From 04d909f9daae85f62bca8f0d7ad58f340e116873 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 16 Feb 2020 11:40:30 -0800 Subject: [PATCH] [X86] Add more avx512 instrutions to llvm-mca resource tests --- .../llvm-mca/X86/Generic/resources-avx512.s | 346 +++++++++++- .../llvm-mca/X86/Generic/resources-avx512bw.s | 213 ++++++- .../X86/Generic/resources-avx512bwvl.s | 314 +++++++++- .../llvm-mca/X86/Generic/resources-avx512dq.s | 94 ++- .../X86/Generic/resources-avx512dqvl.s | 78 ++- .../llvm-mca/X86/Generic/resources-avx512vl.s | 534 +++++++++++++++++- .../X86/SkylakeServer/resources-avx512.s | 333 ++++++++++- .../X86/SkylakeServer/resources-avx512bw.s | 213 ++++++- .../X86/SkylakeServer/resources-avx512bwvl.s | 314 +++++++++- .../X86/SkylakeServer/resources-avx512dq.s | 94 ++- .../X86/SkylakeServer/resources-avx512dqvl.s | 78 ++- .../X86/SkylakeServer/resources-avx512vl.s | 534 +++++++++++++++++- 12 files changed, 3133 insertions(+), 12 deletions(-) diff --git a/test/tools/llvm-mca/X86/Generic/resources-avx512.s b/test/tools/llvm-mca/X86/Generic/resources-avx512.s index d232058859c..b832d7af351 100644 --- a/test/tools/llvm-mca/X86/Generic/resources-avx512.s +++ b/test/tools/llvm-mca/X86/Generic/resources-avx512.s @@ -1,6 +1,15 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s +kandw %k0, %k1, %k2 +kandnw %k0, %k1, %k2 +korw %k0, %k1, %k2 +kxnorw %k0, %k1, %k2 +kxorw %k0, %k1, %k2 +kshiftlw $2, %k1, %k2 +kshiftrw $2, %k1, %k2 +kunpckbw %k0, %k1, %k2 + vaddpd %zmm16, %zmm17, %zmm19 vaddpd (%rax), %zmm17, %zmm19 vaddpd (%rax){1to8}, %zmm17, %zmm19 @@ -21,6 +30,33 @@ vaddps %zmm16, %zmm17, %zmm19 {z}{k1} vaddps (%rax), %zmm17, %zmm19 {z}{k1} vaddps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} +vcmppd $0, %zmm0, %zmm1, %k2 +vcmppd $0, (%rax), %zmm1, %k2 +vcmppd $0, (%rax){1to8}, %zmm1, %k2 +vcmppd $0, %zmm0, %zmm1, %k2 {k3} +vcmppd $0, (%rax), %zmm1, %k2 {k3} +vcmppd $0, (%rax){1to8}, %zmm1, %k2 {k3} + +vcmpps $0, %zmm0, %zmm1, %k2 +vcmpps $0, (%rax), %zmm1, %k2 +vcmpps $0, (%rax){1to16}, %zmm1, %k2 +vcmpps $0, %zmm0, %zmm1, %k2 {k3} +vcmpps $0, (%rax), %zmm1, %k2 {k3} +vcmpps $0, (%rax){1to16}, %zmm1, %k2 {k3} + +vcmpsd $0, %xmm0, %xmm1, %k2 +vcmpsd $0, (%rax), %xmm1, %k2 +vcmpsd $0, %xmm0, %xmm1, %k2 {k3} +vcmpsd $0, (%rax), %xmm1, %k2 {k3} + +vcmpss $0, %xmm0, %xmm1, %k2 +vcmpss $0, (%rax), %xmm1, %k2 +vcmpss $0, %xmm0, %xmm1, %k2 {k3} +vcmpss $0, (%rax), %xmm1, %k2 {k3} + +vcomiss %xmm16, %xmm17 +vcomiss (%rax), %xmm17 + vcvtdq2pd %ymm16, %zmm19 vcvtdq2pd (%rax), %zmm19 vcvtdq2pd (%rax){1to8}, %zmm19 @@ -81,6 +117,11 @@ vdivps %zmm16, %zmm17, %zmm19 {z}{k1} vdivps (%rax), %zmm17, %zmm19 {z}{k1} vdivps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} +vgatherdpd (%rax,%ymm1,2), %zmm2 {k1} +vgatherdps (%rax,%zmm1,2), %zmm2 {k1} +vgatherqpd (%rax,%zmm1,2), %zmm2 {k1} +vgatherqps (%rax,%zmm1,2), %ymm2 {k1} + vmaxpd %zmm16, %zmm17, %zmm19 vmaxpd (%rax), %zmm17, %zmm19 vmaxpd (%rax){1to8}, %zmm17, %zmm19 @@ -181,6 +222,62 @@ vpaddq %zmm16, %zmm17, %zmm19 {z}{k1} vpaddq (%rax), %zmm17, %zmm19 {z}{k1} vpaddq (%rax){1to8}, %zmm17, %zmm19 {z}{k1} +vpcmpd $0, %zmm0, %zmm1, %k2 +vpcmpd $0, (%rax), %zmm1, %k2 +vpcmpd $0, (%rax){1to16}, %zmm1, %k2 +vpcmpd $0, %zmm0, %zmm1, %k2 {k3} +vpcmpd $0, (%rax), %zmm1, %k2 {k3} +vpcmpd $0, (%rax){1to16}, %zmm1, %k2 {k3} + +vpcmpeqd %zmm0, %zmm1, %k2 +vpcmpeqd (%rax), %zmm1, %k2 +vpcmpeqd (%rax){1to16}, %zmm1, %k2 +vpcmpeqd %zmm0, %zmm1, %k2 {k3} +vpcmpeqd (%rax), %zmm1, %k2 {k3} +vpcmpeqd (%rax){1to16}, %zmm1, %k2 {k3} + +vpcmpeqq %zmm0, %zmm1, %k2 +vpcmpeqq (%rax), %zmm1, %k2 +vpcmpeqq (%rax){1to8}, %zmm1, %k2 +vpcmpeqq %zmm0, %zmm1, %k2 {k3} +vpcmpeqq (%rax), %zmm1, %k2 {k3} +vpcmpeqq (%rax){1to8}, %zmm1, %k2 {k3} + +vpcmpgtd %zmm0, %zmm1, %k2 +vpcmpgtd (%rax), %zmm1, %k2 +vpcmpgtd (%rax){1to16}, %zmm1, %k2 +vpcmpgtd %zmm0, %zmm1, %k2 {k3} +vpcmpgtd (%rax), %zmm1, %k2 {k3} +vpcmpgtd (%rax){1to16}, %zmm1, %k2 {k3} + +vpcmpgtq %zmm0, %zmm1, %k2 +vpcmpgtq (%rax), %zmm1, %k2 +vpcmpgtq (%rax){1to8}, %zmm1, %k2 +vpcmpgtq %zmm0, %zmm1, %k2 {k3} +vpcmpgtq (%rax), %zmm1, %k2 {k3} +vpcmpgtq (%rax){1to8}, %zmm1, %k2 {k3} + +vpcmpq $0, %zmm0, %zmm1, %k2 +vpcmpq $0, (%rax), %zmm1, %k2 +vpcmpq $0, (%rax){1to8}, %zmm1, %k2 +vpcmpq $0, %zmm0, %zmm1, %k2 {k3} +vpcmpq $0, (%rax), %zmm1, %k2 {k3} +vpcmpq $0, (%rax){1to8}, %zmm1, %k2 {k3} + +vpcmpud $0, %zmm0, %zmm1, %k2 +vpcmpud $0, (%rax), %zmm1, %k2 +vpcmpud $0, (%rax){1to16}, %zmm1, %k2 +vpcmpud $0, %zmm0, %zmm1, %k2 {k3} +vpcmpud $0, (%rax), %zmm1, %k2 {k3} +vpcmpud $0, (%rax){1to16}, %zmm1, %k2 {k3} + +vpcmpuq $0, %zmm0, %zmm1, %k2 +vpcmpuq $0, (%rax), %zmm1, %k2 +vpcmpuq $0, (%rax){1to8}, %zmm1, %k2 +vpcmpuq $0, %zmm0, %zmm1, %k2 {k3} +vpcmpuq $0, (%rax), %zmm1, %k2 {k3} +vpcmpuq $0, (%rax){1to8}, %zmm1, %k2 {k3} + vpgatherdq (%rax,%ymm1,2), %zmm2 {k1} vpgatherdd (%rax,%zmm1,2), %zmm2 {k1} vpgatherqq (%rax,%zmm1,2), %zmm2 {k1} @@ -460,6 +557,34 @@ vpsubq %zmm16, %zmm17, %zmm19 {z}{k1} vpsubq (%rax), %zmm17, %zmm19 {z}{k1} vpsubq (%rax){1to8}, %zmm17, %zmm19 {z}{k1} +vptestmd %zmm0, %zmm1, %k2 +vptestmd (%rax), %zmm1, %k2 +vptestmd (%rax){1to16}, %zmm1, %k2 +vptestmd %zmm0, %zmm1, %k2 {k3} +vptestmd (%rax), %zmm1, %k2 {k3} +vptestmd (%rax){1to16}, %zmm1, %k2 {k3} + +vptestmq %zmm0, %zmm1, %k2 +vptestmq (%rax), %zmm1, %k2 +vptestmq (%rax){1to8}, %zmm1, %k2 +vptestmq %zmm0, %zmm1, %k2 {k3} +vptestmq (%rax), %zmm1, %k2 {k3} +vptestmq (%rax){1to8}, %zmm1, %k2 {k3} + +vptestnmd %zmm0, %zmm1, %k2 +vptestnmd (%rax), %zmm1, %k2 +vptestnmd (%rax){1to16}, %zmm1, %k2 +vptestnmd %zmm0, %zmm1, %k2 {k3} +vptestnmd (%rax), %zmm1, %k2 {k3} +vptestnmd (%rax){1to16}, %zmm1, %k2 {k3} + +vptestnmq %zmm0, %zmm1, %k2 +vptestnmq (%rax), %zmm1, %k2 +vptestnmq (%rax){1to8}, %zmm1, %k2 +vptestnmq %zmm0, %zmm1, %k2 {k3} +vptestnmq (%rax), %zmm1, %k2 {k3} +vptestnmq (%rax){1to8}, %zmm1, %k2 {k3} + vsubpd %zmm16, %zmm17, %zmm19 vsubpd (%rax), %zmm17, %zmm19 vsubpd (%rax){1to8}, %zmm17, %zmm19 @@ -480,6 +605,9 @@ vsubps %zmm16, %zmm17, %zmm19 {z}{k1} vsubps (%rax), %zmm17, %zmm19 {z}{k1} vsubps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} +vucomiss %xmm16, %xmm17 +vucomiss (%rax), %xmm17 + vunpckhpd %zmm16, %zmm17, %zmm19 vunpckhpd (%rax), %zmm17, %zmm19 vunpckhpd (%rax){1to8}, %zmm17, %zmm19 @@ -529,6 +657,14 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 0.33 kandw %k0, %k1, %k2 +# CHECK-NEXT: 1 1 0.33 kandnw %k0, %k1, %k2 +# CHECK-NEXT: 1 1 0.33 korw %k0, %k1, %k2 +# CHECK-NEXT: 1 1 0.33 kxnorw %k0, %k1, %k2 +# CHECK-NEXT: 1 1 0.33 kxorw %k0, %k1, %k2 +# CHECK-NEXT: 1 1 1.00 kshiftlw $2, %k1, %k2 +# CHECK-NEXT: 1 1 1.00 kshiftrw $2, %k1, %k2 +# CHECK-NEXT: 1 1 1.00 kunpckbw %k0, %k1, %k2 # CHECK-NEXT: 1 3 1.00 vaddpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 2 10 1.00 * vaddpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: 2 10 1.00 * vaddpd (%rax){1to8}, %zmm17, %zmm19 @@ -547,6 +683,28 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 3 1.00 vaddps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 10 1.00 * vaddps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 10 1.00 * vaddps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vcmpeqpd %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vcmpeqpd (%rax), %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vcmpeqpd (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vcmpeqpd %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vcmpeqpd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vcmpeqpd (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vcmpeqps %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vcmpeqps (%rax), %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vcmpeqps (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vcmpeqps %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vcmpeqps (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vcmpeqps (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vcmpeqsd %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vcmpeqsd (%rax), %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vcmpeqsd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vcmpeqsd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vcmpeqss %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vcmpeqss (%rax), %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vcmpeqss %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vcmpeqss (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vcomiss %xmm16, %xmm17 +# CHECK-NEXT: 2 8 1.00 * vcomiss (%rax), %xmm17 # CHECK-NEXT: 2 4 1.00 vcvtdq2pd %ymm16, %zmm19 # CHECK-NEXT: 3 10 1.00 * vcvtdq2pd (%rax), %zmm19 # CHECK-NEXT: 3 10 1.00 * vcvtdq2pd (%rax){1to8}, %zmm19 @@ -601,6 +759,10 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 3 29 28.00 vdivps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 4 36 28.00 * vdivps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 4 36 28.00 * vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 5 0.50 * vgatherdpd (%rax,%ymm1,2), %zmm2 {%k1} +# CHECK-NEXT: 1 5 0.50 * vgatherdps (%rax,%zmm1,2), %zmm2 {%k1} +# CHECK-NEXT: 1 5 0.50 * vgatherqpd (%rax,%zmm1,2), %zmm2 {%k1} +# CHECK-NEXT: 1 5 0.50 * vgatherqps (%rax,%zmm1,2), %ymm2 {%k1} # CHECK-NEXT: 1 3 1.00 vmaxpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 2 10 1.00 * vmaxpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: 2 10 1.00 * vmaxpd (%rax){1to8}, %zmm17, %zmm19 @@ -691,6 +853,54 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 1 0.50 vpaddq %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vpaddq (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vpaddq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vpcmpeqd %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpeqd (%rax), %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpeqd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpeqd %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpeqd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpeqd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpeqd %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpeqd (%rax), %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpeqd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpeqd %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpeqd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpeqd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpeqq %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpeqq (%rax), %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpeqq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpeqq %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpeqq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpeqq (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpgtd %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpgtd (%rax), %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpgtd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpgtd %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpgtd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpgtd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpgtq %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpgtq (%rax), %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpgtq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpgtq %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpgtq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpgtq (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpeqq %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpeqq (%rax), %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpeqq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpeqq %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpeqq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpeqq (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpequd %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpequd (%rax), %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpequd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpequd %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpequd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpequd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpequq %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpequq (%rax), %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpequq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpequq %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpequq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpequq (%rax){1to8}, %zmm1, %k2 {%k3} # CHECK-NEXT: 1 5 0.50 * vpgatherdq (%rax,%ymm1,2), %zmm2 {%k1} # CHECK-NEXT: 1 5 0.50 * vpgatherdd (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: 1 5 0.50 * vpgatherqq (%rax,%zmm1,2), %zmm2 {%k1} @@ -941,6 +1151,30 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 1 0.50 vpsubq %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vpsubq (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vpsubq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vptestmd %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vptestmd (%rax), %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vptestmd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: 1 1 0.33 vptestmd %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vptestmd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vptestmd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.33 vptestmq %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vptestmq (%rax), %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vptestmq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: 1 1 0.33 vptestmq %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vptestmq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vptestmq (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.33 vptestnmd %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vptestnmd (%rax), %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vptestnmd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: 1 1 0.33 vptestnmd %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vptestnmd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vptestnmd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.33 vptestnmq %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vptestnmq (%rax), %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vptestnmq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: 1 1 0.33 vptestnmq %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vptestnmq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vptestnmq (%rax){1to8}, %zmm1, %k2 {%k3} # CHECK-NEXT: 1 3 1.00 vsubpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 2 10 1.00 * vsubpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: 2 10 1.00 * vsubpd (%rax){1to8}, %zmm17, %zmm19 @@ -959,6 +1193,8 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 3 1.00 vsubps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 10 1.00 * vsubps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 10 1.00 * vsubps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vucomiss %xmm16, %xmm17 +# CHECK-NEXT: 2 8 1.00 * vucomiss (%rax), %xmm17 # CHECK-NEXT: 1 1 1.00 vunpckhpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 2 8 1.00 * vunpckhpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: 2 8 1.00 * vunpckhpd (%rax){1to8}, %zmm17, %zmm19 @@ -1008,10 +1244,18 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 1506.00 129.00 144.00 - 270.00 155.00 155.00 +# CHECK-NEXT: - 1506.00 138.67 201.67 - 306.67 188.00 188.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - kandw %k0, %k1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - kandnw %k0, %k1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - korw %k0, %k1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - kxnorw %k0, %k1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - kxorw %k0, %k1, %k2 +# CHECK-NEXT: - - - - - 1.00 - - kshiftlw $2, %k1, %k2 +# CHECK-NEXT: - - - - - 1.00 - - kshiftrw $2, %k1, %k2 +# CHECK-NEXT: - - - - - 1.00 - - kunpckbw %k0, %k1, %k2 # CHECK-NEXT: - - - 1.00 - - - - vaddpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vaddpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vaddpd (%rax){1to8}, %zmm17, %zmm19 @@ -1030,6 +1274,28 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - 1.00 - - - - vaddps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vaddps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vaddps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - - - vcmpeqpd %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpeqpd (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpeqpd (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: - - - 1.00 - - - - vcmpeqpd %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpeqpd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpeqpd (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 1.00 - - - - vcmpeqps %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpeqps (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpeqps (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: - - - 1.00 - - - - vcmpeqps %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpeqps (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpeqps (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 1.00 - - - - vcmpeqsd %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpeqsd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - 1.00 - - - - vcmpeqsd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpeqsd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 1.00 - - - - vcmpeqss %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpeqss (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - 1.00 - - - - vcmpeqss %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpeqss (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 1.00 - - - - vcomiss %xmm16, %xmm17 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcomiss (%rax), %xmm17 # CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtdq2pd %ymm16, %zmm19 # CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtdq2pd (%rax), %zmm19 # CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtdq2pd (%rax){1to8}, %zmm19 @@ -1084,6 +1350,10 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - 28.00 2.50 - - 0.50 - - vdivps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - 28.00 2.50 - - 0.50 0.50 0.50 vdivps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - 28.00 2.50 - - 0.50 0.50 0.50 vdivps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - 0.50 0.50 vgatherdpd (%rax,%ymm1,2), %zmm2 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vgatherdps (%rax,%zmm1,2), %zmm2 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vgatherqpd (%rax,%zmm1,2), %zmm2 {%k1} +# CHECK-NEXT: - - - - - - 0.50 0.50 vgatherqps (%rax,%zmm1,2), %ymm2 {%k1} # CHECK-NEXT: - - - 1.00 - - - - vmaxpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vmaxpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vmaxpd (%rax){1to8}, %zmm17, %zmm19 @@ -1174,6 +1444,54 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - 0.50 - 0.50 - - vpaddq %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpaddq (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpaddq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqd %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqd (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqd %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqd %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqd (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqd %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqq %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqq (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqq %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqq (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpgtd %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtd (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpgtd %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpgtq %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtq (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpgtq %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtq (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqq %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqq (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqq %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqq (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpequd %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequd (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpequd %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpequq %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequq (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpequq %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequq (%rax){1to8}, %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - - 0.50 0.50 vpgatherdq (%rax,%ymm1,2), %zmm2 {%k1} # CHECK-NEXT: - - - - - - 0.50 0.50 vpgatherdd (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: - - - - - - 0.50 0.50 vpgatherqq (%rax,%zmm1,2), %zmm2 {%k1} @@ -1424,6 +1742,30 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - 0.50 - 0.50 - - vpsubq %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpsubq (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpsubq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestmd %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmd (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestmd %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestmq %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmq (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestmq %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmq (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestnmd %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmd (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestnmd %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestnmq %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmq (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestnmq %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmq (%rax){1to8}, %zmm1, %k2 {%k3} # CHECK-NEXT: - - - 1.00 - - - - vsubpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vsubpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vsubpd (%rax){1to8}, %zmm17, %zmm19 @@ -1442,6 +1784,8 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - 1.00 - - - - vsubps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vsubps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vsubps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - - - vucomiss %xmm16, %xmm17 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vucomiss (%rax), %xmm17 # CHECK-NEXT: - - - - - 1.00 - - vunpckhpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vunpckhpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vunpckhpd (%rax){1to8}, %zmm17, %zmm19 diff --git a/test/tools/llvm-mca/X86/Generic/resources-avx512bw.s b/test/tools/llvm-mca/X86/Generic/resources-avx512bw.s index 145192932f5..be70d494bf5 100644 --- a/test/tools/llvm-mca/X86/Generic/resources-avx512bw.s +++ b/test/tools/llvm-mca/X86/Generic/resources-avx512bw.s @@ -1,6 +1,25 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s +kaddd %k0, %k1, %k2 +kaddq %k0, %k1, %k2 +kandd %k0, %k1, %k2 +kandq %k0, %k1, %k2 +kandnd %k0, %k1, %k2 +kandnq %k0, %k1, %k2 +kord %k0, %k1, %k2 +korq %k0, %k1, %k2 +kxnord %k0, %k1, %k2 +kxnorq %k0, %k1, %k2 +kxord %k0, %k1, %k2 +kxorq %k0, %k1, %k2 +kshiftld $2, %k1, %k2 +kshiftlq $2, %k1, %k2 +kshiftrd $2, %k1, %k2 +kshiftrq $2, %k1, %k2 +kunpckdq %k0, %k1, %k2 +kunpckwd %k0, %k1, %k2 + vpabsb %zmm16, %zmm19 vpabsb (%rax), %zmm19 vpabsb %zmm16, %zmm19 {k1} @@ -29,6 +48,46 @@ vpaddw (%rax), %zmm17, %zmm19 {k1} vpaddw %zmm16, %zmm17, %zmm19 {z}{k1} vpaddw (%rax), %zmm17, %zmm19 {z}{k1} +vpcmpb $0, %zmm0, %zmm1, %k2 +vpcmpb $0, (%rax), %zmm1, %k2 +vpcmpb $0, %zmm0, %zmm1, %k2 {k3} +vpcmpb $0, (%rax), %zmm1, %k2 {k3} + +vpcmpeqb %zmm0, %zmm1, %k2 +vpcmpeqb (%rax), %zmm1, %k2 +vpcmpeqb %zmm0, %zmm1, %k2 {k3} +vpcmpeqb (%rax), %zmm1, %k2 {k3} + +vpcmpeqw %zmm0, %zmm1, %k2 +vpcmpeqw (%rax), %zmm1, %k2 +vpcmpeqw %zmm0, %zmm1, %k2 {k3} +vpcmpeqw (%rax), %zmm1, %k2 {k3} + +vpcmpgtb %zmm0, %zmm1, %k2 +vpcmpgtb (%rax), %zmm1, %k2 +vpcmpgtb %zmm0, %zmm1, %k2 {k3} +vpcmpgtb (%rax), %zmm1, %k2 {k3} + +vpcmpgtw %zmm0, %zmm1, %k2 +vpcmpgtw (%rax), %zmm1, %k2 +vpcmpgtw %zmm0, %zmm1, %k2 {k3} +vpcmpgtw (%rax), %zmm1, %k2 {k3} + +vpcmpub $0, %zmm0, %zmm1, %k2 +vpcmpub $0, (%rax), %zmm1, %k2 +vpcmpub $0, %zmm0, %zmm1, %k2 {k3} +vpcmpub $0, (%rax), %zmm1, %k2 {k3} + +vpcmpuw $0, %zmm0, %zmm1, %k2 +vpcmpuw $0, (%rax), %zmm1, %k2 +vpcmpuw $0, %zmm0, %zmm1, %k2 {k3} +vpcmpuw $0, (%rax), %zmm1, %k2 {k3} + +vpcmpw $0, %zmm0, %zmm1, %k2 +vpcmpw $0, (%rax), %zmm1, %k2 +vpcmpw $0, %zmm0, %zmm1, %k2 {k3} +vpcmpw $0, (%rax), %zmm1, %k2 {k3} + vpermw %zmm16, %zmm17, %zmm19 vpermw (%rax), %zmm17, %zmm19 vpermw %zmm16, %zmm17, %zmm19 {k1} @@ -71,6 +130,26 @@ vpsubw (%rax), %zmm17, %zmm19 {k1} vpsubw %zmm16, %zmm17, %zmm19 {z}{k1} vpsubw (%rax), %zmm17, %zmm19 {z}{k1} +vptestmb %zmm0, %zmm1, %k2 +vptestmb (%rax), %zmm1, %k2 +vptestmb %zmm0, %zmm1, %k2 {k3} +vptestmb (%rax), %zmm1, %k2 {k3} + +vptestmw %zmm0, %zmm1, %k2 +vptestmw (%rax), %zmm1, %k2 +vptestmw %zmm0, %zmm1, %k2 {k3} +vptestmw (%rax), %zmm1, %k2 {k3} + +vptestnmb %zmm0, %zmm1, %k2 +vptestnmb (%rax), %zmm1, %k2 +vptestnmb %zmm0, %zmm1, %k2 {k3} +vptestnmb (%rax), %zmm1, %k2 {k3} + +vptestnmw %zmm0, %zmm1, %k2 +vptestnmw (%rax), %zmm1, %k2 +vptestnmw %zmm0, %zmm1, %k2 {k3} +vptestnmw (%rax), %zmm1, %k2 {k3} + vpunpckhbw %zmm16, %zmm17, %zmm19 vpunpckhbw (%rax), %zmm17, %zmm19 vpunpckhbw %zmm16, %zmm17, %zmm19 {k1} @@ -108,6 +187,24 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 0.33 kaddd %k0, %k1, %k2 +# CHECK-NEXT: 1 1 0.33 kaddq %k0, %k1, %k2 +# CHECK-NEXT: 1 1 0.33 kandd %k0, %k1, %k2 +# CHECK-NEXT: 1 1 0.33 kandq %k0, %k1, %k2 +# CHECK-NEXT: 1 1 0.33 kandnd %k0, %k1, %k2 +# CHECK-NEXT: 1 1 0.33 kandnq %k0, %k1, %k2 +# CHECK-NEXT: 1 1 0.33 kord %k0, %k1, %k2 +# CHECK-NEXT: 1 1 0.33 korq %k0, %k1, %k2 +# CHECK-NEXT: 1 1 0.33 kxnord %k0, %k1, %k2 +# CHECK-NEXT: 1 1 0.33 kxnorq %k0, %k1, %k2 +# CHECK-NEXT: 1 1 0.33 kxord %k0, %k1, %k2 +# CHECK-NEXT: 1 1 0.33 kxorq %k0, %k1, %k2 +# CHECK-NEXT: 1 1 1.00 kshiftld $2, %k1, %k2 +# CHECK-NEXT: 1 1 1.00 kshiftlq $2, %k1, %k2 +# CHECK-NEXT: 1 1 1.00 kshiftrd $2, %k1, %k2 +# CHECK-NEXT: 1 1 1.00 kshiftrq $2, %k1, %k2 +# CHECK-NEXT: 1 1 1.00 kunpckdq %k0, %k1, %k2 +# CHECK-NEXT: 1 1 1.00 kunpckwd %k0, %k1, %k2 # CHECK-NEXT: 1 1 0.50 vpabsb %zmm16, %zmm19 # CHECK-NEXT: 2 8 0.50 * vpabsb (%rax), %zmm19 # CHECK-NEXT: 1 1 0.50 vpabsb %zmm16, %zmm19 {%k1} @@ -132,6 +229,38 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 2 8 0.50 * vpaddw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1 1 0.50 vpaddw %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vpaddw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vpcmpeqb %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpeqb (%rax), %zmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpeqb %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpeqb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpeqb %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpeqb (%rax), %zmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpeqb %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpeqb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpeqw %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpeqw (%rax), %zmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpeqw %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpeqw (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpgtb %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpgtb (%rax), %zmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpgtb %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpgtb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpgtw %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpgtw (%rax), %zmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpgtw %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpgtw (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpequb %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpequb (%rax), %zmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpequb %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpequb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpequw %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpequw (%rax), %zmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpequw %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpequw (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpeqw %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpeqw (%rax), %zmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpeqw %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpeqw (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: 1 1 1.00 vpermw %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 2 8 1.00 * vpermw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1 1 1.00 vpermw %zmm16, %zmm17, %zmm19 {%k1} @@ -168,6 +297,22 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 2 8 0.50 * vpsubw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1 1 0.50 vpsubw %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vpsubw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vptestmb %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vptestmb (%rax), %zmm1, %k2 +# CHECK-NEXT: 1 1 0.33 vptestmb %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vptestmb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.33 vptestmw %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vptestmw (%rax), %zmm1, %k2 +# CHECK-NEXT: 1 1 0.33 vptestmw %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vptestmw (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.33 vptestnmb %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vptestnmb (%rax), %zmm1, %k2 +# CHECK-NEXT: 1 1 0.33 vptestnmb %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vptestnmb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.33 vptestnmw %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vptestnmw (%rax), %zmm1, %k2 +# CHECK-NEXT: 1 1 0.33 vptestnmw %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vptestnmw (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: 1 1 1.00 vpunpckhbw %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 2 8 1.00 * vpunpckhbw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1 1 1.00 vpunpckhbw %zmm16, %zmm17, %zmm19 {%k1} @@ -205,10 +350,28 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - - 21.00 - 63.00 21.00 21.00 +# CHECK-NEXT: - - 9.33 46.33 - 94.33 33.00 33.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - kaddd %k0, %k1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - kaddq %k0, %k1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - kandd %k0, %k1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - kandq %k0, %k1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - kandnd %k0, %k1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - kandnq %k0, %k1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - kord %k0, %k1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - korq %k0, %k1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - kxnord %k0, %k1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - kxnorq %k0, %k1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - kxord %k0, %k1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - kxorq %k0, %k1, %k2 +# CHECK-NEXT: - - - - - 1.00 - - kshiftld $2, %k1, %k2 +# CHECK-NEXT: - - - - - 1.00 - - kshiftlq $2, %k1, %k2 +# CHECK-NEXT: - - - - - 1.00 - - kshiftrd $2, %k1, %k2 +# CHECK-NEXT: - - - - - 1.00 - - kshiftrq $2, %k1, %k2 +# CHECK-NEXT: - - - - - 1.00 - - kunpckdq %k0, %k1, %k2 +# CHECK-NEXT: - - - - - 1.00 - - kunpckwd %k0, %k1, %k2 # CHECK-NEXT: - - - 0.50 - 0.50 - - vpabsb %zmm16, %zmm19 # CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpabsb (%rax), %zmm19 # CHECK-NEXT: - - - 0.50 - 0.50 - - vpabsb %zmm16, %zmm19 {%k1} @@ -233,6 +396,38 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpaddw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - 0.50 - 0.50 - - vpaddw %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpaddw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqb %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqb (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqb %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqb %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqb (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqb %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqw %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqw (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqw %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqw (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpgtb %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtb (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpgtb %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpgtw %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtw (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpgtw %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtw (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpequb %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequb (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpequb %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpequw %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequw (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpequw %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequw (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqw %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqw (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqw %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqw (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - vpermw %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermw (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - vpermw %zmm16, %zmm17, %zmm19 {%k1} @@ -269,6 +464,22 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpsubw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - - 0.50 - 0.50 - - vpsubw %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpsubw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestmb %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmb (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestmb %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestmw %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmw (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestmw %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmw (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestnmb %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmb (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestnmb %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestnmw %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmw (%rax), %zmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestnmw %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmw (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - vpunpckhbw %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpunpckhbw (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 - - vpunpckhbw %zmm16, %zmm17, %zmm19 {%k1} diff --git a/test/tools/llvm-mca/X86/Generic/resources-avx512bwvl.s b/test/tools/llvm-mca/X86/Generic/resources-avx512bwvl.s index 71c54b8f941..2f227a7a1c1 100644 --- a/test/tools/llvm-mca/X86/Generic/resources-avx512bwvl.s +++ b/test/tools/llvm-mca/X86/Generic/resources-avx512bwvl.s @@ -57,6 +57,86 @@ vpaddw (%rax), %ymm17, %ymm19 {k1} vpaddw %ymm16, %ymm17, %ymm19 {z}{k1} vpaddw (%rax), %ymm17, %ymm19 {z}{k1} +vpcmpb $0, %xmm0, %xmm1, %k2 +vpcmpb $0, (%rax), %xmm1, %k2 +vpcmpb $0, %xmm0, %xmm1, %k2 {k3} +vpcmpb $0, (%rax), %xmm1, %k2 {k3} + +vpcmpb $0, %ymm0, %ymm1, %k2 +vpcmpb $0, (%rax), %ymm1, %k2 +vpcmpb $0, %ymm0, %ymm1, %k2 {k3} +vpcmpb $0, (%rax), %ymm1, %k2 {k3} + +vpcmpeqb %xmm0, %xmm1, %k2 +vpcmpeqb (%rax), %xmm1, %k2 +vpcmpeqb %xmm0, %xmm1, %k2 {k3} +vpcmpeqb (%rax), %xmm1, %k2 {k3} + +vpcmpeqb %ymm0, %ymm1, %k2 +vpcmpeqb (%rax), %ymm1, %k2 +vpcmpeqb %ymm0, %ymm1, %k2 {k3} +vpcmpeqb (%rax), %ymm1, %k2 {k3} + +vpcmpeqw %xmm0, %xmm1, %k2 +vpcmpeqw (%rax), %xmm1, %k2 +vpcmpeqw %xmm0, %xmm1, %k2 {k3} +vpcmpeqw (%rax), %xmm1, %k2 {k3} + +vpcmpeqw %ymm0, %ymm1, %k2 +vpcmpeqw (%rax), %ymm1, %k2 +vpcmpeqw %ymm0, %ymm1, %k2 {k3} +vpcmpeqw (%rax), %ymm1, %k2 {k3} + +vpcmpgtb %xmm0, %xmm1, %k2 +vpcmpgtb (%rax), %xmm1, %k2 +vpcmpgtb %xmm0, %xmm1, %k2 {k3} +vpcmpgtb (%rax), %xmm1, %k2 {k3} + +vpcmpgtb %ymm0, %ymm1, %k2 +vpcmpgtb (%rax), %ymm1, %k2 +vpcmpgtb %ymm0, %ymm1, %k2 {k3} +vpcmpgtb (%rax), %ymm1, %k2 {k3} + +vpcmpgtw %xmm0, %xmm1, %k2 +vpcmpgtw (%rax), %xmm1, %k2 +vpcmpgtw %xmm0, %xmm1, %k2 {k3} +vpcmpgtw (%rax), %xmm1, %k2 {k3} + +vpcmpgtw %ymm0, %ymm1, %k2 +vpcmpgtw (%rax), %ymm1, %k2 +vpcmpgtw %ymm0, %ymm1, %k2 {k3} +vpcmpgtw (%rax), %ymm1, %k2 {k3} + +vpcmpub $0, %xmm0, %xmm1, %k2 +vpcmpub $0, (%rax), %xmm1, %k2 +vpcmpub $0, %xmm0, %xmm1, %k2 {k3} +vpcmpub $0, (%rax), %xmm1, %k2 {k3} + +vpcmpub $0, %ymm0, %ymm1, %k2 +vpcmpub $0, (%rax), %ymm1, %k2 +vpcmpub $0, %ymm0, %ymm1, %k2 {k3} +vpcmpub $0, (%rax), %ymm1, %k2 {k3} + +vpcmpuw $0, %xmm0, %xmm1, %k2 +vpcmpuw $0, (%rax), %xmm1, %k2 +vpcmpuw $0, %xmm0, %xmm1, %k2 {k3} +vpcmpuw $0, (%rax), %xmm1, %k2 {k3} + +vpcmpuw $0, %ymm0, %ymm1, %k2 +vpcmpuw $0, (%rax), %ymm1, %k2 +vpcmpuw $0, %ymm0, %ymm1, %k2 {k3} +vpcmpuw $0, (%rax), %ymm1, %k2 {k3} + +vpcmpw $0, %xmm0, %xmm1, %k2 +vpcmpw $0, (%rax), %xmm1, %k2 +vpcmpw $0, %xmm0, %xmm1, %k2 {k3} +vpcmpw $0, (%rax), %xmm1, %k2 {k3} + +vpcmpw $0, %ymm0, %ymm1, %k2 +vpcmpw $0, (%rax), %ymm1, %k2 +vpcmpw $0, %ymm0, %ymm1, %k2 {k3} +vpcmpw $0, (%rax), %ymm1, %k2 {k3} + vpermw %xmm16, %xmm17, %xmm19 vpermw (%rax), %xmm17, %xmm19 vpermw %xmm16, %xmm17, %xmm19 {k1} @@ -127,6 +207,46 @@ vpsubw (%rax), %ymm17, %ymm19 {k1} vpsubw %ymm16, %ymm17, %ymm19 {z}{k1} vpsubw (%rax), %ymm17, %ymm19 {z}{k1} +vptestmb %xmm0, %xmm1, %k2 +vptestmb (%rax), %xmm1, %k2 +vptestmb %xmm0, %xmm1, %k2 {k3} +vptestmb (%rax), %xmm1, %k2 {k3} + +vptestmb %ymm0, %ymm1, %k2 +vptestmb (%rax), %ymm1, %k2 +vptestmb %ymm0, %ymm1, %k2 {k3} +vptestmb (%rax), %ymm1, %k2 {k3} + +vptestmw %xmm0, %xmm1, %k2 +vptestmw (%rax), %xmm1, %k2 +vptestmw %xmm0, %xmm1, %k2 {k3} +vptestmw (%rax), %xmm1, %k2 {k3} + +vptestmw %ymm0, %ymm1, %k2 +vptestmw (%rax), %ymm1, %k2 +vptestmw %ymm0, %ymm1, %k2 {k3} +vptestmw (%rax), %ymm1, %k2 {k3} + +vptestnmb %xmm0, %xmm1, %k2 +vptestnmb (%rax), %xmm1, %k2 +vptestnmb %xmm0, %xmm1, %k2 {k3} +vptestnmb (%rax), %xmm1, %k2 {k3} + +vptestnmb %ymm0, %ymm1, %k2 +vptestnmb (%rax), %ymm1, %k2 +vptestnmb %ymm0, %ymm1, %k2 {k3} +vptestnmb (%rax), %ymm1, %k2 {k3} + +vptestnmw %xmm0, %xmm1, %k2 +vptestnmw (%rax), %xmm1, %k2 +vptestnmw %xmm0, %xmm1, %k2 {k3} +vptestnmw (%rax), %xmm1, %k2 {k3} + +vptestnmw %ymm0, %ymm1, %k2 +vptestnmw (%rax), %ymm1, %k2 +vptestnmw %ymm0, %ymm1, %k2 {k3} +vptestnmw (%rax), %ymm1, %k2 {k3} + vpunpckhbw %xmm16, %xmm17, %xmm19 vpunpckhbw (%rax), %xmm17, %xmm19 vpunpckhbw %xmm16, %xmm17, %xmm19 {k1} @@ -240,6 +360,70 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 2 8 0.50 * vpaddw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 1 1 0.50 vpaddw %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vpaddw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vpcmpeqb %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vpcmpeqb (%rax), %xmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpeqb %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vpcmpeqb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpeqb %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpeqb (%rax), %ymm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpeqb %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpeqb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpeqb %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vpcmpeqb (%rax), %xmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpeqb %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vpcmpeqb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpeqb %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpeqb (%rax), %ymm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpeqb %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpeqb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpeqw %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vpcmpeqw (%rax), %xmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpeqw %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vpcmpeqw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpeqw %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpeqw (%rax), %ymm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpeqw %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpeqw (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpgtb %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vpcmpgtb (%rax), %xmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpgtb %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vpcmpgtb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpgtb %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpgtb (%rax), %ymm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpgtb %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpgtb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpgtw %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vpcmpgtw (%rax), %xmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpgtw %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vpcmpgtw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpgtw %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpgtw (%rax), %ymm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpgtw %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpgtw (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpequb %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vpcmpequb (%rax), %xmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpequb %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vpcmpequb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpequb %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpequb (%rax), %ymm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpequb %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpequb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpequw %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vpcmpequw (%rax), %xmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpequw %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vpcmpequw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpequw %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpequw (%rax), %ymm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpequw %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpequw (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpeqw %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vpcmpeqw (%rax), %xmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpeqw %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vpcmpeqw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpeqw %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpeqw (%rax), %ymm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpeqw %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpeqw (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: 1 1 1.00 vpermw %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 2 8 1.00 * vpermw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 1 1 1.00 vpermw %xmm16, %xmm17, %xmm19 {%k1} @@ -300,6 +484,38 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 2 8 0.50 * vpsubw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 1 1 0.50 vpsubw %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vpsubw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vptestmb %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vptestmb (%rax), %xmm1, %k2 +# CHECK-NEXT: 1 1 0.33 vptestmb %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vptestmb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.33 vptestmb %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vptestmb (%rax), %ymm1, %k2 +# CHECK-NEXT: 1 1 0.33 vptestmb %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vptestmb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.33 vptestmw %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vptestmw (%rax), %xmm1, %k2 +# CHECK-NEXT: 1 1 0.33 vptestmw %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vptestmw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.33 vptestmw %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vptestmw (%rax), %ymm1, %k2 +# CHECK-NEXT: 1 1 0.33 vptestmw %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vptestmw (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.33 vptestnmb %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vptestnmb (%rax), %xmm1, %k2 +# CHECK-NEXT: 1 1 0.33 vptestnmb %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vptestnmb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.33 vptestnmb %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vptestnmb (%rax), %ymm1, %k2 +# CHECK-NEXT: 1 1 0.33 vptestnmb %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vptestnmb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.33 vptestnmw %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vptestnmw (%rax), %xmm1, %k2 +# CHECK-NEXT: 1 1 0.33 vptestnmw %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vptestnmw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.33 vptestnmw %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vptestnmw (%rax), %ymm1, %k2 +# CHECK-NEXT: 1 1 0.33 vptestnmw %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vptestnmw (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: 1 1 0.50 vpunpckhbw %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 2 7 0.50 * vpunpckhbw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 1 1 0.50 vpunpckhbw %xmm16, %xmm17, %xmm19 {%k1} @@ -361,7 +577,7 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - - 57.00 - 99.00 39.00 39.00 +# CHECK-NEXT: - - 10.67 99.67 - 141.67 63.00 63.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -413,6 +629,70 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpaddw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - 0.50 - 0.50 - - vpaddw %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpaddw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqb %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqb (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqb %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqb %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqb (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqb %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqb %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqb (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqb %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqb %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqb (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqb %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqw %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqw (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqw %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqw %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqw (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqw %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqw (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpgtb %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtb (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpgtb %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpgtb %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtb (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpgtb %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpgtw %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtw (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpgtw %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpgtw %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtw (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpgtw %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtw (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpequb %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequb (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpequb %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpequb %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequb (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpequb %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpequw %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequw (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpequw %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpequw %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequw (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpequw %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequw (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqw %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqw (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqw %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqw %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqw (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqw %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqw (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - vpermw %xmm16, %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermw (%rax), %xmm17, %xmm19 # CHECK-NEXT: - - - - - 1.00 - - vpermw %xmm16, %xmm17, %xmm19 {%k1} @@ -473,6 +753,38 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpsubw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - - 0.50 - 0.50 - - vpsubw %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpsubw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestmb %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmb (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestmb %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestmb %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmb (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestmb %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestmw %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmw (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestmw %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestmw %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmw (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestmw %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmw (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestnmb %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmb (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestnmb %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestnmb %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmb (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestnmb %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestnmw %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmw (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestnmw %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestnmw %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmw (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestnmw %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmw (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: - - - 0.50 - 0.50 - - vpunpckhbw %xmm16, %xmm17, %xmm19 # CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpunpckhbw (%rax), %xmm17, %xmm19 # CHECK-NEXT: - - - 0.50 - 0.50 - - vpunpckhbw %xmm16, %xmm17, %xmm19 {%k1} diff --git a/test/tools/llvm-mca/X86/Generic/resources-avx512dq.s b/test/tools/llvm-mca/X86/Generic/resources-avx512dq.s index 1947562b5cf..3630534fc04 100644 --- a/test/tools/llvm-mca/X86/Generic/resources-avx512dq.s +++ b/test/tools/llvm-mca/X86/Generic/resources-avx512dq.s @@ -1,6 +1,16 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s +kaddb %k0, %k1, %k2 +kaddw %k0, %k1, %k2 +kandb %k0, %k1, %k2 +kandnb %k0, %k1, %k2 +korb %k0, %k1, %k2 +kxnorb %k0, %k1, %k2 +kxorb %k0, %k1, %k2 +kshiftlb $2, %k1, %k2 +kshiftrb $2, %k1, %k2 + vandnpd %zmm16, %zmm17, %zmm19 vandnpd (%rax), %zmm17, %zmm19 vandnpd (%rax){1to8}, %zmm17, %zmm19 @@ -81,6 +91,30 @@ vorps %zmm16, %zmm17, %zmm19 {z}{k1} vorps (%rax), %zmm17, %zmm19 {z}{k1} vorps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} +vfpclasspd $0xab, %zmm16, %k1 +vfpclasspdz $0xab, (%rax), %k1 +vfpclasspdz $0xab, (%rax){1to8}, %k1 +vfpclasspd $0xab, %zmm16, %k1 {k2} +vfpclasspdz $0xab, (%rax), %k1 {k2} +vfpclasspdz $0xab, (%rax){1to8}, %k1 {k2} + +vfpclassps $0xab, %zmm16, %k1 +vfpclasspsz $0xab, (%rax), %k1 +vfpclasspsz $0xab, (%rax){1to16}, %k1 +vfpclassps $0xab, %zmm16, %k1 {k2} +vfpclasspsz $0xab, (%rax), %k1 {k2} +vfpclasspsz $0xab, (%rax){1to16}, %k1 {k2} + +vfpclasssd $0xab, %xmm16, %k1 +vfpclasssd $0xab, (%rax), %k1 +vfpclasssd $0xab, %xmm16, %k1 {k2} +vfpclasssd $0xab, (%rax), %k1 {k2} + +vfpclassss $0xab, %xmm16, %k1 +vfpclassss $0xab, (%rax), %k1 +vfpclassss $0xab, %xmm16, %k1 {k2} +vfpclassss $0xab, (%rax), %k1 {k2} + vpmullq %zmm16, %zmm17, %zmm19 vpmullq (%rax), %zmm17, %zmm19 vpmullq %zmm16, %zmm17, %zmm19 {k1} @@ -117,6 +151,15 @@ vxorps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 0.33 kaddb %k0, %k1, %k2 +# CHECK-NEXT: 1 1 0.33 kaddw %k0, %k1, %k2 +# CHECK-NEXT: 1 1 0.33 kandb %k0, %k1, %k2 +# CHECK-NEXT: 1 1 0.33 kandnb %k0, %k1, %k2 +# CHECK-NEXT: 1 1 0.33 korb %k0, %k1, %k2 +# CHECK-NEXT: 1 1 0.33 kxnorb %k0, %k1, %k2 +# CHECK-NEXT: 1 1 0.33 kxorb %k0, %k1, %k2 +# CHECK-NEXT: 1 1 1.00 kshiftlb $2, %k1, %k2 +# CHECK-NEXT: 1 1 1.00 kshiftrb $2, %k1, %k2 # CHECK-NEXT: 1 1 1.00 vandnpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 2 8 1.00 * vandnpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: 2 8 1.00 * vandnpd (%rax){1to8}, %zmm17, %zmm19 @@ -189,6 +232,26 @@ vxorps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 1 1.00 vorps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 1.00 * vorps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 1.00 * vorps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vfpclasspd $171, %zmm16, %k1 +# CHECK-NEXT: 2 10 1.00 * vfpclasspdz $171, (%rax), %k1 +# CHECK-NEXT: 2 10 1.00 * vfpclasspd $171, (%rax){1to8}, %k1 +# CHECK-NEXT: 1 3 1.00 vfpclasspd $171, %zmm16, %k1 {%k2} +# CHECK-NEXT: 2 10 1.00 * vfpclasspdz $171, (%rax), %k1 {%k2} +# CHECK-NEXT: 2 10 1.00 * vfpclasspd $171, (%rax){1to8}, %k1 {%k2} +# CHECK-NEXT: 1 3 1.00 vfpclassps $171, %zmm16, %k1 +# CHECK-NEXT: 2 10 1.00 * vfpclasspsz $171, (%rax), %k1 +# CHECK-NEXT: 2 10 1.00 * vfpclassps $171, (%rax){1to16}, %k1 +# CHECK-NEXT: 1 3 1.00 vfpclassps $171, %zmm16, %k1 {%k2} +# CHECK-NEXT: 2 10 1.00 * vfpclasspsz $171, (%rax), %k1 {%k2} +# CHECK-NEXT: 2 10 1.00 * vfpclassps $171, (%rax){1to16}, %k1 {%k2} +# CHECK-NEXT: 1 3 1.00 vfpclasssd $171, %xmm16, %k1 +# CHECK-NEXT: 2 9 1.00 * vfpclasssd $171, (%rax), %k1 +# CHECK-NEXT: 1 3 1.00 vfpclasssd $171, %xmm16, %k1 {%k2} +# CHECK-NEXT: 2 9 1.00 * vfpclasssd $171, (%rax), %k1 {%k2} +# CHECK-NEXT: 1 3 1.00 vfpclassss $171, %xmm16, %k1 +# CHECK-NEXT: 2 9 1.00 * vfpclassss $171, (%rax), %k1 +# CHECK-NEXT: 1 3 1.00 vfpclassss $171, %xmm16, %k1 {%k2} +# CHECK-NEXT: 2 9 1.00 * vfpclassss $171, (%rax), %k1 {%k2} # CHECK-NEXT: 1 5 1.00 vpmullq %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 2 12 1.00 * vpmullq (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1 5 1.00 vpmullq %zmm16, %zmm17, %zmm19 {%k1} @@ -226,10 +289,19 @@ vxorps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - 6.00 18.00 - 81.00 31.50 31.50 +# CHECK-NEXT: - - 8.33 40.33 - 85.33 37.50 37.50 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - kaddb %k0, %k1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - kaddw %k0, %k1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - kandb %k0, %k1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - kandnb %k0, %k1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - korb %k0, %k1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - kxnorb %k0, %k1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - kxorb %k0, %k1, %k2 +# CHECK-NEXT: - - - - - 1.00 - - kshiftlb $2, %k1, %k2 +# CHECK-NEXT: - - - - - 1.00 - - kshiftrb $2, %k1, %k2 # CHECK-NEXT: - - - - - 1.00 - - vandnpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vandnpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vandnpd (%rax){1to8}, %zmm17, %zmm19 @@ -302,6 +374,26 @@ vxorps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - - - - 1.00 - - vorps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vorps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vorps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - - - vfpclasspd $171, %zmm16, %k1 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfpclasspdz $171, (%rax), %k1 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfpclasspd $171, (%rax){1to8}, %k1 +# CHECK-NEXT: - - - 1.00 - - - - vfpclasspd $171, %zmm16, %k1 {%k2} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfpclasspdz $171, (%rax), %k1 {%k2} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfpclasspd $171, (%rax){1to8}, %k1 {%k2} +# CHECK-NEXT: - - - 1.00 - - - - vfpclassps $171, %zmm16, %k1 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfpclasspsz $171, (%rax), %k1 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfpclassps $171, (%rax){1to16}, %k1 +# CHECK-NEXT: - - - 1.00 - - - - vfpclassps $171, %zmm16, %k1 {%k2} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfpclasspsz $171, (%rax), %k1 {%k2} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfpclassps $171, (%rax){1to16}, %k1 {%k2} +# CHECK-NEXT: - - - 1.00 - - - - vfpclasssd $171, %xmm16, %k1 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfpclasssd $171, (%rax), %k1 +# CHECK-NEXT: - - - 1.00 - - - - vfpclasssd $171, %xmm16, %k1 {%k2} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfpclasssd $171, (%rax), %k1 {%k2} +# CHECK-NEXT: - - - 1.00 - - - - vfpclassss $171, %xmm16, %k1 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfpclassss $171, (%rax), %k1 +# CHECK-NEXT: - - - 1.00 - - - - vfpclassss $171, %xmm16, %k1 {%k2} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfpclassss $171, (%rax), %k1 {%k2} # CHECK-NEXT: - - 1.00 - - - - - vpmullq %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmullq (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - 1.00 - - - - - vpmullq %zmm16, %zmm17, %zmm19 {%k1} diff --git a/test/tools/llvm-mca/X86/Generic/resources-avx512dqvl.s b/test/tools/llvm-mca/X86/Generic/resources-avx512dqvl.s index 90c25753e37..ac28d91d83f 100644 --- a/test/tools/llvm-mca/X86/Generic/resources-avx512dqvl.s +++ b/test/tools/llvm-mca/X86/Generic/resources-avx512dqvl.s @@ -121,6 +121,34 @@ vorps %ymm16, %ymm17, %ymm19 {z}{k1} vorps (%rax), %ymm17, %ymm19 {z}{k1} vorps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} +vfpclasspd $0xab, %xmm16, %k1 +vfpclasspdx $0xab, (%rax), %k1 +vfpclasspdx $0xab, (%rax){1to2}, %k1 +vfpclasspd $0xab, %xmm16, %k1 {k2} +vfpclasspdx $0xab, (%rax), %k1 {k2} +vfpclasspdx $0xab, (%rax){1to2}, %k1 {k2} + +vfpclasspd $0xab, %ymm16, %k1 +vfpclasspdy $0xab, (%rax), %k1 +vfpclasspdy $0xab, (%rax){1to4}, %k1 +vfpclasspd $0xab, %ymm16, %k1 {k2} +vfpclasspdy $0xab, (%rax), %k1 {k2} +vfpclasspdy $0xab, (%rax){1to4}, %k1 {k2} + +vfpclassps $0xab, %xmm16, %k1 +vfpclasspsx $0xab, (%rax), %k1 +vfpclasspsx $0xab, (%rax){1to4}, %k1 +vfpclassps $0xab, %xmm16, %k1 {k2} +vfpclasspsx $0xab, (%rax), %k1 {k2} +vfpclasspsx $0xab, (%rax){1to4}, %k1 {k2} + +vfpclassps $0xab, %ymm16, %k1 +vfpclasspsy $0xab, (%rax), %k1 +vfpclasspsy $0xab, (%rax){1to8}, %k1 +vfpclassps $0xab, %ymm16, %k1 {k2} +vfpclasspsy $0xab, (%rax), %k1 {k2} +vfpclasspsy $0xab, (%rax){1to8}, %k1 {k2} + vpmullq %xmm16, %xmm17, %xmm19 vpmullq (%rax), %xmm17, %xmm19 vpmullq %xmm16, %xmm17, %xmm19 {k1} @@ -292,6 +320,30 @@ vxorps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 1 1 1.00 vorps %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 8 1.00 * vorps (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 8 1.00 * vorps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vfpclasspd $171, %xmm16, %k1 +# CHECK-NEXT: 2 9 1.00 * vfpclasspdx $171, (%rax), %k1 +# CHECK-NEXT: 2 9 1.00 * vfpclasspd $171, (%rax){1to2}, %k1 +# CHECK-NEXT: 1 3 1.00 vfpclasspd $171, %xmm16, %k1 {%k2} +# CHECK-NEXT: 2 9 1.00 * vfpclasspdx $171, (%rax), %k1 {%k2} +# CHECK-NEXT: 2 9 1.00 * vfpclasspd $171, (%rax){1to2}, %k1 {%k2} +# CHECK-NEXT: 1 3 1.00 vfpclasspd $171, %ymm16, %k1 +# CHECK-NEXT: 2 10 1.00 * vfpclasspdy $171, (%rax), %k1 +# CHECK-NEXT: 2 10 1.00 * vfpclasspd $171, (%rax){1to4}, %k1 +# CHECK-NEXT: 1 3 1.00 vfpclasspd $171, %ymm16, %k1 {%k2} +# CHECK-NEXT: 2 10 1.00 * vfpclasspdy $171, (%rax), %k1 {%k2} +# CHECK-NEXT: 2 10 1.00 * vfpclasspd $171, (%rax){1to4}, %k1 {%k2} +# CHECK-NEXT: 1 3 1.00 vfpclassps $171, %xmm16, %k1 +# CHECK-NEXT: 2 9 1.00 * vfpclasspsx $171, (%rax), %k1 +# CHECK-NEXT: 2 9 1.00 * vfpclassps $171, (%rax){1to4}, %k1 +# CHECK-NEXT: 1 3 1.00 vfpclassps $171, %xmm16, %k1 {%k2} +# CHECK-NEXT: 2 9 1.00 * vfpclasspsx $171, (%rax), %k1 {%k2} +# CHECK-NEXT: 2 9 1.00 * vfpclassps $171, (%rax){1to4}, %k1 {%k2} +# CHECK-NEXT: 1 3 1.00 vfpclassps $171, %ymm16, %k1 +# CHECK-NEXT: 2 10 1.00 * vfpclasspsy $171, (%rax), %k1 +# CHECK-NEXT: 2 10 1.00 * vfpclassps $171, (%rax){1to8}, %k1 +# CHECK-NEXT: 1 3 1.00 vfpclassps $171, %ymm16, %k1 {%k2} +# CHECK-NEXT: 2 10 1.00 * vfpclasspsy $171, (%rax), %k1 {%k2} +# CHECK-NEXT: 2 10 1.00 * vfpclassps $171, (%rax){1to8}, %k1 {%k2} # CHECK-NEXT: 1 5 1.00 vpmullq %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 2 11 1.00 * vpmullq (%rax), %xmm17, %xmm19 # CHECK-NEXT: 1 5 1.00 vpmullq %xmm16, %xmm17, %xmm19 {%k1} @@ -353,7 +405,7 @@ vxorps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - 12.00 18.00 - 144.00 51.00 51.00 +# CHECK-NEXT: - - 12.00 42.00 - 144.00 59.00 59.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -465,6 +517,30 @@ vxorps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - - - - 1.00 - - vorps %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vorps (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vorps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - - - vfpclasspd $171, %xmm16, %k1 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfpclasspdx $171, (%rax), %k1 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfpclasspd $171, (%rax){1to2}, %k1 +# CHECK-NEXT: - - - 1.00 - - - - vfpclasspd $171, %xmm16, %k1 {%k2} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfpclasspdx $171, (%rax), %k1 {%k2} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfpclasspd $171, (%rax){1to2}, %k1 {%k2} +# CHECK-NEXT: - - - 1.00 - - - - vfpclasspd $171, %ymm16, %k1 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfpclasspdy $171, (%rax), %k1 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfpclasspd $171, (%rax){1to4}, %k1 +# CHECK-NEXT: - - - 1.00 - - - - vfpclasspd $171, %ymm16, %k1 {%k2} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfpclasspdy $171, (%rax), %k1 {%k2} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfpclasspd $171, (%rax){1to4}, %k1 {%k2} +# CHECK-NEXT: - - - 1.00 - - - - vfpclassps $171, %xmm16, %k1 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfpclasspsx $171, (%rax), %k1 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfpclassps $171, (%rax){1to4}, %k1 +# CHECK-NEXT: - - - 1.00 - - - - vfpclassps $171, %xmm16, %k1 {%k2} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfpclasspsx $171, (%rax), %k1 {%k2} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfpclassps $171, (%rax){1to4}, %k1 {%k2} +# CHECK-NEXT: - - - 1.00 - - - - vfpclassps $171, %ymm16, %k1 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfpclasspsy $171, (%rax), %k1 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfpclassps $171, (%rax){1to8}, %k1 +# CHECK-NEXT: - - - 1.00 - - - - vfpclassps $171, %ymm16, %k1 {%k2} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfpclasspsy $171, (%rax), %k1 {%k2} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfpclassps $171, (%rax){1to8}, %k1 {%k2} # CHECK-NEXT: - - 1.00 - - - - - vpmullq %xmm16, %xmm17, %xmm19 # CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmullq (%rax), %xmm17, %xmm19 # CHECK-NEXT: - - 1.00 - - - - - vpmullq %xmm16, %xmm17, %xmm19 {%k1} diff --git a/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s b/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s index 6d43b8c54b4..2ba7bb5c674 100644 --- a/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s +++ b/test/tools/llvm-mca/X86/Generic/resources-avx512vl.s @@ -41,6 +41,34 @@ vaddps %ymm16, %ymm17, %ymm19 {z}{k1} vaddps (%rax), %ymm17, %ymm19 {z}{k1} vaddps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} +vcmppd $0, %xmm0, %xmm1, %k2 +vcmppd $0, (%rax), %xmm1, %k2 +vcmppd $0, (%rax){1to2}, %xmm1, %k2 +vcmppd $0, %xmm0, %xmm1, %k2 {k3} +vcmppd $0, (%rax), %xmm1, %k2 {k3} +vcmppd $0, (%rax){1to2}, %xmm1, %k2 {k3} + +vcmppd $0, %ymm0, %ymm1, %k2 +vcmppd $0, (%rax), %ymm1, %k2 +vcmppd $0, (%rax){1to4}, %ymm1, %k2 +vcmppd $0, %ymm0, %ymm1, %k2 {k3} +vcmppd $0, (%rax), %ymm1, %k2 {k3} +vcmppd $0, (%rax){1to4}, %ymm1, %k2 {k3} + +vcmpps $0, %xmm0, %xmm1, %k2 +vcmpps $0, (%rax), %xmm1, %k2 +vcmpps $0, (%rax){1to4}, %xmm1, %k2 +vcmpps $0, %xmm0, %xmm1, %k2 {k3} +vcmpps $0, (%rax), %xmm1, %k2 {k3} +vcmpps $0, (%rax){1to4}, %xmm1, %k2 {k3} + +vcmpps $0, %ymm0, %ymm1, %k2 +vcmpps $0, (%rax), %ymm1, %k2 +vcmpps $0, (%rax){1to8}, %ymm1, %k2 +vcmpps $0, %ymm0, %ymm1, %k2 {k3} +vcmpps $0, (%rax), %ymm1, %k2 {k3} +vcmpps $0, (%rax){1to8}, %ymm1, %k2 {k3} + vcvtdq2pd %xmm16, %xmm19 vcvtdq2pd (%rax), %xmm19 vcvtdq2pd (%rax){1to2}, %xmm19 @@ -291,6 +319,118 @@ vpaddq %ymm16, %ymm17, %ymm19 {z}{k1} vpaddq (%rax), %ymm17, %ymm19 {z}{k1} vpaddq (%rax){1to4}, %ymm17, %ymm19 {z}{k1} +vpcmpd $0, %xmm0, %xmm1, %k2 +vpcmpd $0, (%rax), %xmm1, %k2 +vpcmpd $0, (%rax){1to4}, %xmm1, %k2 +vpcmpd $0, %xmm0, %xmm1, %k2 {k3} +vpcmpd $0, (%rax), %xmm1, %k2 {k3} +vpcmpd $0, (%rax){1to4}, %xmm1, %k2 {k3} + +vpcmpd $0, %ymm0, %ymm1, %k2 +vpcmpd $0, (%rax), %ymm1, %k2 +vpcmpd $0, (%rax){1to8}, %ymm1, %k2 +vpcmpd $0, %ymm0, %ymm1, %k2 {k3} +vpcmpd $0, (%rax), %ymm1, %k2 {k3} +vpcmpd $0, (%rax){1to8}, %ymm1, %k2 {k3} + +vpcmpeqd %xmm0, %xmm1, %k2 +vpcmpeqd (%rax), %xmm1, %k2 +vpcmpeqd (%rax){1to4}, %xmm1, %k2 +vpcmpeqd %xmm0, %xmm1, %k2 {k3} +vpcmpeqd (%rax), %xmm1, %k2 {k3} +vpcmpeqd (%rax){1to4}, %xmm1, %k2 {k3} + +vpcmpeqd %ymm0, %ymm1, %k2 +vpcmpeqd (%rax), %ymm1, %k2 +vpcmpeqd (%rax){1to8}, %ymm1, %k2 +vpcmpeqd %ymm0, %ymm1, %k2 {k3} +vpcmpeqd (%rax), %ymm1, %k2 {k3} +vpcmpeqd (%rax){1to8}, %ymm1, %k2 {k3} + +vpcmpeqq %xmm0, %xmm1, %k2 +vpcmpeqq (%rax), %xmm1, %k2 +vpcmpeqq (%rax){1to2}, %xmm1, %k2 +vpcmpeqq %xmm0, %xmm1, %k2 {k3} +vpcmpeqq (%rax), %xmm1, %k2 {k3} +vpcmpeqq (%rax){1to2}, %xmm1, %k2 {k3} + +vpcmpeqq %ymm0, %ymm1, %k2 +vpcmpeqq (%rax), %ymm1, %k2 +vpcmpeqq (%rax){1to4}, %ymm1, %k2 +vpcmpeqq %ymm0, %ymm1, %k2 {k3} +vpcmpeqq (%rax), %ymm1, %k2 {k3} +vpcmpeqq (%rax){1to4}, %ymm1, %k2 {k3} + +vpcmpgtd %xmm0, %xmm1, %k2 +vpcmpgtd (%rax), %xmm1, %k2 +vpcmpgtd (%rax){1to4}, %xmm1, %k2 +vpcmpgtd %xmm0, %xmm1, %k2 {k3} +vpcmpgtd (%rax), %xmm1, %k2 {k3} +vpcmpgtd (%rax){1to4}, %xmm1, %k2 {k3} + +vpcmpgtd %ymm0, %ymm1, %k2 +vpcmpgtd (%rax), %ymm1, %k2 +vpcmpgtd (%rax){1to8}, %ymm1, %k2 +vpcmpgtd %ymm0, %ymm1, %k2 {k3} +vpcmpgtd (%rax), %ymm1, %k2 {k3} +vpcmpgtd (%rax){1to8}, %ymm1, %k2 {k3} + +vpcmpgtq %xmm0, %xmm1, %k2 +vpcmpgtq (%rax), %xmm1, %k2 +vpcmpgtq (%rax){1to2}, %xmm1, %k2 +vpcmpgtq %xmm0, %xmm1, %k2 {k3} +vpcmpgtq (%rax), %xmm1, %k2 {k3} +vpcmpgtq (%rax){1to2}, %xmm1, %k2 {k3} + +vpcmpgtq %ymm0, %ymm1, %k2 +vpcmpgtq (%rax), %ymm1, %k2 +vpcmpgtq (%rax){1to4}, %ymm1, %k2 +vpcmpgtq %ymm0, %ymm1, %k2 {k3} +vpcmpgtq (%rax), %ymm1, %k2 {k3} +vpcmpgtq (%rax){1to4}, %ymm1, %k2 {k3} + +vpcmpq $0, %xmm0, %xmm1, %k2 +vpcmpq $0, (%rax), %xmm1, %k2 +vpcmpq $0, (%rax){1to2}, %xmm1, %k2 +vpcmpq $0, %xmm0, %xmm1, %k2 {k3} +vpcmpq $0, (%rax), %xmm1, %k2 {k3} +vpcmpq $0, (%rax){1to2}, %xmm1, %k2 {k3} + +vpcmpq $0, %ymm0, %ymm1, %k2 +vpcmpq $0, (%rax), %ymm1, %k2 +vpcmpq $0, (%rax){1to4}, %ymm1, %k2 +vpcmpq $0, %ymm0, %ymm1, %k2 {k3} +vpcmpq $0, (%rax), %ymm1, %k2 {k3} +vpcmpq $0, (%rax){1to4}, %ymm1, %k2 {k3} + +vpcmpud $0, %xmm0, %xmm1, %k2 +vpcmpud $0, (%rax), %xmm1, %k2 +vpcmpud $0, (%rax){1to4}, %xmm1, %k2 +vpcmpud $0, %xmm0, %xmm1, %k2 {k3} +vpcmpud $0, (%rax), %xmm1, %k2 {k3} +vpcmpud $0, (%rax){1to4}, %xmm1, %k2 {k3} + +vpcmpud $0, %ymm0, %ymm1, %k2 +vpcmpud $0, (%rax), %ymm1, %k2 +vpcmpud $0, (%rax){1to8}, %ymm1, %k2 +vpcmpud $0, %ymm0, %ymm1, %k2 {k3} +vpcmpud $0, (%rax), %ymm1, %k2 {k3} +vpcmpud $0, (%rax){1to8}, %ymm1, %k2 {k3} + +vpcmpuq $0, %xmm0, %xmm1, %k2 +vpcmpuq $0, (%rax), %xmm1, %k2 +vpcmpuq $0, (%rax){1to2}, %xmm1, %k2 +vpcmpuq $0, %xmm0, %xmm1, %k2 {k3} +vpcmpuq $0, (%rax), %xmm1, %k2 {k3} +vpcmpuq $0, (%rax){1to2}, %xmm1, %k2 {k3} + +vpcmpuq $0, %ymm0, %ymm1, %k2 +vpcmpuq $0, (%rax), %ymm1, %k2 +vpcmpuq $0, (%rax){1to4}, %ymm1, %k2 +vpcmpuq $0, %ymm0, %ymm1, %k2 {k3} +vpcmpuq $0, (%rax), %ymm1, %k2 {k3} +vpcmpuq $0, (%rax){1to4}, %ymm1, %k2 {k3} + vpermd %ymm16, %ymm17, %ymm19 vpermd (%rax), %ymm17, %ymm19 vpermd (%rax){1to8}, %ymm17, %ymm19 @@ -521,6 +661,62 @@ vpsubq %ymm16, %ymm17, %ymm19 {z}{k1} vpsubq (%rax), %ymm17, %ymm19 {z}{k1} vpsubq (%rax){1to4}, %ymm17, %ymm19 {z}{k1} +vptestmd %xmm0, %xmm1, %k2 +vptestmd (%rax), %xmm1, %k2 +vptestmd (%rax){1to4}, %xmm1, %k2 +vptestmd %xmm0, %xmm1, %k2 {k3} +vptestmd (%rax), %xmm1, %k2 {k3} +vptestmd (%rax){1to4}, %xmm1, %k2 {k3} + +vptestmd %ymm0, %ymm1, %k2 +vptestmd (%rax), %ymm1, %k2 +vptestmd (%rax){1to8}, %ymm1, %k2 +vptestmd %ymm0, %ymm1, %k2 {k3} +vptestmd (%rax), %ymm1, %k2 {k3} +vptestmd (%rax){1to8}, %ymm1, %k2 {k3} + +vptestmq %xmm0, %xmm1, %k2 +vptestmq (%rax), %xmm1, %k2 +vptestmq (%rax){1to2}, %xmm1, %k2 +vptestmq %xmm0, %xmm1, %k2 {k3} +vptestmq (%rax), %xmm1, %k2 {k3} +vptestmq (%rax){1to2}, %xmm1, %k2 {k3} + +vptestmq %ymm0, %ymm1, %k2 +vptestmq (%rax), %ymm1, %k2 +vptestmq (%rax){1to4}, %ymm1, %k2 +vptestmq %ymm0, %ymm1, %k2 {k3} +vptestmq (%rax), %ymm1, %k2 {k3} +vptestmq (%rax){1to4}, %ymm1, %k2 {k3} + +vptestnmd %xmm0, %xmm1, %k2 +vptestnmd (%rax), %xmm1, %k2 +vptestnmd (%rax){1to4}, %xmm1, %k2 +vptestnmd %xmm0, %xmm1, %k2 {k3} +vptestnmd (%rax), %xmm1, %k2 {k3} +vptestnmd (%rax){1to4}, %xmm1, %k2 {k3} + +vptestnmd %ymm0, %ymm1, %k2 +vptestnmd (%rax), %ymm1, %k2 +vptestnmd (%rax){1to8}, %ymm1, %k2 +vptestnmd %ymm0, %ymm1, %k2 {k3} +vptestnmd (%rax), %ymm1, %k2 {k3} +vptestnmd (%rax){1to8}, %ymm1, %k2 {k3} + +vptestnmq %xmm0, %xmm1, %k2 +vptestnmq (%rax), %xmm1, %k2 +vptestnmq (%rax){1to2}, %xmm1, %k2 +vptestnmq %xmm0, %xmm1, %k2 {k3} +vptestnmq (%rax), %xmm1, %k2 {k3} +vptestnmq (%rax){1to2}, %xmm1, %k2 {k3} + +vptestnmq %ymm0, %ymm1, %k2 +vptestnmq (%rax), %ymm1, %k2 +vptestnmq (%rax){1to4}, %ymm1, %k2 +vptestnmq %ymm0, %ymm1, %k2 {k3} +vptestnmq (%rax), %ymm1, %k2 {k3} +vptestnmq (%rax){1to4}, %ymm1, %k2 {k3} + vpunpckhdq %xmm16, %xmm17, %xmm19 vpunpckhdq (%rax), %xmm17, %xmm19 vpunpckhdq (%rax){1to4}, %xmm17, %xmm19 @@ -806,6 +1002,30 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 1 3 1.00 vaddps %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 10 1.00 * vaddps (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 10 1.00 * vaddps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vcmpeqpd %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vcmpeqpd (%rax), %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vcmpeqpd (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vcmpeqpd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vcmpeqpd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vcmpeqpd (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vcmpeqpd %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vcmpeqpd (%rax), %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vcmpeqpd (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: 1 3 1.00 vcmpeqpd %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vcmpeqpd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vcmpeqpd (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vcmpeqps %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vcmpeqps (%rax), %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vcmpeqps (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vcmpeqps %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vcmpeqps (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vcmpeqps (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vcmpeqps %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vcmpeqps (%rax), %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vcmpeqps (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: 1 3 1.00 vcmpeqps %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vcmpeqps (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vcmpeqps (%rax){1to8}, %ymm1, %k2 {%k3} # CHECK-NEXT: 2 4 1.00 vcvtdq2pd %xmm16, %xmm19 # CHECK-NEXT: 3 10 1.00 * vcvtdq2pd (%rax), %xmm19 # CHECK-NEXT: 3 10 1.00 * vcvtdq2pd (%rax){1to2}, %xmm19 @@ -1030,6 +1250,102 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 1 1 0.50 vpaddq %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vpaddq (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vpaddq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.50 vpcmpeqd %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vpcmpeqd (%rax), %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vpcmpeqd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpeqd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vpcmpeqd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vpcmpeqd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpeqd %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpeqd (%rax), %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpeqd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpeqd %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpeqd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpeqd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpeqd %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vpcmpeqd (%rax), %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vpcmpeqd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpeqd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vpcmpeqd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vpcmpeqd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpeqd %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpeqd (%rax), %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpeqd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpeqd %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpeqd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpeqd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpeqq %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vpcmpeqq (%rax), %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vpcmpeqq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpeqq %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vpcmpeqq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vpcmpeqq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpeqq %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpeqq (%rax), %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpeqq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpeqq %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpeqq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpeqq (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpgtd %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vpcmpgtd (%rax), %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vpcmpgtd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpgtd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vpcmpgtd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vpcmpgtd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpgtd %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpgtd (%rax), %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpgtd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpgtd %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpgtd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpgtd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpgtq %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vpcmpgtq (%rax), %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vpcmpgtq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpgtq %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vpcmpgtq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vpcmpgtq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpgtq %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpgtq (%rax), %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpgtq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpgtq %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpgtq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpgtq (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpeqq %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vpcmpeqq (%rax), %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vpcmpeqq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpeqq %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vpcmpeqq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vpcmpeqq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpeqq %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpeqq (%rax), %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpeqq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpeqq %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpeqq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpeqq (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpequd %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vpcmpequd (%rax), %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vpcmpequd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpequd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vpcmpequd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vpcmpequd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpequd %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpequd (%rax), %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpequd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpequd %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpequd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpequd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpequq %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vpcmpequq (%rax), %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vpcmpequq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpequq %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vpcmpequq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vpcmpequq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.50 vpcmpequq %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpequq (%rax), %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vpcmpequq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: 1 1 0.50 vpcmpequq %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpequq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vpcmpequq (%rax){1to4}, %ymm1, %k2 {%k3} # CHECK-NEXT: 1 1 1.00 vpermd %ymm16, %ymm17, %ymm19 # CHECK-NEXT: 2 8 1.00 * vpermd (%rax), %ymm17, %ymm19 # CHECK-NEXT: 2 8 1.00 * vpermd (%rax){1to8}, %ymm17, %ymm19 @@ -1236,6 +1552,54 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 1 1 0.50 vpsubq %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vpsubq (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vpsubq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 1 0.33 vptestmd %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vptestmd (%rax), %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vptestmd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: 1 1 0.33 vptestmd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vptestmd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vptestmd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.33 vptestmd %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vptestmd (%rax), %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vptestmd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: 1 1 0.33 vptestmd %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vptestmd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vptestmd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.33 vptestmq %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vptestmq (%rax), %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vptestmq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: 1 1 0.33 vptestmq %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vptestmq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vptestmq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.33 vptestmq %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vptestmq (%rax), %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vptestmq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: 1 1 0.33 vptestmq %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vptestmq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vptestmq (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.33 vptestnmd %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vptestnmd (%rax), %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vptestnmd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: 1 1 0.33 vptestnmd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vptestnmd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vptestnmd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.33 vptestnmd %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vptestnmd (%rax), %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vptestnmd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: 1 1 0.33 vptestnmd %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vptestnmd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vptestnmd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.33 vptestnmq %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vptestnmq (%rax), %xmm1, %k2 +# CHECK-NEXT: 2 7 0.50 * vptestnmq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: 1 1 0.33 vptestnmq %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vptestnmq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 7 0.50 * vptestnmq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 1 0.33 vptestnmq %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vptestnmq (%rax), %ymm1, %k2 +# CHECK-NEXT: 2 8 0.50 * vptestnmq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: 1 1 0.33 vptestnmq %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vptestnmq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 8 0.50 * vptestnmq (%rax){1to4}, %ymm1, %k2 {%k3} # CHECK-NEXT: 1 1 0.50 vpunpckhdq %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 2 7 0.50 * vpunpckhdq (%rax), %xmm17, %xmm19 # CHECK-NEXT: 2 7 0.50 * vpunpckhdq (%rax){1to4}, %xmm17, %xmm19 @@ -1465,7 +1829,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - 1935.00 180.00 229.50 - 346.50 230.00 230.00 +# CHECK-NEXT: - 1935.00 196.00 317.50 - 410.50 286.00 286.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: @@ -1505,6 +1869,30 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - - 1.00 - - - - vaddps %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vaddps (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vaddps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 1.00 - - - - vcmpeqpd %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpeqpd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpeqpd (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: - - - 1.00 - - - - vcmpeqpd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpeqpd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpeqpd (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 1.00 - - - - vcmpeqpd %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpeqpd (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpeqpd (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: - - - 1.00 - - - - vcmpeqpd %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpeqpd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpeqpd (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 1.00 - - - - vcmpeqps %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpeqps (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpeqps (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: - - - 1.00 - - - - vcmpeqps %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpeqps (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpeqps (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 1.00 - - - - vcmpeqps %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpeqps (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpeqps (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: - - - 1.00 - - - - vcmpeqps %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpeqps (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcmpeqps (%rax){1to8}, %ymm1, %k2 {%k3} # CHECK-NEXT: - - - 1.00 - 1.00 - - vcvtdq2pd %xmm16, %xmm19 # CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtdq2pd (%rax), %xmm19 # CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 vcvtdq2pd (%rax){1to2}, %xmm19 @@ -1729,6 +2117,102 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - - 0.50 - 0.50 - - vpaddq %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpaddq (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpaddq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqd %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqd %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqd (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqd %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqd %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqd %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqd (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqd %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqq %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqq (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqq %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqq %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqq (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqq %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqq (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpgtd %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpgtd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpgtd %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtd (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpgtd %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpgtq %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtq (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpgtq %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpgtq %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtq (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpgtq %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpgtq (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqq %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqq (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqq %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqq %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqq (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpeqq %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpeqq (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpequd %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpequd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpequd %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequd (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpequd %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpequq %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequq (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpequq %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpequq %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequq (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmpequq %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmpequq (%rax){1to4}, %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - 1.00 - - vpermd %ymm16, %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermd (%rax), %ymm17, %ymm19 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermd (%rax){1to8}, %ymm17, %ymm19 @@ -1935,6 +2419,54 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - - 0.50 - 0.50 - - vpsubq %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpsubq (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpsubq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestmd %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestmd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestmd %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmd (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestmd %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestmq %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmq (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestmq %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestmq %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmq (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestmq %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestmq (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestnmd %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestnmd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestnmd %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmd (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestnmd %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestnmq %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmq (%rax), %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestnmq %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestnmq %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmq (%rax), %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - vptestnmq %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 vptestnmq (%rax){1to4}, %ymm1, %k2 {%k3} # CHECK-NEXT: - - - 0.50 - 0.50 - - vpunpckhdq %xmm16, %xmm17, %xmm19 # CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpunpckhdq (%rax), %xmm17, %xmm19 # CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpunpckhdq (%rax){1to4}, %xmm17, %xmm19 diff --git a/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s b/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s index e99030b7d43..c18ed71b15a 100644 --- a/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s +++ b/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512.s @@ -1,6 +1,15 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -instruction-tables < %s | FileCheck %s +kandw %k0, %k1, %k2 +kandnw %k0, %k1, %k2 +korw %k0, %k1, %k2 +kxnorw %k0, %k1, %k2 +kxorw %k0, %k1, %k2 +kshiftlw $2, %k1, %k2 +kshiftrw $2, %k1, %k2 +kunpckbw %k0, %k1, %k2 + vaddpd %zmm16, %zmm17, %zmm19 vaddpd (%rax), %zmm17, %zmm19 vaddpd (%rax){1to8}, %zmm17, %zmm19 @@ -21,6 +30,33 @@ vaddps %zmm16, %zmm17, %zmm19 {z}{k1} vaddps (%rax), %zmm17, %zmm19 {z}{k1} vaddps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} +vcmppd $0, %zmm0, %zmm1, %k2 +vcmppd $0, (%rax), %zmm1, %k2 +vcmppd $0, (%rax){1to8}, %zmm1, %k2 +vcmppd $0, %zmm0, %zmm1, %k2 {k3} +vcmppd $0, (%rax), %zmm1, %k2 {k3} +vcmppd $0, (%rax){1to8}, %zmm1, %k2 {k3} + +vcmpps $0, %zmm0, %zmm1, %k2 +vcmpps $0, (%rax), %zmm1, %k2 +vcmpps $0, (%rax){1to16}, %zmm1, %k2 +vcmpps $0, %zmm0, %zmm1, %k2 {k3} +vcmpps $0, (%rax), %zmm1, %k2 {k3} +vcmpps $0, (%rax){1to16}, %zmm1, %k2 {k3} + +vcmpsd $0, %xmm0, %xmm1, %k2 +vcmpsd $0, (%rax), %xmm1, %k2 +vcmpsd $0, %xmm0, %xmm1, %k2 {k3} +vcmpsd $0, (%rax), %xmm1, %k2 {k3} + +vcmpss $0, %xmm0, %xmm1, %k2 +vcmpss $0, (%rax), %xmm1, %k2 +vcmpss $0, %xmm0, %xmm1, %k2 {k3} +vcmpss $0, (%rax), %xmm1, %k2 {k3} + +vcomiss %xmm16, %xmm17 +vcomiss (%rax), %xmm17 + vcvtdq2pd %ymm16, %zmm19 vcvtdq2pd (%rax), %zmm19 vcvtdq2pd (%rax){1to8}, %zmm19 @@ -186,6 +222,62 @@ vpaddq %zmm16, %zmm17, %zmm19 {z}{k1} vpaddq (%rax), %zmm17, %zmm19 {z}{k1} vpaddq (%rax){1to8}, %zmm17, %zmm19 {z}{k1} +vpcmpd $0, %zmm0, %zmm1, %k2 +vpcmpd $0, (%rax), %zmm1, %k2 +vpcmpd $0, (%rax){1to16}, %zmm1, %k2 +vpcmpd $0, %zmm0, %zmm1, %k2 {k3} +vpcmpd $0, (%rax), %zmm1, %k2 {k3} +vpcmpd $0, (%rax){1to16}, %zmm1, %k2 {k3} + +vpcmpeqd %zmm0, %zmm1, %k2 +vpcmpeqd (%rax), %zmm1, %k2 +vpcmpeqd (%rax){1to16}, %zmm1, %k2 +vpcmpeqd %zmm0, %zmm1, %k2 {k3} +vpcmpeqd (%rax), %zmm1, %k2 {k3} +vpcmpeqd (%rax){1to16}, %zmm1, %k2 {k3} + +vpcmpeqq %zmm0, %zmm1, %k2 +vpcmpeqq (%rax), %zmm1, %k2 +vpcmpeqq (%rax){1to8}, %zmm1, %k2 +vpcmpeqq %zmm0, %zmm1, %k2 {k3} +vpcmpeqq (%rax), %zmm1, %k2 {k3} +vpcmpeqq (%rax){1to8}, %zmm1, %k2 {k3} + +vpcmpgtd %zmm0, %zmm1, %k2 +vpcmpgtd (%rax), %zmm1, %k2 +vpcmpgtd (%rax){1to16}, %zmm1, %k2 +vpcmpgtd %zmm0, %zmm1, %k2 {k3} +vpcmpgtd (%rax), %zmm1, %k2 {k3} +vpcmpgtd (%rax){1to16}, %zmm1, %k2 {k3} + +vpcmpgtq %zmm0, %zmm1, %k2 +vpcmpgtq (%rax), %zmm1, %k2 +vpcmpgtq (%rax){1to8}, %zmm1, %k2 +vpcmpgtq %zmm0, %zmm1, %k2 {k3} +vpcmpgtq (%rax), %zmm1, %k2 {k3} +vpcmpgtq (%rax){1to8}, %zmm1, %k2 {k3} + +vpcmpq $0, %zmm0, %zmm1, %k2 +vpcmpq $0, (%rax), %zmm1, %k2 +vpcmpq $0, (%rax){1to8}, %zmm1, %k2 +vpcmpq $0, %zmm0, %zmm1, %k2 {k3} +vpcmpq $0, (%rax), %zmm1, %k2 {k3} +vpcmpq $0, (%rax){1to8}, %zmm1, %k2 {k3} + +vpcmpud $0, %zmm0, %zmm1, %k2 +vpcmpud $0, (%rax), %zmm1, %k2 +vpcmpud $0, (%rax){1to16}, %zmm1, %k2 +vpcmpud $0, %zmm0, %zmm1, %k2 {k3} +vpcmpud $0, (%rax), %zmm1, %k2 {k3} +vpcmpud $0, (%rax){1to16}, %zmm1, %k2 {k3} + +vpcmpuq $0, %zmm0, %zmm1, %k2 +vpcmpuq $0, (%rax), %zmm1, %k2 +vpcmpuq $0, (%rax){1to8}, %zmm1, %k2 +vpcmpuq $0, %zmm0, %zmm1, %k2 {k3} +vpcmpuq $0, (%rax), %zmm1, %k2 {k3} +vpcmpuq $0, (%rax){1to8}, %zmm1, %k2 {k3} + vpgatherdq (%rax,%ymm1,2), %zmm2 {k1} vpgatherdd (%rax,%zmm1,2), %zmm2 {k1} vpgatherqq (%rax,%zmm1,2), %zmm2 {k1} @@ -465,6 +557,34 @@ vpsubq %zmm16, %zmm17, %zmm19 {z}{k1} vpsubq (%rax), %zmm17, %zmm19 {z}{k1} vpsubq (%rax){1to8}, %zmm17, %zmm19 {z}{k1} +vptestmd %zmm0, %zmm1, %k2 +vptestmd (%rax), %zmm1, %k2 +vptestmd (%rax){1to16}, %zmm1, %k2 +vptestmd %zmm0, %zmm1, %k2 {k3} +vptestmd (%rax), %zmm1, %k2 {k3} +vptestmd (%rax){1to16}, %zmm1, %k2 {k3} + +vptestmq %zmm0, %zmm1, %k2 +vptestmq (%rax), %zmm1, %k2 +vptestmq (%rax){1to8}, %zmm1, %k2 +vptestmq %zmm0, %zmm1, %k2 {k3} +vptestmq (%rax), %zmm1, %k2 {k3} +vptestmq (%rax){1to8}, %zmm1, %k2 {k3} + +vptestnmd %zmm0, %zmm1, %k2 +vptestnmd (%rax), %zmm1, %k2 +vptestnmd (%rax){1to16}, %zmm1, %k2 +vptestnmd %zmm0, %zmm1, %k2 {k3} +vptestnmd (%rax), %zmm1, %k2 {k3} +vptestnmd (%rax){1to16}, %zmm1, %k2 {k3} + +vptestnmq %zmm0, %zmm1, %k2 +vptestnmq (%rax), %zmm1, %k2 +vptestnmq (%rax){1to8}, %zmm1, %k2 +vptestnmq %zmm0, %zmm1, %k2 {k3} +vptestnmq (%rax), %zmm1, %k2 {k3} +vptestnmq (%rax){1to8}, %zmm1, %k2 {k3} + vsubpd %zmm16, %zmm17, %zmm19 vsubpd (%rax), %zmm17, %zmm19 vsubpd (%rax){1to8}, %zmm17, %zmm19 @@ -485,6 +605,9 @@ vsubps %zmm16, %zmm17, %zmm19 {z}{k1} vsubps (%rax), %zmm17, %zmm19 {z}{k1} vsubps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} +vucomiss %xmm16, %xmm17 +vucomiss (%rax), %xmm17 + vunpckhpd %zmm16, %zmm17, %zmm19 vunpckhpd (%rax), %zmm17, %zmm19 vunpckhpd (%rax){1to8}, %zmm17, %zmm19 @@ -534,6 +657,14 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 1 1.00 kandw %k0, %k1, %k2 +# CHECK-NEXT: 1 1 1.00 kandnw %k0, %k1, %k2 +# CHECK-NEXT: 1 1 1.00 korw %k0, %k1, %k2 +# CHECK-NEXT: 1 1 1.00 kxnorw %k0, %k1, %k2 +# CHECK-NEXT: 1 1 1.00 kxorw %k0, %k1, %k2 +# CHECK-NEXT: 1 3 1.00 kshiftlw $2, %k1, %k2 +# CHECK-NEXT: 1 3 1.00 kshiftrw $2, %k1, %k2 +# CHECK-NEXT: 1 3 1.00 kunpckbw %k0, %k1, %k2 # CHECK-NEXT: 1 4 0.50 vaddpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 2 11 0.50 * vaddpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: 2 11 0.50 * vaddpd (%rax){1to8}, %zmm17, %zmm19 @@ -552,6 +683,28 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 4 0.50 vaddps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vaddps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vaddps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vcmpeqpd %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vcmpeqpd (%rax), %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vcmpeqpd (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vcmpeqpd %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vcmpeqpd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vcmpeqpd (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vcmpeqps %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vcmpeqps (%rax), %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vcmpeqps (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vcmpeqps %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vcmpeqps (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vcmpeqps (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vcmpeqsd %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vcmpeqsd (%rax), %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vcmpeqsd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vcmpeqsd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vcmpeqss %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vcmpeqss (%rax), %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vcmpeqss %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vcmpeqss (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 2 1.00 vcomiss %xmm16, %xmm17 +# CHECK-NEXT: 2 7 1.00 * vcomiss (%rax), %xmm17 # CHECK-NEXT: 2 7 1.00 vcvtdq2pd %ymm16, %zmm19 # CHECK-NEXT: 2 11 0.50 * vcvtdq2pd (%rax), %zmm19 # CHECK-NEXT: 2 11 0.50 * vcvtdq2pd (%rax){1to8}, %zmm19 @@ -700,6 +853,54 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 1 0.33 vpaddq %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vpaddq (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vpaddq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vpcmpeqd %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpeqd (%rax), %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpeqd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpeqd %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpeqd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpeqd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpeqd %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpeqd (%rax), %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpeqd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpeqd %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpeqd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpeqd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpeqq %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpeqq (%rax), %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpeqq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpeqq %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpeqq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpeqq (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpgtd %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpgtd (%rax), %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpgtd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpgtd %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpgtd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpgtd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpgtq %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpgtq (%rax), %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpgtq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpgtq %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpgtq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpgtq (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpeqq %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpeqq (%rax), %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpeqq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpeqq %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpeqq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpeqq (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpequd %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpequd (%rax), %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpequd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpequd %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpequd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpequd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpequq %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpequq (%rax), %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpequq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpequq %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpequq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpequq (%rax){1to8}, %zmm1, %k2 {%k3} # CHECK-NEXT: 5 21 4.00 * vpgatherdq (%rax,%ymm1,2), %zmm2 {%k1} # CHECK-NEXT: 5 25 8.00 * vpgatherdd (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: 5 21 4.00 * vpgatherqq (%rax,%zmm1,2), %zmm2 {%k1} @@ -950,6 +1151,30 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 1 0.33 vpsubq %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vpsubq (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vpsubq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vptestmd %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vptestmd (%rax), %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vptestmd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vptestmd %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vptestmd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vptestmd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vptestmq %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vptestmq (%rax), %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vptestmq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vptestmq %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vptestmq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vptestmq (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vptestnmd %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vptestnmd (%rax), %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vptestnmd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vptestnmd %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vptestnmd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vptestnmd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vptestnmq %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vptestnmq (%rax), %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vptestnmq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vptestnmq %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vptestnmq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vptestnmq (%rax){1to8}, %zmm1, %k2 {%k3} # CHECK-NEXT: 1 4 0.50 vsubpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 2 11 0.50 * vsubpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: 2 11 0.50 * vsubpd (%rax){1to8}, %zmm17, %zmm19 @@ -968,6 +1193,8 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 4 0.50 vsubps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vsubps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vsubps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 2 1.00 vucomiss %xmm16, %xmm17 +# CHECK-NEXT: 2 7 1.00 * vucomiss (%rax), %xmm17 # CHECK-NEXT: 1 1 1.00 vunpckhpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 2 8 1.00 * vunpckhpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: 2 8 1.00 * vunpckhpd (%rax){1to8}, %zmm17, %zmm19 @@ -1019,10 +1246,18 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 612.00 200.67 30.67 193.00 193.00 - 334.67 2.00 - +# CHECK-NEXT: - 612.00 209.67 30.67 224.00 224.00 - 429.67 2.00 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: +# CHECK-NEXT: - - 1.00 - - - - - - - kandw %k0, %k1, %k2 +# CHECK-NEXT: - - 1.00 - - - - - - - kandnw %k0, %k1, %k2 +# CHECK-NEXT: - - 1.00 - - - - - - - korw %k0, %k1, %k2 +# CHECK-NEXT: - - 1.00 - - - - - - - kxnorw %k0, %k1, %k2 +# CHECK-NEXT: - - 1.00 - - - - - - - kxorw %k0, %k1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - kshiftlw $2, %k1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - kshiftrw $2, %k1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - kunpckbw %k0, %k1, %k2 # CHECK-NEXT: - - 0.50 - - - - 0.50 - - vaddpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vaddpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vaddpd (%rax){1to8}, %zmm17, %zmm19 @@ -1041,6 +1276,28 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - 0.50 - - - - 0.50 - - vaddps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vaddps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vaddps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - 1.00 - - vcmpeqpd %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vcmpeqpd (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vcmpeqpd (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vcmpeqpd %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vcmpeqpd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vcmpeqpd (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vcmpeqps %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vcmpeqps (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vcmpeqps (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vcmpeqps %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vcmpeqps (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vcmpeqps (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vcmpeqsd %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vcmpeqsd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vcmpeqsd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vcmpeqsd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vcmpeqss %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vcmpeqss (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vcmpeqss %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vcmpeqss (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - 1.00 - - - - - - - vcomiss %xmm16, %xmm17 +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vcomiss (%rax), %xmm17 # CHECK-NEXT: - - 0.50 - - - - 1.50 - - vcvtdq2pd %ymm16, %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtdq2pd (%rax), %zmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtdq2pd (%rax){1to8}, %zmm19 @@ -1189,6 +1446,54 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpaddq %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpaddq (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpaddq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqd %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqd (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqd %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqd %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqd (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqd %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqq %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqq (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqq %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqq (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpgtd %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtd (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpgtd %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpgtq %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtq (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpgtq %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtq (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqq %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqq (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqq %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqq (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpequd %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequd (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpequd %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpequq %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequq (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpequq %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequq (%rax){1to8}, %zmm1, %k2 {%k3} # CHECK-NEXT: - - 1.58 0.58 4.00 4.00 - 0.58 0.25 - vpgatherdq (%rax,%ymm1,2), %zmm2 {%k1} # CHECK-NEXT: - - 1.58 0.58 8.00 8.00 - 0.58 0.25 - vpgatherdd (%rax,%zmm1,2), %zmm2 {%k1} # CHECK-NEXT: - - 1.58 0.58 4.00 4.00 - 0.58 0.25 - vpgatherqq (%rax,%zmm1,2), %zmm2 {%k1} @@ -1439,6 +1744,30 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpsubq %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpsubq (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpsubq (%rax){1to8}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - 1.00 - - vptestmd %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmd (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vptestmd %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vptestmq %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmq (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vptestmq %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmq (%rax){1to8}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vptestnmd %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmd (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmd (%rax){1to16}, %zmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vptestnmd %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmd (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmd (%rax){1to16}, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vptestnmq %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmq (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmq (%rax){1to8}, %zmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vptestnmq %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmq (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmq (%rax){1to8}, %zmm1, %k2 {%k3} # CHECK-NEXT: - - 0.50 - - - - 0.50 - - vsubpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vsubpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vsubpd (%rax){1to8}, %zmm17, %zmm19 @@ -1457,6 +1786,8 @@ vunpcklps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - 0.50 - - - - 0.50 - - vsubps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vsubps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vsubps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - 1.00 - - - - - - - vucomiss %xmm16, %xmm17 +# CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vucomiss (%rax), %xmm17 # CHECK-NEXT: - - - - - - - 1.00 - - vunpckhpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vunpckhpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vunpckhpd (%rax){1to8}, %zmm17, %zmm19 diff --git a/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bw.s b/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bw.s index a73e63c846f..048f0767a27 100644 --- a/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bw.s +++ b/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bw.s @@ -1,6 +1,25 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -instruction-tables < %s | FileCheck %s +kaddd %k0, %k1, %k2 +kaddq %k0, %k1, %k2 +kandd %k0, %k1, %k2 +kandq %k0, %k1, %k2 +kandnd %k0, %k1, %k2 +kandnq %k0, %k1, %k2 +kord %k0, %k1, %k2 +korq %k0, %k1, %k2 +kxnord %k0, %k1, %k2 +kxnorq %k0, %k1, %k2 +kxord %k0, %k1, %k2 +kxorq %k0, %k1, %k2 +kshiftld $2, %k1, %k2 +kshiftlq $2, %k1, %k2 +kshiftrd $2, %k1, %k2 +kshiftrq $2, %k1, %k2 +kunpckdq %k0, %k1, %k2 +kunpckwd %k0, %k1, %k2 + vpabsb %zmm16, %zmm19 vpabsb (%rax), %zmm19 vpabsb %zmm16, %zmm19 {k1} @@ -29,6 +48,46 @@ vpaddw (%rax), %zmm17, %zmm19 {k1} vpaddw %zmm16, %zmm17, %zmm19 {z}{k1} vpaddw (%rax), %zmm17, %zmm19 {z}{k1} +vpcmpb $0, %zmm0, %zmm1, %k2 +vpcmpb $0, (%rax), %zmm1, %k2 +vpcmpb $0, %zmm0, %zmm1, %k2 {k3} +vpcmpb $0, (%rax), %zmm1, %k2 {k3} + +vpcmpeqb %zmm0, %zmm1, %k2 +vpcmpeqb (%rax), %zmm1, %k2 +vpcmpeqb %zmm0, %zmm1, %k2 {k3} +vpcmpeqb (%rax), %zmm1, %k2 {k3} + +vpcmpeqw %zmm0, %zmm1, %k2 +vpcmpeqw (%rax), %zmm1, %k2 +vpcmpeqw %zmm0, %zmm1, %k2 {k3} +vpcmpeqw (%rax), %zmm1, %k2 {k3} + +vpcmpgtb %zmm0, %zmm1, %k2 +vpcmpgtb (%rax), %zmm1, %k2 +vpcmpgtb %zmm0, %zmm1, %k2 {k3} +vpcmpgtb (%rax), %zmm1, %k2 {k3} + +vpcmpgtw %zmm0, %zmm1, %k2 +vpcmpgtw (%rax), %zmm1, %k2 +vpcmpgtw %zmm0, %zmm1, %k2 {k3} +vpcmpgtw (%rax), %zmm1, %k2 {k3} + +vpcmpub $0, %zmm0, %zmm1, %k2 +vpcmpub $0, (%rax), %zmm1, %k2 +vpcmpub $0, %zmm0, %zmm1, %k2 {k3} +vpcmpub $0, (%rax), %zmm1, %k2 {k3} + +vpcmpuw $0, %zmm0, %zmm1, %k2 +vpcmpuw $0, (%rax), %zmm1, %k2 +vpcmpuw $0, %zmm0, %zmm1, %k2 {k3} +vpcmpuw $0, (%rax), %zmm1, %k2 {k3} + +vpcmpw $0, %zmm0, %zmm1, %k2 +vpcmpw $0, (%rax), %zmm1, %k2 +vpcmpw $0, %zmm0, %zmm1, %k2 {k3} +vpcmpw $0, (%rax), %zmm1, %k2 {k3} + vpermw %zmm16, %zmm17, %zmm19 vpermw (%rax), %zmm17, %zmm19 vpermw %zmm16, %zmm17, %zmm19 {k1} @@ -71,6 +130,26 @@ vpsubw (%rax), %zmm17, %zmm19 {k1} vpsubw %zmm16, %zmm17, %zmm19 {z}{k1} vpsubw (%rax), %zmm17, %zmm19 {z}{k1} +vptestmb %zmm0, %zmm1, %k2 +vptestmb (%rax), %zmm1, %k2 +vptestmb %zmm0, %zmm1, %k2 {k3} +vptestmb (%rax), %zmm1, %k2 {k3} + +vptestmw %zmm0, %zmm1, %k2 +vptestmw (%rax), %zmm1, %k2 +vptestmw %zmm0, %zmm1, %k2 {k3} +vptestmw (%rax), %zmm1, %k2 {k3} + +vptestnmb %zmm0, %zmm1, %k2 +vptestnmb (%rax), %zmm1, %k2 +vptestnmb %zmm0, %zmm1, %k2 {k3} +vptestnmb (%rax), %zmm1, %k2 {k3} + +vptestnmw %zmm0, %zmm1, %k2 +vptestnmw (%rax), %zmm1, %k2 +vptestnmw %zmm0, %zmm1, %k2 {k3} +vptestnmw (%rax), %zmm1, %k2 {k3} + vpunpckhbw %zmm16, %zmm17, %zmm19 vpunpckhbw (%rax), %zmm17, %zmm19 vpunpckhbw %zmm16, %zmm17, %zmm19 {k1} @@ -108,6 +187,24 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 1.00 kaddd %k0, %k1, %k2 +# CHECK-NEXT: 1 3 1.00 kaddq %k0, %k1, %k2 +# CHECK-NEXT: 1 1 1.00 kandd %k0, %k1, %k2 +# CHECK-NEXT: 1 1 1.00 kandq %k0, %k1, %k2 +# CHECK-NEXT: 1 1 1.00 kandnd %k0, %k1, %k2 +# CHECK-NEXT: 1 1 1.00 kandnq %k0, %k1, %k2 +# CHECK-NEXT: 1 1 1.00 kord %k0, %k1, %k2 +# CHECK-NEXT: 1 1 1.00 korq %k0, %k1, %k2 +# CHECK-NEXT: 1 1 1.00 kxnord %k0, %k1, %k2 +# CHECK-NEXT: 1 1 1.00 kxnorq %k0, %k1, %k2 +# CHECK-NEXT: 1 1 1.00 kxord %k0, %k1, %k2 +# CHECK-NEXT: 1 1 1.00 kxorq %k0, %k1, %k2 +# CHECK-NEXT: 1 3 1.00 kshiftld $2, %k1, %k2 +# CHECK-NEXT: 1 3 1.00 kshiftlq $2, %k1, %k2 +# CHECK-NEXT: 1 3 1.00 kshiftrd $2, %k1, %k2 +# CHECK-NEXT: 1 3 1.00 kshiftrq $2, %k1, %k2 +# CHECK-NEXT: 1 3 1.00 kunpckdq %k0, %k1, %k2 +# CHECK-NEXT: 1 3 1.00 kunpckwd %k0, %k1, %k2 # CHECK-NEXT: 1 1 1.00 vpabsb %zmm16, %zmm19 # CHECK-NEXT: 2 8 1.00 * vpabsb (%rax), %zmm19 # CHECK-NEXT: 1 1 1.00 vpabsb %zmm16, %zmm19 {%k1} @@ -132,6 +229,38 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 2 8 0.50 * vpaddw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1 1 0.33 vpaddw %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vpaddw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vpcmpeqb %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpeqb (%rax), %zmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpeqb %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpeqb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpeqb %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpeqb (%rax), %zmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpeqb %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpeqb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpeqw %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpeqw (%rax), %zmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpeqw %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpeqw (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpgtb %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpgtb (%rax), %zmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpgtb %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpgtb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpgtw %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpgtw (%rax), %zmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpgtw %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpgtw (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpequb %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpequb (%rax), %zmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpequb %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpequb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpequw %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpequw (%rax), %zmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpequw %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpequw (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpeqw %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpeqw (%rax), %zmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpeqw %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpeqw (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: 2 6 2.00 vpermw %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 3 13 2.00 * vpermw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 2 6 2.00 vpermw %zmm16, %zmm17, %zmm19 {%k1} @@ -168,6 +297,22 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 2 8 0.50 * vpsubw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: 1 1 0.33 vpsubw %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vpsubw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vptestmb %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vptestmb (%rax), %zmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vptestmb %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vptestmb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vptestmw %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vptestmw (%rax), %zmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vptestmw %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vptestmw (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vptestnmb %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vptestnmb (%rax), %zmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vptestnmb %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vptestnmb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vptestnmw %zmm0, %zmm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vptestnmw (%rax), %zmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vptestnmw %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vptestnmw (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: 1 1 1.00 vpunpckhbw %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 2 8 1.00 * vpunpckhbw (%rax), %zmm17, %zmm19 # CHECK-NEXT: 1 1 1.00 vpunpckhbw %zmm16, %zmm17, %zmm19 {%k1} @@ -207,10 +352,28 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 20.00 8.00 21.00 21.00 - 62.00 - - +# CHECK-NEXT: - - 30.00 8.00 33.00 33.00 - 118.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: +# CHECK-NEXT: - - - - - - - 1.00 - - kaddd %k0, %k1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - kaddq %k0, %k1, %k2 +# CHECK-NEXT: - - 1.00 - - - - - - - kandd %k0, %k1, %k2 +# CHECK-NEXT: - - 1.00 - - - - - - - kandq %k0, %k1, %k2 +# CHECK-NEXT: - - 1.00 - - - - - - - kandnd %k0, %k1, %k2 +# CHECK-NEXT: - - 1.00 - - - - - - - kandnq %k0, %k1, %k2 +# CHECK-NEXT: - - 1.00 - - - - - - - kord %k0, %k1, %k2 +# CHECK-NEXT: - - 1.00 - - - - - - - korq %k0, %k1, %k2 +# CHECK-NEXT: - - 1.00 - - - - - - - kxnord %k0, %k1, %k2 +# CHECK-NEXT: - - 1.00 - - - - - - - kxnorq %k0, %k1, %k2 +# CHECK-NEXT: - - 1.00 - - - - - - - kxord %k0, %k1, %k2 +# CHECK-NEXT: - - 1.00 - - - - - - - kxorq %k0, %k1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - kshiftld $2, %k1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - kshiftlq $2, %k1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - kshiftrd $2, %k1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - kshiftrq $2, %k1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - kunpckdq %k0, %k1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - kunpckwd %k0, %k1, %k2 # CHECK-NEXT: - - 1.00 - - - - - - - vpabsb %zmm16, %zmm19 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vpabsb (%rax), %zmm19 # CHECK-NEXT: - - 1.00 - - - - - - - vpabsb %zmm16, %zmm19 {%k1} @@ -235,6 +398,38 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpaddw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpaddw %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpaddw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqb %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqb (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqb %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqb %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqb (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqb %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqw %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqw (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqw %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqw (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpgtb %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtb (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpgtb %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpgtw %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtw (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpgtw %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtw (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpequb %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequb (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpequb %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpequw %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequw (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpequw %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequw (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqw %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqw (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqw %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqw (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - - - 2.00 - - vpermw %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vpermw (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - - - 2.00 - - vpermw %zmm16, %zmm17, %zmm19 {%k1} @@ -271,6 +466,22 @@ vpunpcklwd (%rax), %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpsubw (%rax), %zmm17, %zmm19 {%k1} # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpsubw %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpsubw (%rax), %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - 1.00 - - vptestmb %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmb (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vptestmb %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vptestmw %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmw (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vptestmw %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmw (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vptestnmb %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmb (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vptestnmb %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmb (%rax), %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vptestnmw %zmm0, %zmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmw (%rax), %zmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vptestnmw %zmm0, %zmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmw (%rax), %zmm1, %k2 {%k3} # CHECK-NEXT: - - - - - - - 1.00 - - vpunpckhbw %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpunpckhbw (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - - - - - - 1.00 - - vpunpckhbw %zmm16, %zmm17, %zmm19 {%k1} diff --git a/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bwvl.s b/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bwvl.s index 2939f1cbe49..be35aae60ba 100644 --- a/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bwvl.s +++ b/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512bwvl.s @@ -57,6 +57,86 @@ vpaddw (%rax), %ymm17, %ymm19 {k1} vpaddw %ymm16, %ymm17, %ymm19 {z}{k1} vpaddw (%rax), %ymm17, %ymm19 {z}{k1} +vpcmpb $0, %xmm0, %xmm1, %k2 +vpcmpb $0, (%rax), %xmm1, %k2 +vpcmpb $0, %xmm0, %xmm1, %k2 {k3} +vpcmpb $0, (%rax), %xmm1, %k2 {k3} + +vpcmpb $0, %ymm0, %ymm1, %k2 +vpcmpb $0, (%rax), %ymm1, %k2 +vpcmpb $0, %ymm0, %ymm1, %k2 {k3} +vpcmpb $0, (%rax), %ymm1, %k2 {k3} + +vpcmpeqb %xmm0, %xmm1, %k2 +vpcmpeqb (%rax), %xmm1, %k2 +vpcmpeqb %xmm0, %xmm1, %k2 {k3} +vpcmpeqb (%rax), %xmm1, %k2 {k3} + +vpcmpeqb %ymm0, %ymm1, %k2 +vpcmpeqb (%rax), %ymm1, %k2 +vpcmpeqb %ymm0, %ymm1, %k2 {k3} +vpcmpeqb (%rax), %ymm1, %k2 {k3} + +vpcmpeqw %xmm0, %xmm1, %k2 +vpcmpeqw (%rax), %xmm1, %k2 +vpcmpeqw %xmm0, %xmm1, %k2 {k3} +vpcmpeqw (%rax), %xmm1, %k2 {k3} + +vpcmpeqw %ymm0, %ymm1, %k2 +vpcmpeqw (%rax), %ymm1, %k2 +vpcmpeqw %ymm0, %ymm1, %k2 {k3} +vpcmpeqw (%rax), %ymm1, %k2 {k3} + +vpcmpgtb %xmm0, %xmm1, %k2 +vpcmpgtb (%rax), %xmm1, %k2 +vpcmpgtb %xmm0, %xmm1, %k2 {k3} +vpcmpgtb (%rax), %xmm1, %k2 {k3} + +vpcmpgtb %ymm0, %ymm1, %k2 +vpcmpgtb (%rax), %ymm1, %k2 +vpcmpgtb %ymm0, %ymm1, %k2 {k3} +vpcmpgtb (%rax), %ymm1, %k2 {k3} + +vpcmpgtw %xmm0, %xmm1, %k2 +vpcmpgtw (%rax), %xmm1, %k2 +vpcmpgtw %xmm0, %xmm1, %k2 {k3} +vpcmpgtw (%rax), %xmm1, %k2 {k3} + +vpcmpgtw %ymm0, %ymm1, %k2 +vpcmpgtw (%rax), %ymm1, %k2 +vpcmpgtw %ymm0, %ymm1, %k2 {k3} +vpcmpgtw (%rax), %ymm1, %k2 {k3} + +vpcmpub $0, %xmm0, %xmm1, %k2 +vpcmpub $0, (%rax), %xmm1, %k2 +vpcmpub $0, %xmm0, %xmm1, %k2 {k3} +vpcmpub $0, (%rax), %xmm1, %k2 {k3} + +vpcmpub $0, %ymm0, %ymm1, %k2 +vpcmpub $0, (%rax), %ymm1, %k2 +vpcmpub $0, %ymm0, %ymm1, %k2 {k3} +vpcmpub $0, (%rax), %ymm1, %k2 {k3} + +vpcmpuw $0, %xmm0, %xmm1, %k2 +vpcmpuw $0, (%rax), %xmm1, %k2 +vpcmpuw $0, %xmm0, %xmm1, %k2 {k3} +vpcmpuw $0, (%rax), %xmm1, %k2 {k3} + +vpcmpuw $0, %ymm0, %ymm1, %k2 +vpcmpuw $0, (%rax), %ymm1, %k2 +vpcmpuw $0, %ymm0, %ymm1, %k2 {k3} +vpcmpuw $0, (%rax), %ymm1, %k2 {k3} + +vpcmpw $0, %xmm0, %xmm1, %k2 +vpcmpw $0, (%rax), %xmm1, %k2 +vpcmpw $0, %xmm0, %xmm1, %k2 {k3} +vpcmpw $0, (%rax), %xmm1, %k2 {k3} + +vpcmpw $0, %ymm0, %ymm1, %k2 +vpcmpw $0, (%rax), %ymm1, %k2 +vpcmpw $0, %ymm0, %ymm1, %k2 {k3} +vpcmpw $0, (%rax), %ymm1, %k2 {k3} + vpermw %xmm16, %xmm17, %xmm19 vpermw (%rax), %xmm17, %xmm19 vpermw %xmm16, %xmm17, %xmm19 {k1} @@ -127,6 +207,46 @@ vpsubw (%rax), %ymm17, %ymm19 {k1} vpsubw %ymm16, %ymm17, %ymm19 {z}{k1} vpsubw (%rax), %ymm17, %ymm19 {z}{k1} +vptestmb %xmm0, %xmm1, %k2 +vptestmb (%rax), %xmm1, %k2 +vptestmb %xmm0, %xmm1, %k2 {k3} +vptestmb (%rax), %xmm1, %k2 {k3} + +vptestmb %ymm0, %ymm1, %k2 +vptestmb (%rax), %ymm1, %k2 +vptestmb %ymm0, %ymm1, %k2 {k3} +vptestmb (%rax), %ymm1, %k2 {k3} + +vptestmw %xmm0, %xmm1, %k2 +vptestmw (%rax), %xmm1, %k2 +vptestmw %xmm0, %xmm1, %k2 {k3} +vptestmw (%rax), %xmm1, %k2 {k3} + +vptestmw %ymm0, %ymm1, %k2 +vptestmw (%rax), %ymm1, %k2 +vptestmw %ymm0, %ymm1, %k2 {k3} +vptestmw (%rax), %ymm1, %k2 {k3} + +vptestnmb %xmm0, %xmm1, %k2 +vptestnmb (%rax), %xmm1, %k2 +vptestnmb %xmm0, %xmm1, %k2 {k3} +vptestnmb (%rax), %xmm1, %k2 {k3} + +vptestnmb %ymm0, %ymm1, %k2 +vptestnmb (%rax), %ymm1, %k2 +vptestnmb %ymm0, %ymm1, %k2 {k3} +vptestnmb (%rax), %ymm1, %k2 {k3} + +vptestnmw %xmm0, %xmm1, %k2 +vptestnmw (%rax), %xmm1, %k2 +vptestnmw %xmm0, %xmm1, %k2 {k3} +vptestnmw (%rax), %xmm1, %k2 {k3} + +vptestnmw %ymm0, %ymm1, %k2 +vptestnmw (%rax), %ymm1, %k2 +vptestnmw %ymm0, %ymm1, %k2 {k3} +vptestnmw (%rax), %ymm1, %k2 {k3} + vpunpckhbw %xmm16, %xmm17, %xmm19 vpunpckhbw (%rax), %xmm17, %xmm19 vpunpckhbw %xmm16, %xmm17, %xmm19 {k1} @@ -240,6 +360,70 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 2 8 0.50 * vpaddw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 1 1 0.33 vpaddw %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vpaddw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vpcmpeqb %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vpcmpeqb (%rax), %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpeqb %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vpcmpeqb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpeqb %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpeqb (%rax), %ymm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpeqb %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpeqb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpeqb %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vpcmpeqb (%rax), %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpeqb %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vpcmpeqb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpeqb %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpeqb (%rax), %ymm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpeqb %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpeqb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpeqw %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vpcmpeqw (%rax), %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpeqw %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vpcmpeqw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpeqw %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpeqw (%rax), %ymm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpeqw %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpeqw (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpgtb %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vpcmpgtb (%rax), %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpgtb %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vpcmpgtb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpgtb %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpgtb (%rax), %ymm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpgtb %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpgtb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpgtw %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vpcmpgtw (%rax), %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpgtw %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vpcmpgtw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpgtw %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpgtw (%rax), %ymm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpgtw %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpgtw (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpequb %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vpcmpequb (%rax), %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpequb %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vpcmpequb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpequb %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpequb (%rax), %ymm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpequb %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpequb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpequw %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vpcmpequw (%rax), %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpequw %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vpcmpequw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpequw %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpequw (%rax), %ymm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpequw %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpequw (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpeqw %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vpcmpeqw (%rax), %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpeqw %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vpcmpeqw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpeqw %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpeqw (%rax), %ymm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpeqw %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpeqw (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: 2 6 2.00 vpermw %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 3 12 2.00 * vpermw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 2 6 2.00 vpermw %xmm16, %xmm17, %xmm19 {%k1} @@ -300,6 +484,38 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 2 8 0.50 * vpsubw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: 1 1 0.33 vpsubw %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vpsubw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vptestmb %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vptestmb (%rax), %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vptestmb %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vptestmb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vptestmb %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vptestmb (%rax), %ymm1, %k2 +# CHECK-NEXT: 1 3 1.00 vptestmb %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vptestmb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vptestmw %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vptestmw (%rax), %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vptestmw %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vptestmw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vptestmw %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vptestmw (%rax), %ymm1, %k2 +# CHECK-NEXT: 1 3 1.00 vptestmw %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vptestmw (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vptestnmb %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vptestnmb (%rax), %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vptestnmb %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vptestnmb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vptestnmb %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vptestnmb (%rax), %ymm1, %k2 +# CHECK-NEXT: 1 3 1.00 vptestnmb %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vptestnmb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vptestnmw %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vptestnmw (%rax), %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vptestnmw %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vptestnmw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vptestnmw %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vptestnmw (%rax), %ymm1, %k2 +# CHECK-NEXT: 1 3 1.00 vptestnmw %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vptestnmw (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: 1 1 1.00 vpunpckhbw %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 2 7 1.00 * vpunpckhbw (%rax), %xmm17, %xmm19 # CHECK-NEXT: 1 1 1.00 vpunpckhbw %xmm16, %xmm17, %xmm19 {%k1} @@ -363,7 +579,7 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 28.00 28.00 39.00 39.00 - 112.00 - - +# CHECK-NEXT: - - 28.00 28.00 63.00 63.00 - 208.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -415,6 +631,70 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpaddw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpaddw %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpaddw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqb %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqb (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqb %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqb %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqb (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqb %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqb %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqb (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqb %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqb %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqb (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqb %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqw %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqw (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqw %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqw %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqw (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqw %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqw (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpgtb %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtb (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpgtb %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpgtb %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtb (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpgtb %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpgtw %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtw (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpgtw %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpgtw %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtw (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpgtw %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtw (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpequb %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequb (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpequb %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpequb %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequb (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpequb %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpequw %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequw (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpequw %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpequw %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequw (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpequw %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequw (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqw %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqw (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqw %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqw %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqw (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqw %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqw (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - - - 2.00 - - vpermw %xmm16, %xmm17, %xmm19 # CHECK-NEXT: - - - - 0.50 0.50 - 2.00 - - vpermw (%rax), %xmm17, %xmm19 # CHECK-NEXT: - - - - - - - 2.00 - - vpermw %xmm16, %xmm17, %xmm19 {%k1} @@ -475,6 +755,38 @@ vpunpcklwd (%rax), %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpsubw (%rax), %ymm17, %ymm19 {%k1} # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpsubw %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpsubw (%rax), %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - 1.00 - - vptestmb %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmb (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vptestmb %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vptestmb %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmb (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vptestmb %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vptestmw %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmw (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vptestmw %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vptestmw %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmw (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vptestmw %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmw (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vptestnmb %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmb (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vptestnmb %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmb (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vptestnmb %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmb (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vptestnmb %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmb (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vptestnmw %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmw (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vptestnmw %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmw (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vptestnmw %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmw (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vptestnmw %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmw (%rax), %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - - - 1.00 - - vpunpckhbw %xmm16, %xmm17, %xmm19 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpunpckhbw (%rax), %xmm17, %xmm19 # CHECK-NEXT: - - - - - - - 1.00 - - vpunpckhbw %xmm16, %xmm17, %xmm19 {%k1} diff --git a/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512dq.s b/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512dq.s index 1612ede05fb..8bbc0777e0c 100644 --- a/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512dq.s +++ b/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512dq.s @@ -1,6 +1,16 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -instruction-tables < %s | FileCheck %s +kaddb %k0, %k1, %k2 +kaddw %k0, %k1, %k2 +kandb %k0, %k1, %k2 +kandnb %k0, %k1, %k2 +korb %k0, %k1, %k2 +kxnorb %k0, %k1, %k2 +kxorb %k0, %k1, %k2 +kshiftlb $2, %k1, %k2 +kshiftrb $2, %k1, %k2 + vandnpd %zmm16, %zmm17, %zmm19 vandnpd (%rax), %zmm17, %zmm19 vandnpd (%rax){1to8}, %zmm17, %zmm19 @@ -81,6 +91,30 @@ vorps %zmm16, %zmm17, %zmm19 {z}{k1} vorps (%rax), %zmm17, %zmm19 {z}{k1} vorps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} +vfpclasspd $0xab, %zmm16, %k1 +vfpclasspdz $0xab, (%rax), %k1 +vfpclasspdz $0xab, (%rax){1to8}, %k1 +vfpclasspd $0xab, %zmm16, %k1 {k2} +vfpclasspdz $0xab, (%rax), %k1 {k2} +vfpclasspdz $0xab, (%rax){1to8}, %k1 {k2} + +vfpclassps $0xab, %zmm16, %k1 +vfpclasspsz $0xab, (%rax), %k1 +vfpclasspsz $0xab, (%rax){1to16}, %k1 +vfpclassps $0xab, %zmm16, %k1 {k2} +vfpclasspsz $0xab, (%rax), %k1 {k2} +vfpclasspsz $0xab, (%rax){1to16}, %k1 {k2} + +vfpclasssd $0xab, %xmm16, %k1 +vfpclasssd $0xab, (%rax), %k1 +vfpclasssd $0xab, %xmm16, %k1 {k2} +vfpclasssd $0xab, (%rax), %k1 {k2} + +vfpclassss $0xab, %xmm16, %k1 +vfpclassss $0xab, (%rax), %k1 +vfpclassss $0xab, %xmm16, %k1 {k2} +vfpclassss $0xab, (%rax), %k1 {k2} + vpmullq %zmm16, %zmm17, %zmm19 vpmullq (%rax), %zmm17, %zmm19 vpmullq %zmm16, %zmm17, %zmm19 {k1} @@ -117,6 +151,15 @@ vxorps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 3 1.00 kaddb %k0, %k1, %k2 +# CHECK-NEXT: 1 3 1.00 kaddw %k0, %k1, %k2 +# CHECK-NEXT: 1 1 1.00 kandb %k0, %k1, %k2 +# CHECK-NEXT: 1 1 1.00 kandnb %k0, %k1, %k2 +# CHECK-NEXT: 1 1 1.00 korb %k0, %k1, %k2 +# CHECK-NEXT: 1 1 1.00 kxnorb %k0, %k1, %k2 +# CHECK-NEXT: 1 1 1.00 kxorb %k0, %k1, %k2 +# CHECK-NEXT: 1 3 1.00 kshiftlb $2, %k1, %k2 +# CHECK-NEXT: 1 3 1.00 kshiftrb $2, %k1, %k2 # CHECK-NEXT: 1 1 0.50 vandnpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 2 8 0.50 * vandnpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: 2 8 0.50 * vandnpd (%rax){1to8}, %zmm17, %zmm19 @@ -189,6 +232,26 @@ vxorps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: 1 1 0.50 vorps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vorps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vorps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vfpclasspd $171, %zmm16, %k1 +# CHECK-NEXT: 2 11 0.50 * vfpclasspdz $171, (%rax), %k1 +# CHECK-NEXT: 2 11 0.50 * vfpclasspd $171, (%rax){1to8}, %k1 +# CHECK-NEXT: 1 3 1.00 vfpclasspd $171, %zmm16, %k1 {%k2} +# CHECK-NEXT: 2 11 0.50 * vfpclasspdz $171, (%rax), %k1 {%k2} +# CHECK-NEXT: 2 11 0.50 * vfpclasspd $171, (%rax){1to8}, %k1 {%k2} +# CHECK-NEXT: 1 3 1.00 vfpclassps $171, %zmm16, %k1 +# CHECK-NEXT: 2 11 0.50 * vfpclasspsz $171, (%rax), %k1 +# CHECK-NEXT: 2 11 0.50 * vfpclassps $171, (%rax){1to16}, %k1 +# CHECK-NEXT: 1 3 1.00 vfpclassps $171, %zmm16, %k1 {%k2} +# CHECK-NEXT: 2 11 0.50 * vfpclasspsz $171, (%rax), %k1 {%k2} +# CHECK-NEXT: 2 11 0.50 * vfpclassps $171, (%rax){1to16}, %k1 {%k2} +# CHECK-NEXT: 1 3 1.00 vfpclasssd $171, %xmm16, %k1 +# CHECK-NEXT: 2 8 1.00 * vfpclasssd $171, (%rax), %k1 +# CHECK-NEXT: 1 3 1.00 vfpclasssd $171, %xmm16, %k1 {%k2} +# CHECK-NEXT: 2 8 1.00 * vfpclasssd $171, (%rax), %k1 {%k2} +# CHECK-NEXT: 1 3 1.00 vfpclassss $171, %xmm16, %k1 +# CHECK-NEXT: 2 9 1.00 * vfpclassss $171, (%rax), %k1 +# CHECK-NEXT: 1 3 1.00 vfpclassss $171, %xmm16, %k1 {%k2} +# CHECK-NEXT: 2 9 1.00 * vfpclassss $171, (%rax), %k1 {%k2} # CHECK-NEXT: 3 12 1.50 vpmullq %zmm16, %zmm17, %zmm19 # CHECK-NEXT: 4 19 1.00 * vpmullq (%rax), %zmm17, %zmm19 # CHECK-NEXT: 3 12 1.50 vpmullq %zmm16, %zmm17, %zmm19 {%k1} @@ -228,10 +291,19 @@ vxorps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 50.50 7.00 31.50 31.50 - 57.50 - - +# CHECK-NEXT: - - 59.50 7.00 37.50 37.50 - 77.50 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: +# CHECK-NEXT: - - - - - - - 1.00 - - kaddb %k0, %k1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - kaddw %k0, %k1, %k2 +# CHECK-NEXT: - - 1.00 - - - - - - - kandb %k0, %k1, %k2 +# CHECK-NEXT: - - 1.00 - - - - - - - kandnb %k0, %k1, %k2 +# CHECK-NEXT: - - 1.00 - - - - - - - korb %k0, %k1, %k2 +# CHECK-NEXT: - - 1.00 - - - - - - - kxnorb %k0, %k1, %k2 +# CHECK-NEXT: - - 1.00 - - - - - - - kxorb %k0, %k1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - kshiftlb $2, %k1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - kshiftrb $2, %k1, %k2 # CHECK-NEXT: - - 0.50 - - - - 0.50 - - vandnpd %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vandnpd (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vandnpd (%rax){1to8}, %zmm17, %zmm19 @@ -304,6 +376,26 @@ vxorps (%rax){1to16}, %zmm17, %zmm19 {z}{k1} # CHECK-NEXT: - - 0.50 - - - - 0.50 - - vorps %zmm16, %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vorps (%rax), %zmm17, %zmm19 {%k1} {z} # CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vorps (%rax){1to16}, %zmm17, %zmm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - 1.00 - - vfpclasspd $171, %zmm16, %k1 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfpclasspdz $171, (%rax), %k1 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfpclasspd $171, (%rax){1to8}, %k1 +# CHECK-NEXT: - - - - - - - 1.00 - - vfpclasspd $171, %zmm16, %k1 {%k2} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfpclasspdz $171, (%rax), %k1 {%k2} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfpclasspd $171, (%rax){1to8}, %k1 {%k2} +# CHECK-NEXT: - - - - - - - 1.00 - - vfpclassps $171, %zmm16, %k1 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfpclasspsz $171, (%rax), %k1 +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfpclassps $171, (%rax){1to16}, %k1 +# CHECK-NEXT: - - - - - - - 1.00 - - vfpclassps $171, %zmm16, %k1 {%k2} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfpclasspsz $171, (%rax), %k1 {%k2} +# CHECK-NEXT: - - 0.50 - 0.50 0.50 - 0.50 - - vfpclassps $171, (%rax){1to16}, %k1 {%k2} +# CHECK-NEXT: - - - - - - - 1.00 - - vfpclasssd $171, %xmm16, %k1 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vfpclasssd $171, (%rax), %k1 +# CHECK-NEXT: - - - - - - - 1.00 - - vfpclasssd $171, %xmm16, %k1 {%k2} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vfpclasssd $171, (%rax), %k1 {%k2} +# CHECK-NEXT: - - - - - - - 1.00 - - vfpclassss $171, %xmm16, %k1 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vfpclassss $171, (%rax), %k1 +# CHECK-NEXT: - - - - - - - 1.00 - - vfpclassss $171, %xmm16, %k1 {%k2} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vfpclassss $171, (%rax), %k1 {%k2} # CHECK-NEXT: - - 1.50 - - - - 1.50 - - vpmullq %zmm16, %zmm17, %zmm19 # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - 1.00 - - vpmullq (%rax), %zmm17, %zmm19 # CHECK-NEXT: - - 1.50 - - - - 1.50 - - vpmullq %zmm16, %zmm17, %zmm19 {%k1} diff --git a/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512dqvl.s b/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512dqvl.s index f6b3cd63833..8968d576b6f 100644 --- a/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512dqvl.s +++ b/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512dqvl.s @@ -121,6 +121,34 @@ vorps %ymm16, %ymm17, %ymm19 {z}{k1} vorps (%rax), %ymm17, %ymm19 {z}{k1} vorps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} +vfpclasspd $0xab, %xmm16, %k1 +vfpclasspdx $0xab, (%rax), %k1 +vfpclasspdx $0xab, (%rax){1to2}, %k1 +vfpclasspd $0xab, %xmm16, %k1 {k2} +vfpclasspdx $0xab, (%rax), %k1 {k2} +vfpclasspdx $0xab, (%rax){1to2}, %k1 {k2} + +vfpclasspd $0xab, %ymm16, %k1 +vfpclasspdy $0xab, (%rax), %k1 +vfpclasspdy $0xab, (%rax){1to4}, %k1 +vfpclasspd $0xab, %ymm16, %k1 {k2} +vfpclasspdy $0xab, (%rax), %k1 {k2} +vfpclasspdy $0xab, (%rax){1to4}, %k1 {k2} + +vfpclassps $0xab, %xmm16, %k1 +vfpclasspsx $0xab, (%rax), %k1 +vfpclasspsx $0xab, (%rax){1to4}, %k1 +vfpclassps $0xab, %xmm16, %k1 {k2} +vfpclasspsx $0xab, (%rax), %k1 {k2} +vfpclasspsx $0xab, (%rax){1to4}, %k1 {k2} + +vfpclassps $0xab, %ymm16, %k1 +vfpclasspsy $0xab, (%rax), %k1 +vfpclasspsy $0xab, (%rax){1to8}, %k1 +vfpclassps $0xab, %ymm16, %k1 {k2} +vfpclasspsy $0xab, (%rax), %k1 {k2} +vfpclasspsy $0xab, (%rax){1to8}, %k1 {k2} + vpmullq %xmm16, %xmm17, %xmm19 vpmullq (%rax), %xmm17, %xmm19 vpmullq %xmm16, %xmm17, %xmm19 {k1} @@ -292,6 +320,30 @@ vxorps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 1 1 0.33 vorps %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vorps (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vorps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vfpclasspd $171, %xmm16, %k1 +# CHECK-NEXT: 2 10 0.50 * vfpclasspdx $171, (%rax), %k1 +# CHECK-NEXT: 2 10 0.50 * vfpclasspd $171, (%rax){1to2}, %k1 +# CHECK-NEXT: 1 3 1.00 vfpclasspd $171, %xmm16, %k1 {%k2} +# CHECK-NEXT: 2 10 0.50 * vfpclasspdx $171, (%rax), %k1 {%k2} +# CHECK-NEXT: 2 10 0.50 * vfpclasspd $171, (%rax){1to2}, %k1 {%k2} +# CHECK-NEXT: 1 3 1.00 vfpclasspd $171, %ymm16, %k1 +# CHECK-NEXT: 2 11 0.50 * vfpclasspdy $171, (%rax), %k1 +# CHECK-NEXT: 2 11 0.50 * vfpclasspd $171, (%rax){1to4}, %k1 +# CHECK-NEXT: 1 3 1.00 vfpclasspd $171, %ymm16, %k1 {%k2} +# CHECK-NEXT: 2 11 0.50 * vfpclasspdy $171, (%rax), %k1 {%k2} +# CHECK-NEXT: 2 11 0.50 * vfpclasspd $171, (%rax){1to4}, %k1 {%k2} +# CHECK-NEXT: 1 3 1.00 vfpclassps $171, %xmm16, %k1 +# CHECK-NEXT: 2 10 0.50 * vfpclasspsx $171, (%rax), %k1 +# CHECK-NEXT: 2 10 0.50 * vfpclassps $171, (%rax){1to4}, %k1 +# CHECK-NEXT: 1 3 1.00 vfpclassps $171, %xmm16, %k1 {%k2} +# CHECK-NEXT: 2 10 0.50 * vfpclasspsx $171, (%rax), %k1 {%k2} +# CHECK-NEXT: 2 10 0.50 * vfpclassps $171, (%rax){1to4}, %k1 {%k2} +# CHECK-NEXT: 1 3 1.00 vfpclassps $171, %ymm16, %k1 +# CHECK-NEXT: 2 11 0.50 * vfpclasspsy $171, (%rax), %k1 +# CHECK-NEXT: 2 11 0.50 * vfpclassps $171, (%rax){1to8}, %k1 +# CHECK-NEXT: 1 3 1.00 vfpclassps $171, %ymm16, %k1 {%k2} +# CHECK-NEXT: 2 11 0.50 * vfpclasspsy $171, (%rax), %k1 {%k2} +# CHECK-NEXT: 2 11 0.50 * vfpclassps $171, (%rax){1to8}, %k1 {%k2} # CHECK-NEXT: 3 12 1.50 vpmullq %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 4 18 1.00 * vpmullq (%rax), %xmm17, %xmm19 # CHECK-NEXT: 3 12 1.50 vpmullq %xmm16, %xmm17, %xmm19 {%k1} @@ -355,7 +407,7 @@ vxorps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 64.00 64.00 51.00 51.00 - 52.00 - - +# CHECK-NEXT: - - 72.00 72.00 59.00 59.00 - 60.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -467,6 +519,30 @@ vxorps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vorps %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vorps (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vorps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - 1.00 - - vfpclasspd $171, %xmm16, %k1 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfpclasspdx $171, (%rax), %k1 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfpclasspd $171, (%rax){1to2}, %k1 +# CHECK-NEXT: - - - - - - - 1.00 - - vfpclasspd $171, %xmm16, %k1 {%k2} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfpclasspdx $171, (%rax), %k1 {%k2} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfpclasspd $171, (%rax){1to2}, %k1 {%k2} +# CHECK-NEXT: - - - - - - - 1.00 - - vfpclasspd $171, %ymm16, %k1 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfpclasspdy $171, (%rax), %k1 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfpclasspd $171, (%rax){1to4}, %k1 +# CHECK-NEXT: - - - - - - - 1.00 - - vfpclasspd $171, %ymm16, %k1 {%k2} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfpclasspdy $171, (%rax), %k1 {%k2} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfpclasspd $171, (%rax){1to4}, %k1 {%k2} +# CHECK-NEXT: - - - - - - - 1.00 - - vfpclassps $171, %xmm16, %k1 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfpclasspsx $171, (%rax), %k1 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfpclassps $171, (%rax){1to4}, %k1 +# CHECK-NEXT: - - - - - - - 1.00 - - vfpclassps $171, %xmm16, %k1 {%k2} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfpclasspsx $171, (%rax), %k1 {%k2} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfpclassps $171, (%rax){1to4}, %k1 {%k2} +# CHECK-NEXT: - - - - - - - 1.00 - - vfpclassps $171, %ymm16, %k1 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfpclasspsy $171, (%rax), %k1 +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfpclassps $171, (%rax){1to8}, %k1 +# CHECK-NEXT: - - - - - - - 1.00 - - vfpclassps $171, %ymm16, %k1 {%k2} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfpclasspsy $171, (%rax), %k1 {%k2} +# CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vfpclassps $171, (%rax){1to8}, %k1 {%k2} # CHECK-NEXT: - - 1.50 1.50 - - - - - - vpmullq %xmm16, %xmm17, %xmm19 # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - 1.00 - - vpmullq (%rax), %xmm17, %xmm19 # CHECK-NEXT: - - 1.50 1.50 - - - - - - vpmullq %xmm16, %xmm17, %xmm19 {%k1} diff --git a/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s b/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s index 4b8db4f0f4d..b31524dbacb 100644 --- a/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s +++ b/test/tools/llvm-mca/X86/SkylakeServer/resources-avx512vl.s @@ -41,6 +41,34 @@ vaddps %ymm16, %ymm17, %ymm19 {z}{k1} vaddps (%rax), %ymm17, %ymm19 {z}{k1} vaddps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} +vcmppd $0, %xmm0, %xmm1, %k2 +vcmppd $0, (%rax), %xmm1, %k2 +vcmppd $0, (%rax){1to2}, %xmm1, %k2 +vcmppd $0, %xmm0, %xmm1, %k2 {k3} +vcmppd $0, (%rax), %xmm1, %k2 {k3} +vcmppd $0, (%rax){1to2}, %xmm1, %k2 {k3} + +vcmppd $0, %ymm0, %ymm1, %k2 +vcmppd $0, (%rax), %ymm1, %k2 +vcmppd $0, (%rax){1to4}, %ymm1, %k2 +vcmppd $0, %ymm0, %ymm1, %k2 {k3} +vcmppd $0, (%rax), %ymm1, %k2 {k3} +vcmppd $0, (%rax){1to4}, %ymm1, %k2 {k3} + +vcmpps $0, %xmm0, %xmm1, %k2 +vcmpps $0, (%rax), %xmm1, %k2 +vcmpps $0, (%rax){1to4}, %xmm1, %k2 +vcmpps $0, %xmm0, %xmm1, %k2 {k3} +vcmpps $0, (%rax), %xmm1, %k2 {k3} +vcmpps $0, (%rax){1to4}, %xmm1, %k2 {k3} + +vcmpps $0, %ymm0, %ymm1, %k2 +vcmpps $0, (%rax), %ymm1, %k2 +vcmpps $0, (%rax){1to8}, %ymm1, %k2 +vcmpps $0, %ymm0, %ymm1, %k2 {k3} +vcmpps $0, (%rax), %ymm1, %k2 {k3} +vcmpps $0, (%rax){1to8}, %ymm1, %k2 {k3} + vcvtdq2pd %xmm16, %xmm19 vcvtdq2pd (%rax), %xmm19 vcvtdq2pd (%rax){1to2}, %xmm19 @@ -291,6 +319,118 @@ vpaddq %ymm16, %ymm17, %ymm19 {z}{k1} vpaddq (%rax), %ymm17, %ymm19 {z}{k1} vpaddq (%rax){1to4}, %ymm17, %ymm19 {z}{k1} +vpcmpd $0, %xmm0, %xmm1, %k2 +vpcmpd $0, (%rax), %xmm1, %k2 +vpcmpd $0, (%rax){1to4}, %xmm1, %k2 +vpcmpd $0, %xmm0, %xmm1, %k2 {k3} +vpcmpd $0, (%rax), %xmm1, %k2 {k3} +vpcmpd $0, (%rax){1to4}, %xmm1, %k2 {k3} + +vpcmpd $0, %ymm0, %ymm1, %k2 +vpcmpd $0, (%rax), %ymm1, %k2 +vpcmpd $0, (%rax){1to8}, %ymm1, %k2 +vpcmpd $0, %ymm0, %ymm1, %k2 {k3} +vpcmpd $0, (%rax), %ymm1, %k2 {k3} +vpcmpd $0, (%rax){1to8}, %ymm1, %k2 {k3} + +vpcmpeqd %xmm0, %xmm1, %k2 +vpcmpeqd (%rax), %xmm1, %k2 +vpcmpeqd (%rax){1to4}, %xmm1, %k2 +vpcmpeqd %xmm0, %xmm1, %k2 {k3} +vpcmpeqd (%rax), %xmm1, %k2 {k3} +vpcmpeqd (%rax){1to4}, %xmm1, %k2 {k3} + +vpcmpeqd %ymm0, %ymm1, %k2 +vpcmpeqd (%rax), %ymm1, %k2 +vpcmpeqd (%rax){1to8}, %ymm1, %k2 +vpcmpeqd %ymm0, %ymm1, %k2 {k3} +vpcmpeqd (%rax), %ymm1, %k2 {k3} +vpcmpeqd (%rax){1to8}, %ymm1, %k2 {k3} + +vpcmpeqq %xmm0, %xmm1, %k2 +vpcmpeqq (%rax), %xmm1, %k2 +vpcmpeqq (%rax){1to2}, %xmm1, %k2 +vpcmpeqq %xmm0, %xmm1, %k2 {k3} +vpcmpeqq (%rax), %xmm1, %k2 {k3} +vpcmpeqq (%rax){1to2}, %xmm1, %k2 {k3} + +vpcmpeqq %ymm0, %ymm1, %k2 +vpcmpeqq (%rax), %ymm1, %k2 +vpcmpeqq (%rax){1to4}, %ymm1, %k2 +vpcmpeqq %ymm0, %ymm1, %k2 {k3} +vpcmpeqq (%rax), %ymm1, %k2 {k3} +vpcmpeqq (%rax){1to4}, %ymm1, %k2 {k3} + +vpcmpgtd %xmm0, %xmm1, %k2 +vpcmpgtd (%rax), %xmm1, %k2 +vpcmpgtd (%rax){1to4}, %xmm1, %k2 +vpcmpgtd %xmm0, %xmm1, %k2 {k3} +vpcmpgtd (%rax), %xmm1, %k2 {k3} +vpcmpgtd (%rax){1to4}, %xmm1, %k2 {k3} + +vpcmpgtd %ymm0, %ymm1, %k2 +vpcmpgtd (%rax), %ymm1, %k2 +vpcmpgtd (%rax){1to8}, %ymm1, %k2 +vpcmpgtd %ymm0, %ymm1, %k2 {k3} +vpcmpgtd (%rax), %ymm1, %k2 {k3} +vpcmpgtd (%rax){1to8}, %ymm1, %k2 {k3} + +vpcmpgtq %xmm0, %xmm1, %k2 +vpcmpgtq (%rax), %xmm1, %k2 +vpcmpgtq (%rax){1to2}, %xmm1, %k2 +vpcmpgtq %xmm0, %xmm1, %k2 {k3} +vpcmpgtq (%rax), %xmm1, %k2 {k3} +vpcmpgtq (%rax){1to2}, %xmm1, %k2 {k3} + +vpcmpgtq %ymm0, %ymm1, %k2 +vpcmpgtq (%rax), %ymm1, %k2 +vpcmpgtq (%rax){1to4}, %ymm1, %k2 +vpcmpgtq %ymm0, %ymm1, %k2 {k3} +vpcmpgtq (%rax), %ymm1, %k2 {k3} +vpcmpgtq (%rax){1to4}, %ymm1, %k2 {k3} + +vpcmpq $0, %xmm0, %xmm1, %k2 +vpcmpq $0, (%rax), %xmm1, %k2 +vpcmpq $0, (%rax){1to2}, %xmm1, %k2 +vpcmpq $0, %xmm0, %xmm1, %k2 {k3} +vpcmpq $0, (%rax), %xmm1, %k2 {k3} +vpcmpq $0, (%rax){1to2}, %xmm1, %k2 {k3} + +vpcmpq $0, %ymm0, %ymm1, %k2 +vpcmpq $0, (%rax), %ymm1, %k2 +vpcmpq $0, (%rax){1to4}, %ymm1, %k2 +vpcmpq $0, %ymm0, %ymm1, %k2 {k3} +vpcmpq $0, (%rax), %ymm1, %k2 {k3} +vpcmpq $0, (%rax){1to4}, %ymm1, %k2 {k3} + +vpcmpud $0, %xmm0, %xmm1, %k2 +vpcmpud $0, (%rax), %xmm1, %k2 +vpcmpud $0, (%rax){1to4}, %xmm1, %k2 +vpcmpud $0, %xmm0, %xmm1, %k2 {k3} +vpcmpud $0, (%rax), %xmm1, %k2 {k3} +vpcmpud $0, (%rax){1to4}, %xmm1, %k2 {k3} + +vpcmpud $0, %ymm0, %ymm1, %k2 +vpcmpud $0, (%rax), %ymm1, %k2 +vpcmpud $0, (%rax){1to8}, %ymm1, %k2 +vpcmpud $0, %ymm0, %ymm1, %k2 {k3} +vpcmpud $0, (%rax), %ymm1, %k2 {k3} +vpcmpud $0, (%rax){1to8}, %ymm1, %k2 {k3} + +vpcmpuq $0, %xmm0, %xmm1, %k2 +vpcmpuq $0, (%rax), %xmm1, %k2 +vpcmpuq $0, (%rax){1to2}, %xmm1, %k2 +vpcmpuq $0, %xmm0, %xmm1, %k2 {k3} +vpcmpuq $0, (%rax), %xmm1, %k2 {k3} +vpcmpuq $0, (%rax){1to2}, %xmm1, %k2 {k3} + +vpcmpuq $0, %ymm0, %ymm1, %k2 +vpcmpuq $0, (%rax), %ymm1, %k2 +vpcmpuq $0, (%rax){1to4}, %ymm1, %k2 +vpcmpuq $0, %ymm0, %ymm1, %k2 {k3} +vpcmpuq $0, (%rax), %ymm1, %k2 {k3} +vpcmpuq $0, (%rax){1to4}, %ymm1, %k2 {k3} + vpermd %ymm16, %ymm17, %ymm19 vpermd (%rax), %ymm17, %ymm19 vpermd (%rax){1to8}, %ymm17, %ymm19 @@ -521,6 +661,62 @@ vpsubq %ymm16, %ymm17, %ymm19 {z}{k1} vpsubq (%rax), %ymm17, %ymm19 {z}{k1} vpsubq (%rax){1to4}, %ymm17, %ymm19 {z}{k1} +vptestmd %xmm0, %xmm1, %k2 +vptestmd (%rax), %xmm1, %k2 +vptestmd (%rax){1to4}, %xmm1, %k2 +vptestmd %xmm0, %xmm1, %k2 {k3} +vptestmd (%rax), %xmm1, %k2 {k3} +vptestmd (%rax){1to4}, %xmm1, %k2 {k3} + +vptestmd %ymm0, %ymm1, %k2 +vptestmd (%rax), %ymm1, %k2 +vptestmd (%rax){1to8}, %ymm1, %k2 +vptestmd %ymm0, %ymm1, %k2 {k3} +vptestmd (%rax), %ymm1, %k2 {k3} +vptestmd (%rax){1to8}, %ymm1, %k2 {k3} + +vptestmq %xmm0, %xmm1, %k2 +vptestmq (%rax), %xmm1, %k2 +vptestmq (%rax){1to2}, %xmm1, %k2 +vptestmq %xmm0, %xmm1, %k2 {k3} +vptestmq (%rax), %xmm1, %k2 {k3} +vptestmq (%rax){1to2}, %xmm1, %k2 {k3} + +vptestmq %ymm0, %ymm1, %k2 +vptestmq (%rax), %ymm1, %k2 +vptestmq (%rax){1to4}, %ymm1, %k2 +vptestmq %ymm0, %ymm1, %k2 {k3} +vptestmq (%rax), %ymm1, %k2 {k3} +vptestmq (%rax){1to4}, %ymm1, %k2 {k3} + +vptestnmd %xmm0, %xmm1, %k2 +vptestnmd (%rax), %xmm1, %k2 +vptestnmd (%rax){1to4}, %xmm1, %k2 +vptestnmd %xmm0, %xmm1, %k2 {k3} +vptestnmd (%rax), %xmm1, %k2 {k3} +vptestnmd (%rax){1to4}, %xmm1, %k2 {k3} + +vptestnmd %ymm0, %ymm1, %k2 +vptestnmd (%rax), %ymm1, %k2 +vptestnmd (%rax){1to8}, %ymm1, %k2 +vptestnmd %ymm0, %ymm1, %k2 {k3} +vptestnmd (%rax), %ymm1, %k2 {k3} +vptestnmd (%rax){1to8}, %ymm1, %k2 {k3} + +vptestnmq %xmm0, %xmm1, %k2 +vptestnmq (%rax), %xmm1, %k2 +vptestnmq (%rax){1to2}, %xmm1, %k2 +vptestnmq %xmm0, %xmm1, %k2 {k3} +vptestnmq (%rax), %xmm1, %k2 {k3} +vptestnmq (%rax){1to2}, %xmm1, %k2 {k3} + +vptestnmq %ymm0, %ymm1, %k2 +vptestnmq (%rax), %ymm1, %k2 +vptestnmq (%rax){1to4}, %ymm1, %k2 +vptestnmq %ymm0, %ymm1, %k2 {k3} +vptestnmq (%rax), %ymm1, %k2 {k3} +vptestnmq (%rax){1to4}, %ymm1, %k2 {k3} + vpunpckhdq %xmm16, %xmm17, %xmm19 vpunpckhdq (%rax), %xmm17, %xmm19 vpunpckhdq (%rax){1to4}, %xmm17, %xmm19 @@ -806,6 +1002,30 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 1 4 0.50 vaddps %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vaddps (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 11 0.50 * vaddps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vcmpeqpd %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vcmpeqpd (%rax), %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vcmpeqpd (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vcmpeqpd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vcmpeqpd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vcmpeqpd (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vcmpeqpd %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vcmpeqpd (%rax), %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vcmpeqpd (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: 1 3 1.00 vcmpeqpd %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vcmpeqpd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vcmpeqpd (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vcmpeqps %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vcmpeqps (%rax), %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vcmpeqps (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vcmpeqps %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vcmpeqps (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vcmpeqps (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vcmpeqps %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vcmpeqps (%rax), %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vcmpeqps (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: 1 3 1.00 vcmpeqps %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vcmpeqps (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vcmpeqps (%rax){1to8}, %ymm1, %k2 {%k3} # CHECK-NEXT: 2 5 1.00 vcvtdq2pd %xmm16, %xmm19 # CHECK-NEXT: 2 10 0.50 * vcvtdq2pd (%rax), %xmm19 # CHECK-NEXT: 2 10 0.50 * vcvtdq2pd (%rax){1to2}, %xmm19 @@ -1030,6 +1250,102 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 1 1 0.33 vpaddq %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vpaddq (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vpaddq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vpcmpeqd %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vpcmpeqd (%rax), %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vpcmpeqd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpeqd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vpcmpeqd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vpcmpeqd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpeqd %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpeqd (%rax), %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpeqd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpeqd %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpeqd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpeqd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpeqd %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vpcmpeqd (%rax), %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vpcmpeqd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpeqd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vpcmpeqd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vpcmpeqd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpeqd %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpeqd (%rax), %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpeqd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpeqd %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpeqd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpeqd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpeqq %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vpcmpeqq (%rax), %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vpcmpeqq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpeqq %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vpcmpeqq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vpcmpeqq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpeqq %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpeqq (%rax), %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpeqq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpeqq %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpeqq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpeqq (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpgtd %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vpcmpgtd (%rax), %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vpcmpgtd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpgtd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vpcmpgtd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vpcmpgtd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpgtd %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpgtd (%rax), %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpgtd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpgtd %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpgtd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpgtd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpgtq %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vpcmpgtq (%rax), %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vpcmpgtq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpgtq %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vpcmpgtq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vpcmpgtq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpgtq %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpgtq (%rax), %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpgtq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpgtq %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpgtq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpgtq (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpeqq %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vpcmpeqq (%rax), %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vpcmpeqq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpeqq %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vpcmpeqq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vpcmpeqq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpeqq %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpeqq (%rax), %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpeqq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpeqq %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpeqq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpeqq (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpequd %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vpcmpequd (%rax), %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vpcmpequd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpequd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vpcmpequd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vpcmpequd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpequd %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpequd (%rax), %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpequd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpequd %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpequd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpequd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpequq %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vpcmpequq (%rax), %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vpcmpequq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpequq %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vpcmpequq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vpcmpequq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vpcmpequq %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpequq (%rax), %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vpcmpequq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: 1 3 1.00 vpcmpequq %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpequq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vpcmpequq (%rax){1to4}, %ymm1, %k2 {%k3} # CHECK-NEXT: 1 3 1.00 vpermd %ymm16, %ymm17, %ymm19 # CHECK-NEXT: 2 10 1.00 * vpermd (%rax), %ymm17, %ymm19 # CHECK-NEXT: 2 10 1.00 * vpermd (%rax){1to8}, %ymm17, %ymm19 @@ -1236,6 +1552,54 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: 1 1 0.33 vpsubq %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vpsubq (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: 2 8 0.50 * vpsubq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: 1 3 1.00 vptestmd %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vptestmd (%rax), %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vptestmd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vptestmd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vptestmd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vptestmd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vptestmd %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vptestmd (%rax), %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vptestmd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: 1 3 1.00 vptestmd %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vptestmd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vptestmd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vptestmq %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vptestmq (%rax), %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vptestmq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vptestmq %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vptestmq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vptestmq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vptestmq %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vptestmq (%rax), %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vptestmq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: 1 3 1.00 vptestmq %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vptestmq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vptestmq (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vptestnmd %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vptestnmd (%rax), %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vptestnmd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vptestnmd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vptestnmd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vptestnmd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vptestnmd %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vptestnmd (%rax), %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vptestnmd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: 1 3 1.00 vptestnmd %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vptestnmd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vptestnmd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vptestnmq %xmm0, %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vptestnmq (%rax), %xmm1, %k2 +# CHECK-NEXT: 2 9 1.00 * vptestnmq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: 1 3 1.00 vptestnmq %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vptestnmq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: 2 9 1.00 * vptestnmq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: 1 3 1.00 vptestnmq %ymm0, %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vptestnmq (%rax), %ymm1, %k2 +# CHECK-NEXT: 2 10 1.00 * vptestnmq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: 1 3 1.00 vptestnmq %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vptestnmq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: 2 10 1.00 * vptestnmq (%rax){1to4}, %ymm1, %k2 {%k3} # CHECK-NEXT: 1 1 1.00 vpunpckhdq %xmm16, %xmm17, %xmm19 # CHECK-NEXT: 2 7 1.00 * vpunpckhdq (%rax), %xmm17, %xmm19 # CHECK-NEXT: 2 7 1.00 * vpunpckhdq (%rax){1to4}, %xmm17, %xmm19 @@ -1467,7 +1831,7 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 423.00 242.33 154.33 252.00 252.00 - 337.33 4.00 - +# CHECK-NEXT: - 423.00 242.33 154.33 308.00 308.00 - 505.33 4.00 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1507,6 +1871,30 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - 0.50 0.50 - - - - - - vaddps %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddps (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.50 0.50 0.50 0.50 - - - - vaddps (%rax){1to8}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - 1.00 - - vcmpeqpd %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vcmpeqpd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vcmpeqpd (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vcmpeqpd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vcmpeqpd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vcmpeqpd (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vcmpeqpd %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vcmpeqpd (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vcmpeqpd (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vcmpeqpd %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vcmpeqpd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vcmpeqpd (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vcmpeqps %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vcmpeqps (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vcmpeqps (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vcmpeqps %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vcmpeqps (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vcmpeqps (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vcmpeqps %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vcmpeqps (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vcmpeqps (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vcmpeqps %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vcmpeqps (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vcmpeqps (%rax){1to8}, %ymm1, %k2 {%k3} # CHECK-NEXT: - - 0.33 0.33 - - - 1.33 - - vcvtdq2pd %xmm16, %xmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtdq2pd (%rax), %xmm19 # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vcvtdq2pd (%rax){1to2}, %xmm19 @@ -1731,6 +2119,102 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpaddq %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpaddq (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpaddq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqd %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqd %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqd (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqd %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqd %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqd %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqd (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqd %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqq %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqq (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqq %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqq %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqq (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqq %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqq (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpgtd %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpgtd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpgtd %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtd (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpgtd %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpgtq %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtq (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpgtq %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpgtq %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtq (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpgtq %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpgtq (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqq %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqq (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqq %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqq %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqq (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpeqq %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpeqq (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpequd %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpequd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpequd %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequd (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpequd %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpequq %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequq (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpequq %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpequq %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequq (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vpcmpequq %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpcmpequq (%rax){1to4}, %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - - - 1.00 - - vpermd %ymm16, %ymm17, %ymm19 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermd (%rax), %ymm17, %ymm19 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpermd (%rax){1to8}, %ymm17, %ymm19 @@ -1937,6 +2421,54 @@ vunpcklps (%rax){1to8}, %ymm17, %ymm19 {z}{k1} # CHECK-NEXT: - - 0.33 0.33 - - - 0.33 - - vpsubq %ymm16, %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpsubq (%rax), %ymm17, %ymm19 {%k1} {z} # CHECK-NEXT: - - 0.33 0.33 0.50 0.50 - 0.33 - - vpsubq (%rax){1to4}, %ymm17, %ymm19 {%k1} {z} +# CHECK-NEXT: - - - - - - - 1.00 - - vptestmd %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vptestmd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vptestmd %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmd (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vptestmd %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vptestmq %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmq (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vptestmq %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vptestmq %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmq (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vptestmq %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestmq (%rax){1to4}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vptestnmd %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmd (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmd (%rax){1to4}, %xmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vptestnmd %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmd (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmd (%rax){1to4}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vptestnmd %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmd (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmd (%rax){1to8}, %ymm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vptestnmd %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmd (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmd (%rax){1to8}, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vptestnmq %xmm0, %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmq (%rax), %xmm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmq (%rax){1to2}, %xmm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vptestnmq %xmm0, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmq (%rax), %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmq (%rax){1to2}, %xmm1, %k2 {%k3} +# CHECK-NEXT: - - - - - - - 1.00 - - vptestnmq %ymm0, %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmq (%rax), %ymm1, %k2 +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmq (%rax){1to4}, %ymm1, %k2 +# CHECK-NEXT: - - - - - - - 1.00 - - vptestnmq %ymm0, %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmq (%rax), %ymm1, %k2 {%k3} +# CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vptestnmq (%rax){1to4}, %ymm1, %k2 {%k3} # CHECK-NEXT: - - - - - - - 1.00 - - vpunpckhdq %xmm16, %xmm17, %xmm19 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpunpckhdq (%rax), %xmm17, %xmm19 # CHECK-NEXT: - - - - 0.50 0.50 - 1.00 - - vpunpckhdq (%rax){1to4}, %xmm17, %xmm19