1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 13:11:39 +01:00

[NFC][X86] Baseline tests for AMD BdVer2 (Piledriver) Scheduler model

Adding the baseline tests in a preparatory NFC commit,
so that the actual commit shows the *diff*.

Yes, i'm aware that a few of these codegen-based sched tests
are testing wrong instructions, i will fix that afterwards.

For https://reviews.llvm.org/D52779

llvm-svn: 345462
This commit is contained in:
Roman Lebedev 2018-10-27 20:36:11 +00:00
parent 4182c8fe4c
commit 1a55ca0270
116 changed files with 26834 additions and 950 deletions

View File

@ -14,6 +14,8 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,SKX-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=+aes,-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=+aes,-avx | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
@ -92,6 +94,18 @@ define <2 x i64> @test_aesdec(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SKX-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_aesdec:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [7:1.00]
; BDVER2-SSE-NEXT: aesdec (%rdi), %xmm0 # sched: [13:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_aesdec:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vaesdec %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
; BDVER2-NEXT: vaesdec (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_aesdec:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: aesdec %xmm1, %xmm0 # sched: [3:1.00]
@ -195,6 +209,18 @@ define <2 x i64> @test_aesdeclast(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2)
; SKX-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_aesdeclast:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [7:1.00]
; BDVER2-SSE-NEXT: aesdeclast (%rdi), %xmm0 # sched: [13:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_aesdeclast:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vaesdeclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
; BDVER2-NEXT: vaesdeclast (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_aesdeclast:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: aesdeclast %xmm1, %xmm0 # sched: [3:1.00]
@ -298,6 +324,18 @@ define <2 x i64> @test_aesenc(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SKX-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_aesenc:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [7:1.00]
; BDVER2-SSE-NEXT: aesenc (%rdi), %xmm0 # sched: [13:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_aesenc:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vaesenc %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
; BDVER2-NEXT: vaesenc (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_aesenc:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: aesenc %xmm1, %xmm0 # sched: [3:1.00]
@ -401,6 +439,18 @@ define <2 x i64> @test_aesenclast(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2)
; SKX-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_aesenclast:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [7:1.00]
; BDVER2-SSE-NEXT: aesenclast (%rdi), %xmm0 # sched: [13:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_aesenclast:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vaesenclast %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
; BDVER2-NEXT: vaesenclast (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_aesenclast:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: aesenclast %xmm1, %xmm0 # sched: [3:1.00]
@ -517,6 +567,20 @@ define <2 x i64> @test_aesimc(<2 x i64> %a0, <2 x i64> *%a1) {
; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_aesimc:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: aesimc %xmm0, %xmm1 # sched: [12:2.00]
; BDVER2-SSE-NEXT: aesimc (%rdi), %xmm0 # sched: [18:2.00]
; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_aesimc:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vaesimc %xmm0, %xmm0 # sched: [12:2.00]
; BDVER2-NEXT: vaesimc (%rdi), %xmm1 # sched: [18:2.00]
; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_aesimc:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: aesimc %xmm0, %xmm1 # sched: [2:1.00]
@ -637,6 +701,20 @@ define <2 x i64> @test_aeskeygenassist(<2 x i64> %a0, <2 x i64> *%a1) {
; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_aeskeygenassist:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [8:3.67]
; BDVER2-SSE-NEXT: aeskeygenassist $7, (%rdi), %xmm0 # sched: [8:3.33]
; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_aeskeygenassist:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vaeskeygenassist $7, %xmm0, %xmm0 # sched: [8:3.67]
; BDVER2-NEXT: vaeskeygenassist $7, (%rdi), %xmm1 # sched: [8:3.33]
; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_aeskeygenassist:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: aeskeygenassist $7, %xmm0, %xmm1 # sched: [2:1.00]

File diff suppressed because it is too large Load Diff

View File

@ -2,6 +2,7 @@
; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=VZ --check-prefix=AVX
; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=VZ --check-prefix=AVX512
; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-unknown-unknown -mattr=+avx,+fast-partial-ymm-or-zmm-write | FileCheck %s --check-prefix=ALL --check-prefix=NO-VZ --check-prefix=FAST-ymm-zmm
; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=NO-VZ --check-prefix=BDVER2
; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-unknown-unknown -mcpu=btver2 | FileCheck %s --check-prefix=ALL --check-prefix=NO-VZ --check-prefix=BTVER2
declare i32 @foo()
@ -56,6 +57,20 @@ define <8 x float> @test01(<4 x float> %a, <4 x float> %b, <8 x float> %c) nounw
; FAST-ymm-zmm-NEXT: addq $56, %rsp
; FAST-ymm-zmm-NEXT: retq
;
; BDVER2-LABEL: test01:
; BDVER2: # %bb.0:
; BDVER2-NEXT: subq $56, %rsp
; BDVER2-NEXT: vmovups %ymm2, (%rsp) # 32-byte Spill
; BDVER2-NEXT: vmovaps {{.*}}(%rip), %xmm0
; BDVER2-NEXT: vzeroupper
; BDVER2-NEXT: callq do_sse
; BDVER2-NEXT: vmovaps %xmm0, {{.*}}(%rip)
; BDVER2-NEXT: callq do_sse
; BDVER2-NEXT: vmovaps %xmm0, {{.*}}(%rip)
; BDVER2-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; BDVER2-NEXT: addq $56, %rsp
; BDVER2-NEXT: retq
;
; BTVER2-LABEL: test01:
; BTVER2: # %bb.0:
; BTVER2-NEXT: subq $56, %rsp
@ -86,11 +101,24 @@ define <4 x float> @test02(<8 x float> %a, <8 x float> %b) nounwind {
; VZ-NEXT: vzeroupper
; VZ-NEXT: jmp do_sse # TAILCALL
;
; NO-VZ-LABEL: test02:
; NO-VZ: # %bb.0:
; NO-VZ-NEXT: vaddps %ymm1, %ymm0, %ymm0
; NO-VZ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; NO-VZ-NEXT: jmp do_sse # TAILCALL
; FAST-ymm-zmm-LABEL: test02:
; FAST-ymm-zmm: # %bb.0:
; FAST-ymm-zmm-NEXT: vaddps %ymm1, %ymm0, %ymm0
; FAST-ymm-zmm-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; FAST-ymm-zmm-NEXT: jmp do_sse # TAILCALL
;
; BDVER2-LABEL: test02:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0
; BDVER2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; BDVER2-NEXT: vzeroupper
; BDVER2-NEXT: jmp do_sse # TAILCALL
;
; BTVER2-LABEL: test02:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vaddps %ymm1, %ymm0, %ymm0
; BTVER2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; BTVER2-NEXT: jmp do_sse # TAILCALL
%add.i = fadd <8 x float> %a, %b
%add.low = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %add.i, i8 0)
%call3 = tail call <4 x float> @do_sse(<4 x float> %add.low) nounwind
@ -162,6 +190,37 @@ define <4 x float> @test03(<4 x float> %a, <4 x float> %b) nounwind {
; FAST-ymm-zmm-NEXT: popq %rbx
; FAST-ymm-zmm-NEXT: retq
;
; BDVER2-LABEL: test03:
; BDVER2: # %bb.0: # %entry
; BDVER2-NEXT: pushq %rbx
; BDVER2-NEXT: subq $16, %rsp
; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0
; BDVER2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; BDVER2-NEXT: .p2align 4, 0x90
; BDVER2-NEXT: .LBB3_1: # %while.cond
; BDVER2-NEXT: # =>This Inner Loop Header: Depth=1
; BDVER2-NEXT: callq foo
; BDVER2-NEXT: testl %eax, %eax
; BDVER2-NEXT: jne .LBB3_1
; BDVER2-NEXT: # %bb.2: # %for.body.preheader
; BDVER2-NEXT: movl $4, %ebx
; BDVER2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
; BDVER2-NEXT: .p2align 4, 0x90
; BDVER2-NEXT: .LBB3_3: # %for.body
; BDVER2-NEXT: # =>This Inner Loop Header: Depth=1
; BDVER2-NEXT: callq do_sse
; BDVER2-NEXT: callq do_sse
; BDVER2-NEXT: vmovaps {{.*}}(%rip), %ymm0
; BDVER2-NEXT: vextractf128 $1, %ymm0, %xmm0
; BDVER2-NEXT: vzeroupper
; BDVER2-NEXT: callq do_sse
; BDVER2-NEXT: addl $-1, %ebx
; BDVER2-NEXT: jne .LBB3_3
; BDVER2-NEXT: # %bb.4: # %for.end
; BDVER2-NEXT: addq $16, %rsp
; BDVER2-NEXT: popq %rbx
; BDVER2-NEXT: retq
;
; BTVER2-LABEL: test03:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: pushq %rbx
@ -230,15 +289,36 @@ define <4 x float> @test04(<4 x float> %a, <4 x float> %b) nounwind {
; VZ-NEXT: vzeroupper
; VZ-NEXT: retq
;
; NO-VZ-LABEL: test04:
; NO-VZ: # %bb.0:
; NO-VZ-NEXT: pushq %rax
; NO-VZ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; NO-VZ-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; NO-VZ-NEXT: callq do_avx
; NO-VZ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; NO-VZ-NEXT: popq %rax
; NO-VZ-NEXT: retq
; FAST-ymm-zmm-LABEL: test04:
; FAST-ymm-zmm: # %bb.0:
; FAST-ymm-zmm-NEXT: pushq %rax
; FAST-ymm-zmm-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; FAST-ymm-zmm-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; FAST-ymm-zmm-NEXT: callq do_avx
; FAST-ymm-zmm-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; FAST-ymm-zmm-NEXT: popq %rax
; FAST-ymm-zmm-NEXT: retq
;
; BDVER2-LABEL: test04:
; BDVER2: # %bb.0:
; BDVER2-NEXT: pushq %rax
; BDVER2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; BDVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; BDVER2-NEXT: callq do_avx
; BDVER2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; BDVER2-NEXT: popq %rax
; BDVER2-NEXT: vzeroupper
; BDVER2-NEXT: retq
;
; BTVER2-LABEL: test04:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pushq %rax
; BTVER2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; BTVER2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; BTVER2-NEXT: callq do_avx
; BTVER2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; BTVER2-NEXT: popq %rax
; BTVER2-NEXT: retq
%shuf = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%call = call <8 x float> @do_avx(<8 x float> %shuf) nounwind
%shuf2 = shufflevector <8 x float> %call, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>

View File

@ -4,6 +4,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+bmi | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
@ -36,6 +37,13 @@ define i32 @test_andn_i32(i32 %a0, i32 %a1, i32 *%a2) {
; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_andn_i32:
; BDVER2: # %bb.0:
; BDVER2-NEXT: andnl %esi, %edi, %ecx # sched: [1:0.33]
; BDVER2-NEXT: andnl (%rdx), %edi, %eax # sched: [6:0.50]
; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_andn_i32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: andnl (%rdx), %edi, %eax # sched: [4:1.00]
@ -86,6 +94,13 @@ define i64 @test_andn_i64(i64 %a0, i64 %a1, i64 *%a2) {
; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_andn_i64:
; BDVER2: # %bb.0:
; BDVER2-NEXT: andnq %rsi, %rdi, %rcx # sched: [1:0.33]
; BDVER2-NEXT: andnq (%rdx), %rdi, %rax # sched: [6:0.50]
; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_andn_i64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: andnq (%rdx), %rdi, %rax # sched: [4:1.00]
@ -136,6 +151,13 @@ define i32 @test_bextr_i32(i32 %a0, i32 %a1, i32 *%a2) {
; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_bextr_i32:
; BDVER2: # %bb.0:
; BDVER2-NEXT: bextrl %edi, (%rdx), %ecx # sched: [7:1.00]
; BDVER2-NEXT: bextrl %edi, %esi, %eax # sched: [2:1.00]
; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_bextr_i32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: bextrl %edi, (%rdx), %ecx # sched: [4:1.00]
@ -186,6 +208,13 @@ define i64 @test_bextr_i64(i64 %a0, i64 %a1, i64 *%a2) {
; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_bextr_i64:
; BDVER2: # %bb.0:
; BDVER2-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [7:1.00]
; BDVER2-NEXT: bextrq %rdi, %rsi, %rax # sched: [2:1.00]
; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_bextr_i64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: bextrq %rdi, (%rdx), %rcx # sched: [4:1.00]
@ -236,6 +265,13 @@ define i32 @test_blsi_i32(i32 %a0, i32 *%a1) {
; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_blsi_i32:
; BDVER2: # %bb.0:
; BDVER2-NEXT: blsil (%rsi), %ecx # sched: [6:0.50]
; BDVER2-NEXT: blsil %edi, %eax # sched: [1:0.33]
; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_blsi_i32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: blsil (%rsi), %ecx # sched: [5:1.00]
@ -287,6 +323,13 @@ define i64 @test_blsi_i64(i64 %a0, i64 *%a1) {
; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_blsi_i64:
; BDVER2: # %bb.0:
; BDVER2-NEXT: blsiq (%rsi), %rcx # sched: [6:0.50]
; BDVER2-NEXT: blsiq %rdi, %rax # sched: [1:0.33]
; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_blsi_i64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: blsiq (%rsi), %rcx # sched: [5:1.00]
@ -338,6 +381,13 @@ define i32 @test_blsmsk_i32(i32 %a0, i32 *%a1) {
; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_blsmsk_i32:
; BDVER2: # %bb.0:
; BDVER2-NEXT: blsmskl (%rsi), %ecx # sched: [6:0.50]
; BDVER2-NEXT: blsmskl %edi, %eax # sched: [1:0.33]
; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_blsmsk_i32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: blsmskl (%rsi), %ecx # sched: [5:1.00]
@ -389,6 +439,13 @@ define i64 @test_blsmsk_i64(i64 %a0, i64 *%a1) {
; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_blsmsk_i64:
; BDVER2: # %bb.0:
; BDVER2-NEXT: blsmskq (%rsi), %rcx # sched: [6:0.50]
; BDVER2-NEXT: blsmskq %rdi, %rax # sched: [1:0.33]
; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_blsmsk_i64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: blsmskq (%rsi), %rcx # sched: [5:1.00]
@ -440,6 +497,13 @@ define i32 @test_blsr_i32(i32 %a0, i32 *%a1) {
; SKYLAKE-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_blsr_i32:
; BDVER2: # %bb.0:
; BDVER2-NEXT: blsrl (%rsi), %ecx # sched: [6:0.50]
; BDVER2-NEXT: blsrl %edi, %eax # sched: [1:0.33]
; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_blsr_i32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: blsrl (%rsi), %ecx # sched: [5:1.00]
@ -491,6 +555,13 @@ define i64 @test_blsr_i64(i64 %a0, i64 *%a1) {
; SKYLAKE-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_blsr_i64:
; BDVER2: # %bb.0:
; BDVER2-NEXT: blsrq (%rsi), %rcx # sched: [6:0.50]
; BDVER2-NEXT: blsrq %rdi, %rax # sched: [1:0.33]
; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_blsr_i64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: blsrq (%rsi), %rcx # sched: [5:1.00]
@ -546,6 +617,14 @@ define i16 @test_cttz_i16(i16 zeroext %a0, i16 *%a1) {
; SKYLAKE-NEXT: # kill: def $ax killed $ax killed $eax
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_cttz_i16:
; BDVER2: # %bb.0:
; BDVER2-NEXT: tzcntw (%rsi), %cx # sched: [8:1.00]
; BDVER2-NEXT: tzcntw %di, %ax # sched: [3:1.00]
; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33]
; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_cttz_i16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: tzcntw (%rsi), %cx # sched: [5:1.00]
@ -598,6 +677,13 @@ define i32 @test_cttz_i32(i32 %a0, i32 *%a1) {
; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_cttz_i32:
; BDVER2: # %bb.0:
; BDVER2-NEXT: tzcntl (%rsi), %ecx # sched: [8:1.00]
; BDVER2-NEXT: tzcntl %edi, %eax # sched: [3:1.00]
; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_cttz_i32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: tzcntl (%rsi), %ecx # sched: [5:1.00]
@ -648,6 +734,13 @@ define i64 @test_cttz_i64(i64 %a0, i64 *%a1) {
; SKYLAKE-NEXT: orq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_cttz_i64:
; BDVER2: # %bb.0:
; BDVER2-NEXT: tzcntq (%rsi), %rcx # sched: [8:1.00]
; BDVER2-NEXT: tzcntq %rdi, %rax # sched: [3:1.00]
; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_cttz_i64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: tzcntq (%rsi), %rcx # sched: [5:1.00]

View File

@ -8,6 +8,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
@ -540,6 +541,72 @@ define void @test_cmov_16(i16 %a0, i16 %a1, i16 *%a2) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_cmov_16:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: cmovow %si, %di # sched: [2:0.67]
; BDVER2-NEXT: cmovnow %si, %di # sched: [2:0.67]
; BDVER2-NEXT: cmovbw %si, %di # sched: [2:0.67]
; BDVER2-NEXT: cmovbw %si, %di # sched: [2:0.67]
; BDVER2-NEXT: cmovbw %si, %di # sched: [2:0.67]
; BDVER2-NEXT: cmovaew %si, %di # sched: [2:0.67]
; BDVER2-NEXT: cmovaew %si, %di # sched: [2:0.67]
; BDVER2-NEXT: cmovaew %si, %di # sched: [2:0.67]
; BDVER2-NEXT: cmovew %si, %di # sched: [2:0.67]
; BDVER2-NEXT: cmovew %si, %di # sched: [2:0.67]
; BDVER2-NEXT: cmovnew %si, %di # sched: [2:0.67]
; BDVER2-NEXT: cmovnew %si, %di # sched: [2:0.67]
; BDVER2-NEXT: cmovbew %si, %di # sched: [3:1.00]
; BDVER2-NEXT: cmovbew %si, %di # sched: [3:1.00]
; BDVER2-NEXT: cmovaw %si, %di # sched: [3:1.00]
; BDVER2-NEXT: cmovaw %si, %di # sched: [3:1.00]
; BDVER2-NEXT: cmovsw %si, %di # sched: [2:0.67]
; BDVER2-NEXT: cmovnsw %si, %di # sched: [2:0.67]
; BDVER2-NEXT: cmovpw %si, %di # sched: [2:0.67]
; BDVER2-NEXT: cmovpw %si, %di # sched: [2:0.67]
; BDVER2-NEXT: cmovnpw %si, %di # sched: [2:0.67]
; BDVER2-NEXT: cmovnpw %si, %di # sched: [2:0.67]
; BDVER2-NEXT: cmovlw %si, %di # sched: [2:0.67]
; BDVER2-NEXT: cmovlw %si, %di # sched: [2:0.67]
; BDVER2-NEXT: cmovgew %si, %di # sched: [2:0.67]
; BDVER2-NEXT: cmovgew %si, %di # sched: [2:0.67]
; BDVER2-NEXT: cmovlew %si, %di # sched: [2:0.67]
; BDVER2-NEXT: cmovlew %si, %di # sched: [2:0.67]
; BDVER2-NEXT: cmovgw %si, %di # sched: [2:0.67]
; BDVER2-NEXT: cmovgw %si, %di # sched: [2:0.67]
; BDVER2-NEXT: cmovow (%rdx), %di # sched: [7:0.67]
; BDVER2-NEXT: cmovnow (%rdx), %di # sched: [7:0.67]
; BDVER2-NEXT: cmovbw (%rdx), %di # sched: [7:0.67]
; BDVER2-NEXT: cmovbw (%rdx), %di # sched: [7:0.67]
; BDVER2-NEXT: cmovbw (%rdx), %di # sched: [7:0.67]
; BDVER2-NEXT: cmovaew (%rdx), %di # sched: [7:0.67]
; BDVER2-NEXT: cmovaew (%rdx), %di # sched: [7:0.67]
; BDVER2-NEXT: cmovaew (%rdx), %di # sched: [7:0.67]
; BDVER2-NEXT: cmovew (%rdx), %di # sched: [7:0.67]
; BDVER2-NEXT: cmovew (%rdx), %di # sched: [7:0.67]
; BDVER2-NEXT: cmovnew (%rdx), %di # sched: [7:0.67]
; BDVER2-NEXT: cmovnew (%rdx), %di # sched: [7:0.67]
; BDVER2-NEXT: cmovbew (%rdx), %di # sched: [8:1.00]
; BDVER2-NEXT: cmovbew (%rdx), %di # sched: [8:1.00]
; BDVER2-NEXT: cmovaw (%rdx), %di # sched: [8:1.00]
; BDVER2-NEXT: cmovaw (%rdx), %di # sched: [8:1.00]
; BDVER2-NEXT: cmovsw (%rdx), %di # sched: [7:0.67]
; BDVER2-NEXT: cmovnsw (%rdx), %di # sched: [7:0.67]
; BDVER2-NEXT: cmovpw (%rdx), %di # sched: [7:0.67]
; BDVER2-NEXT: cmovpw (%rdx), %di # sched: [7:0.67]
; BDVER2-NEXT: cmovnpw (%rdx), %di # sched: [7:0.67]
; BDVER2-NEXT: cmovnpw (%rdx), %di # sched: [7:0.67]
; BDVER2-NEXT: cmovlw (%rdx), %di # sched: [7:0.67]
; BDVER2-NEXT: cmovlw (%rdx), %di # sched: [7:0.67]
; BDVER2-NEXT: cmovgew (%rdx), %di # sched: [7:0.67]
; BDVER2-NEXT: cmovgew (%rdx), %di # sched: [7:0.67]
; BDVER2-NEXT: cmovlew (%rdx), %di # sched: [7:0.67]
; BDVER2-NEXT: cmovlew (%rdx), %di # sched: [7:0.67]
; BDVER2-NEXT: cmovgw (%rdx), %di # sched: [7:0.67]
; BDVER2-NEXT: cmovgw (%rdx), %di # sched: [7:0.67]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_cmov_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -1204,6 +1271,72 @@ define void @test_cmov_32(i32 %a0, i32 %a1, i32 *%a2) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_cmov_32:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: cmovol %esi, %edi # sched: [2:0.67]
; BDVER2-NEXT: cmovnol %esi, %edi # sched: [2:0.67]
; BDVER2-NEXT: cmovbl %esi, %edi # sched: [2:0.67]
; BDVER2-NEXT: cmovbl %esi, %edi # sched: [2:0.67]
; BDVER2-NEXT: cmovbl %esi, %edi # sched: [2:0.67]
; BDVER2-NEXT: cmovael %esi, %edi # sched: [2:0.67]
; BDVER2-NEXT: cmovael %esi, %edi # sched: [2:0.67]
; BDVER2-NEXT: cmovael %esi, %edi # sched: [2:0.67]
; BDVER2-NEXT: cmovel %esi, %edi # sched: [2:0.67]
; BDVER2-NEXT: cmovel %esi, %edi # sched: [2:0.67]
; BDVER2-NEXT: cmovnel %esi, %edi # sched: [2:0.67]
; BDVER2-NEXT: cmovnel %esi, %edi # sched: [2:0.67]
; BDVER2-NEXT: cmovbel %esi, %edi # sched: [3:1.00]
; BDVER2-NEXT: cmovbel %esi, %edi # sched: [3:1.00]
; BDVER2-NEXT: cmoval %esi, %edi # sched: [3:1.00]
; BDVER2-NEXT: cmoval %esi, %edi # sched: [3:1.00]
; BDVER2-NEXT: cmovsl %esi, %edi # sched: [2:0.67]
; BDVER2-NEXT: cmovnsl %esi, %edi # sched: [2:0.67]
; BDVER2-NEXT: cmovpl %esi, %edi # sched: [2:0.67]
; BDVER2-NEXT: cmovpl %esi, %edi # sched: [2:0.67]
; BDVER2-NEXT: cmovnpl %esi, %edi # sched: [2:0.67]
; BDVER2-NEXT: cmovnpl %esi, %edi # sched: [2:0.67]
; BDVER2-NEXT: cmovll %esi, %edi # sched: [2:0.67]
; BDVER2-NEXT: cmovll %esi, %edi # sched: [2:0.67]
; BDVER2-NEXT: cmovgel %esi, %edi # sched: [2:0.67]
; BDVER2-NEXT: cmovgel %esi, %edi # sched: [2:0.67]
; BDVER2-NEXT: cmovlel %esi, %edi # sched: [2:0.67]
; BDVER2-NEXT: cmovlel %esi, %edi # sched: [2:0.67]
; BDVER2-NEXT: cmovgl %esi, %edi # sched: [2:0.67]
; BDVER2-NEXT: cmovgl %esi, %edi # sched: [2:0.67]
; BDVER2-NEXT: cmovol (%rdx), %edi # sched: [7:0.67]
; BDVER2-NEXT: cmovnol (%rdx), %edi # sched: [7:0.67]
; BDVER2-NEXT: cmovbl (%rdx), %edi # sched: [7:0.67]
; BDVER2-NEXT: cmovbl (%rdx), %edi # sched: [7:0.67]
; BDVER2-NEXT: cmovbl (%rdx), %edi # sched: [7:0.67]
; BDVER2-NEXT: cmovael (%rdx), %edi # sched: [7:0.67]
; BDVER2-NEXT: cmovael (%rdx), %edi # sched: [7:0.67]
; BDVER2-NEXT: cmovael (%rdx), %edi # sched: [7:0.67]
; BDVER2-NEXT: cmovel (%rdx), %edi # sched: [7:0.67]
; BDVER2-NEXT: cmovel (%rdx), %edi # sched: [7:0.67]
; BDVER2-NEXT: cmovnel (%rdx), %edi # sched: [7:0.67]
; BDVER2-NEXT: cmovnel (%rdx), %edi # sched: [7:0.67]
; BDVER2-NEXT: cmovbel (%rdx), %edi # sched: [8:1.00]
; BDVER2-NEXT: cmovbel (%rdx), %edi # sched: [8:1.00]
; BDVER2-NEXT: cmoval (%rdx), %edi # sched: [8:1.00]
; BDVER2-NEXT: cmoval (%rdx), %edi # sched: [8:1.00]
; BDVER2-NEXT: cmovsl (%rdx), %edi # sched: [7:0.67]
; BDVER2-NEXT: cmovnsl (%rdx), %edi # sched: [7:0.67]
; BDVER2-NEXT: cmovpl (%rdx), %edi # sched: [7:0.67]
; BDVER2-NEXT: cmovpl (%rdx), %edi # sched: [7:0.67]
; BDVER2-NEXT: cmovnpl (%rdx), %edi # sched: [7:0.67]
; BDVER2-NEXT: cmovnpl (%rdx), %edi # sched: [7:0.67]
; BDVER2-NEXT: cmovll (%rdx), %edi # sched: [7:0.67]
; BDVER2-NEXT: cmovll (%rdx), %edi # sched: [7:0.67]
; BDVER2-NEXT: cmovgel (%rdx), %edi # sched: [7:0.67]
; BDVER2-NEXT: cmovgel (%rdx), %edi # sched: [7:0.67]
; BDVER2-NEXT: cmovlel (%rdx), %edi # sched: [7:0.67]
; BDVER2-NEXT: cmovlel (%rdx), %edi # sched: [7:0.67]
; BDVER2-NEXT: cmovgl (%rdx), %edi # sched: [7:0.67]
; BDVER2-NEXT: cmovgl (%rdx), %edi # sched: [7:0.67]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_cmov_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -1868,6 +2001,72 @@ define void @test_cmov_64(i64 %a0, i64 %a1, i64 *%a2) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_cmov_64:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: cmovoq %rsi, %rdi # sched: [2:0.67]
; BDVER2-NEXT: cmovnoq %rsi, %rdi # sched: [2:0.67]
; BDVER2-NEXT: cmovbq %rsi, %rdi # sched: [2:0.67]
; BDVER2-NEXT: cmovbq %rsi, %rdi # sched: [2:0.67]
; BDVER2-NEXT: cmovbq %rsi, %rdi # sched: [2:0.67]
; BDVER2-NEXT: cmovaeq %rsi, %rdi # sched: [2:0.67]
; BDVER2-NEXT: cmovaeq %rsi, %rdi # sched: [2:0.67]
; BDVER2-NEXT: cmovaeq %rsi, %rdi # sched: [2:0.67]
; BDVER2-NEXT: cmoveq %rsi, %rdi # sched: [2:0.67]
; BDVER2-NEXT: cmoveq %rsi, %rdi # sched: [2:0.67]
; BDVER2-NEXT: cmovneq %rsi, %rdi # sched: [2:0.67]
; BDVER2-NEXT: cmovneq %rsi, %rdi # sched: [2:0.67]
; BDVER2-NEXT: cmovbeq %rsi, %rdi # sched: [3:1.00]
; BDVER2-NEXT: cmovbeq %rsi, %rdi # sched: [3:1.00]
; BDVER2-NEXT: cmovaq %rsi, %rdi # sched: [3:1.00]
; BDVER2-NEXT: cmovaq %rsi, %rdi # sched: [3:1.00]
; BDVER2-NEXT: cmovsq %rsi, %rdi # sched: [2:0.67]
; BDVER2-NEXT: cmovnsq %rsi, %rdi # sched: [2:0.67]
; BDVER2-NEXT: cmovpq %rsi, %rdi # sched: [2:0.67]
; BDVER2-NEXT: cmovpq %rsi, %rdi # sched: [2:0.67]
; BDVER2-NEXT: cmovnpq %rsi, %rdi # sched: [2:0.67]
; BDVER2-NEXT: cmovnpq %rsi, %rdi # sched: [2:0.67]
; BDVER2-NEXT: cmovlq %rsi, %rdi # sched: [2:0.67]
; BDVER2-NEXT: cmovlq %rsi, %rdi # sched: [2:0.67]
; BDVER2-NEXT: cmovgeq %rsi, %rdi # sched: [2:0.67]
; BDVER2-NEXT: cmovgeq %rsi, %rdi # sched: [2:0.67]
; BDVER2-NEXT: cmovleq %rsi, %rdi # sched: [2:0.67]
; BDVER2-NEXT: cmovleq %rsi, %rdi # sched: [2:0.67]
; BDVER2-NEXT: cmovgq %rsi, %rdi # sched: [2:0.67]
; BDVER2-NEXT: cmovgq %rsi, %rdi # sched: [2:0.67]
; BDVER2-NEXT: cmovoq (%rdx), %rdi # sched: [7:0.67]
; BDVER2-NEXT: cmovnoq (%rdx), %rdi # sched: [7:0.67]
; BDVER2-NEXT: cmovbq (%rdx), %rdi # sched: [7:0.67]
; BDVER2-NEXT: cmovbq (%rdx), %rdi # sched: [7:0.67]
; BDVER2-NEXT: cmovbq (%rdx), %rdi # sched: [7:0.67]
; BDVER2-NEXT: cmovaeq (%rdx), %rdi # sched: [7:0.67]
; BDVER2-NEXT: cmovaeq (%rdx), %rdi # sched: [7:0.67]
; BDVER2-NEXT: cmovaeq (%rdx), %rdi # sched: [7:0.67]
; BDVER2-NEXT: cmoveq (%rdx), %rdi # sched: [7:0.67]
; BDVER2-NEXT: cmoveq (%rdx), %rdi # sched: [7:0.67]
; BDVER2-NEXT: cmovneq (%rdx), %rdi # sched: [7:0.67]
; BDVER2-NEXT: cmovneq (%rdx), %rdi # sched: [7:0.67]
; BDVER2-NEXT: cmovbeq (%rdx), %rdi # sched: [8:1.00]
; BDVER2-NEXT: cmovbeq (%rdx), %rdi # sched: [8:1.00]
; BDVER2-NEXT: cmovaq (%rdx), %rdi # sched: [8:1.00]
; BDVER2-NEXT: cmovaq (%rdx), %rdi # sched: [8:1.00]
; BDVER2-NEXT: cmovsq (%rdx), %rdi # sched: [7:0.67]
; BDVER2-NEXT: cmovnsq (%rdx), %rdi # sched: [7:0.67]
; BDVER2-NEXT: cmovpq (%rdx), %rdi # sched: [7:0.67]
; BDVER2-NEXT: cmovpq (%rdx), %rdi # sched: [7:0.67]
; BDVER2-NEXT: cmovnpq (%rdx), %rdi # sched: [7:0.67]
; BDVER2-NEXT: cmovnpq (%rdx), %rdi # sched: [7:0.67]
; BDVER2-NEXT: cmovlq (%rdx), %rdi # sched: [7:0.67]
; BDVER2-NEXT: cmovlq (%rdx), %rdi # sched: [7:0.67]
; BDVER2-NEXT: cmovgeq (%rdx), %rdi # sched: [7:0.67]
; BDVER2-NEXT: cmovgeq (%rdx), %rdi # sched: [7:0.67]
; BDVER2-NEXT: cmovleq (%rdx), %rdi # sched: [7:0.67]
; BDVER2-NEXT: cmovleq (%rdx), %rdi # sched: [7:0.67]
; BDVER2-NEXT: cmovgq (%rdx), %rdi # sched: [7:0.67]
; BDVER2-NEXT: cmovgq (%rdx), %rdi # sched: [7:0.67]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_cmov_64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP

View File

@ -4,6 +4,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+f16c | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
@ -43,6 +44,13 @@ define <4 x float> @test_vcvtph2ps_128(<8 x i16> %a0, <8 x i16> *%a1) {
; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_vcvtph2ps_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [8:1.00]
; BDVER2-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_vcvtph2ps_128:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [8:1.00]
@ -100,6 +108,13 @@ define <8 x float> @test_vcvtph2ps_256(<8 x i16> %a0, <8 x i16> *%a1) {
; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_vcvtph2ps_256:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [8:1.00]
; BDVER2-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [3:1.00]
; BDVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_vcvtph2ps_256:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [8:2.00]
@ -152,6 +167,12 @@ define <8 x i16> @test_vcvtps2ph_128(<4 x float> %a0, <4 x float> %a1, <4 x i16>
; SKYLAKE-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [6:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_vcvtps2ph_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_vcvtps2ph_128:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00]
@ -207,6 +228,13 @@ define <8 x i16> @test_vcvtps2ph_256(<8 x float> %a0, <8 x float> %a1, <8 x i16>
; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_vcvtps2ph_256:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [4:1.00]
; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_vcvtps2ph_256:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:2.00]

View File

@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+fma | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+fma | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
@ -24,6 +25,18 @@ define void @test_vfmaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double>
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmaddpd_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfmadd132pd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
; BDVER2-NEXT: vfmadd231pd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfmaddpd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@ -113,6 +126,19 @@ define void @test_vfmaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double>
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmaddpd_256:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
; BDVER2-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
; BDVER2-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
; BDVER2-NEXT: vfmadd132pd {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:0.50]
; BDVER2-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:0.50]
; BDVER2-NEXT: vfmadd231pd {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfmaddpd_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@ -206,6 +232,18 @@ define void @test_vfmaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmaddps_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
; BDVER2-NEXT: vfmadd231ps {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfmaddps_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@ -295,6 +333,19 @@ define void @test_vfmaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmaddps_256:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 sched: [5:0.50]
; BDVER2-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 sched: [5:0.50]
; BDVER2-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) + ymm0 sched: [5:0.50]
; BDVER2-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * mem) + ymm1 sched: [10:0.50]
; BDVER2-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + mem sched: [10:0.50]
; BDVER2-NEXT: vfmadd231ps {{.*#+}} ymm0 = (ymm1 * mem) + ymm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfmaddps_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@ -388,6 +439,18 @@ define void @test_vfmaddsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double>
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmaddsd_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
; BDVER2-NEXT: vfmadd231sd {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfmaddsd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@ -476,6 +539,18 @@ define void @test_vfmaddss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmaddss_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * xmm2) + xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + mem sched: [10:0.50]
; BDVER2-NEXT: vfmadd231ss {{.*#+}} xmm0 = (xmm1 * mem) + xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfmaddss_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@ -568,6 +643,18 @@ define void @test_vfmaddsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x doubl
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmaddsubpd_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfmaddsub132pd {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
; BDVER2-NEXT: vfmaddsub231pd {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfmaddsubpd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@ -657,6 +744,19 @@ define void @test_vfmaddsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x doubl
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmaddsubpd_256:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
; BDVER2-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
; BDVER2-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
; BDVER2-NEXT: vfmaddsub132pd {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:0.50]
; BDVER2-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:0.50]
; BDVER2-NEXT: vfmaddsub231pd {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfmaddsubpd_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@ -750,6 +850,18 @@ define void @test_vfmaddsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float>
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmaddsubps_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) +/- xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) +/- xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfmaddsub132ps {{.*#+}} xmm0 = (xmm0 * mem) +/- xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- mem sched: [10:0.50]
; BDVER2-NEXT: vfmaddsub231ps {{.*#+}} xmm0 = (xmm1 * mem) +/- xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfmaddsubps_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@ -839,6 +951,19 @@ define void @test_vfmaddsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float>
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmaddsubps_256:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) +/- ymm1 sched: [5:0.50]
; BDVER2-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2 sched: [5:0.50]
; BDVER2-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) +/- ymm0 sched: [5:0.50]
; BDVER2-NEXT: vfmaddsub132ps {{.*#+}} ymm0 = (ymm0 * mem) +/- ymm1 sched: [10:0.50]
; BDVER2-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- mem sched: [10:0.50]
; BDVER2-NEXT: vfmaddsub231ps {{.*#+}} ymm0 = (ymm1 * mem) +/- ymm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfmaddsubps_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@ -936,6 +1061,18 @@ define void @test_vfmsubaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x doubl
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmsubaddpd_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfmsubadd132pd {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfmsubadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
; BDVER2-NEXT: vfmsubadd231pd {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfmsubaddpd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@ -1025,6 +1162,19 @@ define void @test_vfmsubaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x doubl
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmsubaddpd_256:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
; BDVER2-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
; BDVER2-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
; BDVER2-NEXT: vfmsubadd132pd {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:0.50]
; BDVER2-NEXT: vfmsubadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:0.50]
; BDVER2-NEXT: vfmsubadd231pd {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfmsubaddpd_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@ -1118,6 +1268,18 @@ define void @test_vfmsubaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float>
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmsubaddps_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * xmm2) -/+ xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * xmm2) -/+ xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfmsubadd132ps {{.*#+}} xmm0 = (xmm0 * mem) -/+ xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfmsubadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) -/+ mem sched: [10:0.50]
; BDVER2-NEXT: vfmsubadd231ps {{.*#+}} xmm0 = (xmm1 * mem) -/+ xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfmsubaddps_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@ -1207,6 +1369,19 @@ define void @test_vfmsubaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float>
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmsubaddps_256:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * ymm2) -/+ ymm1 sched: [5:0.50]
; BDVER2-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ ymm2 sched: [5:0.50]
; BDVER2-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * ymm2) -/+ ymm0 sched: [5:0.50]
; BDVER2-NEXT: vfmsubadd132ps {{.*#+}} ymm0 = (ymm0 * mem) -/+ ymm1 sched: [10:0.50]
; BDVER2-NEXT: vfmsubadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) -/+ mem sched: [10:0.50]
; BDVER2-NEXT: vfmsubadd231ps {{.*#+}} ymm0 = (ymm1 * mem) -/+ ymm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfmsubaddps_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@ -1304,6 +1479,18 @@ define void @test_vfmsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double>
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmsubpd_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
; BDVER2-NEXT: vfmsub231pd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfmsubpd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@ -1393,6 +1580,19 @@ define void @test_vfmsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double>
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmsubpd_256:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
; BDVER2-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
; BDVER2-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
; BDVER2-NEXT: vfmsub132pd {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:0.50]
; BDVER2-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:0.50]
; BDVER2-NEXT: vfmsub231pd {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfmsubpd_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@ -1486,6 +1686,18 @@ define void @test_vfmsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmsubps_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfmsub132ps {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
; BDVER2-NEXT: vfmsub231ps {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfmsubps_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@ -1575,6 +1787,19 @@ define void @test_vfmsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmsubps_256:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 sched: [5:0.50]
; BDVER2-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 sched: [5:0.50]
; BDVER2-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * ymm2) - ymm0 sched: [5:0.50]
; BDVER2-NEXT: vfmsub132ps {{.*#+}} ymm0 = (ymm0 * mem) - ymm1 sched: [10:0.50]
; BDVER2-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - mem sched: [10:0.50]
; BDVER2-NEXT: vfmsub231ps {{.*#+}} ymm0 = (ymm1 * mem) - ymm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfmsubps_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@ -1668,6 +1893,18 @@ define void @test_vfmsubsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double>
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmsubsd_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
; BDVER2-NEXT: vfmsub231sd {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfmsubsd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@ -1756,6 +1993,18 @@ define void @test_vfmsubss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfmsubss_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * xmm2) - xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - mem sched: [10:0.50]
; BDVER2-NEXT: vfmsub231ss {{.*#+}} xmm0 = (xmm1 * mem) - xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfmsubss_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@ -1848,6 +2097,18 @@ define void @test_vfnmaddpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double>
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfnmaddpd_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd132pd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
; BDVER2-NEXT: vfnmadd231pd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfnmaddpd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@ -1937,6 +2198,19 @@ define void @test_vfnmaddpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double>
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfnmaddpd_256:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd132pd {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:0.50]
; BDVER2-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:0.50]
; BDVER2-NEXT: vfnmadd231pd {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfnmaddpd_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@ -2030,6 +2304,18 @@ define void @test_vfnmaddps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfnmaddps_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd132ps {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
; BDVER2-NEXT: vfnmadd231ps {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfnmaddps_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@ -2119,6 +2405,19 @@ define void @test_vfnmaddps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfnmaddps_256:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) + ymm1 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) + ymm0 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd132ps {{.*#+}} ymm0 = -(ymm0 * mem) + ymm1 sched: [10:0.50]
; BDVER2-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + mem sched: [10:0.50]
; BDVER2-NEXT: vfnmadd231ps {{.*#+}} ymm0 = -(ymm1 * mem) + ymm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfnmaddps_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@ -2212,6 +2511,18 @@ define void @test_vfnmaddsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double>
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfnmaddsd_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
; BDVER2-NEXT: vfnmadd231sd {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfnmaddsd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@ -2300,6 +2611,18 @@ define void @test_vfnmaddss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfnmaddss_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) + xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) + xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + mem sched: [10:0.50]
; BDVER2-NEXT: vfnmadd231ss {{.*#+}} xmm0 = -(xmm1 * mem) + xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfnmaddss_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@ -2392,6 +2715,18 @@ define void @test_vfnmsubpd_128(<2 x double> %a0, <2 x double> %a1, <2 x double>
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfnmsubpd_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub132pd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
; BDVER2-NEXT: vfnmsub231pd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfnmsubpd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@ -2481,6 +2816,19 @@ define void @test_vfnmsubpd_256(<4 x double> %a0, <4 x double> %a1, <4 x double>
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfnmsubpd_256:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub132pd {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:0.50]
; BDVER2-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:0.50]
; BDVER2-NEXT: vfnmsub231pd {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfnmsubpd_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@ -2574,6 +2922,18 @@ define void @test_vfnmsubps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfnmsubps_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub132ps {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
; BDVER2-NEXT: vfnmsub231ps {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfnmsubps_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@ -2663,6 +3023,19 @@ define void @test_vfnmsubps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a
; GENERIC-NEXT: vzeroupper # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfnmsubps_256:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * ymm2) - ymm1 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * ymm2) - ymm0 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub132ps {{.*#+}} ymm0 = -(ymm0 * mem) - ymm1 sched: [10:0.50]
; BDVER2-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - mem sched: [10:0.50]
; BDVER2-NEXT: vfnmsub231ps {{.*#+}} ymm0 = -(ymm1 * mem) - ymm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: vzeroupper # sched: [100:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfnmsubps_256:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@ -2756,6 +3129,18 @@ define void @test_vfnmsubsd_128(<2 x double> %a0, <2 x double> %a1, <2 x double>
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfnmsubsd_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
; BDVER2-NEXT: vfnmsub231sd {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfnmsubsd_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP
@ -2844,6 +3229,18 @@ define void @test_vfnmsubss_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a
; GENERIC-NEXT: #NO_APP
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_vfnmsubss_128:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * xmm2) - xmm1 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * xmm2) - xmm0 sched: [5:0.50]
; BDVER2-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm1 sched: [10:0.50]
; BDVER2-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - mem sched: [10:0.50]
; BDVER2-NEXT: vfnmsub231ss {{.*#+}} xmm0 = -(xmm1 * mem) - xmm0 sched: [10:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_vfnmsubss_128:
; HASWELL: # %bb.0:
; HASWELL-NEXT: #APP

File diff suppressed because it is too large Load Diff

View File

@ -8,6 +8,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=-slow-3ops-lea | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
@ -60,6 +61,12 @@ define i32 @test_lea_offset(i32) {
; SKYLAKE-NEXT: leal -24(%rdi), %eax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_lea_offset:
; BDVER2: # %bb.0:
; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
; BDVER2-NEXT: leal -24(%rdi), %eax # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_offset:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi
@ -124,6 +131,12 @@ define i32 @test_lea_offset_big(i32) {
; SKYLAKE-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_lea_offset_big:
; BDVER2: # %bb.0:
; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
; BDVER2-NEXT: leal 1024(%rdi), %eax # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_offset_big:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi
@ -196,6 +209,13 @@ define i32 @test_lea_add(i32, i32) {
; SKYLAKE-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_lea_add:
; BDVER2: # %bb.0:
; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi
; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
; BDVER2-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_add:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $esi killed $esi def $rsi
@ -274,6 +294,13 @@ define i32 @test_lea_add_offset(i32, i32) {
; SKYLAKE-NEXT: addl $16, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_lea_add_offset:
; BDVER2: # %bb.0:
; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi
; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
; BDVER2-NEXT: leal 16(%rdi,%rsi), %eax # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_add_offset:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $esi killed $esi def $rsi
@ -358,6 +385,13 @@ define i32 @test_lea_add_offset_big(i32, i32) {
; SKYLAKE-NEXT: # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_lea_add_offset_big:
; BDVER2: # %bb.0:
; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi
; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
; BDVER2-NEXT: leal -4096(%rdi,%rsi), %eax # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_add_offset_big:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $esi killed $esi def $rsi
@ -425,6 +459,12 @@ define i32 @test_lea_mul(i32) {
; SKYLAKE-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_lea_mul:
; BDVER2: # %bb.0:
; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
; BDVER2-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_mul:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi
@ -494,6 +534,12 @@ define i32 @test_lea_mul_offset(i32) {
; SKYLAKE-NEXT: addl $-32, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_lea_mul_offset:
; BDVER2: # %bb.0:
; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
; BDVER2-NEXT: leal -32(%rdi,%rdi,2), %eax # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_mul_offset:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi
@ -569,6 +615,12 @@ define i32 @test_lea_mul_offset_big(i32) {
; SKYLAKE-NEXT: # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_lea_mul_offset_big:
; BDVER2: # %bb.0:
; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
; BDVER2-NEXT: leal 10000(%rdi,%rdi,8), %eax # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_mul_offset_big:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $edi killed $edi def $rdi
@ -641,6 +693,13 @@ define i32 @test_lea_add_scale(i32, i32) {
; SKYLAKE-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_lea_add_scale:
; BDVER2: # %bb.0:
; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi
; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
; BDVER2-NEXT: leal (%rdi,%rsi,2), %eax # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_add_scale:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $esi killed $esi def $rsi
@ -720,6 +779,13 @@ define i32 @test_lea_add_scale_offset(i32, i32) {
; SKYLAKE-NEXT: addl $96, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_lea_add_scale_offset:
; BDVER2: # %bb.0:
; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi
; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
; BDVER2-NEXT: leal 96(%rdi,%rsi,4), %eax # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_add_scale_offset:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $esi killed $esi def $rsi
@ -805,6 +871,13 @@ define i32 @test_lea_add_scale_offset_big(i32, i32) {
; SKYLAKE-NEXT: # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_lea_add_scale_offset_big:
; BDVER2: # %bb.0:
; BDVER2-NEXT: # kill: def $esi killed $esi def $rsi
; BDVER2-NEXT: # kill: def $edi killed $edi def $rdi
; BDVER2-NEXT: leal -1200(%rdi,%rsi,8), %eax # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_add_scale_offset_big:
; BTVER2: # %bb.0:
; BTVER2-NEXT: # kill: def $esi killed $esi def $rsi

View File

@ -8,6 +8,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=-slow-3ops-lea | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
@ -53,6 +54,11 @@ define i64 @test_lea_offset(i64) {
; SKYLAKE-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_lea_offset:
; BDVER2: # %bb.0:
; BDVER2-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_offset:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq -24(%rdi), %rax # sched: [1:0.50]
@ -108,6 +114,11 @@ define i64 @test_lea_offset_big(i64) {
; SKYLAKE-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_lea_offset_big:
; BDVER2: # %bb.0:
; BDVER2-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_offset_big:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq 1024(%rdi), %rax # sched: [1:0.50]
@ -164,6 +175,11 @@ define i64 @test_lea_add(i64, i64) {
; SKYLAKE-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_lea_add:
; BDVER2: # %bb.0:
; BDVER2-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_add:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
@ -224,6 +240,11 @@ define i64 @test_lea_add_offset(i64, i64) {
; SKYLAKE-NEXT: addq $16, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_lea_add_offset:
; BDVER2: # %bb.0:
; BDVER2-NEXT: leaq 16(%rdi,%rsi), %rax # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_add_offset:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq 16(%rdi,%rsi), %rax # sched: [2:1.00]
@ -290,6 +311,11 @@ define i64 @test_lea_add_offset_big(i64, i64) {
; SKYLAKE-NEXT: # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_lea_add_offset_big:
; BDVER2: # %bb.0:
; BDVER2-NEXT: leaq -4096(%rdi,%rsi), %rax # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_add_offset_big:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq -4096(%rdi,%rsi), %rax # sched: [2:1.00]
@ -346,6 +372,11 @@ define i64 @test_lea_mul(i64) {
; SKYLAKE-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_lea_mul:
; BDVER2: # %bb.0:
; BDVER2-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_mul:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [2:1.00]
@ -406,6 +437,11 @@ define i64 @test_lea_mul_offset(i64) {
; SKYLAKE-NEXT: addq $-32, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_lea_mul_offset:
; BDVER2: # %bb.0:
; BDVER2-NEXT: leaq -32(%rdi,%rdi,2), %rax # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_mul_offset:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq -32(%rdi,%rdi,2), %rax # sched: [2:1.00]
@ -472,6 +508,11 @@ define i64 @test_lea_mul_offset_big(i64) {
; SKYLAKE-NEXT: # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_lea_mul_offset_big:
; BDVER2: # %bb.0:
; BDVER2-NEXT: leaq 10000(%rdi,%rdi,8), %rax # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_mul_offset_big:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq 10000(%rdi,%rdi,8), %rax # sched: [2:1.00]
@ -528,6 +569,11 @@ define i64 @test_lea_add_scale(i64, i64) {
; SKYLAKE-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_lea_add_scale:
; BDVER2: # %bb.0:
; BDVER2-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_add_scale:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq (%rdi,%rsi,2), %rax # sched: [2:1.00]
@ -589,6 +635,11 @@ define i64 @test_lea_add_scale_offset(i64, i64) {
; SKYLAKE-NEXT: addq $96, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_lea_add_scale_offset:
; BDVER2: # %bb.0:
; BDVER2-NEXT: leaq 96(%rdi,%rsi,4), %rax # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_add_scale_offset:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq 96(%rdi,%rsi,4), %rax # sched: [2:1.00]
@ -656,6 +707,11 @@ define i64 @test_lea_add_scale_offset_big(i64, i64) {
; SKYLAKE-NEXT: # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_lea_add_scale_offset_big:
; BDVER2: # %bb.0:
; BDVER2-NEXT: leaq -1200(%rdi,%rsi,8), %rax # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_lea_add_scale_offset_big:
; BTVER2: # %bb.0:
; BTVER2-NEXT: leaq -1200(%rdi,%rsi,8), %rax # sched: [2:1.00]

View File

@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=x86-64 -mattr=+lwp | FileCheck %s --check-prefix=GENERIC
; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=bdver1 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER1
; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=x86-64 -mattr=+lwp | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER12 --check-prefix=BDVER1
; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=x86-64 -mattr=+lwp | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER12 --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=bdver3 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER3
; RUN: llc < %s -mtriple=x86_64-unknown -print-schedule -mcpu=bdver4 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER4
@ -11,10 +11,20 @@ define void @test_llwpcb(i8 *%a0) nounwind {
; GENERIC-NEXT: llwpcb %rdi # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER-LABEL: test_llwpcb:
; BDVER: # %bb.0:
; BDVER-NEXT: llwpcb %rdi
; BDVER-NEXT: retq
; BDVER12-LABEL: test_llwpcb:
; BDVER12: # %bb.0:
; BDVER12-NEXT: llwpcb %rdi # sched: [100:0.33]
; BDVER12-NEXT: retq # sched: [1:1.00]
;
; BDVER3-LABEL: test_llwpcb:
; BDVER3: # %bb.0:
; BDVER3-NEXT: llwpcb %rdi
; BDVER3-NEXT: retq
;
; BDVER4-LABEL: test_llwpcb:
; BDVER4: # %bb.0:
; BDVER4-NEXT: llwpcb %rdi
; BDVER4-NEXT: retq
tail call void @llvm.x86.llwpcb(i8 *%a0)
ret void
}
@ -25,10 +35,20 @@ define i8* @test_slwpcb(i8 *%a0) nounwind {
; GENERIC-NEXT: slwpcb %rax # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER-LABEL: test_slwpcb:
; BDVER: # %bb.0:
; BDVER-NEXT: slwpcb %rax
; BDVER-NEXT: retq
; BDVER12-LABEL: test_slwpcb:
; BDVER12: # %bb.0:
; BDVER12-NEXT: slwpcb %rax # sched: [100:0.33]
; BDVER12-NEXT: retq # sched: [1:1.00]
;
; BDVER3-LABEL: test_slwpcb:
; BDVER3: # %bb.0:
; BDVER3-NEXT: slwpcb %rax
; BDVER3-NEXT: retq
;
; BDVER4-LABEL: test_slwpcb:
; BDVER4: # %bb.0:
; BDVER4-NEXT: slwpcb %rax
; BDVER4-NEXT: retq
%1 = tail call i8* @llvm.x86.slwpcb()
ret i8 *%1
}
@ -42,12 +62,27 @@ define i8 @test_lwpins32_rri(i32 %a0, i32 %a1) nounwind {
; GENERIC-NEXT: setb %al # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER-LABEL: test_lwpins32_rri:
; BDVER: # %bb.0:
; BDVER-NEXT: addl %esi, %esi
; BDVER-NEXT: lwpins $-1985229329, %esi, %edi # imm = 0x89ABCDEF
; BDVER-NEXT: setb %al
; BDVER-NEXT: retq
; BDVER12-LABEL: test_lwpins32_rri:
; BDVER12: # %bb.0:
; BDVER12-NEXT: addl %esi, %esi # sched: [1:0.33]
; BDVER12-NEXT: lwpins $-1985229329, %esi, %edi # imm = 0x89ABCDEF
; BDVER12-NEXT: # sched: [100:0.33]
; BDVER12-NEXT: setb %al # sched: [1:0.50]
; BDVER12-NEXT: retq # sched: [1:1.00]
;
; BDVER3-LABEL: test_lwpins32_rri:
; BDVER3: # %bb.0:
; BDVER3-NEXT: addl %esi, %esi
; BDVER3-NEXT: lwpins $-1985229329, %esi, %edi # imm = 0x89ABCDEF
; BDVER3-NEXT: setb %al
; BDVER3-NEXT: retq
;
; BDVER4-LABEL: test_lwpins32_rri:
; BDVER4: # %bb.0:
; BDVER4-NEXT: addl %esi, %esi
; BDVER4-NEXT: lwpins $-1985229329, %esi, %edi # imm = 0x89ABCDEF
; BDVER4-NEXT: setb %al
; BDVER4-NEXT: retq
%1 = add i32 %a1, %a1
%2 = tail call i8 @llvm.x86.lwpins32(i32 %a0, i32 %1, i32 2309737967)
ret i8 %2
@ -61,11 +96,24 @@ define i8 @test_lwpins32_rmi(i32 %a0, i32 *%p1) nounwind {
; GENERIC-NEXT: setb %al # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER-LABEL: test_lwpins32_rmi:
; BDVER: # %bb.0:
; BDVER-NEXT: lwpins $1985229328, (%rsi), %edi # imm = 0x76543210
; BDVER-NEXT: setb %al
; BDVER-NEXT: retq
; BDVER12-LABEL: test_lwpins32_rmi:
; BDVER12: # %bb.0:
; BDVER12-NEXT: lwpins $1985229328, (%rsi), %edi # imm = 0x76543210
; BDVER12-NEXT: # sched: [100:0.33]
; BDVER12-NEXT: setb %al # sched: [1:0.50]
; BDVER12-NEXT: retq # sched: [1:1.00]
;
; BDVER3-LABEL: test_lwpins32_rmi:
; BDVER3: # %bb.0:
; BDVER3-NEXT: lwpins $1985229328, (%rsi), %edi # imm = 0x76543210
; BDVER3-NEXT: setb %al
; BDVER3-NEXT: retq
;
; BDVER4-LABEL: test_lwpins32_rmi:
; BDVER4: # %bb.0:
; BDVER4-NEXT: lwpins $1985229328, (%rsi), %edi # imm = 0x76543210
; BDVER4-NEXT: setb %al
; BDVER4-NEXT: retq
%a1 = load i32, i32 *%p1
%1 = tail call i8 @llvm.x86.lwpins32(i32 %a0, i32 %a1, i32 1985229328)
ret i8 %1
@ -79,11 +127,24 @@ define i8 @test_lwpins64_rri(i64 %a0, i32 %a1) nounwind {
; GENERIC-NEXT: setb %al # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER-LABEL: test_lwpins64_rri:
; BDVER: # %bb.0:
; BDVER-NEXT: lwpins $-1985229329, %esi, %rdi # imm = 0x89ABCDEF
; BDVER-NEXT: setb %al
; BDVER-NEXT: retq
; BDVER12-LABEL: test_lwpins64_rri:
; BDVER12: # %bb.0:
; BDVER12-NEXT: lwpins $-1985229329, %esi, %rdi # imm = 0x89ABCDEF
; BDVER12-NEXT: # sched: [100:0.33]
; BDVER12-NEXT: setb %al # sched: [1:0.50]
; BDVER12-NEXT: retq # sched: [1:1.00]
;
; BDVER3-LABEL: test_lwpins64_rri:
; BDVER3: # %bb.0:
; BDVER3-NEXT: lwpins $-1985229329, %esi, %rdi # imm = 0x89ABCDEF
; BDVER3-NEXT: setb %al
; BDVER3-NEXT: retq
;
; BDVER4-LABEL: test_lwpins64_rri:
; BDVER4: # %bb.0:
; BDVER4-NEXT: lwpins $-1985229329, %esi, %rdi # imm = 0x89ABCDEF
; BDVER4-NEXT: setb %al
; BDVER4-NEXT: retq
%1 = tail call i8 @llvm.x86.lwpins64(i64 %a0, i32 %a1, i32 2309737967)
ret i8 %1
}
@ -96,11 +157,24 @@ define i8 @test_lwpins64_rmi(i64 %a0, i32 *%p1) nounwind {
; GENERIC-NEXT: setb %al # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER-LABEL: test_lwpins64_rmi:
; BDVER: # %bb.0:
; BDVER-NEXT: lwpins $1985229328, (%rsi), %rdi # imm = 0x76543210
; BDVER-NEXT: setb %al
; BDVER-NEXT: retq
; BDVER12-LABEL: test_lwpins64_rmi:
; BDVER12: # %bb.0:
; BDVER12-NEXT: lwpins $1985229328, (%rsi), %rdi # imm = 0x76543210
; BDVER12-NEXT: # sched: [100:0.33]
; BDVER12-NEXT: setb %al # sched: [1:0.50]
; BDVER12-NEXT: retq # sched: [1:1.00]
;
; BDVER3-LABEL: test_lwpins64_rmi:
; BDVER3: # %bb.0:
; BDVER3-NEXT: lwpins $1985229328, (%rsi), %rdi # imm = 0x76543210
; BDVER3-NEXT: setb %al
; BDVER3-NEXT: retq
;
; BDVER4-LABEL: test_lwpins64_rmi:
; BDVER4: # %bb.0:
; BDVER4-NEXT: lwpins $1985229328, (%rsi), %rdi # imm = 0x76543210
; BDVER4-NEXT: setb %al
; BDVER4-NEXT: retq
%a1 = load i32, i32 *%p1
%1 = tail call i8 @llvm.x86.lwpins64(i64 %a0, i32 %a1, i32 1985229328)
ret i8 %1
@ -114,11 +188,24 @@ define void @test_lwpval32_rri(i32 %a0, i32 %a1) nounwind {
; GENERIC-NEXT: # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER-LABEL: test_lwpval32_rri:
; BDVER: # %bb.0:
; BDVER-NEXT: addl %esi, %esi
; BDVER-NEXT: lwpval $-19088744, %esi, %edi # imm = 0xFEDCBA98
; BDVER-NEXT: retq
; BDVER12-LABEL: test_lwpval32_rri:
; BDVER12: # %bb.0:
; BDVER12-NEXT: addl %esi, %esi # sched: [1:0.33]
; BDVER12-NEXT: lwpval $-19088744, %esi, %edi # imm = 0xFEDCBA98
; BDVER12-NEXT: # sched: [100:0.33]
; BDVER12-NEXT: retq # sched: [1:1.00]
;
; BDVER3-LABEL: test_lwpval32_rri:
; BDVER3: # %bb.0:
; BDVER3-NEXT: addl %esi, %esi
; BDVER3-NEXT: lwpval $-19088744, %esi, %edi # imm = 0xFEDCBA98
; BDVER3-NEXT: retq
;
; BDVER4-LABEL: test_lwpval32_rri:
; BDVER4: # %bb.0:
; BDVER4-NEXT: addl %esi, %esi
; BDVER4-NEXT: lwpval $-19088744, %esi, %edi # imm = 0xFEDCBA98
; BDVER4-NEXT: retq
%1 = add i32 %a1, %a1
tail call void @llvm.x86.lwpval32(i32 %a0, i32 %1, i32 4275878552)
ret void
@ -131,10 +218,21 @@ define void @test_lwpval32_rmi(i32 %a0, i32 *%p1) nounwind {
; GENERIC-NEXT: # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER-LABEL: test_lwpval32_rmi:
; BDVER: # %bb.0:
; BDVER-NEXT: lwpval $305419896, (%rsi), %edi # imm = 0x12345678
; BDVER-NEXT: retq
; BDVER12-LABEL: test_lwpval32_rmi:
; BDVER12: # %bb.0:
; BDVER12-NEXT: lwpval $305419896, (%rsi), %edi # imm = 0x12345678
; BDVER12-NEXT: # sched: [100:0.33]
; BDVER12-NEXT: retq # sched: [1:1.00]
;
; BDVER3-LABEL: test_lwpval32_rmi:
; BDVER3: # %bb.0:
; BDVER3-NEXT: lwpval $305419896, (%rsi), %edi # imm = 0x12345678
; BDVER3-NEXT: retq
;
; BDVER4-LABEL: test_lwpval32_rmi:
; BDVER4: # %bb.0:
; BDVER4-NEXT: lwpval $305419896, (%rsi), %edi # imm = 0x12345678
; BDVER4-NEXT: retq
%a1 = load i32, i32 *%p1
tail call void @llvm.x86.lwpval32(i32 %a0, i32 %a1, i32 305419896)
ret void
@ -147,10 +245,21 @@ define void @test_lwpval64_rri(i64 %a0, i32 %a1) nounwind {
; GENERIC-NEXT: # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER-LABEL: test_lwpval64_rri:
; BDVER: # %bb.0:
; BDVER-NEXT: lwpval $-19088744, %esi, %rdi # imm = 0xFEDCBA98
; BDVER-NEXT: retq
; BDVER12-LABEL: test_lwpval64_rri:
; BDVER12: # %bb.0:
; BDVER12-NEXT: lwpval $-19088744, %esi, %rdi # imm = 0xFEDCBA98
; BDVER12-NEXT: # sched: [100:0.33]
; BDVER12-NEXT: retq # sched: [1:1.00]
;
; BDVER3-LABEL: test_lwpval64_rri:
; BDVER3: # %bb.0:
; BDVER3-NEXT: lwpval $-19088744, %esi, %rdi # imm = 0xFEDCBA98
; BDVER3-NEXT: retq
;
; BDVER4-LABEL: test_lwpval64_rri:
; BDVER4: # %bb.0:
; BDVER4-NEXT: lwpval $-19088744, %esi, %rdi # imm = 0xFEDCBA98
; BDVER4-NEXT: retq
tail call void @llvm.x86.lwpval64(i64 %a0, i32 %a1, i32 4275878552)
ret void
}
@ -162,10 +271,21 @@ define void @test_lwpval64_rmi(i64 %a0, i32 *%p1) nounwind {
; GENERIC-NEXT: # sched: [100:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER-LABEL: test_lwpval64_rmi:
; BDVER: # %bb.0:
; BDVER-NEXT: lwpval $305419896, (%rsi), %rdi # imm = 0x12345678
; BDVER-NEXT: retq
; BDVER12-LABEL: test_lwpval64_rmi:
; BDVER12: # %bb.0:
; BDVER12-NEXT: lwpval $305419896, (%rsi), %rdi # imm = 0x12345678
; BDVER12-NEXT: # sched: [100:0.33]
; BDVER12-NEXT: retq # sched: [1:1.00]
;
; BDVER3-LABEL: test_lwpval64_rmi:
; BDVER3: # %bb.0:
; BDVER3-NEXT: lwpval $305419896, (%rsi), %rdi # imm = 0x12345678
; BDVER3-NEXT: retq
;
; BDVER4-LABEL: test_lwpval64_rmi:
; BDVER4: # %bb.0:
; BDVER4-NEXT: lwpval $305419896, (%rsi), %rdi # imm = 0x12345678
; BDVER4-NEXT: retq
%a1 = load i32, i32 *%p1
tail call void @llvm.x86.lwpval64(i64 %a0, i32 %a1, i32 305419896)
ret void

View File

@ -4,6 +4,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+lzcnt | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
@ -40,6 +41,14 @@ define i16 @test_ctlz_i16(i16 zeroext %a0, i16 *%a1) {
; SKYLAKE-NEXT: # kill: def $ax killed $ax killed $eax
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_ctlz_i16:
; BDVER2: # %bb.0:
; BDVER2-NEXT: lzcntw (%rsi), %cx # sched: [8:1.00]
; BDVER2-NEXT: lzcntw %di, %ax # sched: [3:1.00]
; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33]
; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_ctlz_i16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: lzcntw (%rsi), %cx # sched: [4:1.00]
@ -92,6 +101,13 @@ define i32 @test_ctlz_i32(i32 %a0, i32 *%a1) {
; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_ctlz_i32:
; BDVER2: # %bb.0:
; BDVER2-NEXT: lzcntl (%rsi), %ecx # sched: [8:1.00]
; BDVER2-NEXT: lzcntl %edi, %eax # sched: [3:1.00]
; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_ctlz_i32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: lzcntl (%rsi), %ecx # sched: [4:1.00]
@ -142,6 +158,13 @@ define i64 @test_ctlz_i64(i64 %a0, i64 *%a1) {
; SKYLAKE-NEXT: orq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_ctlz_i64:
; BDVER2: # %bb.0:
; BDVER2-NEXT: lzcntq (%rsi), %rcx # sched: [8:1.00]
; BDVER2-NEXT: lzcntq %rdi, %rax # sched: [3:1.00]
; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_ctlz_i64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: lzcntq (%rsi), %rcx # sched: [4:1.00]

File diff suppressed because it is too large Load Diff

View File

@ -8,6 +8,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+popcnt | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
@ -60,6 +61,14 @@ define i16 @test_ctpop_i16(i16 zeroext %a0, i16 *%a1) {
; SKYLAKE-NEXT: # kill: def $ax killed $ax killed $eax
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_ctpop_i16:
; BDVER2: # %bb.0:
; BDVER2-NEXT: popcntw (%rsi), %cx # sched: [9:1.00]
; BDVER2-NEXT: popcntw %di, %ax # sched: [3:1.00]
; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33]
; BDVER2-NEXT: # kill: def $ax killed $ax killed $eax
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_ctpop_i16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: popcntw (%rsi), %cx # sched: [4:1.00]
@ -126,6 +135,13 @@ define i32 @test_ctpop_i32(i32 %a0, i32 *%a1) {
; SKYLAKE-NEXT: orl %ecx, %eax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_ctpop_i32:
; BDVER2: # %bb.0:
; BDVER2-NEXT: popcntl (%rsi), %ecx # sched: [9:1.00]
; BDVER2-NEXT: popcntl %edi, %eax # sched: [3:1.00]
; BDVER2-NEXT: orl %ecx, %eax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_ctpop_i32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: popcntl (%rsi), %ecx # sched: [4:1.00]
@ -190,6 +206,13 @@ define i64 @test_ctpop_i64(i64 %a0, i64 *%a1) {
; SKYLAKE-NEXT: orq %rcx, %rax # sched: [1:0.25]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; BDVER2-LABEL: test_ctpop_i64:
; BDVER2: # %bb.0:
; BDVER2-NEXT: popcntq (%rsi), %rcx # sched: [9:1.00]
; BDVER2-NEXT: popcntq %rdi, %rax # sched: [3:1.00]
; BDVER2-NEXT: orq %rcx, %rax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_ctpop_i64:
; BTVER2: # %bb.0:
; BTVER2-NEXT: popcntq (%rsi), %rcx # sched: [4:1.00]

View File

@ -2,6 +2,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE-RECIP
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX-RECIP
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=FMA-RECIP
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+fma4 -mattr=+avx -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=SANDY
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL
@ -37,6 +38,12 @@ define float @f32_no_estimate(float %x) #0 {
; FMA-RECIP-NEXT: vdivss %xmm0, %xmm1, %xmm0
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: f32_no_estimate:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
; BDVER2-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [14:14.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: f32_no_estimate:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
@ -105,6 +112,13 @@ define float @f32_one_step(float %x) #1 {
; FMA-RECIP-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: f32_one_step:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: f32_one_step:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
@ -202,6 +216,16 @@ define float @f32_two_step(float %x) #2 {
; FMA-RECIP-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: f32_two_step:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; BDVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm3 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddss %xmm1, %xmm3, %xmm1, %xmm1 # sched: [5:0.50]
; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: f32_two_step:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [5:1.00]
@ -300,6 +324,12 @@ define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 {
; FMA-RECIP-NEXT: vdivps %xmm0, %xmm1, %xmm0
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v4f32_no_estimate:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1,1,1,1] sched: [6:0.50]
; BDVER2-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [14:14.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: v4f32_no_estimate:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} xmm1 = [1,1,1,1] sched: [5:1.00]
@ -368,6 +398,13 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 {
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v4f32_one_step:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: v4f32_one_step:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [5:1.00]
@ -467,6 +504,16 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * xmm3) + xmm3
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v4f32_two_step:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; BDVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50]
; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm3 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddps %xmm1, %xmm3, %xmm1, %xmm1 # sched: [5:0.50]
; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: v4f32_two_step:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1,1,1,1] sched: [5:1.00]
@ -568,6 +615,12 @@ define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
; FMA-RECIP-NEXT: vdivps %ymm0, %ymm1, %ymm0
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v8f32_no_estimate:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
; BDVER2-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [29:28.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: v8f32_no_estimate:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
@ -643,6 +696,13 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 {
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm1
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v8f32_one_step:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: v8f32_one_step:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
@ -755,6 +815,16 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm0 = (ymm0 * ymm3) + ymm3
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v8f32_two_step:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm3 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddps %ymm1, %ymm3, %ymm1, %ymm1 # sched: [5:0.50]
; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: v8f32_two_step:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
@ -864,6 +934,13 @@ define <16 x float> @v16f32_no_estimate(<16 x float> %x) #0 {
; FMA-RECIP-NEXT: vdivps %ymm1, %ymm2, %ymm1
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v16f32_no_estimate:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
; BDVER2-NEXT: vdivps %ymm0, %ymm2, %ymm0 # sched: [29:28.00]
; BDVER2-NEXT: vdivps %ymm1, %ymm2, %ymm1 # sched: [29:28.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: v16f32_no_estimate:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
@ -966,6 +1043,17 @@ define <16 x float> @v16f32_one_step(<16 x float> %x) #1 {
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm2) + ymm2
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v16f32_one_step:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00]
; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
; BDVER2-NEXT: vrcpps %ymm1, %ymm4 # sched: [7:2.00]
; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 # sched: [5:0.50]
; BDVER2-NEXT: vfnmaddps %ymm3, %ymm4, %ymm1, %ymm1 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddps %ymm4, %ymm1, %ymm4, %ymm1 # sched: [5:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: v16f32_one_step:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
@ -1136,6 +1224,21 @@ define <16 x float> @v16f32_two_step(<16 x float> %x) #2 {
; FMA-RECIP-NEXT: vfmadd132ps {{.*#+}} ymm1 = (ymm1 * ymm4) + ymm4
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v16f32_two_step:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00]
; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm4 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 # sched: [5:0.50]
; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 # sched: [5:0.50]
; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00]
; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm4 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 # sched: [5:0.50]
; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 # sched: [5:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: v16f32_two_step:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1] sched: [5:1.00]

View File

@ -2,6 +2,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE-RECIP
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX-RECIP
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=FMA-RECIP
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=+fma4 -mattr=+avx -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=SANDY
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=HASWELL
@ -30,6 +31,12 @@ define float @f32_no_step_2(float %x) #3 {
; FMA-RECIP-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: f32_no_step_2:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: f32_no_step_2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [2:1.00]
@ -101,6 +108,14 @@ define float @f32_one_step_2(float %x) #1 {
; FMA-RECIP-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: f32_one_step_2:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: f32_one_step_2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
@ -196,6 +211,15 @@ define float @f32_one_step_2_divs(float %x) #1 {
; FMA-RECIP-NEXT: vmulss %xmm0, %xmm1, %xmm0
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: f32_one_step_2_divs:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; BDVER2-NEXT: vfnmaddss {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00]
; BDVER2-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: f32_one_step_2_divs:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00]
@ -309,6 +333,17 @@ define float @f32_two_step_2(float %x) #2 {
; FMA-RECIP-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: f32_two_step_2:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00]
; BDVER2-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50]
; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm3 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddss %xmm1, %xmm3, %xmm1, %xmm1 # sched: [5:0.50]
; BDVER2-NEXT: vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddss %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; BDVER2-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: f32_two_step_2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [5:1.00]
@ -425,6 +460,14 @@ define <4 x float> @v4f32_one_step2(<4 x float> %x) #1 {
; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v4f32_one_step2:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: v4f32_one_step2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [5:1.00]
@ -522,6 +565,15 @@ define <4 x float> @v4f32_one_step_2_divs(<4 x float> %x) #1 {
; FMA-RECIP-NEXT: vmulps %xmm0, %xmm1, %xmm0
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v4f32_one_step_2_divs:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %xmm1, %xmm0, %xmm0 # sched: [10:0.50]
; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00]
; BDVER2-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: v4f32_one_step_2_divs:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [5:1.00]
@ -637,6 +689,17 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 {
; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v4f32_two_step2:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00]
; BDVER2-NEXT: vmovaps {{.*#+}} xmm2 = [1,1,1,1] sched: [6:0.50]
; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm3 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddps %xmm1, %xmm3, %xmm1, %xmm1 # sched: [5:0.50]
; BDVER2-NEXT: vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddps %xmm1, %xmm0, %xmm1, %xmm0 # sched: [5:0.50]
; BDVER2-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: v4f32_two_step2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} xmm3 = [1,1,1,1] sched: [5:1.00]
@ -761,6 +824,14 @@ define <8 x float> @v8f32_one_step2(<8 x float> %x) #1 {
; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v8f32_one_step2:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: v8f32_one_step2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
@ -867,6 +938,15 @@ define <8 x float> @v8f32_one_step_2_divs(<8 x float> %x) #1 {
; FMA-RECIP-NEXT: vmulps %ymm0, %ymm1, %ymm0
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v8f32_one_step_2_divs:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; BDVER2-NEXT: vfnmaddps {{.*}}(%rip), %ymm1, %ymm0, %ymm0 # sched: [10:0.50]
; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [12:1.00]
; BDVER2-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: v8f32_one_step_2_divs:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
@ -996,6 +1076,17 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 {
; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v8f32_two_step2:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00]
; BDVER2-NEXT: vmovaps {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm3 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddps %ymm1, %ymm3, %ymm1, %ymm1 # sched: [5:0.50]
; BDVER2-NEXT: vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddps %ymm1, %ymm0, %ymm1, %ymm0 # sched: [5:0.50]
; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: v8f32_two_step2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
@ -1097,6 +1188,11 @@ define <8 x float> @v8f32_no_step(<8 x float> %x) #3 {
; FMA-RECIP-NEXT: vrcpps %ymm0, %ymm0
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v8f32_no_step:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: v8f32_no_step:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [2:2.00]
@ -1151,6 +1247,12 @@ define <8 x float> @v8f32_no_step2(<8 x float> %x) #3 {
; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v8f32_no_step2:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: v8f32_no_step2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [2:2.00]
@ -1257,6 +1359,19 @@ define <16 x float> @v16f32_one_step2(<16 x float> %x) #1 {
; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v16f32_one_step2:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00]
; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
; BDVER2-NEXT: vrcpps %ymm0, %ymm4 # sched: [7:2.00]
; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 # sched: [5:0.50]
; BDVER2-NEXT: vfnmaddps %ymm3, %ymm4, %ymm0, %ymm0 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddps %ymm4, %ymm0, %ymm4, %ymm0 # sched: [5:0.50]
; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: v16f32_one_step2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
@ -1415,6 +1530,21 @@ define <16 x float> @v16f32_one_step_2_divs(<16 x float> %x) #1 {
; FMA-RECIP-NEXT: vmulps %ymm1, %ymm2, %ymm1
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v16f32_one_step_2_divs:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00]
; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 # sched: [5:0.50]
; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00]
; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 # sched: [5:0.50]
; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm2 # sched: [12:1.00]
; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm3 # sched: [12:1.00]
; BDVER2-NEXT: vmulps %ymm0, %ymm3, %ymm0 # sched: [5:1.00]
; BDVER2-NEXT: vmulps %ymm1, %ymm2, %ymm1 # sched: [5:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: v16f32_one_step_2_divs:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
@ -1613,6 +1743,23 @@ define <16 x float> @v16f32_two_step2(<16 x float> %x) #2 {
; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v16f32_two_step2:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpps %ymm1, %ymm2 # sched: [7:2.00]
; BDVER2-NEXT: vmovaps {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1] sched: [7:0.50]
; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm4 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 # sched: [5:0.50]
; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm1, %ymm1 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddps %ymm2, %ymm1, %ymm2, %ymm1 # sched: [5:0.50]
; BDVER2-NEXT: vrcpps %ymm0, %ymm2 # sched: [7:2.00]
; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm4 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddps %ymm2, %ymm4, %ymm2, %ymm2 # sched: [5:0.50]
; BDVER2-NEXT: vfnmaddps %ymm3, %ymm2, %ymm0, %ymm0 # sched: [5:0.50]
; BDVER2-NEXT: vfmaddps %ymm2, %ymm0, %ymm2, %ymm0 # sched: [5:0.50]
; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: v16f32_two_step2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vmovaps {{.*#+}} ymm4 = [1,1,1,1,1,1,1,1] sched: [5:1.00]
@ -1755,6 +1902,12 @@ define <16 x float> @v16f32_no_step(<16 x float> %x) #3 {
; FMA-RECIP-NEXT: vrcpps %ymm1, %ymm1
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v16f32_no_step:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
; BDVER2-NEXT: vrcpps %ymm1, %ymm1 # sched: [7:2.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: v16f32_no_step:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [2:2.00]
@ -1821,6 +1974,14 @@ define <16 x float> @v16f32_no_step2(<16 x float> %x) #3 {
; FMA-RECIP-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1
; FMA-RECIP-NEXT: retq
;
; BDVER2-LABEL: v16f32_no_step2:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpps %ymm1, %ymm1 # sched: [7:2.00]
; BDVER2-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00]
; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:1.00]
; BDVER2-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 # sched: [12:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: v16f32_no_step2:
; BTVER2: # %bb.0:
; BTVER2-NEXT: vrcpps %ymm1, %ymm1 # sched: [2:2.00]

View File

@ -1,7 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx -mattr=+slow-shld | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER12 --check-prefix=BDVER1
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx -mattr=+slow-shld | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER12 --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver1 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER1
; uint64_t lshift10(uint64_t a, uint64_t b)
@ -16,17 +17,17 @@ define i64 @lshift10_optsize(i64 %a, i64 %b) nounwind readnone optsize {
; GENERIC-NEXT: shldq $10, %rsi, %rax # sched: [2:0.67]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER12-LABEL: lshift10_optsize:
; BDVER12: # %bb.0: # %entry
; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.33]
; BDVER12-NEXT: shldq $10, %rsi, %rax # sched: [2:0.67]
; BDVER12-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: lshift10_optsize:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50]
; BTVER2-NEXT: shldq $10, %rsi, %rax # sched: [3:3.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; BDVER1-LABEL: lshift10_optsize:
; BDVER1: # %bb.0: # %entry
; BDVER1-NEXT: movq %rdi, %rax
; BDVER1-NEXT: shldq $10, %rsi, %rax
; BDVER1-NEXT: retq
entry:
%shl = shl i64 %a, 10
%shr = lshr i64 %b, 54
@ -41,19 +42,19 @@ define i64 @lshift10(i64 %a, i64 %b) nounwind readnone {
; GENERIC-NEXT: shldq $10, %rsi, %rax # sched: [2:0.67]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER12-LABEL: lshift10:
; BDVER12: # %bb.0: # %entry
; BDVER12-NEXT: shlq $10, %rdi # sched: [1:0.50]
; BDVER12-NEXT: shrq $54, %rsi # sched: [1:0.50]
; BDVER12-NEXT: leaq (%rsi,%rdi), %rax # sched: [1:0.50]
; BDVER12-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: lshift10:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: shlq $10, %rdi # sched: [1:0.50]
; BTVER2-NEXT: shrq $54, %rsi # sched: [1:0.50]
; BTVER2-NEXT: leaq (%rsi,%rdi), %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; BDVER1-LABEL: lshift10:
; BDVER1: # %bb.0: # %entry
; BDVER1-NEXT: shlq $10, %rdi
; BDVER1-NEXT: shrq $54, %rsi
; BDVER1-NEXT: leaq (%rsi,%rdi), %rax
; BDVER1-NEXT: retq
entry:
%shl = shl i64 %a, 10
%shr = lshr i64 %b, 54
@ -74,17 +75,17 @@ define i64 @rshift10_optsize(i64 %a, i64 %b) nounwind readnone optsize {
; GENERIC-NEXT: shrdq $62, %rsi, %rax # sched: [2:0.67]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER12-LABEL: rshift10_optsize:
; BDVER12: # %bb.0: # %entry
; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.33]
; BDVER12-NEXT: shrdq $62, %rsi, %rax # sched: [2:0.67]
; BDVER12-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: rshift10_optsize:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50]
; BTVER2-NEXT: shrdq $62, %rsi, %rax # sched: [3:3.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; BDVER1-LABEL: rshift10_optsize:
; BDVER1: # %bb.0: # %entry
; BDVER1-NEXT: movq %rdi, %rax
; BDVER1-NEXT: shrdq $62, %rsi, %rax
; BDVER1-NEXT: retq
entry:
%shl = lshr i64 %a, 62
%shr = shl i64 %b, 2
@ -100,17 +101,17 @@ define i64 @rshift10(i64 %a, i64 %b) nounwind readnone {
; GENERIC-NEXT: shrdq $62, %rsi, %rax # sched: [2:0.67]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER12-LABEL: rshift10:
; BDVER12: # %bb.0: # %entry
; BDVER12-NEXT: shrq $62, %rdi # sched: [1:0.50]
; BDVER12-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [1:0.50]
; BDVER12-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: rshift10:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: shrq $62, %rdi # sched: [1:0.50]
; BTVER2-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [2:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; BDVER1-LABEL: rshift10:
; BDVER1: # %bb.0: # %entry
; BDVER1-NEXT: shrq $62, %rdi
; BDVER1-NEXT: leaq (%rdi,%rsi,4), %rax
; BDVER1-NEXT: retq
entry:
%shl = lshr i64 %a, 62
%shr = shl i64 %b, 2
@ -132,6 +133,14 @@ define i64 @lshift_cl_optsize(i64 %a, i64 %b, i64 %c) nounwind readnone optsize
; GENERIC-NEXT: shldq %cl, %rsi, %rax # sched: [4:1.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER12-LABEL: lshift_cl_optsize:
; BDVER12: # %bb.0: # %entry
; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.33]
; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.33]
; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
; BDVER12-NEXT: shldq %cl, %rsi, %rax # sched: [4:1.50]
; BDVER12-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: lshift_cl_optsize:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
@ -139,14 +148,6 @@ define i64 @lshift_cl_optsize(i64 %a, i64 %b, i64 %c) nounwind readnone optsize
; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
; BTVER2-NEXT: shldq %cl, %rsi, %rax # sched: [4:4.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; BDVER1-LABEL: lshift_cl_optsize:
; BDVER1: # %bb.0: # %entry
; BDVER1-NEXT: movq %rdx, %rcx
; BDVER1-NEXT: movq %rdi, %rax
; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx
; BDVER1-NEXT: shldq %cl, %rsi, %rax
; BDVER1-NEXT: retq
entry:
%shl = shl i64 %a, %c
%sub = sub nsw i64 64, %c
@ -164,6 +165,17 @@ define i64 @lshift_cl(i64 %a, i64 %b, i64 %c) nounwind readnone {
; GENERIC-NEXT: shldq %cl, %rsi, %rax # sched: [4:1.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER12-LABEL: lshift_cl:
; BDVER12: # %bb.0: # %entry
; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.33]
; BDVER12-NEXT: movq %rsi, %rax # sched: [1:0.33]
; BDVER12-NEXT: shlq %cl, %rdi # sched: [3:1.50]
; BDVER12-NEXT: negl %ecx # sched: [1:0.33]
; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
; BDVER12-NEXT: shrq %cl, %rax # sched: [3:1.50]
; BDVER12-NEXT: orq %rdi, %rax # sched: [1:0.33]
; BDVER12-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: lshift_cl:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
@ -174,17 +186,6 @@ define i64 @lshift_cl(i64 %a, i64 %b, i64 %c) nounwind readnone {
; BTVER2-NEXT: shrq %cl, %rax # sched: [1:0.50]
; BTVER2-NEXT: orq %rdi, %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; BDVER1-LABEL: lshift_cl:
; BDVER1: # %bb.0: # %entry
; BDVER1-NEXT: movq %rdx, %rcx
; BDVER1-NEXT: movq %rsi, %rax
; BDVER1-NEXT: shlq %cl, %rdi
; BDVER1-NEXT: negl %ecx
; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx
; BDVER1-NEXT: shrq %cl, %rax
; BDVER1-NEXT: orq %rdi, %rax
; BDVER1-NEXT: retq
entry:
%shl = shl i64 %a, %c
%sub = sub nsw i64 64, %c
@ -208,6 +209,14 @@ define i64 @rshift_cl_optsize(i64 %a, i64 %b, i64 %c) nounwind readnone optsize
; GENERIC-NEXT: shrdq %cl, %rsi, %rax # sched: [4:1.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER12-LABEL: rshift_cl_optsize:
; BDVER12: # %bb.0: # %entry
; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.33]
; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.33]
; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
; BDVER12-NEXT: shrdq %cl, %rsi, %rax # sched: [4:1.50]
; BDVER12-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: rshift_cl_optsize:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
@ -215,14 +224,6 @@ define i64 @rshift_cl_optsize(i64 %a, i64 %b, i64 %c) nounwind readnone optsize
; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
; BTVER2-NEXT: shrdq %cl, %rsi, %rax # sched: [4:4.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; BDVER1-LABEL: rshift_cl_optsize:
; BDVER1: # %bb.0: # %entry
; BDVER1-NEXT: movq %rdx, %rcx
; BDVER1-NEXT: movq %rdi, %rax
; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx
; BDVER1-NEXT: shrdq %cl, %rsi, %rax
; BDVER1-NEXT: retq
entry:
%shr = lshr i64 %a, %c
%sub = sub nsw i64 64, %c
@ -240,6 +241,17 @@ define i64 @rshift_cl(i64 %a, i64 %b, i64 %c) nounwind readnone {
; GENERIC-NEXT: shrdq %cl, %rsi, %rax # sched: [4:1.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER12-LABEL: rshift_cl:
; BDVER12: # %bb.0: # %entry
; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.33]
; BDVER12-NEXT: movq %rsi, %rax # sched: [1:0.33]
; BDVER12-NEXT: shrq %cl, %rdi # sched: [3:1.50]
; BDVER12-NEXT: negl %ecx # sched: [1:0.33]
; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
; BDVER12-NEXT: shlq %cl, %rax # sched: [3:1.50]
; BDVER12-NEXT: orq %rdi, %rax # sched: [1:0.33]
; BDVER12-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: rshift_cl:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
@ -250,17 +262,6 @@ define i64 @rshift_cl(i64 %a, i64 %b, i64 %c) nounwind readnone {
; BTVER2-NEXT: shlq %cl, %rax # sched: [1:0.50]
; BTVER2-NEXT: orq %rdi, %rax # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; BDVER1-LABEL: rshift_cl:
; BDVER1: # %bb.0: # %entry
; BDVER1-NEXT: movq %rdx, %rcx
; BDVER1-NEXT: movq %rsi, %rax
; BDVER1-NEXT: shrq %cl, %rdi
; BDVER1-NEXT: negl %ecx
; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx
; BDVER1-NEXT: shlq %cl, %rax
; BDVER1-NEXT: orq %rdi, %rax
; BDVER1-NEXT: retq
entry:
%shr = lshr i64 %a, %c
%sub = sub nsw i64 64, %c
@ -284,19 +285,19 @@ define void @lshift_mem_cl_optsize(i64 %a, i64 %c) nounwind readnone optsize {
; GENERIC-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [10:1.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER12-LABEL: lshift_mem_cl_optsize:
; BDVER12: # %bb.0: # %entry
; BDVER12-NEXT: movq %rsi, %rcx # sched: [1:0.33]
; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
; BDVER12-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [10:1.50]
; BDVER12-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: lshift_mem_cl_optsize:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq %rsi, %rcx # sched: [1:0.50]
; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
; BTVER2-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [9:11.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; BDVER1-LABEL: lshift_mem_cl_optsize:
; BDVER1: # %bb.0: # %entry
; BDVER1-NEXT: movq %rsi, %rcx
; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx
; BDVER1-NEXT: shldq %cl, %rdi, {{.*}}(%rip)
; BDVER1-NEXT: retq
entry:
%b = load i64, i64* @x
%shl = shl i64 %b, %c
@ -315,6 +316,18 @@ define void @lshift_mem_cl(i64 %a, i64 %c) nounwind readnone {
; GENERIC-NEXT: shldq %cl, %rdi, {{.*}}(%rip) # sched: [10:1.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER12-LABEL: lshift_mem_cl:
; BDVER12: # %bb.0: # %entry
; BDVER12-NEXT: movq %rsi, %rcx # sched: [1:0.33]
; BDVER12-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50]
; BDVER12-NEXT: shlq %cl, %rax # sched: [3:1.50]
; BDVER12-NEXT: negl %ecx # sched: [1:0.33]
; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
; BDVER12-NEXT: shrq %cl, %rdi # sched: [3:1.50]
; BDVER12-NEXT: orq %rax, %rdi # sched: [1:0.33]
; BDVER12-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00]
; BDVER12-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: lshift_mem_cl:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [5:1.00]
@ -326,18 +339,6 @@ define void @lshift_mem_cl(i64 %a, i64 %c) nounwind readnone {
; BTVER2-NEXT: orq %rax, %rdi # sched: [1:0.50]
; BTVER2-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; BDVER1-LABEL: lshift_mem_cl:
; BDVER1: # %bb.0: # %entry
; BDVER1-NEXT: movq %rsi, %rcx
; BDVER1-NEXT: movq {{.*}}(%rip), %rax
; BDVER1-NEXT: shlq %cl, %rax
; BDVER1-NEXT: negl %ecx
; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx
; BDVER1-NEXT: shrq %cl, %rdi
; BDVER1-NEXT: orq %rax, %rdi
; BDVER1-NEXT: movq %rdi, {{.*}}(%rip)
; BDVER1-NEXT: retq
entry:
%b = load i64, i64* @x
%shl = shl i64 %b, %c
@ -354,6 +355,15 @@ define void @lshift_mem(i64 %a) nounwind readnone {
; GENERIC-NEXT: shldq $10, %rdi, {{.*}}(%rip) # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER12-LABEL: lshift_mem:
; BDVER12: # %bb.0: # %entry
; BDVER12-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50]
; BDVER12-NEXT: shlq $10, %rax # sched: [1:0.50]
; BDVER12-NEXT: shrq $54, %rdi # sched: [1:0.50]
; BDVER12-NEXT: orq %rax, %rdi # sched: [1:0.33]
; BDVER12-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00]
; BDVER12-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: lshift_mem:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [5:1.00]
@ -362,15 +372,6 @@ define void @lshift_mem(i64 %a) nounwind readnone {
; BTVER2-NEXT: orq %rax, %rdi # sched: [1:0.50]
; BTVER2-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; BDVER1-LABEL: lshift_mem:
; BDVER1: # %bb.0: # %entry
; BDVER1-NEXT: movq {{.*}}(%rip), %rax
; BDVER1-NEXT: shlq $10, %rax
; BDVER1-NEXT: shrq $54, %rdi
; BDVER1-NEXT: orq %rax, %rdi
; BDVER1-NEXT: movq %rdi, {{.*}}(%rip)
; BDVER1-NEXT: retq
entry:
%b = load i64, i64* @x
%shl = shl i64 %b, 10
@ -386,15 +387,15 @@ define void @lshift_mem_optsize(i64 %a) nounwind readnone optsize {
; GENERIC-NEXT: shldq $10, %rdi, {{.*}}(%rip) # sched: [8:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER12-LABEL: lshift_mem_optsize:
; BDVER12: # %bb.0: # %entry
; BDVER12-NEXT: shldq $10, %rdi, {{.*}}(%rip) # sched: [8:1.00]
; BDVER12-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: lshift_mem_optsize:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: shldq $10, %rdi, {{.*}}(%rip) # sched: [9:11.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; BDVER1-LABEL: lshift_mem_optsize:
; BDVER1: # %bb.0: # %entry
; BDVER1-NEXT: shldq $10, %rdi, {{.*}}(%rip)
; BDVER1-NEXT: retq
entry:
%b = load i64, i64* @x
%shl = shl i64 %b, 10
@ -412,6 +413,15 @@ define void @lshift_mem_b(i64 %b) nounwind readnone {
; GENERIC-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER12-LABEL: lshift_mem_b:
; BDVER12: # %bb.0: # %entry
; BDVER12-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50]
; BDVER12-NEXT: shlq $10, %rdi # sched: [1:0.50]
; BDVER12-NEXT: shrq $54, %rax # sched: [1:0.50]
; BDVER12-NEXT: orq %rdi, %rax # sched: [1:0.33]
; BDVER12-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
; BDVER12-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: lshift_mem_b:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [5:1.00]
@ -420,15 +430,6 @@ define void @lshift_mem_b(i64 %b) nounwind readnone {
; BTVER2-NEXT: orq %rdi, %rax # sched: [1:0.50]
; BTVER2-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; BDVER1-LABEL: lshift_mem_b:
; BDVER1: # %bb.0: # %entry
; BDVER1-NEXT: movq {{.*}}(%rip), %rax
; BDVER1-NEXT: shlq $10, %rdi
; BDVER1-NEXT: shrq $54, %rax
; BDVER1-NEXT: orq %rdi, %rax
; BDVER1-NEXT: movq %rax, {{.*}}(%rip)
; BDVER1-NEXT: retq
entry:
%a = load i64, i64* @x
%shl = shl i64 %b, 10
@ -446,19 +447,19 @@ define void @lshift_mem_b_optsize(i64 %b) nounwind readnone optsize {
; GENERIC-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER12-LABEL: lshift_mem_b_optsize:
; BDVER12: # %bb.0: # %entry
; BDVER12-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50]
; BDVER12-NEXT: shrdq $54, %rdi, %rax # sched: [2:0.67]
; BDVER12-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
; BDVER12-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: lshift_mem_b_optsize:
; BTVER2: # %bb.0: # %entry
; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [5:1.00]
; BTVER2-NEXT: shrdq $54, %rdi, %rax # sched: [3:3.00]
; BTVER2-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; BDVER1-LABEL: lshift_mem_b_optsize:
; BDVER1: # %bb.0: # %entry
; BDVER1-NEXT: movq {{.*}}(%rip), %rax
; BDVER1-NEXT: shrdq $54, %rdi, %rax
; BDVER1-NEXT: movq %rax, {{.*}}(%rip)
; BDVER1-NEXT: retq
entry:
%a = load i64, i64* @x
%shl = shl i64 %b, 10

View File

@ -8,6 +8,7 @@
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
@ -76,6 +77,14 @@ define i8 @test_aaa(i8 %a0) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_aaa:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: aaa # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_aaa:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:1.00]
@ -168,6 +177,15 @@ define void @test_aad(i16 %a0) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_aad:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: aad # sched: [100:0.33]
; BDVER2-NEXT: aad $16 # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_aad:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
@ -262,6 +280,15 @@ define void @test_aam(i8 %a0) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_aam:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: aam # sched: [100:0.33]
; BDVER2-NEXT: aam $16 # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_aam:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:1.00]
@ -348,6 +375,14 @@ define i8 @test_aas(i8 %a0) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_aas:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: aas # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_aas:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:1.00]
@ -440,6 +475,15 @@ define void @test_arpl(i16 %a0, i16 *%a1) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_arpl:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: arpl %ax, (%ecx) # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_arpl:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
@ -598,6 +642,23 @@ define void @test_bound(i16 %a0, i16 *%a1, i32 %a2, i32 *%a3) optsize {
; SKX-NEXT: .cfi_def_cfa_offset 4
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_bound:
; BDVER2: # %bb.0:
; BDVER2-NEXT: pushl %esi # sched: [5:1.00]
; BDVER2-NEXT: .cfi_def_cfa_offset 8
; BDVER2-NEXT: .cfi_offset %esi, -8
; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: bound %ax, (%esi) # sched: [100:0.33]
; BDVER2-NEXT: bound %ecx, (%edx) # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: popl %esi # sched: [6:0.50]
; BDVER2-NEXT: .cfi_def_cfa_offset 4
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_bound:
; BTVER2: # %bb.0:
; BTVER2-NEXT: pushl %esi # sched: [1:1.00]
@ -702,6 +763,14 @@ define i8 @test_daa(i8 %a0) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_daa:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: daa # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_daa:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:1.00]
@ -786,6 +855,14 @@ define i8 @test_das(i8 %a0) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_das:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: das # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_das:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movb {{[0-9]+}}(%esp), %al # sched: [5:1.00]
@ -886,6 +963,16 @@ define void @test_dec16(i16 %a0, i16* %a1) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_dec16:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: decw %ax # sched: [1:0.33]
; BDVER2-NEXT: decw (%ecx) # sched: [7:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_dec16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
@ -989,6 +1076,16 @@ define void @test_dec32(i32 %a0, i32* %a1) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_dec32:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: decl %eax # sched: [1:0.33]
; BDVER2-NEXT: decl (%ecx) # sched: [7:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_dec32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@ -1093,6 +1190,16 @@ define void @test_inc16(i16 %a0, i16* %a1) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_inc16:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: incw %ax # sched: [1:0.33]
; BDVER2-NEXT: incw (%ecx) # sched: [7:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_inc16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
@ -1196,6 +1303,16 @@ define void @test_inc32(i32 %a0, i32* %a1) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_inc32:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: incl %eax # sched: [1:0.33]
; BDVER2-NEXT: incl (%ecx) # sched: [7:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_inc32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@ -1276,6 +1393,13 @@ define void @test_into() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_into:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: into # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_into:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -1368,6 +1492,15 @@ define void @test_jcxz_jecxz() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_jcxz_jecxz:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: JXTGT:
; BDVER2-NEXT: jcxz JXTGT # sched: [2:1.00]
; BDVER2-NEXT: jecxz JXTGT # sched: [2:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_jcxz_jecxz:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -1448,6 +1581,13 @@ define void @test_leave() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_leave:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: leave # sched: [7:0.67]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_leave:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -1604,6 +1744,23 @@ define void @test_pop_push() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_pop_push:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: popl %ds # sched: [100:0.33]
; BDVER2-NEXT: popl %es # sched: [100:0.33]
; BDVER2-NEXT: popl %ss # sched: [100:0.33]
; BDVER2-NEXT: popl %fs # sched: [100:0.33]
; BDVER2-NEXT: popl %gs # sched: [100:0.33]
; BDVER2-NEXT: pushl %cs # sched: [100:0.33]
; BDVER2-NEXT: pushl %ds # sched: [100:0.33]
; BDVER2-NEXT: pushl %es # sched: [100:0.33]
; BDVER2-NEXT: pushl %ss # sched: [100:0.33]
; BDVER2-NEXT: pushl %fs # sched: [100:0.33]
; BDVER2-NEXT: pushl %gs # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_pop_push:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -1760,6 +1917,21 @@ define i16 @test_pop_push_16(i16 %a0, i16 *%a1) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_pop_push_16:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: popw %ax # sched: [6:0.50]
; BDVER2-NEXT: popw (%ecx) # sched: [6:0.50]
; BDVER2-NEXT: pushw %ax # sched: [5:1.00]
; BDVER2-NEXT: pushw (%ecx) # sched: [5:1.00]
; BDVER2-NEXT: pushw $4095 # imm = 0xFFF
; BDVER2-NEXT: # sched: [1:1.00]
; BDVER2-NEXT: pushw $7 # sched: [1:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_pop_push_16:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [4:1.00]
@ -1912,6 +2084,21 @@ define i32 @test_pop_push_32(i32 %a0, i32 *%a1) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_pop_push_32:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: popl %eax # sched: [6:0.50]
; BDVER2-NEXT: popl (%ecx) # sched: [6:0.50]
; BDVER2-NEXT: pushl %eax # sched: [5:1.00]
; BDVER2-NEXT: pushl (%ecx) # sched: [5:1.00]
; BDVER2-NEXT: pushl $4095 # imm = 0xFFF
; BDVER2-NEXT: # sched: [1:1.00]
; BDVER2-NEXT: pushl $7 # sched: [1:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_pop_push_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@ -2026,6 +2213,16 @@ define void @test_popa_popf_pusha_pushf() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_popa_popf_pusha_pushf:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: popal # sched: [5:0.50]
; BDVER2-NEXT: popfl # sched: [5:0.50]
; BDVER2-NEXT: pushal # sched: [1:1.00]
; BDVER2-NEXT: pushfl # sched: [1:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_popa_popf_pusha_pushf:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -2144,6 +2341,18 @@ define void @test_ret() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_ret:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: retl # sched: [6:1.00]
; BDVER2-NEXT: retl $4095 # imm = 0xFFF
; BDVER2-NEXT: # sched: [6:1.00]
; BDVER2-NEXT: lretl # sched: [6:1.00]
; BDVER2-NEXT: lretl $4095 # imm = 0xFFF
; BDVER2-NEXT: # sched: [6:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_ret:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -2228,6 +2437,13 @@ define i8 @test_salc() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_salc:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: salc # sched: [1:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_salc:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -2345,6 +2561,18 @@ define void @test_xchg_32(i32 %a0, i32 %a1, i32 *%a2) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_xchg_32:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: xchgl %eax, %eax # sched: [2:1.00]
; BDVER2-NEXT: xchgl %ecx, %eax # sched: [2:1.00]
; BDVER2-NEXT: xchgl %eax, (%edx) # sched: [6:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_xchg_32:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]

File diff suppressed because it is too large Load Diff

View File

@ -1,25 +1,41 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s --check-prefix=CORE2
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=nehalem | FileCheck %s --check-prefix=NEHALEM
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=x86-64 | FileCheck %s --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1)
define void @copy16bytes(i8* nocapture %a, i8* nocapture readonly %b) {
; CORE2-LABEL: copy16bytes:
; CORE2: ## %bb.0:
; CORE2-NEXT: movq (%rsi), %rax
; CORE2-NEXT: movq 8(%rsi), %rcx
; CORE2-NEXT: movq %rcx, 8(%rdi)
; CORE2-NEXT: movq %rax, (%rdi)
; CORE2-NEXT: retq
;
; NEHALEM-LABEL: copy16bytes:
; NEHALEM: ## %bb.0:
; NEHALEM-NEXT: movups (%rsi), %xmm0
; NEHALEM-NEXT: movups %xmm0, (%rdi)
; NEHALEM-NEXT: retq
;
; BDVER2-LABEL: copy16bytes:
; BDVER2: ## %bb.0:
; BDVER2-NEXT: movups (%rsi), %xmm0
; BDVER2-NEXT: movups %xmm0, (%rdi)
; BDVER2-NEXT: retq
;
; BTVER2-LABEL: copy16bytes:
; BTVER2: ## %bb.0:
; BTVER2-NEXT: vmovups (%rsi), %xmm0
; BTVER2-NEXT: vmovups %xmm0, (%rdi)
; BTVER2-NEXT: retq
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 16, i1 false)
ret void
; CHECK-LABEL: copy16bytes
; CORE2: movq
; CORE2-NEXT: movq
; CORE2-NEXT: movq
; CORE2-NEXT: movq
; CORE2-NEXT: retq
; NEHALEM: movups
; NEHALEM-NEXT: movups
; NEHALEM-NEXT: retq
; BTVER2: movups
; BTVER2-NEXT: movups
; BTVER2-NEXT: retq
}

View File

@ -14,6 +14,8 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
@ -100,6 +102,18 @@ define <4 x float> @test_addps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
; SKX-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_addps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: addps (%rdi), %xmm0 # sched: [9:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_addps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_addps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
@ -208,6 +222,18 @@ define float @test_addss(float %a0, float %a1, float *%a2) {
; SKX-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_addss:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: addss (%rdi), %xmm0 # sched: [9:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_addss:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_addss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
@ -320,6 +346,18 @@ define <4 x float> @test_andps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
; SKX-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_andps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:1.00]
; BDVER2-SSE-NEXT: andps (%rdi), %xmm0 # sched: [7:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_andps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; BDVER2-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_andps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: andps %xmm1, %xmm0 # sched: [1:0.50]
@ -436,6 +474,18 @@ define <4 x float> @test_andnotps(<4 x float> %a0, <4 x float> %a1, <4 x float>
; SKX-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_andnotps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00]
; BDVER2-SSE-NEXT: andnps (%rdi), %xmm0 # sched: [7:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_andnotps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; BDVER2-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_andnotps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: andnps %xmm1, %xmm0 # sched: [1:0.50]
@ -563,6 +613,20 @@ define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
; SKX-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_cmpps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00]
; BDVER2-SSE-NEXT: cmpeqps (%rdi), %xmm0 # sched: [9:1.00]
; BDVER2-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_cmpps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
; BDVER2-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; BDVER2-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_cmpps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cmpeqps %xmm0, %xmm1 # sched: [2:1.00]
@ -679,6 +743,18 @@ define float @test_cmpss(float %a0, float %a1, float *%a2) {
; SKX-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_cmpss:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: cmpeqss (%rdi), %xmm0 # sched: [9:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_cmpss:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_cmpss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cmpeqss %xmm1, %xmm0 # sched: [2:1.00]
@ -896,6 +972,34 @@ define i32 @test_comiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_comiss:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [2:1.00]
; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
; BDVER2-SSE-NEXT: sete %cl # sched: [1:0.50]
; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.33]
; BDVER2-SSE-NEXT: comiss (%rdi), %xmm0 # sched: [8:1.00]
; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
; BDVER2-SSE-NEXT: sete %dl # sched: [1:0.50]
; BDVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.33]
; BDVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.33]
; BDVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_comiss:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vcomiss %xmm1, %xmm0 # sched: [2:1.00]
; BDVER2-NEXT: setnp %al # sched: [1:0.50]
; BDVER2-NEXT: sete %cl # sched: [1:0.50]
; BDVER2-NEXT: andb %al, %cl # sched: [1:0.33]
; BDVER2-NEXT: vcomiss (%rdi), %xmm0 # sched: [8:1.00]
; BDVER2-NEXT: setnp %al # sched: [1:0.50]
; BDVER2-NEXT: sete %dl # sched: [1:0.50]
; BDVER2-NEXT: andb %al, %dl # sched: [1:0.33]
; BDVER2-NEXT: orb %cl, %dl # sched: [1:0.33]
; BDVER2-NEXT: movzbl %dl, %eax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_comiss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00]
@ -1051,6 +1155,20 @@ define float @test_cvtsi2ss(i32 %a0, i32 *%a1) {
; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_cvtsi2ss:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:2.00]
; BDVER2-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [10:1.00]
; BDVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_cvtsi2ss:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:2.00]
; BDVER2-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [10:1.00]
; BDVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_cvtsi2ss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [14:1.00]
@ -1177,6 +1295,20 @@ define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) {
; SKX-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_cvtsi2ssq:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00]
; BDVER2-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [10:1.00]
; BDVER2-SSE-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_cvtsi2ssq:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00]
; BDVER2-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [10:1.00]
; BDVER2-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_cvtsi2ssq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [14:1.00]
@ -1303,6 +1435,20 @@ define i32 @test_cvtss2si(float %a0, float *%a1) {
; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_cvtss2si:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: cvtss2si %xmm0, %ecx # sched: [5:1.00]
; BDVER2-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [9:1.00]
; BDVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_cvtss2si:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vcvtss2si %xmm0, %ecx # sched: [5:1.00]
; BDVER2-NEXT: vcvtss2si (%rdi), %eax # sched: [10:1.00]
; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_cvtss2si:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtss2si (%rdi), %eax # sched: [12:1.00]
@ -1432,6 +1578,20 @@ define i64 @test_cvtss2siq(float %a0, float *%a1) {
; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_cvtss2siq:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: cvtss2si %xmm0, %rcx # sched: [5:1.00]
; BDVER2-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [9:1.00]
; BDVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_cvtss2siq:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vcvtss2si %xmm0, %rcx # sched: [5:1.00]
; BDVER2-NEXT: vcvtss2si (%rdi), %rax # sched: [10:1.00]
; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_cvtss2siq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvtss2si (%rdi), %rax # sched: [12:1.00]
@ -1561,6 +1721,20 @@ define i32 @test_cvttss2si(float %a0, float *%a1) {
; SKX-NEXT: addl %ecx, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_cvttss2si:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: cvttss2si %xmm0, %ecx # sched: [5:1.00]
; BDVER2-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [9:1.00]
; BDVER2-SSE-NEXT: addl %ecx, %eax # sched: [1:0.33]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_cvttss2si:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vcvttss2si %xmm0, %ecx # sched: [5:1.00]
; BDVER2-NEXT: vcvttss2si (%rdi), %eax # sched: [10:1.00]
; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_cvttss2si:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvttss2si (%rdi), %eax # sched: [12:1.00]
@ -1687,6 +1861,20 @@ define i64 @test_cvttss2siq(float %a0, float *%a1) {
; SKX-NEXT: addq %rcx, %rax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_cvttss2siq:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: cvttss2si %xmm0, %rcx # sched: [5:1.00]
; BDVER2-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [9:1.00]
; BDVER2-SSE-NEXT: addq %rcx, %rax # sched: [1:0.33]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_cvttss2siq:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vcvttss2si %xmm0, %rcx # sched: [5:1.00]
; BDVER2-NEXT: vcvttss2si (%rdi), %rax # sched: [10:1.00]
; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_cvttss2siq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: cvttss2si (%rdi), %rax # sched: [12:1.00]
@ -1800,6 +1988,18 @@ define <4 x float> @test_divps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
; SKX-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [17:5.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_divps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: divps %xmm1, %xmm0 # sched: [14:14.00]
; BDVER2-SSE-NEXT: divps (%rdi), %xmm0 # sched: [20:14.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_divps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [14:14.00]
; BDVER2-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [20:14.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_divps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: divps %xmm1, %xmm0 # sched: [19:19.00]
@ -1908,6 +2108,18 @@ define float @test_divss(float %a0, float %a1, float *%a2) {
; SKX-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:3.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_divss:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: divss %xmm1, %xmm0 # sched: [14:14.00]
; BDVER2-SSE-NEXT: divss (%rdi), %xmm0 # sched: [20:14.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_divss:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [14:14.00]
; BDVER2-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [20:14.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_divss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: divss %xmm1, %xmm0 # sched: [19:19.00]
@ -2016,6 +2228,18 @@ define void @test_ldmxcsr(i32 %a0) {
; SKX-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_ldmxcsr:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
; BDVER2-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_ldmxcsr:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
; BDVER2-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_ldmxcsr:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00]
@ -2126,6 +2350,18 @@ define <4 x float> @test_maxps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
; SKX-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_maxps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: maxps (%rdi), %xmm0 # sched: [9:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_maxps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_maxps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: maxps %xmm1, %xmm0 # sched: [2:1.00]
@ -2235,6 +2471,18 @@ define <4 x float> @test_maxss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
; SKX-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_maxss:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: maxss (%rdi), %xmm0 # sched: [9:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_maxss:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_maxss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: maxss %xmm1, %xmm0 # sched: [2:1.00]
@ -2344,6 +2592,18 @@ define <4 x float> @test_minps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
; SKX-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_minps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: minps (%rdi), %xmm0 # sched: [9:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_minps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_minps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: minps %xmm1, %xmm0 # sched: [2:1.00]
@ -2453,6 +2713,18 @@ define <4 x float> @test_minss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
; SKX-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_minss:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: minss (%rdi), %xmm0 # sched: [9:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_minss:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_minss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: minss %xmm1, %xmm0 # sched: [2:1.00]
@ -2575,6 +2847,20 @@ define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) {
; SKX-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_movaps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50]
; BDVER2-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_movaps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50]
; BDVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_movaps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [5:1.00]
@ -2682,6 +2968,16 @@ define <4 x float> @test_movhlps(<4 x float> %a0, <4 x float> %a1) {
; SKX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_movhlps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_movhlps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_movhlps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:0.50]
@ -2813,6 +3109,22 @@ define <4 x float> @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2)
; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_movhps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: movhps %xmm0, (%rdi) # sched: [1:1.00]
; BDVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_movhps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
; BDVER2-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_movhps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
@ -2935,6 +3247,18 @@ define <4 x float> @test_movlhps(<4 x float> %a0, <4 x float> %a1) {
; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_movlhps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_movlhps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00]
; BDVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_movlhps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50]
@ -3069,6 +3393,22 @@ define <4 x float> @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2)
; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_movlps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
; BDVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_movlps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
; BDVER2-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_movlps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
@ -3178,6 +3518,16 @@ define i32 @test_movmskps(<4 x float> %a0) {
; SKX-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_movmskps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: movmskps %xmm0, %eax # sched: [2:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_movmskps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_movmskps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movmskps %xmm0, %eax # sched: [3:1.00]
@ -3274,6 +3624,16 @@ define void @test_movntps(<4 x float> %a0, <4 x float> *%a1) {
; SKX-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_movntps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_movntps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_movntps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [3:1.00]
@ -3389,6 +3749,20 @@ define void @test_movss_mem(float* %a0, float* %a1) {
; SKX-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_movss_mem:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
; BDVER2-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_movss_mem:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50]
; BDVER2-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_movss_mem:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [5:1.00]
@ -3494,6 +3868,16 @@ define <4 x float> @test_movss_reg(<4 x float> %a0, <4 x float> %a1) {
; SKX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_movss_reg:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_movss_reg:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_movss_reg:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50]
@ -3609,6 +3993,20 @@ define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) {
; SKX-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_movups:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50]
; BDVER2-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_movups:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
; BDVER2-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_movups:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movups (%rdi), %xmm0 # sched: [5:1.00]
@ -3721,6 +4119,18 @@ define <4 x float> @test_mulps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
; SKX-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_mulps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [5:1.00]
; BDVER2-SSE-NEXT: mulps (%rdi), %xmm0 # sched: [11:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_mulps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BDVER2-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_mulps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: mulps %xmm1, %xmm0 # sched: [2:1.00]
@ -3829,6 +4239,18 @@ define float @test_mulss(float %a0, float %a1, float *%a2) {
; SKX-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_mulss:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [5:1.00]
; BDVER2-SSE-NEXT: mulss (%rdi), %xmm0 # sched: [11:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_mulss:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BDVER2-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_mulss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: mulss %xmm1, %xmm0 # sched: [2:1.00]
@ -3941,6 +4363,18 @@ define <4 x float> @test_orps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2
; SKX-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_orps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00]
; BDVER2-SSE-NEXT: orps (%rdi), %xmm0 # sched: [7:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_orps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; BDVER2-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_orps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: orps %xmm1, %xmm0 # sched: [1:0.50]
@ -4105,6 +4539,26 @@ define void @test_prefetch(i8* %a0) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_prefetch:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: #APP
; BDVER2-SSE-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
; BDVER2-SSE-NEXT: prefetcht0 (%rdi) # sched: [5:0.50]
; BDVER2-SSE-NEXT: prefetcht1 (%rdi) # sched: [5:0.50]
; BDVER2-SSE-NEXT: prefetcht2 (%rdi) # sched: [5:0.50]
; BDVER2-SSE-NEXT: #NO_APP
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_prefetch:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: prefetchnta (%rdi) # sched: [5:0.50]
; BDVER2-NEXT: prefetcht0 (%rdi) # sched: [5:0.50]
; BDVER2-NEXT: prefetcht1 (%rdi) # sched: [5:0.50]
; BDVER2-NEXT: prefetcht2 (%rdi) # sched: [5:0.50]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_prefetch:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: #APP
@ -4242,6 +4696,20 @@ define <4 x float> @test_rcpps(<4 x float> %a0, <4 x float> *%a1) {
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_rcpps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [5:1.00]
; BDVER2-SSE-NEXT: rcpps (%rdi), %xmm0 # sched: [11:1.00]
; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_rcpps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00]
; BDVER2-NEXT: vrcpps (%rdi), %xmm1 # sched: [11:1.00]
; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_rcpps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: rcpps %xmm0, %xmm1 # sched: [2:1.00]
@ -4384,6 +4852,22 @@ define <4 x float> @test_rcpss(float %a0, float *%a1) {
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_rcpss:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00]
; BDVER2-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
; BDVER2-SSE-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00]
; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_rcpss:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
; BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
; BDVER2-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_rcpss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
@ -4519,6 +5003,20 @@ define <4 x float> @test_rsqrtps(<4 x float> %a0, <4 x float> *%a1) {
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_rsqrtps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:1.00]
; BDVER2-SSE-NEXT: rsqrtps (%rdi), %xmm0 # sched: [11:1.00]
; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_rsqrtps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00]
; BDVER2-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [11:1.00]
; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_rsqrtps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: rsqrtps %xmm0, %xmm1 # sched: [2:1.00]
@ -4661,6 +5159,22 @@ define <4 x float> @test_rsqrtss(float %a0, float *%a1) {
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_rsqrtss:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00]
; BDVER2-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
; BDVER2-SSE-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00]
; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_rsqrtss:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00]
; BDVER2-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50]
; BDVER2-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00]
; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_rsqrtss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [5:1.00]
@ -4774,6 +5288,16 @@ define void @test_sfence() {
; SKX-NEXT: sfence # sched: [2:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_sfence:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: sfence # sched: [1:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_sfence:
; BDVER2: # %bb.0:
; BDVER2-NEXT: sfence # sched: [1:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_sfence:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: sfence # sched: [1:1.00]
@ -4890,6 +5414,20 @@ define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_shufps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
; BDVER2-SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_shufps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00]
; BDVER2-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,3],mem[0,0] sched: [7:1.00]
; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_shufps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:0.50]
@ -5017,6 +5555,20 @@ define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) {
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_sqrtps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [14:14.00]
; BDVER2-SSE-NEXT: sqrtps (%rdi), %xmm0 # sched: [20:14.00]
; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_sqrtps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:14.00]
; BDVER2-NEXT: vsqrtps (%rdi), %xmm1 # sched: [20:14.00]
; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_sqrtps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: sqrtps %xmm0, %xmm1 # sched: [21:21.00]
@ -5159,6 +5711,22 @@ define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) {
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_sqrtss:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: sqrtss %xmm0, %xmm0 # sched: [14:14.00]
; BDVER2-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50]
; BDVER2-SSE-NEXT: sqrtss %xmm1, %xmm1 # sched: [14:14.00]
; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_sqrtss:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:14.00]
; BDVER2-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50]
; BDVER2-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [14:14.00]
; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_sqrtss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movaps (%rdi), %xmm1 # sched: [5:1.00]
@ -5277,6 +5845,18 @@ define i32 @test_stmxcsr() {
; SKX-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_stmxcsr:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
; BDVER2-SSE-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_stmxcsr:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00]
; BDVER2-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_stmxcsr:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00]
@ -5387,6 +5967,18 @@ define <4 x float> @test_subps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
; SKX-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_subps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: subps (%rdi), %xmm0 # sched: [9:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_subps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_subps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00]
@ -5495,6 +6087,18 @@ define float @test_subss(float %a0, float %a1, float *%a2) {
; SKX-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_subss:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: subss (%rdi), %xmm0 # sched: [9:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_subss:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_subss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00]
@ -5707,6 +6311,34 @@ define i32 @test_ucomiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) {
; SKX-NEXT: movzbl %dl, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_ucomiss:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [2:1.00]
; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
; BDVER2-SSE-NEXT: sete %cl # sched: [1:0.50]
; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.33]
; BDVER2-SSE-NEXT: ucomiss (%rdi), %xmm0 # sched: [8:1.00]
; BDVER2-SSE-NEXT: setnp %al # sched: [1:0.50]
; BDVER2-SSE-NEXT: sete %dl # sched: [1:0.50]
; BDVER2-SSE-NEXT: andb %al, %dl # sched: [1:0.33]
; BDVER2-SSE-NEXT: orb %cl, %dl # sched: [1:0.33]
; BDVER2-SSE-NEXT: movzbl %dl, %eax # sched: [1:0.33]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_ucomiss:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vucomiss %xmm1, %xmm0 # sched: [2:1.00]
; BDVER2-NEXT: setnp %al # sched: [1:0.50]
; BDVER2-NEXT: sete %cl # sched: [1:0.50]
; BDVER2-NEXT: andb %al, %cl # sched: [1:0.33]
; BDVER2-NEXT: vucomiss (%rdi), %xmm0 # sched: [8:1.00]
; BDVER2-NEXT: setnp %al # sched: [1:0.50]
; BDVER2-NEXT: sete %dl # sched: [1:0.50]
; BDVER2-NEXT: andb %al, %dl # sched: [1:0.33]
; BDVER2-NEXT: orb %cl, %dl # sched: [1:0.33]
; BDVER2-NEXT: movzbl %dl, %eax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_ucomiss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00]
@ -5862,6 +6494,20 @@ define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float>
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_unpckhps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
; BDVER2-SSE-NEXT: unpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_unpckhps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00]
; BDVER2-NEXT: vunpckhps {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:1.00]
; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_unpckhps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50]
@ -5988,6 +6634,20 @@ define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float>
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_unpcklps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
; BDVER2-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_unpcklps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00]
; BDVER2-NEXT: vunpcklps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:1.00]
; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_unpcklps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50]
@ -6105,6 +6765,18 @@ define <4 x float> @test_xorps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a
; SKX-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_xorps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:1.00]
; BDVER2-SSE-NEXT: xorps (%rdi), %xmm0 # sched: [7:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_xorps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00]
; BDVER2-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_xorps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: xorps %xmm1, %xmm0 # sched: [1:0.50]
@ -6249,6 +6921,22 @@ define <4 x float> @test_fnop() nounwind {
; SKX-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_fnop:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: #APP
; BDVER2-SSE-NEXT: nop # sched: [1:0.25]
; BDVER2-SSE-NEXT: #NO_APP
; BDVER2-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [0:0.25]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_fnop:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: nop # sched: [1:0.25]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: vxorps %xmm0, %xmm0, %xmm0 # sched: [0:0.25]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_fnop:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: xorps %xmm0, %xmm0 # sched: [0:0.50]

File diff suppressed because it is too large Load Diff

View File

@ -14,7 +14,9 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse3 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx -mattr=+sse3 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=+sse3 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-ssse3 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,ZNVER1
@ -98,6 +100,18 @@ define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; SKX-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_addsubpd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: addsubpd (%rdi), %xmm0 # sched: [9:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_addsubpd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_addsubpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00]
@ -207,6 +221,18 @@ define <4 x float> @test_addsubps(<4 x float> %a0, <4 x float> %a1, <4 x float>
; SKX-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_addsubps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: addsubps (%rdi), %xmm0 # sched: [9:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_addsubps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_addsubps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00]
@ -316,6 +342,18 @@ define <2 x double> @test_haddpd(<2 x double> %a0, <2 x double> %a1, <2 x double
; SKX-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_haddpd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00]
; BDVER2-SSE-NEXT: haddpd (%rdi), %xmm0 # sched: [11:2.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_haddpd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; BDVER2-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_haddpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: haddpd %xmm1, %xmm0 # sched: [3:1.00]
@ -425,6 +463,18 @@ define <4 x float> @test_haddps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%
; SKX-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_haddps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00]
; BDVER2-SSE-NEXT: haddps (%rdi), %xmm0 # sched: [11:2.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_haddps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; BDVER2-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_haddps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: haddps %xmm1, %xmm0 # sched: [3:1.00]
@ -534,6 +584,18 @@ define <2 x double> @test_hsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double
; SKX-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_hsubpd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00]
; BDVER2-SSE-NEXT: hsubpd (%rdi), %xmm0 # sched: [11:2.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_hsubpd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; BDVER2-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_hsubpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: hsubpd %xmm1, %xmm0 # sched: [3:1.00]
@ -643,6 +705,18 @@ define <4 x float> @test_hsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%
; SKX-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [12:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_hsubps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00]
; BDVER2-SSE-NEXT: hsubps (%rdi), %xmm0 # sched: [11:2.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_hsubps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00]
; BDVER2-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_hsubps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: hsubps %xmm1, %xmm0 # sched: [3:1.00]
@ -741,6 +815,16 @@ define <16 x i8> @test_lddqu(i8* %a0) {
; SKX-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_lddqu:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_lddqu:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_lddqu:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: lddqu (%rdi), %xmm0 # sched: [5:1.00]
@ -857,6 +941,20 @@ define void @test_monitor(i8* %a0, i32 %a1, i32 %a2) {
; SKX-NEXT: monitor # sched: [100:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_monitor:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: movl %esi, %ecx # sched: [1:0.33]
; BDVER2-SSE-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
; BDVER2-SSE-NEXT: monitor # sched: [100:0.33]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_monitor:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl %esi, %ecx # sched: [1:0.33]
; BDVER2-NEXT: leaq (%rdi), %rax # sched: [1:0.50]
; BDVER2-NEXT: monitor # sched: [100:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_monitor:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movl %esi, %ecx # sched: [1:0.50]
@ -982,6 +1080,20 @@ define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) {
; SKX-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_movddup:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00]
; BDVER2-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:0.50]
; BDVER2-SSE-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_movddup:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00]
; BDVER2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:0.50]
; BDVER2-NEXT: vsubpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_movddup:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:0.50]
@ -1109,6 +1221,20 @@ define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) {
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_movshdup:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00]
; BDVER2-SSE-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50]
; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_movshdup:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00]
; BDVER2-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50]
; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_movshdup:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:0.50]
@ -1236,6 +1362,20 @@ define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) {
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_movsldup:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00]
; BDVER2-SSE-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50]
; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_movsldup:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00]
; BDVER2-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50]
; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_movsldup:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:0.50]
@ -1362,6 +1502,20 @@ define void @test_mwait(i32 %a0, i32 %a1) {
; SKX-NEXT: mwait # sched: [20:2.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_mwait:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: movl %esi, %eax # sched: [1:0.33]
; BDVER2-SSE-NEXT: movl %edi, %ecx # sched: [1:0.33]
; BDVER2-SSE-NEXT: mwait # sched: [100:0.33]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_mwait:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl %esi, %eax # sched: [1:0.33]
; BDVER2-NEXT: movl %edi, %ecx # sched: [1:0.33]
; BDVER2-NEXT: mwait # sched: [100:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_mwait:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movl %esi, %eax # sched: [1:0.50]

View File

@ -13,6 +13,8 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.1 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx,+xop -mattr=+sse4.1 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse4.2 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
@ -103,6 +105,20 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_blendpd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.50]
; BDVER2-SSE-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_blendpd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vblendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.50]
; BDVER2-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:0.50]
; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_blendpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] sched: [1:0.50]
@ -222,6 +238,20 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> *
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_blendps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
; BDVER2-SSE-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_blendps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
; BDVER2-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],mem[1],xmm1[2,3] sched: [7:0.50]
; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_blendps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50]
@ -350,6 +380,21 @@ define <2 x double> @test_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; SKX-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_blendvpd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00]
; BDVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
; BDVER2-SSE-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
; BDVER2-SSE-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
; BDVER2-SSE-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_blendvpd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BDVER2-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_blendvpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movapd %xmm0, %xmm3 # sched: [1:0.50]
@ -480,6 +525,21 @@ define <4 x float> @test_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float>
; SKX-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_blendvps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00]
; BDVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
; BDVER2-SSE-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
; BDVER2-SSE-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
; BDVER2-SSE-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_blendvps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BDVER2-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_blendvps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movaps %xmm0, %xmm3 # sched: [1:0.50]
@ -589,6 +649,18 @@ define <2 x double> @test_dppd(<2 x double> %a0, <2 x double> %a1, <2 x double>
; SKX-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_dppd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00]
; BDVER2-SSE-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_dppd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00]
; BDVER2-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_dppd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:3.00]
@ -692,6 +764,18 @@ define <4 x float> @test_dpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2
; SKX-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [19:1.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_dpps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [12:2.00]
; BDVER2-SSE-NEXT: dpps $7, (%rdi), %xmm0 # sched: [18:2.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_dpps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [12:2.00]
; BDVER2-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [18:2.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_dpps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: dpps $7, %xmm1, %xmm0 # sched: [11:3.00]
@ -795,6 +879,18 @@ define i32 @test_extractps(<4 x float> %a0, i32 *%a1) {
; SKX-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_extractps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00]
; BDVER2-SSE-NEXT: extractps $1, %xmm0, (%rdi) # sched: [5:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_extractps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vextractps $3, %xmm0, %eax # sched: [3:1.00]
; BDVER2-NEXT: vextractps $1, %xmm0, (%rdi) # sched: [5:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_extractps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: extractps $3, %xmm0, %eax # sched: [3:1.00]
@ -899,6 +995,18 @@ define <4 x float> @test_insertps(<4 x float> %a0, <4 x float> %a1, float *%a2)
; SKX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_insertps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
; BDVER2-SSE-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_insertps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00]
; BDVER2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_insertps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:0.50]
@ -990,6 +1098,16 @@ define <2 x i64> @test_movntdqa(i8* %a0) {
; SKX-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_movntdqa:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_movntdqa:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_movntdqa:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movntdqa (%rdi), %xmm0 # sched: [5:1.00]
@ -1087,6 +1205,18 @@ define <8 x i16> @test_mpsadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SKX-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_mpsadbw:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [7:1.00]
; BDVER2-SSE-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [13:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_mpsadbw:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:1.00]
; BDVER2-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [13:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_mpsadbw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [3:2.00]
@ -1191,6 +1321,18 @@ define <8 x i16> @test_packusdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SKX-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_packusdw:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.50]
; BDVER2-SSE-NEXT: packusdw (%rdi), %xmm0 # sched: [7:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_packusdw:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_packusdw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.50]
@ -1316,6 +1458,21 @@ define <16 x i8> @test_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2, <16
; SKX-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:0.67]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pblendvb:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33]
; BDVER2-SSE-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00]
; BDVER2-SSE-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [2:1.00]
; BDVER2-SSE-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [8:1.00]
; BDVER2-SSE-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pblendvb:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00]
; BDVER2-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pblendvb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.50]
@ -1437,6 +1594,20 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pblendw:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
; BDVER2-SSE-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:0.50]
; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pblendw:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
; BDVER2-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],mem[2,3],xmm1[4,5,6],mem[7] sched: [7:0.50]
; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pblendw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50]
@ -1544,6 +1715,18 @@ define <2 x i64> @test_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SKX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pcmpeqq:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
; BDVER2-SSE-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pcmpeqq:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpcomeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: vpcomeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pcmpeqq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50]
@ -1648,6 +1831,18 @@ define i32 @test_pextrb(<16 x i8> %a0, i8 *%a1) {
; SKX-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pextrb:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00]
; BDVER2-SSE-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [5:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pextrb:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00]
; BDVER2-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [5:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pextrb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00]
@ -1763,6 +1958,20 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) {
; SKX-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pextrd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-SSE-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00]
; BDVER2-SSE-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pextrd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpaddd %xmm0, %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00]
; BDVER2-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pextrd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: paddd %xmm0, %xmm0 # sched: [1:0.50]
@ -1870,6 +2079,18 @@ define i64 @test_pextrq(<2 x i64> %a0, <2 x i64> %a1, i64 *%a2) {
; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pextrq:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00]
; BDVER2-SSE-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pextrq:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00]
; BDVER2-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pextrq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00]
@ -1972,6 +2193,18 @@ define i32 @test_pextrw(<8 x i16> %a0, i16 *%a1) {
; SKX-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [2:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pextrw:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00]
; BDVER2-SSE-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pextrw:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00]
; BDVER2-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [5:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pextrw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00]
@ -2075,6 +2308,18 @@ define <8 x i16> @test_phminposuw(<8 x i16> *%a0) {
; SKX-NEXT: vphminposuw %xmm0, %xmm0 # sched: [4:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_phminposuw:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [11:1.00]
; BDVER2-SSE-NEXT: phminposuw %xmm0, %xmm0 # sched: [5:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_phminposuw:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00]
; BDVER2-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_phminposuw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: phminposuw (%rdi), %xmm0 # sched: [7:1.00]
@ -2178,6 +2423,18 @@ define <16 x i8> @test_pinsrb(<16 x i8> %a0, i8 %a1, i8 *%a2) {
; SKX-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pinsrb:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:1.00]
; BDVER2-SSE-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [7:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pinsrb:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
; BDVER2-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pinsrb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pinsrb $1, %edi, %xmm0 # sched: [7:0.50]
@ -2280,6 +2537,18 @@ define <4 x i32> @test_pinsrd(<4 x i32> %a0, i32 %a1, i32 *%a2) {
; SKX-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [6:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pinsrd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:1.00]
; BDVER2-SSE-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [7:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pinsrd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:1.00]
; BDVER2-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pinsrd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pinsrd $1, %edi, %xmm0 # sched: [7:0.50]
@ -2394,6 +2663,20 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) {
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pinsrq:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:1.00]
; BDVER2-SSE-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [7:0.50]
; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pinsrq:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:1.00]
; BDVER2-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [7:0.50]
; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pinsrq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [7:0.50]
@ -2501,6 +2784,18 @@ define <16 x i8> @test_pmaxsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SKX-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pmaxsb:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
; BDVER2-SSE-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pmaxsb:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pmaxsb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50]
@ -2604,6 +2899,18 @@ define <4 x i32> @test_pmaxsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SKX-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pmaxsd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
; BDVER2-SSE-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pmaxsd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pmaxsd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50]
@ -2707,6 +3014,18 @@ define <4 x i32> @test_pmaxud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SKX-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pmaxud:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50]
; BDVER2-SSE-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pmaxud:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pmaxud:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50]
@ -2810,6 +3129,18 @@ define <8 x i16> @test_pmaxuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SKX-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pmaxuw:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
; BDVER2-SSE-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pmaxuw:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pmaxuw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50]
@ -2913,6 +3244,18 @@ define <16 x i8> @test_pminsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SKX-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pminsb:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50]
; BDVER2-SSE-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pminsb:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pminsb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50]
@ -3016,6 +3359,18 @@ define <4 x i32> @test_pminsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SKX-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pminsd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50]
; BDVER2-SSE-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pminsd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pminsd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50]
@ -3119,6 +3474,18 @@ define <4 x i32> @test_pminud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SKX-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pminud:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50]
; BDVER2-SSE-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pminud:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pminud:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50]
@ -3222,6 +3589,18 @@ define <8 x i16> @test_pminuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SKX-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pminuw:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50]
; BDVER2-SSE-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pminuw:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pminuw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50]
@ -3338,6 +3717,20 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) {
; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pmovsxbw:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.50]
; BDVER2-SSE-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [7:0.50]
; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pmovsxbw:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [7:0.50]
; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pmovsxbw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.50]
@ -3459,6 +3852,20 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) {
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pmovsxbd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.50]
; BDVER2-SSE-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [7:0.50]
; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pmovsxbd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [7:0.50]
; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pmovsxbd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.50]
@ -3580,6 +3987,20 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) {
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pmovsxbq:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.50]
; BDVER2-SSE-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [7:0.50]
; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pmovsxbq:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [7:0.50]
; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pmovsxbq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.50]
@ -3701,6 +4122,20 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) {
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pmovsxdq:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.50]
; BDVER2-SSE-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [7:0.50]
; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pmovsxdq:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [7:0.50]
; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pmovsxdq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.50]
@ -3822,6 +4257,20 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) {
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pmovsxwd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.50]
; BDVER2-SSE-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [7:0.50]
; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pmovsxwd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [7:0.50]
; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pmovsxwd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.50]
@ -3943,6 +4392,20 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) {
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pmovsxwq:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.50]
; BDVER2-SSE-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [7:0.50]
; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pmovsxwq:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [7:0.50]
; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pmovsxwq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.50]
@ -4064,6 +4527,20 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) {
; SKX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pmovzxbw:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
; BDVER2-SSE-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50]
; BDVER2-SSE-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pmovzxbw:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
; BDVER2-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50]
; BDVER2-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pmovzxbw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50]
@ -4185,6 +4662,20 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) {
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pmovzxbd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
; BDVER2-SSE-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50]
; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pmovzxbd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
; BDVER2-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50]
; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pmovzxbd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50]
@ -4306,6 +4797,20 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) {
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pmovzxbq:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
; BDVER2-SSE-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50]
; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pmovzxbq:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
; BDVER2-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50]
; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pmovzxbq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50]
@ -4427,6 +4932,20 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) {
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pmovzxdq:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
; BDVER2-SSE-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [7:0.50]
; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pmovzxdq:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
; BDVER2-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [7:0.50]
; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pmovzxdq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50]
@ -4548,6 +5067,20 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) {
; SKX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pmovzxwd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
; BDVER2-SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50]
; BDVER2-SSE-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pmovzxwd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
; BDVER2-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50]
; BDVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pmovzxwd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50]
@ -4669,6 +5202,20 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) {
; SKX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pmovzxwq:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
; BDVER2-SSE-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50]
; BDVER2-SSE-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pmovzxwq:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
; BDVER2-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50]
; BDVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pmovzxwq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50]
@ -4789,6 +5336,20 @@ define <2 x i64> @test_pmuldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x
; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pmuldq:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pmuldq %xmm1, %xmm0 # sched: [5:1.00]
; BDVER2-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [11:1.00]
; BDVER2-SSE-NEXT: por %xmm2, %xmm0 # sched: [1:0.33]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pmuldq:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BDVER2-NEXT: vpmuldq (%rdi), %xmm2, %xmm1 # sched: [11:1.00]
; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pmuldq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmuldq (%rdi), %xmm2 # sched: [7:1.00]
@ -4897,6 +5458,18 @@ define <4 x i32> @test_pmulld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SKX-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [16:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pmulld:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [5:1.00]
; BDVER2-SSE-NEXT: pmulld (%rdi), %xmm0 # sched: [11:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pmulld:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BDVER2-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pmulld:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmulld %xmm1, %xmm0 # sched: [4:2.00]
@ -5047,6 +5620,26 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SKX-NEXT: movzbl %cl, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_ptest:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [2:1.00]
; BDVER2-SSE-NEXT: setb %al # sched: [1:0.50]
; BDVER2-SSE-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00]
; BDVER2-SSE-NEXT: setb %cl # sched: [1:0.50]
; BDVER2-SSE-NEXT: andb %al, %cl # sched: [1:0.33]
; BDVER2-SSE-NEXT: movzbl %cl, %eax # sched: [1:0.33]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_ptest:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00]
; BDVER2-NEXT: setb %al # sched: [1:0.50]
; BDVER2-NEXT: vptest (%rdi), %xmm0 # sched: [8:1.00]
; BDVER2-NEXT: setb %cl # sched: [1:0.50]
; BDVER2-NEXT: andb %al, %cl # sched: [1:0.33]
; BDVER2-NEXT: movzbl %cl, %eax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_ptest:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: ptest %xmm1, %xmm0 # sched: [3:1.00]
@ -5181,6 +5774,20 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) {
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_roundpd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [3:1.00]
; BDVER2-SSE-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [9:1.00]
; BDVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_roundpd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [9:1.00]
; BDVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_roundpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [3:1.00]
@ -5303,6 +5910,20 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) {
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_roundps:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [3:1.00]
; BDVER2-SSE-NEXT: roundps $7, (%rdi), %xmm0 # sched: [9:1.00]
; BDVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_roundps:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [9:1.00]
; BDVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_roundps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: roundps $7, %xmm0, %xmm1 # sched: [3:1.00]
@ -5430,6 +6051,21 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl
; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_roundsd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00]
; BDVER2-SSE-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [3:1.00]
; BDVER2-SSE-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [9:1.00]
; BDVER2-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_roundsd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
; BDVER2-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; BDVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_roundsd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50]
@ -5559,6 +6195,21 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> *
; SKX-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_roundss:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00]
; BDVER2-SSE-NEXT: roundss $7, %xmm1, %xmm2 # sched: [3:1.00]
; BDVER2-SSE-NEXT: roundss $7, (%rdi), %xmm0 # sched: [9:1.00]
; BDVER2-SSE-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_roundss:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00]
; BDVER2-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; BDVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_roundss:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.50]

View File

@ -13,6 +13,8 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx | FileCheck %s --check-prefixes=CHECK,SKX-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.2,+pclmul -mattr=-avx | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx,+xop -mattr=+sse4.2,+pclmul -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-avx | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
@ -103,6 +105,20 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) {
; SKX-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: crc32_32_8:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.33]
; BDVER2-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00]
; BDVER2-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: crc32_32_8:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl %edi, %eax # sched: [1:0.33]
; BDVER2-NEXT: crc32b %sil, %eax # sched: [3:1.00]
; BDVER2-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: crc32_32_8:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50]
@ -222,6 +238,20 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) {
; SKX-NEXT: crc32w (%rdx), %eax # sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: crc32_32_16:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.33]
; BDVER2-SSE-NEXT: crc32w %si, %eax # sched: [3:1.00]
; BDVER2-SSE-NEXT: crc32w (%rdx), %eax # sched: [8:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: crc32_32_16:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl %edi, %eax # sched: [1:0.33]
; BDVER2-NEXT: crc32w %si, %eax # sched: [3:1.00]
; BDVER2-NEXT: crc32w (%rdx), %eax # sched: [8:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: crc32_32_16:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50]
@ -341,6 +371,20 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) {
; SKX-NEXT: crc32l (%rdx), %eax # sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: crc32_32_32:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.33]
; BDVER2-SSE-NEXT: crc32l %esi, %eax # sched: [3:1.00]
; BDVER2-SSE-NEXT: crc32l (%rdx), %eax # sched: [8:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: crc32_32_32:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl %edi, %eax # sched: [1:0.33]
; BDVER2-NEXT: crc32l %esi, %eax # sched: [3:1.00]
; BDVER2-NEXT: crc32l (%rdx), %eax # sched: [8:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: crc32_32_32:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movl %edi, %eax # sched: [1:0.50]
@ -460,6 +504,20 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1, i8 *%a2) nounwind {
; SKX-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: crc32_64_8:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: movq %rdi, %rax # sched: [1:0.33]
; BDVER2-SSE-NEXT: crc32b %sil, %eax # sched: [3:1.00]
; BDVER2-SSE-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: crc32_64_8:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movq %rdi, %rax # sched: [1:0.33]
; BDVER2-NEXT: crc32b %sil, %eax # sched: [3:1.00]
; BDVER2-NEXT: crc32b (%rdx), %eax # sched: [8:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: crc32_64_8:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movq %rdi, %rax # sched: [1:0.50]
@ -579,6 +637,20 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1, i64 *%a2) {
; SKX-NEXT: crc32q (%rdx), %rax # sched: [8:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: crc32_64_64:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: movq %rdi, %rax # sched: [1:0.33]
; BDVER2-SSE-NEXT: crc32q %rsi, %rax # sched: [3:1.00]
; BDVER2-SSE-NEXT: crc32q (%rdx), %rax # sched: [8:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: crc32_64_64:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movq %rdi, %rax # sched: [1:0.33]
; BDVER2-NEXT: crc32q %rsi, %rax # sched: [3:1.00]
; BDVER2-NEXT: crc32q (%rdx), %rax # sched: [8:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: crc32_64_64:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movq %rdi, %rax # sched: [1:0.50]
@ -770,6 +842,32 @@ define i32 @test_pcmpestri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SKX-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pcmpestri:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.33]
; BDVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.33]
; BDVER2-SSE-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [4:2.67]
; BDVER2-SSE-NEXT: movl %ecx, %esi # sched: [1:0.33]
; BDVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.33]
; BDVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.33]
; BDVER2-SSE-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [4:2.33]
; BDVER2-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
; BDVER2-SSE-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pcmpestri:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl $7, %eax # sched: [1:0.33]
; BDVER2-NEXT: movl $7, %edx # sched: [1:0.33]
; BDVER2-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [4:2.67]
; BDVER2-NEXT: movl %ecx, %esi # sched: [1:0.33]
; BDVER2-NEXT: movl $7, %eax # sched: [1:0.33]
; BDVER2-NEXT: movl $7, %edx # sched: [1:0.33]
; BDVER2-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [4:2.33]
; BDVER2-NEXT: # kill: def $ecx killed $ecx def $rcx
; BDVER2-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pcmpestri:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50]
@ -950,6 +1048,26 @@ define <16 x i8> @test_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SKX-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [25:4.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pcmpestrm:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.33]
; BDVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.33]
; BDVER2-SSE-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [11:2.67]
; BDVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.33]
; BDVER2-SSE-NEXT: movl $7, %edx # sched: [1:0.33]
; BDVER2-SSE-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pcmpestrm:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl $7, %eax # sched: [1:0.33]
; BDVER2-NEXT: movl $7, %edx # sched: [1:0.33]
; BDVER2-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [11:2.67]
; BDVER2-NEXT: movl $7, %eax # sched: [1:0.33]
; BDVER2-NEXT: movl $7, %edx # sched: [1:0.33]
; BDVER2-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pcmpestrm:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movl $7, %eax # sched: [1:0.50]
@ -1105,6 +1223,24 @@ define i32 @test_pcmpistri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SKX-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pcmpistri:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
; BDVER2-SSE-NEXT: movl %ecx, %eax # sched: [1:0.33]
; BDVER2-SSE-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00]
; BDVER2-SSE-NEXT: # kill: def $ecx killed $ecx def $rcx
; BDVER2-SSE-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pcmpistri:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00]
; BDVER2-NEXT: movl %ecx, %eax # sched: [1:0.33]
; BDVER2-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00]
; BDVER2-NEXT: # kill: def $ecx killed $ecx def $rcx
; BDVER2-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pcmpistri:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [7:2.00]
@ -1221,6 +1357,18 @@ define <16 x i8> @test_pcmpistrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SKX-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [16:3.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pcmpistrm:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
; BDVER2-SSE-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pcmpistrm:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00]
; BDVER2-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pcmpistrm:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [8:2.00]
@ -1324,6 +1472,18 @@ define <2 x i64> @test_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SKX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [9:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pcmpgtq:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [5:1.00]
; BDVER2-SSE-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [11:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pcmpgtq:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpcomgtq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: vpcomgtq (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pcmpgtq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [1:0.50]
@ -1428,6 +1588,18 @@ define <2 x i64> @test_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) {
; SKX-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [12:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pclmulqdq:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [14:6.00]
; BDVER2-SSE-NEXT: pclmulqdq $0, (%rdi), %xmm0 # sched: [14:5.67]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pclmulqdq:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpclmulqdq $0, %xmm1, %xmm0, %xmm0 # sched: [14:6.00]
; BDVER2-NEXT: vpclmulqdq $0, (%rdi), %xmm0, %xmm0 # sched: [14:5.67]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pclmulqdq:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pclmulqdq $0, %xmm1, %xmm0 # sched: [2:1.00]

View File

@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4a | FileCheck %s --check-prefix=GENERIC
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4a | FileCheck %s --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=ZNVER1
@ -9,6 +10,11 @@ define <2 x i64> @test_extrq(<2 x i64> %a0, <16 x i8> %a1) {
; GENERIC-NEXT: extrq %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_extrq:
; BDVER2: # %bb.0:
; BDVER2-NEXT: extrq %xmm1, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_extrq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: extrq %xmm1, %xmm0 # sched: [1:0.50]
@ -29,6 +35,11 @@ define <2 x i64> @test_extrqi(<2 x i64> %a0) {
; GENERIC-NEXT: extrq $2, $3, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_extrqi:
; BDVER2: # %bb.0:
; BDVER2-NEXT: extrq $2, $3, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_extrqi:
; BTVER2: # %bb.0:
; BTVER2-NEXT: extrq $2, $3, %xmm0 # sched: [1:0.50]
@ -49,6 +60,11 @@ define <2 x i64> @test_insertq(<2 x i64> %a0, <2 x i64> %a1) {
; GENERIC-NEXT: insertq %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_insertq:
; BDVER2: # %bb.0:
; BDVER2-NEXT: insertq %xmm1, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_insertq:
; BTVER2: # %bb.0:
; BTVER2-NEXT: insertq %xmm1, %xmm0 # sched: [2:2.00]
@ -69,6 +85,11 @@ define <2 x i64> @test_insertqi(<2 x i64> %a0, <2 x i64> %a1) {
; GENERIC-NEXT: insertq $6, $5, %xmm1, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_insertqi:
; BDVER2: # %bb.0:
; BDVER2-NEXT: insertq $6, $5, %xmm1, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_insertqi:
; BTVER2: # %bb.0:
; BTVER2-NEXT: insertq $6, $5, %xmm1, %xmm0 # sched: [2:2.00]
@ -89,6 +110,11 @@ define void @test_movntsd(i8* %p, <2 x double> %a) {
; GENERIC-NEXT: movntsd %xmm0, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_movntsd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movntsd %xmm0, (%rdi) # sched: [1:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_movntsd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movntsd %xmm0, (%rdi) # sched: [3:1.00]
@ -109,6 +135,11 @@ define void @test_movntss(i8* %p, <4 x float> %a) {
; GENERIC-NEXT: movntss %xmm0, (%rdi) # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_movntss:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movntss %xmm0, (%rdi) # sched: [1:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-LABEL: test_movntss:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movntss %xmm0, (%rdi) # sched: [3:1.00]

View File

@ -14,6 +14,8 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKYLAKE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,SKX-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skx -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,SKX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+ssse3 -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,BDVER2-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx -mattr=+ssse3 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,BTVER2-SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 -mattr=-avx2 | FileCheck %s --check-prefixes=CHECK,BTVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 -mattr=-sse4.1 | FileCheck %s --check-prefixes=CHECK,ZNVER1-SSE
@ -113,6 +115,20 @@ define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) {
; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pabsb:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50]
; BDVER2-SSE-NEXT: pabsb (%rdi), %xmm0 # sched: [7:0.50]
; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pabsb:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50]
; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pabsb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50]
@ -242,6 +258,20 @@ define <4 x i32> @test_pabsd(<4 x i32> %a0, <4 x i32> *%a1) {
; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pabsd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50]
; BDVER2-SSE-NEXT: pabsd (%rdi), %xmm0 # sched: [7:0.50]
; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pabsd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50]
; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pabsd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50]
@ -371,6 +401,20 @@ define <8 x i16> @test_pabsw(<8 x i16> %a0, <8 x i16> *%a1) {
; SKX-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pabsw:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50]
; BDVER2-SSE-NEXT: pabsw (%rdi), %xmm0 # sched: [7:0.50]
; BDVER2-SSE-NEXT: por %xmm1, %xmm0 # sched: [1:0.33]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pabsw:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: vpabsw (%rdi), %xmm1 # sched: [7:0.50]
; BDVER2-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pabsw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pabsw %xmm0, %xmm1 # sched: [1:0.50]
@ -495,6 +539,19 @@ define <8 x i16> @test_palignr(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SKX-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_palignr:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
; BDVER2-SSE-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50]
; BDVER2-SSE-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_palignr:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
; BDVER2-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_palignr:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50]
@ -605,6 +662,18 @@ define <4 x i32> @test_phaddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SKX-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_phaddd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [3:1.50]
; BDVER2-SSE-NEXT: phaddd (%rdi), %xmm0 # sched: [9:1.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_phaddd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
; BDVER2-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_phaddd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: phaddd %xmm1, %xmm0 # sched: [1:0.50]
@ -714,6 +783,18 @@ define <8 x i16> @test_phaddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SKX-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_phaddsw:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:1.50]
; BDVER2-SSE-NEXT: phaddsw (%rdi), %xmm0 # sched: [9:1.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_phaddsw:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
; BDVER2-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_phaddsw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: phaddsw %xmm1, %xmm0 # sched: [1:0.50]
@ -823,6 +904,18 @@ define <8 x i16> @test_phaddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SKX-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_phaddw:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [3:1.50]
; BDVER2-SSE-NEXT: phaddw (%rdi), %xmm0 # sched: [9:1.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_phaddw:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
; BDVER2-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_phaddw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: phaddw %xmm1, %xmm0 # sched: [1:0.50]
@ -932,6 +1025,18 @@ define <4 x i32> @test_phsubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SKX-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_phsubd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [3:1.50]
; BDVER2-SSE-NEXT: phsubd (%rdi), %xmm0 # sched: [9:1.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_phsubd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
; BDVER2-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_phsubd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: phsubd %xmm1, %xmm0 # sched: [1:0.50]
@ -1041,6 +1146,18 @@ define <8 x i16> @test_phsubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SKX-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_phsubsw:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:1.50]
; BDVER2-SSE-NEXT: phsubsw (%rdi), %xmm0 # sched: [9:1.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_phsubsw:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
; BDVER2-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_phsubsw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: phsubsw %xmm1, %xmm0 # sched: [1:0.50]
@ -1150,6 +1267,18 @@ define <8 x i16> @test_phsubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SKX-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:2.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_phsubw:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [3:1.50]
; BDVER2-SSE-NEXT: phsubw (%rdi), %xmm0 # sched: [9:1.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_phsubw:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:1.50]
; BDVER2-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:1.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_phsubw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: phsubw %xmm1, %xmm0 # sched: [1:0.50]
@ -1259,6 +1388,18 @@ define <8 x i16> @test_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SKX-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pmaddubsw:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [5:1.00]
; BDVER2-SSE-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [11:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pmaddubsw:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BDVER2-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pmaddubsw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [2:1.00]
@ -1369,6 +1510,18 @@ define <8 x i16> @test_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SKX-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [10:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pmulhrsw:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [5:1.00]
; BDVER2-SSE-NEXT: pmulhrsw (%rdi), %xmm0 # sched: [11:1.00]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pmulhrsw:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00]
; BDVER2-NEXT: vpmulhrsw (%rdi), %xmm0, %xmm0 # sched: [11:1.00]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pmulhrsw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [2:1.00]
@ -1478,6 +1631,18 @@ define <16 x i8> @test_pshufb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SKX-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:1.00]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_pshufb:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [1:0.50]
; BDVER2-SSE-NEXT: pshufb (%rdi), %xmm0 # sched: [7:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_pshufb:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_pshufb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: pshufb %xmm1, %xmm0 # sched: [2:2.00]
@ -1591,6 +1756,18 @@ define <16 x i8> @test_psignb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) {
; SKX-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_psignb:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50]
; BDVER2-SSE-NEXT: psignb (%rdi), %xmm0 # sched: [7:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_psignb:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_psignb:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50]
@ -1704,6 +1881,18 @@ define <4 x i32> @test_psignd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) {
; SKX-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_psignd:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50]
; BDVER2-SSE-NEXT: psignd (%rdi), %xmm0 # sched: [7:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_psignd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_psignd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50]
@ -1817,6 +2006,18 @@ define <8 x i16> @test_psignw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) {
; SKX-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BDVER2-SSE-LABEL: test_psignw:
; BDVER2-SSE: # %bb.0:
; BDVER2-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50]
; BDVER2-SSE-NEXT: psignw (%rdi), %xmm0 # sched: [7:0.50]
; BDVER2-SSE-NEXT: retq # sched: [1:1.00]
;
; BDVER2-LABEL: test_psignw:
; BDVER2: # %bb.0:
; BDVER2-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; BDVER2-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BTVER2-SSE-LABEL: test_psignw:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50]

View File

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+tbm | FileCheck %s --check-prefix=GENERIC
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver2 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+tbm | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER2
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver3 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER3
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=bdver4 | FileCheck %s --check-prefix=BDVER --check-prefix=BDVER4
@ -14,12 +14,28 @@ define i32 @test_x86_tbm_bextri_u32(i32 %a0, i32* nocapture %p1) nounwind {
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER-LABEL: test_x86_tbm_bextri_u32:
; BDVER: # %bb.0:
; BDVER-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
; BDVER-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
; BDVER-NEXT: addl %ecx, %eax
; BDVER-NEXT: retq
; BDVER2-LABEL: test_x86_tbm_bextri_u32:
; BDVER2: # %bb.0:
; BDVER2-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
; BDVER2-NEXT: # sched: [2:1.00]
; BDVER2-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
; BDVER2-NEXT: # sched: [7:1.00]
; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BDVER3-LABEL: test_x86_tbm_bextri_u32:
; BDVER3: # %bb.0:
; BDVER3-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
; BDVER3-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
; BDVER3-NEXT: addl %ecx, %eax
; BDVER3-NEXT: retq
;
; BDVER4-LABEL: test_x86_tbm_bextri_u32:
; BDVER4: # %bb.0:
; BDVER4-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
; BDVER4-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
; BDVER4-NEXT: addl %ecx, %eax
; BDVER4-NEXT: retq
%a1 = load i32, i32* %p1
%r0 = lshr i32 %a0, 4
%m0 = lshr i32 %a1, 4
@ -39,12 +55,28 @@ define i64 @test_x86_tbm_bextri_u64(i64 %a0, i64* nocapture %p1) nounwind {
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER-LABEL: test_x86_tbm_bextri_u64:
; BDVER: # %bb.0:
; BDVER-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
; BDVER-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
; BDVER-NEXT: addq %rcx, %rax
; BDVER-NEXT: retq
; BDVER2-LABEL: test_x86_tbm_bextri_u64:
; BDVER2: # %bb.0:
; BDVER2-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
; BDVER2-NEXT: # sched: [2:1.00]
; BDVER2-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
; BDVER2-NEXT: # sched: [7:1.00]
; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BDVER3-LABEL: test_x86_tbm_bextri_u64:
; BDVER3: # %bb.0:
; BDVER3-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
; BDVER3-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
; BDVER3-NEXT: addq %rcx, %rax
; BDVER3-NEXT: retq
;
; BDVER4-LABEL: test_x86_tbm_bextri_u64:
; BDVER4: # %bb.0:
; BDVER4-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04
; BDVER4-NEXT: bextrl $3076, (%rsi), %eax # imm = 0xC04
; BDVER4-NEXT: addq %rcx, %rax
; BDVER4-NEXT: retq
%a1 = load i64, i64* %p1
%r0 = lshr i64 %a0, 4
%m0 = lshr i64 %a1, 4
@ -62,12 +94,26 @@ define i32 @test_x86_tbm_blcfill_u32(i32 %a0, i32* nocapture %p1) nounwind {
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER-LABEL: test_x86_tbm_blcfill_u32:
; BDVER: # %bb.0:
; BDVER-NEXT: blcfilll %edi, %ecx
; BDVER-NEXT: blcfilll (%rsi), %eax
; BDVER-NEXT: addl %ecx, %eax
; BDVER-NEXT: retq
; BDVER2-LABEL: test_x86_tbm_blcfill_u32:
; BDVER2: # %bb.0:
; BDVER2-NEXT: blcfilll %edi, %ecx # sched: [1:0.33]
; BDVER2-NEXT: blcfilll (%rsi), %eax # sched: [6:0.50]
; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BDVER3-LABEL: test_x86_tbm_blcfill_u32:
; BDVER3: # %bb.0:
; BDVER3-NEXT: blcfilll %edi, %ecx
; BDVER3-NEXT: blcfilll (%rsi), %eax
; BDVER3-NEXT: addl %ecx, %eax
; BDVER3-NEXT: retq
;
; BDVER4-LABEL: test_x86_tbm_blcfill_u32:
; BDVER4: # %bb.0:
; BDVER4-NEXT: blcfilll %edi, %ecx
; BDVER4-NEXT: blcfilll (%rsi), %eax
; BDVER4-NEXT: addl %ecx, %eax
; BDVER4-NEXT: retq
%a1 = load i32, i32* %p1
%r0 = add i32 %a0, 1
%m0 = add i32 %a1, 1
@ -85,12 +131,26 @@ define i64 @test_x86_tbm_blcfill_u64(i64 %a0, i64* nocapture %p1) nounwind {
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER-LABEL: test_x86_tbm_blcfill_u64:
; BDVER: # %bb.0:
; BDVER-NEXT: blcfillq %rdi, %rcx
; BDVER-NEXT: blcfillq (%rsi), %rax
; BDVER-NEXT: addq %rcx, %rax
; BDVER-NEXT: retq
; BDVER2-LABEL: test_x86_tbm_blcfill_u64:
; BDVER2: # %bb.0:
; BDVER2-NEXT: blcfillq %rdi, %rcx # sched: [1:0.33]
; BDVER2-NEXT: blcfillq (%rsi), %rax # sched: [6:0.50]
; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BDVER3-LABEL: test_x86_tbm_blcfill_u64:
; BDVER3: # %bb.0:
; BDVER3-NEXT: blcfillq %rdi, %rcx
; BDVER3-NEXT: blcfillq (%rsi), %rax
; BDVER3-NEXT: addq %rcx, %rax
; BDVER3-NEXT: retq
;
; BDVER4-LABEL: test_x86_tbm_blcfill_u64:
; BDVER4: # %bb.0:
; BDVER4-NEXT: blcfillq %rdi, %rcx
; BDVER4-NEXT: blcfillq (%rsi), %rax
; BDVER4-NEXT: addq %rcx, %rax
; BDVER4-NEXT: retq
%a1 = load i64, i64* %p1
%r0 = add i64 %a0, 1
%m0 = add i64 %a1, 1
@ -108,12 +168,26 @@ define i32 @test_x86_tbm_blci_u32(i32 %a0, i32* nocapture %p1) nounwind {
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER-LABEL: test_x86_tbm_blci_u32:
; BDVER: # %bb.0:
; BDVER-NEXT: blcil %edi, %ecx
; BDVER-NEXT: blcil (%rsi), %eax
; BDVER-NEXT: addl %ecx, %eax
; BDVER-NEXT: retq
; BDVER2-LABEL: test_x86_tbm_blci_u32:
; BDVER2: # %bb.0:
; BDVER2-NEXT: blcil %edi, %ecx # sched: [1:0.33]
; BDVER2-NEXT: blcil (%rsi), %eax # sched: [6:0.50]
; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BDVER3-LABEL: test_x86_tbm_blci_u32:
; BDVER3: # %bb.0:
; BDVER3-NEXT: blcil %edi, %ecx
; BDVER3-NEXT: blcil (%rsi), %eax
; BDVER3-NEXT: addl %ecx, %eax
; BDVER3-NEXT: retq
;
; BDVER4-LABEL: test_x86_tbm_blci_u32:
; BDVER4: # %bb.0:
; BDVER4-NEXT: blcil %edi, %ecx
; BDVER4-NEXT: blcil (%rsi), %eax
; BDVER4-NEXT: addl %ecx, %eax
; BDVER4-NEXT: retq
%a1 = load i32, i32* %p1
%r0 = add i32 1, %a0
%m0 = add i32 1, %a1
@ -133,12 +207,26 @@ define i64 @test_x86_tbm_blci_u64(i64 %a0, i64* nocapture %p1) nounwind {
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER-LABEL: test_x86_tbm_blci_u64:
; BDVER: # %bb.0:
; BDVER-NEXT: blciq %rdi, %rcx
; BDVER-NEXT: blciq (%rsi), %rax
; BDVER-NEXT: addq %rcx, %rax
; BDVER-NEXT: retq
; BDVER2-LABEL: test_x86_tbm_blci_u64:
; BDVER2: # %bb.0:
; BDVER2-NEXT: blciq %rdi, %rcx # sched: [1:0.33]
; BDVER2-NEXT: blciq (%rsi), %rax # sched: [6:0.50]
; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BDVER3-LABEL: test_x86_tbm_blci_u64:
; BDVER3: # %bb.0:
; BDVER3-NEXT: blciq %rdi, %rcx
; BDVER3-NEXT: blciq (%rsi), %rax
; BDVER3-NEXT: addq %rcx, %rax
; BDVER3-NEXT: retq
;
; BDVER4-LABEL: test_x86_tbm_blci_u64:
; BDVER4: # %bb.0:
; BDVER4-NEXT: blciq %rdi, %rcx
; BDVER4-NEXT: blciq (%rsi), %rax
; BDVER4-NEXT: addq %rcx, %rax
; BDVER4-NEXT: retq
%a1 = load i64, i64* %p1
%r0 = add i64 1, %a0
%m0 = add i64 1, %a1
@ -158,12 +246,26 @@ define i32 @test_x86_tbm_blcic_u32(i32 %a0, i32* nocapture %p1) nounwind {
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER-LABEL: test_x86_tbm_blcic_u32:
; BDVER: # %bb.0:
; BDVER-NEXT: blcicl %edi, %ecx
; BDVER-NEXT: blcicl (%rsi), %eax
; BDVER-NEXT: addl %ecx, %eax
; BDVER-NEXT: retq
; BDVER2-LABEL: test_x86_tbm_blcic_u32:
; BDVER2: # %bb.0:
; BDVER2-NEXT: blcicl %edi, %ecx # sched: [1:0.33]
; BDVER2-NEXT: blcicl (%rsi), %eax # sched: [6:0.50]
; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BDVER3-LABEL: test_x86_tbm_blcic_u32:
; BDVER3: # %bb.0:
; BDVER3-NEXT: blcicl %edi, %ecx
; BDVER3-NEXT: blcicl (%rsi), %eax
; BDVER3-NEXT: addl %ecx, %eax
; BDVER3-NEXT: retq
;
; BDVER4-LABEL: test_x86_tbm_blcic_u32:
; BDVER4: # %bb.0:
; BDVER4-NEXT: blcicl %edi, %ecx
; BDVER4-NEXT: blcicl (%rsi), %eax
; BDVER4-NEXT: addl %ecx, %eax
; BDVER4-NEXT: retq
%a1 = load i32, i32* %p1
%r0 = xor i32 %a0, -1
%m0 = xor i32 %a1, -1
@ -183,12 +285,26 @@ define i64 @test_x86_tbm_blcic_u64(i64 %a0, i64* nocapture %p1) nounwind {
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER-LABEL: test_x86_tbm_blcic_u64:
; BDVER: # %bb.0:
; BDVER-NEXT: blcicq %rdi, %rcx
; BDVER-NEXT: blcicq (%rsi), %rax
; BDVER-NEXT: addq %rcx, %rax
; BDVER-NEXT: retq
; BDVER2-LABEL: test_x86_tbm_blcic_u64:
; BDVER2: # %bb.0:
; BDVER2-NEXT: blcicq %rdi, %rcx # sched: [1:0.33]
; BDVER2-NEXT: blcicq (%rsi), %rax # sched: [6:0.50]
; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BDVER3-LABEL: test_x86_tbm_blcic_u64:
; BDVER3: # %bb.0:
; BDVER3-NEXT: blcicq %rdi, %rcx
; BDVER3-NEXT: blcicq (%rsi), %rax
; BDVER3-NEXT: addq %rcx, %rax
; BDVER3-NEXT: retq
;
; BDVER4-LABEL: test_x86_tbm_blcic_u64:
; BDVER4: # %bb.0:
; BDVER4-NEXT: blcicq %rdi, %rcx
; BDVER4-NEXT: blcicq (%rsi), %rax
; BDVER4-NEXT: addq %rcx, %rax
; BDVER4-NEXT: retq
%a1 = load i64, i64* %p1
%r0 = xor i64 %a0, -1
%m0 = xor i64 %a1, -1
@ -208,12 +324,26 @@ define i32 @test_x86_tbm_blcmsk_u32(i32 %a0, i32* nocapture %p1) nounwind {
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER-LABEL: test_x86_tbm_blcmsk_u32:
; BDVER: # %bb.0:
; BDVER-NEXT: blcmskl %edi, %ecx
; BDVER-NEXT: blcmskl (%rsi), %eax
; BDVER-NEXT: addl %ecx, %eax
; BDVER-NEXT: retq
; BDVER2-LABEL: test_x86_tbm_blcmsk_u32:
; BDVER2: # %bb.0:
; BDVER2-NEXT: blcmskl %edi, %ecx # sched: [1:0.33]
; BDVER2-NEXT: blcmskl (%rsi), %eax # sched: [6:0.50]
; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BDVER3-LABEL: test_x86_tbm_blcmsk_u32:
; BDVER3: # %bb.0:
; BDVER3-NEXT: blcmskl %edi, %ecx
; BDVER3-NEXT: blcmskl (%rsi), %eax
; BDVER3-NEXT: addl %ecx, %eax
; BDVER3-NEXT: retq
;
; BDVER4-LABEL: test_x86_tbm_blcmsk_u32:
; BDVER4: # %bb.0:
; BDVER4-NEXT: blcmskl %edi, %ecx
; BDVER4-NEXT: blcmskl (%rsi), %eax
; BDVER4-NEXT: addl %ecx, %eax
; BDVER4-NEXT: retq
%a1 = load i32, i32* %p1
%r0 = add i32 %a0, 1
%m0 = add i32 %a1, 1
@ -231,12 +361,26 @@ define i64 @test_x86_tbm_blcmsk_u64(i64 %a0, i64* nocapture %p1) nounwind {
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER-LABEL: test_x86_tbm_blcmsk_u64:
; BDVER: # %bb.0:
; BDVER-NEXT: blcmskq %rdi, %rcx
; BDVER-NEXT: blcmskq (%rsi), %rax
; BDVER-NEXT: addq %rcx, %rax
; BDVER-NEXT: retq
; BDVER2-LABEL: test_x86_tbm_blcmsk_u64:
; BDVER2: # %bb.0:
; BDVER2-NEXT: blcmskq %rdi, %rcx # sched: [1:0.33]
; BDVER2-NEXT: blcmskq (%rsi), %rax # sched: [6:0.50]
; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BDVER3-LABEL: test_x86_tbm_blcmsk_u64:
; BDVER3: # %bb.0:
; BDVER3-NEXT: blcmskq %rdi, %rcx
; BDVER3-NEXT: blcmskq (%rsi), %rax
; BDVER3-NEXT: addq %rcx, %rax
; BDVER3-NEXT: retq
;
; BDVER4-LABEL: test_x86_tbm_blcmsk_u64:
; BDVER4: # %bb.0:
; BDVER4-NEXT: blcmskq %rdi, %rcx
; BDVER4-NEXT: blcmskq (%rsi), %rax
; BDVER4-NEXT: addq %rcx, %rax
; BDVER4-NEXT: retq
%a1 = load i64, i64* %p1
%r0 = add i64 %a0, 1
%m0 = add i64 %a1, 1
@ -254,12 +398,26 @@ define i32 @test_x86_tbm_blcs_u32(i32 %a0, i32* nocapture %p1) nounwind {
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER-LABEL: test_x86_tbm_blcs_u32:
; BDVER: # %bb.0:
; BDVER-NEXT: blcsl %edi, %ecx
; BDVER-NEXT: blcsl (%rsi), %eax
; BDVER-NEXT: addl %ecx, %eax
; BDVER-NEXT: retq
; BDVER2-LABEL: test_x86_tbm_blcs_u32:
; BDVER2: # %bb.0:
; BDVER2-NEXT: blcsl %edi, %ecx # sched: [1:0.33]
; BDVER2-NEXT: blcsl (%rsi), %eax # sched: [6:0.50]
; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BDVER3-LABEL: test_x86_tbm_blcs_u32:
; BDVER3: # %bb.0:
; BDVER3-NEXT: blcsl %edi, %ecx
; BDVER3-NEXT: blcsl (%rsi), %eax
; BDVER3-NEXT: addl %ecx, %eax
; BDVER3-NEXT: retq
;
; BDVER4-LABEL: test_x86_tbm_blcs_u32:
; BDVER4: # %bb.0:
; BDVER4-NEXT: blcsl %edi, %ecx
; BDVER4-NEXT: blcsl (%rsi), %eax
; BDVER4-NEXT: addl %ecx, %eax
; BDVER4-NEXT: retq
%a1 = load i32, i32* %p1
%r0 = add i32 %a0, 1
%m0 = add i32 %a1, 1
@ -277,12 +435,26 @@ define i64 @test_x86_tbm_blcs_u64(i64 %a0, i64* nocapture %p1) nounwind {
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER-LABEL: test_x86_tbm_blcs_u64:
; BDVER: # %bb.0:
; BDVER-NEXT: blcsq %rdi, %rcx
; BDVER-NEXT: blcsq (%rsi), %rax
; BDVER-NEXT: addq %rcx, %rax
; BDVER-NEXT: retq
; BDVER2-LABEL: test_x86_tbm_blcs_u64:
; BDVER2: # %bb.0:
; BDVER2-NEXT: blcsq %rdi, %rcx # sched: [1:0.33]
; BDVER2-NEXT: blcsq (%rsi), %rax # sched: [6:0.50]
; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BDVER3-LABEL: test_x86_tbm_blcs_u64:
; BDVER3: # %bb.0:
; BDVER3-NEXT: blcsq %rdi, %rcx
; BDVER3-NEXT: blcsq (%rsi), %rax
; BDVER3-NEXT: addq %rcx, %rax
; BDVER3-NEXT: retq
;
; BDVER4-LABEL: test_x86_tbm_blcs_u64:
; BDVER4: # %bb.0:
; BDVER4-NEXT: blcsq %rdi, %rcx
; BDVER4-NEXT: blcsq (%rsi), %rax
; BDVER4-NEXT: addq %rcx, %rax
; BDVER4-NEXT: retq
%a1 = load i64, i64* %p1
%r0 = add i64 %a0, 1
%m0 = add i64 %a1, 1
@ -300,12 +472,26 @@ define i32 @test_x86_tbm_blsfill_u32(i32 %a0, i32* nocapture %p1) nounwind {
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER-LABEL: test_x86_tbm_blsfill_u32:
; BDVER: # %bb.0:
; BDVER-NEXT: blsfilll %edi, %ecx
; BDVER-NEXT: blsfilll (%rsi), %eax
; BDVER-NEXT: addl %ecx, %eax
; BDVER-NEXT: retq
; BDVER2-LABEL: test_x86_tbm_blsfill_u32:
; BDVER2: # %bb.0:
; BDVER2-NEXT: blsfilll %edi, %ecx # sched: [1:0.33]
; BDVER2-NEXT: blsfilll (%rsi), %eax # sched: [6:0.50]
; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BDVER3-LABEL: test_x86_tbm_blsfill_u32:
; BDVER3: # %bb.0:
; BDVER3-NEXT: blsfilll %edi, %ecx
; BDVER3-NEXT: blsfilll (%rsi), %eax
; BDVER3-NEXT: addl %ecx, %eax
; BDVER3-NEXT: retq
;
; BDVER4-LABEL: test_x86_tbm_blsfill_u32:
; BDVER4: # %bb.0:
; BDVER4-NEXT: blsfilll %edi, %ecx
; BDVER4-NEXT: blsfilll (%rsi), %eax
; BDVER4-NEXT: addl %ecx, %eax
; BDVER4-NEXT: retq
%a1 = load i32, i32* %p1
%r0 = add i32 %a0, -1
%m0 = add i32 %a1, -1
@ -323,12 +509,26 @@ define i64 @test_x86_tbm_blsfill_u64(i64 %a0, i64* nocapture %p1) nounwind {
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER-LABEL: test_x86_tbm_blsfill_u64:
; BDVER: # %bb.0:
; BDVER-NEXT: blsfillq %rdi, %rcx
; BDVER-NEXT: blsfillq (%rsi), %rax
; BDVER-NEXT: addq %rcx, %rax
; BDVER-NEXT: retq
; BDVER2-LABEL: test_x86_tbm_blsfill_u64:
; BDVER2: # %bb.0:
; BDVER2-NEXT: blsfillq %rdi, %rcx # sched: [1:0.33]
; BDVER2-NEXT: blsfillq (%rsi), %rax # sched: [6:0.50]
; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BDVER3-LABEL: test_x86_tbm_blsfill_u64:
; BDVER3: # %bb.0:
; BDVER3-NEXT: blsfillq %rdi, %rcx
; BDVER3-NEXT: blsfillq (%rsi), %rax
; BDVER3-NEXT: addq %rcx, %rax
; BDVER3-NEXT: retq
;
; BDVER4-LABEL: test_x86_tbm_blsfill_u64:
; BDVER4: # %bb.0:
; BDVER4-NEXT: blsfillq %rdi, %rcx
; BDVER4-NEXT: blsfillq (%rsi), %rax
; BDVER4-NEXT: addq %rcx, %rax
; BDVER4-NEXT: retq
%a1 = load i64, i64* %p1
%r0 = add i64 %a0, -1
%m0 = add i64 %a1, -1
@ -346,12 +546,26 @@ define i32 @test_x86_tbm_blsic_u32(i32 %a0, i32* nocapture %p1) nounwind {
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER-LABEL: test_x86_tbm_blsic_u32:
; BDVER: # %bb.0:
; BDVER-NEXT: blsicl %edi, %ecx
; BDVER-NEXT: blsicl (%rsi), %eax
; BDVER-NEXT: addl %ecx, %eax
; BDVER-NEXT: retq
; BDVER2-LABEL: test_x86_tbm_blsic_u32:
; BDVER2: # %bb.0:
; BDVER2-NEXT: blsicl %edi, %ecx # sched: [1:0.33]
; BDVER2-NEXT: blsicl (%rsi), %eax # sched: [6:0.50]
; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BDVER3-LABEL: test_x86_tbm_blsic_u32:
; BDVER3: # %bb.0:
; BDVER3-NEXT: blsicl %edi, %ecx
; BDVER3-NEXT: blsicl (%rsi), %eax
; BDVER3-NEXT: addl %ecx, %eax
; BDVER3-NEXT: retq
;
; BDVER4-LABEL: test_x86_tbm_blsic_u32:
; BDVER4: # %bb.0:
; BDVER4-NEXT: blsicl %edi, %ecx
; BDVER4-NEXT: blsicl (%rsi), %eax
; BDVER4-NEXT: addl %ecx, %eax
; BDVER4-NEXT: retq
%a1 = load i32, i32* %p1
%r0 = xor i32 %a0, -1
%m0 = xor i32 %a1, -1
@ -371,12 +585,26 @@ define i64 @test_x86_tbm_blsic_u64(i64 %a0, i64* nocapture %p1) nounwind {
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER-LABEL: test_x86_tbm_blsic_u64:
; BDVER: # %bb.0:
; BDVER-NEXT: blsicq %rdi, %rcx
; BDVER-NEXT: blsicq (%rsi), %rax
; BDVER-NEXT: addq %rcx, %rax
; BDVER-NEXT: retq
; BDVER2-LABEL: test_x86_tbm_blsic_u64:
; BDVER2: # %bb.0:
; BDVER2-NEXT: blsicq %rdi, %rcx # sched: [1:0.33]
; BDVER2-NEXT: blsicq (%rsi), %rax # sched: [6:0.50]
; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BDVER3-LABEL: test_x86_tbm_blsic_u64:
; BDVER3: # %bb.0:
; BDVER3-NEXT: blsicq %rdi, %rcx
; BDVER3-NEXT: blsicq (%rsi), %rax
; BDVER3-NEXT: addq %rcx, %rax
; BDVER3-NEXT: retq
;
; BDVER4-LABEL: test_x86_tbm_blsic_u64:
; BDVER4: # %bb.0:
; BDVER4-NEXT: blsicq %rdi, %rcx
; BDVER4-NEXT: blsicq (%rsi), %rax
; BDVER4-NEXT: addq %rcx, %rax
; BDVER4-NEXT: retq
%a1 = load i64, i64* %p1
%r0 = xor i64 %a0, -1
%m0 = xor i64 %a1, -1
@ -396,12 +624,26 @@ define i32 @test_x86_tbm_t1mskc_u32(i32 %a0, i32* nocapture %p1) nounwind {
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER-LABEL: test_x86_tbm_t1mskc_u32:
; BDVER: # %bb.0:
; BDVER-NEXT: t1mskcl %edi, %ecx
; BDVER-NEXT: t1mskcl (%rsi), %eax
; BDVER-NEXT: addl %ecx, %eax
; BDVER-NEXT: retq
; BDVER2-LABEL: test_x86_tbm_t1mskc_u32:
; BDVER2: # %bb.0:
; BDVER2-NEXT: t1mskcl %edi, %ecx # sched: [1:0.33]
; BDVER2-NEXT: t1mskcl (%rsi), %eax # sched: [6:0.50]
; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BDVER3-LABEL: test_x86_tbm_t1mskc_u32:
; BDVER3: # %bb.0:
; BDVER3-NEXT: t1mskcl %edi, %ecx
; BDVER3-NEXT: t1mskcl (%rsi), %eax
; BDVER3-NEXT: addl %ecx, %eax
; BDVER3-NEXT: retq
;
; BDVER4-LABEL: test_x86_tbm_t1mskc_u32:
; BDVER4: # %bb.0:
; BDVER4-NEXT: t1mskcl %edi, %ecx
; BDVER4-NEXT: t1mskcl (%rsi), %eax
; BDVER4-NEXT: addl %ecx, %eax
; BDVER4-NEXT: retq
%a1 = load i32, i32* %p1
%r0 = xor i32 %a0, -1
%m0 = xor i32 %a1, -1
@ -421,12 +663,26 @@ define i64 @test_x86_tbm_t1mskc_u64(i64 %a0, i64* nocapture %p1) nounwind {
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER-LABEL: test_x86_tbm_t1mskc_u64:
; BDVER: # %bb.0:
; BDVER-NEXT: t1mskcq %rdi, %rcx
; BDVER-NEXT: t1mskcq (%rsi), %rax
; BDVER-NEXT: addq %rcx, %rax
; BDVER-NEXT: retq
; BDVER2-LABEL: test_x86_tbm_t1mskc_u64:
; BDVER2: # %bb.0:
; BDVER2-NEXT: t1mskcq %rdi, %rcx # sched: [1:0.33]
; BDVER2-NEXT: t1mskcq (%rsi), %rax # sched: [6:0.50]
; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BDVER3-LABEL: test_x86_tbm_t1mskc_u64:
; BDVER3: # %bb.0:
; BDVER3-NEXT: t1mskcq %rdi, %rcx
; BDVER3-NEXT: t1mskcq (%rsi), %rax
; BDVER3-NEXT: addq %rcx, %rax
; BDVER3-NEXT: retq
;
; BDVER4-LABEL: test_x86_tbm_t1mskc_u64:
; BDVER4: # %bb.0:
; BDVER4-NEXT: t1mskcq %rdi, %rcx
; BDVER4-NEXT: t1mskcq (%rsi), %rax
; BDVER4-NEXT: addq %rcx, %rax
; BDVER4-NEXT: retq
%a1 = load i64, i64* %p1
%r0 = xor i64 %a0, -1
%m0 = xor i64 %a1, -1
@ -446,12 +702,26 @@ define i32 @test_x86_tbm_tzmsk_u32(i32 %a0, i32* nocapture %p1) nounwind {
; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER-LABEL: test_x86_tbm_tzmsk_u32:
; BDVER: # %bb.0:
; BDVER-NEXT: tzmskl %edi, %ecx
; BDVER-NEXT: tzmskl (%rsi), %eax
; BDVER-NEXT: addl %ecx, %eax
; BDVER-NEXT: retq
; BDVER2-LABEL: test_x86_tbm_tzmsk_u32:
; BDVER2: # %bb.0:
; BDVER2-NEXT: tzmskl %edi, %ecx # sched: [1:0.33]
; BDVER2-NEXT: tzmskl (%rsi), %eax # sched: [6:0.50]
; BDVER2-NEXT: addl %ecx, %eax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BDVER3-LABEL: test_x86_tbm_tzmsk_u32:
; BDVER3: # %bb.0:
; BDVER3-NEXT: tzmskl %edi, %ecx
; BDVER3-NEXT: tzmskl (%rsi), %eax
; BDVER3-NEXT: addl %ecx, %eax
; BDVER3-NEXT: retq
;
; BDVER4-LABEL: test_x86_tbm_tzmsk_u32:
; BDVER4: # %bb.0:
; BDVER4-NEXT: tzmskl %edi, %ecx
; BDVER4-NEXT: tzmskl (%rsi), %eax
; BDVER4-NEXT: addl %ecx, %eax
; BDVER4-NEXT: retq
%a1 = load i32, i32* %p1
%r0 = xor i32 %a0, -1
%m0 = xor i32 %a1, -1
@ -471,12 +741,26 @@ define i64 @test_x86_tbm_tzmsk_u64(i64 %a0, i64* nocapture %p1) nounwind {
; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; BDVER-LABEL: test_x86_tbm_tzmsk_u64:
; BDVER: # %bb.0:
; BDVER-NEXT: tzmskq %rdi, %rcx
; BDVER-NEXT: tzmskq (%rsi), %rax
; BDVER-NEXT: addq %rcx, %rax
; BDVER-NEXT: retq
; BDVER2-LABEL: test_x86_tbm_tzmsk_u64:
; BDVER2: # %bb.0:
; BDVER2-NEXT: tzmskq %rdi, %rcx # sched: [1:0.33]
; BDVER2-NEXT: tzmskq (%rsi), %rax # sched: [6:0.50]
; BDVER2-NEXT: addq %rcx, %rax # sched: [1:0.33]
; BDVER2-NEXT: retq # sched: [1:1.00]
;
; BDVER3-LABEL: test_x86_tbm_tzmsk_u64:
; BDVER3: # %bb.0:
; BDVER3-NEXT: tzmskq %rdi, %rcx
; BDVER3-NEXT: tzmskq (%rsi), %rax
; BDVER3-NEXT: addq %rcx, %rax
; BDVER3-NEXT: retq
;
; BDVER4-LABEL: test_x86_tbm_tzmsk_u64:
; BDVER4: # %bb.0:
; BDVER4-NEXT: tzmskq %rdi, %rcx
; BDVER4-NEXT: tzmskq (%rsi), %rax
; BDVER4-NEXT: addq %rcx, %rax
; BDVER4-NEXT: retq
%a1 = load i64, i64* %p1
%r0 = xor i64 %a0, -1
%m0 = xor i64 %a1, -1

View File

@ -8,6 +8,7 @@
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=BDVER2
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
; RUN: llc < %s -mtriple=i686-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
@ -68,6 +69,13 @@ define void @test_f2xm1() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_f2xm1:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: f2xm1 # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_f2xm1:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -142,6 +150,13 @@ define void @test_fabs() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fabs:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fabs # sched: [1:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fabs:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -256,6 +271,18 @@ define void @test_fadd(float *%a0, double *%a1) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fadd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fadd %st(0), %st(1) # sched: [3:1.00]
; BDVER2-NEXT: fadd %st(2) # sched: [3:1.00]
; BDVER2-NEXT: fadds (%ecx) # sched: [10:1.00]
; BDVER2-NEXT: faddl (%eax) # sched: [10:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fadd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@ -380,6 +407,18 @@ define void @test_faddp_fiadd(i16 *%a0, i32 *%a1) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_faddp_fiadd:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: faddp %st(1) # sched: [3:1.00]
; BDVER2-NEXT: faddp %st(2) # sched: [3:1.00]
; BDVER2-NEXT: fiadds (%ecx) # sched: [13:2.00]
; BDVER2-NEXT: fiaddl (%eax) # sched: [13:2.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_faddp_fiadd:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@ -480,6 +519,15 @@ define void @test_fbld_fbstp(i8* %a0) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fbld_fbstp:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fbld (%eax) # sched: [100:0.33]
; BDVER2-NEXT: fbstp (%eax) # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fbld_fbstp:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@ -558,6 +606,13 @@ define void @test_fchs() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fchs:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fchs # sched: [1:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fchs:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -640,6 +695,14 @@ define void @test_fclex() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fclex:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: wait # sched: [100:0.33]
; BDVER2-NEXT: fnclex # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fclex:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -716,6 +779,13 @@ define void @test_fnclex() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fnclex:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fnclex # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fnclex:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -846,6 +916,20 @@ define void @test_fcmov() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fcmov:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fcmovb %st(1), %st(0) # sched: [3:2.00]
; BDVER2-NEXT: fcmovbe %st(1), %st(0) # sched: [3:2.00]
; BDVER2-NEXT: fcmove %st(1), %st(0) # sched: [3:2.00]
; BDVER2-NEXT: fcmovnb %st(1), %st(0) # sched: [3:2.00]
; BDVER2-NEXT: fcmovnbe %st(1), %st(0) # sched: [3:2.00]
; BDVER2-NEXT: fcmovne %st(1), %st(0) # sched: [3:2.00]
; BDVER2-NEXT: fcmovnu %st(1), %st(0) # sched: [3:2.00]
; BDVER2-NEXT: fcmovu %st(1), %st(0) # sched: [3:2.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fcmov:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -974,6 +1058,18 @@ define void @test_fcom(float *%a0, double *%a1) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fcom:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fcom %st(1) # sched: [1:1.00]
; BDVER2-NEXT: fcom %st(3) # sched: [1:1.00]
; BDVER2-NEXT: fcoms (%ecx) # sched: [8:1.00]
; BDVER2-NEXT: fcoml (%eax) # sched: [8:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fcom:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@ -1106,6 +1202,19 @@ define void @test_fcomp_fcompp(float *%a0, double *%a1) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fcomp_fcompp:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fcomp %st(1) # sched: [1:1.00]
; BDVER2-NEXT: fcomp %st(3) # sched: [1:1.00]
; BDVER2-NEXT: fcomps (%ecx) # sched: [8:1.00]
; BDVER2-NEXT: fcompl (%eax) # sched: [8:1.00]
; BDVER2-NEXT: fcompp # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fcomp_fcompp:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@ -1200,6 +1309,14 @@ define void @test_fcomi_fcomip() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fcomi_fcomip:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fcomi %st(3) # sched: [3:1.00]
; BDVER2-NEXT: fcompi %st(3) # sched: [3:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fcomi_fcomip:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -1276,6 +1393,13 @@ define void @test_fcos() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fcos:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fcos # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fcos:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -1350,6 +1474,13 @@ define void @test_fdecstp() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fdecstp:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fdecstp # sched: [1:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fdecstp:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -1464,6 +1595,18 @@ define void @test_fdiv(float *%a0, double *%a1) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fdiv:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fdiv %st(0), %st(1) # sched: [14:14.00]
; BDVER2-NEXT: fdiv %st(2) # sched: [14:14.00]
; BDVER2-NEXT: fdivs (%ecx) # sched: [31:1.00]
; BDVER2-NEXT: fdivl (%eax) # sched: [31:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fdiv:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@ -1588,6 +1731,18 @@ define void @test_fdivp_fidiv(i16 *%a0, i32 *%a1) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fdivp_fidiv:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fdivp %st(1) # sched: [14:14.00]
; BDVER2-NEXT: fdivp %st(2) # sched: [14:14.00]
; BDVER2-NEXT: fidivs (%ecx) # sched: [34:1.00]
; BDVER2-NEXT: fidivl (%eax) # sched: [34:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fdivp_fidiv:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@ -1712,6 +1867,18 @@ define void @test_fdivr(float *%a0, double *%a1) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fdivr:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fdivr %st(0), %st(1) # sched: [14:14.00]
; BDVER2-NEXT: fdivr %st(2) # sched: [14:14.00]
; BDVER2-NEXT: fdivrs (%ecx) # sched: [31:1.00]
; BDVER2-NEXT: fdivrl (%eax) # sched: [31:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fdivr:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@ -1836,6 +2003,18 @@ define void @test_fdivrp_fidivr(i16 *%a0, i32 *%a1) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fdivrp_fidivr:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fdivrp %st(1) # sched: [14:14.00]
; BDVER2-NEXT: fdivrp %st(2) # sched: [14:14.00]
; BDVER2-NEXT: fidivrs (%ecx) # sched: [34:1.00]
; BDVER2-NEXT: fidivrl (%eax) # sched: [34:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fdivrp_fidivr:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@ -1920,6 +2099,13 @@ define void @test_ffree() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_ffree:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: ffree %st(0) # sched: [1:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_ffree:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -2034,6 +2220,18 @@ define void @test_ficom(i16 *%a0, i32 *%a1) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_ficom:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: ficoms (%ecx) # sched: [11:2.00]
; BDVER2-NEXT: ficoml (%eax) # sched: [11:2.00]
; BDVER2-NEXT: ficomps (%ecx) # sched: [11:2.00]
; BDVER2-NEXT: ficompl (%eax) # sched: [11:2.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_ficom:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@ -2158,6 +2356,18 @@ define void @test_fild(i16 *%a0, i32 *%a1, i64 *%a2) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fild:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: filds (%edx) # sched: [10:1.00]
; BDVER2-NEXT: fildl (%ecx) # sched: [10:1.00]
; BDVER2-NEXT: fildll (%eax) # sched: [10:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fild:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@ -2242,6 +2452,13 @@ define void @test_fincstp() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fincstp:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fincstp # sched: [1:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fincstp:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -2324,6 +2541,14 @@ define void @test_finit() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_finit:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: wait # sched: [100:0.33]
; BDVER2-NEXT: fninit # sched: [5:1.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_finit:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -2400,6 +2625,13 @@ define void @test_fninit() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fninit:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fninit # sched: [5:1.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fninit:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -2554,6 +2786,23 @@ define void @test_fist_fistp_fisttp(i16* %a0, i32* %a1, i64 *%a2) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fist_fistp_fisttp:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fists (%edx) # sched: [9:1.00]
; BDVER2-NEXT: fistl (%ecx) # sched: [9:1.00]
; BDVER2-NEXT: fistps (%edx) # sched: [9:1.00]
; BDVER2-NEXT: fistpl (%ecx) # sched: [9:1.00]
; BDVER2-NEXT: fistpll (%eax) # sched: [9:1.00]
; BDVER2-NEXT: fisttps (%edx) # sched: [5:1.00]
; BDVER2-NEXT: fisttpl (%ecx) # sched: [5:1.00]
; BDVER2-NEXT: fisttpll (%eax) # sched: [5:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fist_fistp_fisttp:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@ -2696,6 +2945,19 @@ define void @test_fld(i16* %a0, i32* %a1, i64 *%a2) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fld:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fld %st(0) # sched: [1:1.00]
; BDVER2-NEXT: flds (%edx) # sched: [9:1.00]
; BDVER2-NEXT: fldl (%ecx) # sched: [9:1.00]
; BDVER2-NEXT: fldt (%eax) # sched: [9:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fld:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@ -2798,6 +3060,15 @@ define void @test_fldcw_fldenv(i8* %a0) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fldcw_fldenv:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fldcw (%eax) # sched: [8:2.00]
; BDVER2-NEXT: fldenv (%eax) # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fldcw_fldenv:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@ -2924,6 +3195,19 @@ define void @test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fld1 # sched: [1:1.00]
; BDVER2-NEXT: fldl2e # sched: [1:1.00]
; BDVER2-NEXT: fldl2t # sched: [1:1.00]
; BDVER2-NEXT: fldlg2 # sched: [1:1.00]
; BDVER2-NEXT: fldln2 # sched: [1:1.00]
; BDVER2-NEXT: fldpi # sched: [1:1.00]
; BDVER2-NEXT: fldz # sched: [1:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fld1_fldl2e_fldl2t_fldlg2_fldln2_fldpi_fldz:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -3050,6 +3334,18 @@ define void @test_fmul(float *%a0, double *%a1) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fmul:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fmul %st(0), %st(1) # sched: [5:1.00]
; BDVER2-NEXT: fmul %st(2) # sched: [5:1.00]
; BDVER2-NEXT: fmuls (%ecx) # sched: [12:1.00]
; BDVER2-NEXT: fmull (%eax) # sched: [12:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fmul:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@ -3174,6 +3470,18 @@ define void @test_fmulp_fimul(i16 *%a0, i32 *%a1) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fmulp_fimul:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fmulp %st(1) # sched: [5:1.00]
; BDVER2-NEXT: fmulp %st(2) # sched: [5:1.00]
; BDVER2-NEXT: fimuls (%ecx) # sched: [15:1.00]
; BDVER2-NEXT: fimull (%eax) # sched: [15:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fmulp_fimul:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@ -3258,6 +3566,13 @@ define void @test_fnop() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fnop:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fnop # sched: [1:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fnop:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -3332,6 +3647,13 @@ define void @test_fpatan() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fpatan:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fpatan # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fpatan:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -3414,6 +3736,14 @@ define void @test_fprem_fprem1() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fprem_fprem1:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fprem # sched: [100:0.33]
; BDVER2-NEXT: fprem1 # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fprem_fprem1:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -3490,6 +3820,13 @@ define void @test_fptan() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fptan:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fptan # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fptan:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -3564,6 +3901,13 @@ define void @test_frndint() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_frndint:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: frndint # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_frndint:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -3646,6 +3990,14 @@ define void @test_frstor(i8* %a0) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_frstor:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: frstor (%eax) # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_frstor:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@ -3738,6 +4090,15 @@ define void @test_fsave(i8* %a0) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fsave:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: wait # sched: [100:0.33]
; BDVER2-NEXT: fnsave (%eax) # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fsave:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@ -3824,6 +4185,14 @@ define void @test_fnsave(i8* %a0) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fnsave:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fnsave (%eax) # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fnsave:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@ -3900,6 +4269,13 @@ define void @test_fscale() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fscale:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fscale # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fscale:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -3974,6 +4350,13 @@ define void @test_fsin() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fsin:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fsin # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fsin:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -4048,6 +4431,13 @@ define void @test_fsincos() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fsincos:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fsincos # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fsincos:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -4122,6 +4512,13 @@ define void @test_fsqrt() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fsqrt:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fsqrt # sched: [24:24.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fsqrt:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -4268,6 +4665,22 @@ define void @test_fst_fstp(i16* %a0, i32* %a1, i64 *%a2) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fst_fstp:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fst %st(0) # sched: [1:1.00]
; BDVER2-NEXT: fsts (%edx) # sched: [6:1.00]
; BDVER2-NEXT: fstl (%ecx) # sched: [6:1.00]
; BDVER2-NEXT: fstp %st(0) # sched: [1:1.00]
; BDVER2-NEXT: fstpl (%edx) # sched: [6:1.00]
; BDVER2-NEXT: fstpl (%ecx) # sched: [6:1.00]
; BDVER2-NEXT: fstpt (%eax) # sched: [6:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fst_fstp:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@ -4408,6 +4821,19 @@ define void @test_fstcw_fstenv_fstsw(i8* %a0) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fstcw_fstenv_fstsw:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: wait # sched: [100:0.33]
; BDVER2-NEXT: fnstcw (%eax) # sched: [7:1.00]
; BDVER2-NEXT: wait # sched: [100:0.33]
; BDVER2-NEXT: fnstenv (%eax) # sched: [100:0.33]
; BDVER2-NEXT: wait # sched: [100:0.33]
; BDVER2-NEXT: fnstsw (%eax) # sched: [7:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fstcw_fstenv_fstsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@ -4518,6 +4944,16 @@ define void @test_fnstcw_fnstenv_fnstsw(i8* %a0) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fnstcw_fnstenv_fnstsw:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fnstcw (%eax) # sched: [7:1.00]
; BDVER2-NEXT: fnstenv (%eax) # sched: [100:0.33]
; BDVER2-NEXT: fnstsw (%eax) # sched: [7:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fnstcw_fnstenv_fnstsw:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@ -4638,6 +5074,18 @@ define void @test_fsub(float *%a0, double *%a1) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fsub:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fsub %st(0), %st(1) # sched: [3:1.00]
; BDVER2-NEXT: fsub %st(2) # sched: [3:1.00]
; BDVER2-NEXT: fsubs (%ecx) # sched: [10:1.00]
; BDVER2-NEXT: fsubl (%eax) # sched: [10:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fsub:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@ -4762,6 +5210,18 @@ define void @test_fsubp_fisub(i16 *%a0, i32 *%a1) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fsubp_fisub:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fsubp %st(1) # sched: [3:1.00]
; BDVER2-NEXT: fsubp %st(2) # sched: [3:1.00]
; BDVER2-NEXT: fisubs (%ecx) # sched: [13:2.00]
; BDVER2-NEXT: fisubl (%eax) # sched: [13:2.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fsubp_fisub:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@ -4886,6 +5346,18 @@ define void @test_fsubr(float *%a0, double *%a1) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fsubr:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fsubr %st(0), %st(1) # sched: [3:1.00]
; BDVER2-NEXT: fsubr %st(2) # sched: [3:1.00]
; BDVER2-NEXT: fsubrs (%ecx) # sched: [10:1.00]
; BDVER2-NEXT: fsubrl (%eax) # sched: [10:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fsubr:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@ -5010,6 +5482,18 @@ define void @test_fsubrp_fisubr(i16 *%a0, i32 *%a1) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fsubrp_fisubr:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fsubrp %st(1) # sched: [3:1.00]
; BDVER2-NEXT: fsubrp %st(2) # sched: [3:1.00]
; BDVER2-NEXT: fisubrs (%ecx) # sched: [13:2.00]
; BDVER2-NEXT: fisubrl (%eax) # sched: [13:2.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fsubrp_fisubr:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@ -5094,6 +5578,13 @@ define void @test_ftst() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_ftst:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: ftst # sched: [3:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_ftst:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -5200,6 +5691,17 @@ define void @test_fucom_fucomp_fucompp() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fucom_fucomp_fucompp:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fucom %st(1) # sched: [1:1.00]
; BDVER2-NEXT: fucom %st(3) # sched: [1:1.00]
; BDVER2-NEXT: fucomp %st(1) # sched: [1:1.00]
; BDVER2-NEXT: fucomp %st(3) # sched: [1:1.00]
; BDVER2-NEXT: fucompp # sched: [3:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fucom_fucomp_fucompp:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -5290,6 +5792,14 @@ define void @test_fucomi_fucomip() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fucomi_fucomip:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fucomi %st(3) # sched: [3:1.00]
; BDVER2-NEXT: fucompi %st(3) # sched: [3:1.00]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fucomi_fucomip:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -5366,6 +5876,13 @@ define void @test_fwait() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fwait:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: wait # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fwait:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -5440,6 +5957,13 @@ define void @test_fxam() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fxam:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fxam # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fxam:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -5522,6 +6046,14 @@ define void @test_fxch() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fxch:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fxch %st(1) # sched: [1:0.33]
; BDVER2-NEXT: fxch %st(3) # sched: [1:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fxch:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -5614,6 +6146,15 @@ define void @test_fxrstor_fxsave(i8* %a0) optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fxrstor_fxsave:
; BDVER2: # %bb.0:
; BDVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:0.50]
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fxrstor (%eax) # sched: [5:2.00]
; BDVER2-NEXT: fxsave (%eax) # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fxrstor_fxsave:
; BTVER2: # %bb.0:
; BTVER2-NEXT: movl {{[0-9]+}}(%esp), %eax # sched: [5:1.00]
@ -5692,6 +6233,13 @@ define void @test_fxtract() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fxtract:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fxtract # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fxtract:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -5766,6 +6314,13 @@ define void @test_fyl2x() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fyl2x:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fyl2x # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fyl2x:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP
@ -5840,6 +6395,13 @@ define void @test_fyl2xp1() optsize {
; SKX-NEXT: #NO_APP
; SKX-NEXT: retl # sched: [6:0.50]
;
; BDVER2-LABEL: test_fyl2xp1:
; BDVER2: # %bb.0:
; BDVER2-NEXT: #APP
; BDVER2-NEXT: fyl2xp1 # sched: [100:0.33]
; BDVER2-NEXT: #NO_APP
; BDVER2-NEXT: retl # sched: [6:1.00]
;
; BTVER2-LABEL: test_fyl2xp1:
; BTVER2: # %bb.0:
; BTVER2-NEXT: #APP

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,95 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1000 -timeline < %s | FileCheck %s
add %eax, %ecx
add %esi, %eax
add %eax, %edx
# CHECK: Iterations: 1000
# CHECK-NEXT: Instructions: 3000
# CHECK-NEXT: Total Cycles: 1004
# CHECK-NEXT: Total uOps: 3000
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 2.99
# CHECK-NEXT: IPC: 2.99
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 0.33 addl %eax, %ecx
# CHECK-NEXT: 1 1 0.33 addl %esi, %eax
# CHECK-NEXT: 1 1 0.33 addl %eax, %edx
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 1.00 1.00 - 1.00 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - 1.00 - - - - addl %eax, %ecx
# CHECK-NEXT: - - - - - 1.00 - - addl %esi, %eax
# CHECK-NEXT: - - 1.00 - - - - - addl %eax, %edx
# CHECK: Timeline view:
# CHECK-NEXT: 0123
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeER . . . addl %eax, %ecx
# CHECK-NEXT: [0,1] DeER . . . addl %esi, %eax
# CHECK-NEXT: [0,2] D=eER. . . addl %eax, %edx
# CHECK-NEXT: [1,0] D=eER. . . addl %eax, %ecx
# CHECK-NEXT: [1,1] .DeER. . . addl %esi, %eax
# CHECK-NEXT: [1,2] .D=eER . . addl %eax, %edx
# CHECK-NEXT: [2,0] .D=eER . . addl %eax, %ecx
# CHECK-NEXT: [2,1] .D=eER . . addl %esi, %eax
# CHECK-NEXT: [2,2] . D=eER . . addl %eax, %edx
# CHECK-NEXT: [3,0] . D=eER . . addl %eax, %ecx
# CHECK-NEXT: [3,1] . D=eER . . addl %esi, %eax
# CHECK-NEXT: [3,2] . D==eER . . addl %eax, %edx
# CHECK-NEXT: [4,0] . D=eER . . addl %eax, %ecx
# CHECK-NEXT: [4,1] . D=eER . . addl %esi, %eax
# CHECK-NEXT: [4,2] . D==eER . . addl %eax, %edx
# CHECK-NEXT: [5,0] . D==eER . . addl %eax, %ecx
# CHECK-NEXT: [5,1] . D=eER . . addl %esi, %eax
# CHECK-NEXT: [5,2] . D==eER. . addl %eax, %edx
# CHECK-NEXT: [6,0] . D==eER. . addl %eax, %ecx
# CHECK-NEXT: [6,1] . D==eER. . addl %esi, %eax
# CHECK-NEXT: [6,2] . D==eER . addl %eax, %edx
# CHECK-NEXT: [7,0] . D==eER . addl %eax, %ecx
# CHECK-NEXT: [7,1] . D==eER . addl %esi, %eax
# CHECK-NEXT: [7,2] . D===eER . addl %eax, %edx
# CHECK-NEXT: [8,0] . .D==eER . addl %eax, %ecx
# CHECK-NEXT: [8,1] . .D==eER . addl %esi, %eax
# CHECK-NEXT: [8,2] . .D===eER. addl %eax, %edx
# CHECK-NEXT: [9,0] . .D===eER. addl %eax, %ecx
# CHECK-NEXT: [9,1] . . D==eER. addl %esi, %eax
# CHECK-NEXT: [9,2] . . D===eER addl %eax, %edx
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 10 2.5 0.1 0.0 addl %eax, %ecx
# CHECK-NEXT: 1. 10 2.2 0.1 0.0 addl %esi, %eax
# CHECK-NEXT: 2. 10 3.0 0.0 0.0 addl %eax, %edx

View File

@ -0,0 +1,63 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=100 -resource-pressure=false -timeline -timeline-max-iterations=2 < %s | FileCheck %s
## Sets register RAX.
imulq $5, %rcx, %rax
## Kills the previous definition of RAX.
## The upper portion of RAX is cleared.
lzcnt %ecx, %eax
## The AND can start immediately after the LZCNT.
## It doesn't need to wait for the IMUL.
and %rcx, %rax
bsf %rax, %rcx
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 400
# CHECK-NEXT: Total Cycles: 803
# CHECK-NEXT: Total uOps: 400
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 0.50
# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 3.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 3 1.00 imulq $5, %rcx, %rax
# CHECK-NEXT: 1 3 1.00 lzcntl %ecx, %eax
# CHECK-NEXT: 1 1 0.33 andq %rcx, %rax
# CHECK-NEXT: 1 3 1.00 bsfq %rax, %rcx
# CHECK: Timeline view:
# CHECK-NEXT: 012345678
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeER . . . imulq $5, %rcx, %rax
# CHECK-NEXT: [0,1] D=eeeER . . . lzcntl %ecx, %eax
# CHECK-NEXT: [0,2] D====eER . . . andq %rcx, %rax
# CHECK-NEXT: [0,3] D=====eeeER . . bsfq %rax, %rcx
# CHECK-NEXT: [1,0] .D=======eeeER . . imulq $5, %rcx, %rax
# CHECK-NEXT: [1,1] .D========eeeER. . lzcntl %ecx, %eax
# CHECK-NEXT: [1,2] .D===========eER . andq %rcx, %rax
# CHECK-NEXT: [1,3] .D============eeeER bsfq %rax, %rcx
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 4.5 0.5 0.0 imulq $5, %rcx, %rax
# CHECK-NEXT: 1. 2 5.5 1.5 0.0 lzcntl %ecx, %eax
# CHECK-NEXT: 2. 2 8.5 0.0 0.0 andq %rcx, %rax
# CHECK-NEXT: 3. 2 9.5 0.0 0.0 bsfq %rax, %rcx

View File

@ -0,0 +1,137 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=100 -resource-pressure=false -timeline -timeline-max-iterations=2 < %s | FileCheck %s
# In this test, the VDIVPS takes 38 cycles to write to register YMM3. The first
# VADDPS does not depend on the VDIVPS (the WAW dependency is eliminated at
# register renaming stage). So the first VADDPS can be executed in parallel to
# the VDIVPS. That VADDPS also writes to register XMM3, and the upper half of
# YMM3 is implicitly cleared. As a consequence, the definition of YMM3 from the
# VDIVPS is killed, and the subsequent VADDPS instructions don't need to wait
# for the VDIVPS to complete.
# The block reciprocal throughput is limited by the VDIVPS reciprocal throughput
# (which is 38 cycles). The sequence of VADDPS can be executed in parallel on
# the FPA unit; their latency is "hidden" by the long latency of the VDIVPS.
vdivps %ymm0, %ymm1, %ymm3
vaddps %xmm0, %xmm1, %xmm3
vaddps %ymm3, %ymm1, %ymm4
vaddps %ymm3, %ymm1, %ymm4
vaddps %ymm3, %ymm1, %ymm4
vaddps %ymm3, %ymm1, %ymm4
vaddps %ymm3, %ymm1, %ymm4
vaddps %ymm3, %ymm1, %ymm4
vaddps %ymm3, %ymm1, %ymm4
vaddps %ymm3, %ymm1, %ymm4
vaddps %ymm3, %ymm1, %ymm4
vaddps %ymm3, %ymm1, %ymm4
vaddps %ymm3, %ymm1, %ymm4
vaddps %ymm3, %ymm1, %ymm4
vaddps %ymm3, %ymm1, %ymm4
vaddps %ymm3, %ymm1, %ymm4
vaddps %ymm3, %ymm1, %ymm4
vandps %xmm4, %xmm1, %xmm0
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1800
# CHECK-NEXT: Total Cycles: 2804
# CHECK-NEXT: Total uOps: 2000
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 0.71
# CHECK-NEXT: IPC: 0.64
# CHECK-NEXT: Block RThroughput: 28.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 3 29 28.00 vdivps %ymm0, %ymm1, %ymm3
# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: 1 3 1.00 vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: 1 1 1.00 vandps %xmm4, %xmm1, %xmm0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0123456789 0123456789
# CHECK-NEXT: Index 0123456789 0123456789 0123456789
# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . vdivps %ymm0, %ymm1, %ymm3
# CHECK-NEXT: [0,1] DeeeE--------------------------R . . . . . . vaddps %xmm0, %xmm1, %xmm3
# CHECK-NEXT: [0,2] .D==eeeE-----------------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: [0,3] .D===eeeE----------------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: [0,4] .D====eeeE---------------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: [0,5] .D=====eeeE--------------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: [0,6] . D=====eeeE-------------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: [0,7] . D======eeeE------------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: [0,8] . D=======eeeE-----------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: [0,9] . D========eeeE----------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: [0,10] . D========eeeE---------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: [0,11] . D=========eeeE--------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: [0,12] . D==========eeeE-------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: [0,13] . D===========eeeE------------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: [0,14] . D===========eeeE-----------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: [0,15] . D============eeeE----------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: [0,16] . D=============eeeE---------R . . . . . . vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: [0,17] . D================eE--------R . . . . . . vandps %xmm4, %xmm1, %xmm0
# CHECK-NEXT: [1,0] . D=======================eeeeeeeeeeeeeeeeeeeeeeeeeeeeeER vdivps %ymm0, %ymm1, %ymm3
# CHECK-NEXT: [1,1] . D================eeeE---------------------------------R vaddps %xmm0, %xmm1, %xmm3
# CHECK-NEXT: [1,2] . .D==================eeeE------------------------------R vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: [1,3] . .D===================eeeE-----------------------------R vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: [1,4] . .D====================eeeE----------------------------R vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: [1,5] . .D=====================eeeE---------------------------R vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: [1,6] . . D=====================eeeE--------------------------R vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: [1,7] . . D======================eeeE-------------------------R vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: [1,8] . . D=======================eeeE------------------------R vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: [1,9] . . D========================eeeE-----------------------R vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: [1,10] . . D========================eeeE----------------------R vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: [1,11] . . D=========================eeeE---------------------R vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: [1,12] . . D==========================eeeE--------------------R vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: [1,13] . . D===========================eeeE-------------------R vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: [1,14] . . D===========================eeeE------------------R vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: [1,15] . . D============================eeeE-----------------R vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: [1,16] . . D=============================eeeE----------------R vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: [1,17] . . D================================eE---------------R vandps %xmm4, %xmm1, %xmm0
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 12.5 4.0 0.0 vdivps %ymm0, %ymm1, %ymm3
# CHECK-NEXT: 1. 2 9.0 0.5 29.5 vaddps %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 2. 2 11.0 0.0 26.5 vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: 3. 2 12.0 1.0 25.5 vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: 4. 2 13.0 2.0 24.5 vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: 5. 2 14.0 3.0 23.5 vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: 6. 2 14.0 4.0 22.5 vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: 7. 2 15.0 5.0 21.5 vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: 8. 2 16.0 6.0 20.5 vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: 9. 2 17.0 7.0 19.5 vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: 10. 2 17.0 8.0 18.5 vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: 11. 2 18.0 9.0 17.5 vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: 12. 2 19.0 10.0 16.5 vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: 13. 2 20.0 11.0 15.5 vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: 14. 2 20.0 12.0 14.5 vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: 15. 2 21.0 13.0 13.5 vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: 16. 2 22.0 14.0 12.5 vaddps %ymm3, %ymm1, %ymm4
# CHECK-NEXT: 17. 2 25.0 0.0 11.5 vandps %xmm4, %xmm1, %xmm0

View File

@ -0,0 +1,72 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s
# Perf stat reports an IPC of 1.97 for this block of code.
# The CMP instruction doesn't depend on the value of EAX. It can set the flags
# without having to read the inputs.
cmp %eax, %eax
cmovae %ebx, %eax
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 3000
# CHECK-NEXT: Total Cycles: 4503
# CHECK-NEXT: Total uOps: 4500
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 1.00
# CHECK-NEXT: IPC: 0.67
# CHECK-NEXT: Block RThroughput: 0.8
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 0.33 cmpl %eax, %eax
# CHECK-NEXT: 2 2 0.67 cmovael %ebx, %eax
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 1.00 1.00 - 1.00 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - - - 1.00 - - cmpl %eax, %eax
# CHECK-NEXT: - - 1.00 1.00 - - - - cmovael %ebx, %eax
# CHECK: Timeline view:
# CHECK-NEXT: 01
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeER . .. cmpl %eax, %eax
# CHECK-NEXT: [0,1] D=eeER .. cmovael %ebx, %eax
# CHECK-NEXT: [1,0] D===eER .. cmpl %eax, %eax
# CHECK-NEXT: [1,1] .D===eeER .. cmovael %ebx, %eax
# CHECK-NEXT: [2,0] .D=====eER.. cmpl %eax, %eax
# CHECK-NEXT: [2,1] . D=====eeER cmovael %ebx, %eax
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 3.7 0.3 0.0 cmpl %eax, %eax
# CHECK-NEXT: 1. 3 4.0 0.0 0.0 cmovael %ebx, %eax

View File

@ -0,0 +1,87 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s
# perf stat reports an IPC of 2.00 for this block of code.
# All of the vector packed compares from this test are dependency breaking
# instructions. That means, there is no RAW dependency between any of the
# instructions, and the code can be fully parallelized in hardware.
vpcmpeqb %xmm0, %xmm0, %xmm1
vpcmpeqw %xmm1, %xmm1, %xmm2
vpcmpeqd %xmm2, %xmm2, %xmm3
vpcmpeqq %xmm3, %xmm3, %xmm0
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 6000
# CHECK-NEXT: Total Cycles: 6003
# CHECK-NEXT: Total uOps: 6000
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 1.00
# CHECK-NEXT: IPC: 1.00
# CHECK-NEXT: Block RThroughput: 2.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 0.50 vpcmpeqb %xmm0, %xmm0, %xmm1
# CHECK-NEXT: 1 1 0.50 vpcmpeqw %xmm1, %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.50 vpcmpeqd %xmm2, %xmm2, %xmm3
# CHECK-NEXT: 1 1 0.50 vpcmpeqq %xmm3, %xmm3, %xmm0
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - - 2.00 - 2.00 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - - - 1.00 - - vpcmpeqb %xmm0, %xmm0, %xmm1
# CHECK-NEXT: - - - 1.00 - - - - vpcmpeqw %xmm1, %xmm1, %xmm2
# CHECK-NEXT: - - - - - 1.00 - - vpcmpeqd %xmm2, %xmm2, %xmm3
# CHECK-NEXT: - - - 1.00 - - - - vpcmpeqq %xmm3, %xmm3, %xmm0
# CHECK: Timeline view:
# CHECK-NEXT: 01234
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeER . . . vpcmpeqb %xmm0, %xmm0, %xmm1
# CHECK-NEXT: [0,1] D=eER. . . vpcmpeqw %xmm1, %xmm1, %xmm2
# CHECK-NEXT: [0,2] D==eER . . vpcmpeqd %xmm2, %xmm2, %xmm3
# CHECK-NEXT: [0,3] D===eER . . vpcmpeqq %xmm3, %xmm3, %xmm0
# CHECK-NEXT: [1,0] .D===eER . . vpcmpeqb %xmm0, %xmm0, %xmm1
# CHECK-NEXT: [1,1] .D====eER . . vpcmpeqw %xmm1, %xmm1, %xmm2
# CHECK-NEXT: [1,2] .D=====eER. . vpcmpeqd %xmm2, %xmm2, %xmm3
# CHECK-NEXT: [1,3] .D======eER . vpcmpeqq %xmm3, %xmm3, %xmm0
# CHECK-NEXT: [2,0] . D======eER . vpcmpeqb %xmm0, %xmm0, %xmm1
# CHECK-NEXT: [2,1] . D=======eER . vpcmpeqw %xmm1, %xmm1, %xmm2
# CHECK-NEXT: [2,2] . D========eER. vpcmpeqd %xmm2, %xmm2, %xmm3
# CHECK-NEXT: [2,3] . D=========eER vpcmpeqq %xmm3, %xmm3, %xmm0
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 4.0 0.3 0.0 vpcmpeqb %xmm0, %xmm0, %xmm1
# CHECK-NEXT: 1. 3 5.0 0.0 0.0 vpcmpeqw %xmm1, %xmm1, %xmm2
# CHECK-NEXT: 2. 3 6.0 0.0 0.0 vpcmpeqd %xmm2, %xmm2, %xmm3
# CHECK-NEXT: 3. 3 7.0 0.0 0.0 vpcmpeqq %xmm3, %xmm3, %xmm0

View File

@ -0,0 +1,87 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s
# perf stat reports an IPC of 2.00 for this block of code.
# All of the vector packed compares from this test are zero idioms. These zero
# idioms are all detected and removed by the register renamer. That means, no
# uOp is executed, and there is no RAW dependency for any of the packed
# compares.
vpcmpgtb %xmm0, %xmm0, %xmm1
vpcmpgtw %xmm1, %xmm1, %xmm2
vpcmpgtd %xmm2, %xmm2, %xmm3
vpcmpgtq %xmm3, %xmm3, %xmm0
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 6000
# CHECK-NEXT: Total Cycles: 1501
# CHECK-NEXT: Total uOps: 6000
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 4.00
# CHECK-NEXT: IPC: 4.00
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 0 0.25 vpcmpgtb %xmm0, %xmm0, %xmm1
# CHECK-NEXT: 1 0 0.25 vpcmpgtw %xmm1, %xmm1, %xmm2
# CHECK-NEXT: 1 0 0.25 vpcmpgtd %xmm2, %xmm2, %xmm3
# CHECK-NEXT: 1 0 0.25 vpcmpgtq %xmm3, %xmm3, %xmm0
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - - - - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - - - - - - vpcmpgtb %xmm0, %xmm0, %xmm1
# CHECK-NEXT: - - - - - - - - vpcmpgtw %xmm1, %xmm1, %xmm2
# CHECK-NEXT: - - - - - - - - vpcmpgtd %xmm2, %xmm2, %xmm3
# CHECK-NEXT: - - - - - - - - vpcmpgtq %xmm3, %xmm3, %xmm0
# CHECK: Timeline view:
# CHECK-NEXT: Index 0123
# CHECK: [0,0] DR . vpcmpgtb %xmm0, %xmm0, %xmm1
# CHECK-NEXT: [0,1] DR . vpcmpgtw %xmm1, %xmm1, %xmm2
# CHECK-NEXT: [0,2] DR . vpcmpgtd %xmm2, %xmm2, %xmm3
# CHECK-NEXT: [0,3] DR . vpcmpgtq %xmm3, %xmm3, %xmm0
# CHECK-NEXT: [1,0] .DR. vpcmpgtb %xmm0, %xmm0, %xmm1
# CHECK-NEXT: [1,1] .DR. vpcmpgtw %xmm1, %xmm1, %xmm2
# CHECK-NEXT: [1,2] .DR. vpcmpgtd %xmm2, %xmm2, %xmm3
# CHECK-NEXT: [1,3] .DR. vpcmpgtq %xmm3, %xmm3, %xmm0
# CHECK-NEXT: [2,0] . DR vpcmpgtb %xmm0, %xmm0, %xmm1
# CHECK-NEXT: [2,1] . DR vpcmpgtw %xmm1, %xmm1, %xmm2
# CHECK-NEXT: [2,2] . DR vpcmpgtd %xmm2, %xmm2, %xmm3
# CHECK-NEXT: [2,3] . DR vpcmpgtq %xmm3, %xmm3, %xmm0
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 0.0 0.0 0.0 vpcmpgtb %xmm0, %xmm0, %xmm1
# CHECK-NEXT: 1. 3 0.0 0.0 0.0 vpcmpgtw %xmm1, %xmm1, %xmm2
# CHECK-NEXT: 2. 3 0.0 0.0 0.0 vpcmpgtd %xmm2, %xmm2, %xmm3
# CHECK-NEXT: 3. 3 0.0 0.0 0.0 vpcmpgtq %xmm3, %xmm3, %xmm0

View File

@ -0,0 +1,73 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s
# perf stat reports an IPC of 1.00 for this code block.
# Although both SBB are dependency breaking instructions, there is still an
# implicit dependency on EFLAGS which limits the ILP. So, the hardware backend
# can only execute one instruction per cycle.
sbb %edx, %edx
sbb %eax, %eax
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 3000
# CHECK-NEXT: Total Cycles: 6003
# CHECK-NEXT: Total uOps: 6000
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 1.00
# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 2 2 0.67 sbbl %edx, %edx
# CHECK-NEXT: 2 2 0.67 sbbl %eax, %eax
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 1.33 1.33 - 1.33 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - 0.67 0.67 - 0.67 - - sbbl %edx, %edx
# CHECK-NEXT: - - 0.67 0.67 - 0.67 - - sbbl %eax, %eax
# CHECK: Timeline view:
# CHECK-NEXT: 01234
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeER. . . sbbl %edx, %edx
# CHECK-NEXT: [0,1] D==eeER . . sbbl %eax, %eax
# CHECK-NEXT: [1,0] .D===eeER . . sbbl %edx, %edx
# CHECK-NEXT: [1,1] .D=====eeER . sbbl %eax, %eax
# CHECK-NEXT: [2,0] . D======eeER . sbbl %edx, %edx
# CHECK-NEXT: [2,1] . D========eeER sbbl %eax, %eax
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 4.0 0.3 0.0 sbbl %edx, %edx
# CHECK-NEXT: 1. 3 6.0 0.0 0.0 sbbl %eax, %eax

View File

@ -0,0 +1,80 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=3 -iterations=1500 < %s | FileCheck %s
# perf stat reports a throughput of 1.51 IPC for this block of code.
# The SBB does not depend on the value of register EAX. That means, it doesn't
# have to wait for the IMUL to write-back on EAX. However, it still depends on
# the ADD for EFLAGS.
imul %edx, %eax
add %edx, %edx
sbb %eax, %eax
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 4500
# CHECK-NEXT: Total Cycles: 7503
# CHECK-NEXT: Total uOps: 6000
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 0.80
# CHECK-NEXT: IPC: 0.60
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 3 1.00 imull %edx, %eax
# CHECK-NEXT: 1 1 0.33 addl %edx, %edx
# CHECK-NEXT: 2 2 0.67 sbbl %eax, %eax
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 1.33 1.33 - 1.33 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - 1.00 - - - - imull %edx, %eax
# CHECK-NEXT: - - 0.33 0.33 - 0.34 - - addl %edx, %edx
# CHECK-NEXT: - - 1.00 - - 1.00 - - sbbl %eax, %eax
# CHECK: Timeline view:
# CHECK-NEXT: 01234567
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeER . . . imull %edx, %eax
# CHECK-NEXT: [0,1] DeE--R . . . addl %edx, %edx
# CHECK-NEXT: [0,2] D===eeER . . . sbbl %eax, %eax
# CHECK-NEXT: [1,0] .D====eeeER . . imull %edx, %eax
# CHECK-NEXT: [1,1] .DeE------R . . addl %edx, %edx
# CHECK-NEXT: [1,2] .D=======eeER . . sbbl %eax, %eax
# CHECK-NEXT: [2,0] . D========eeeER . imull %edx, %eax
# CHECK-NEXT: [2,1] . DeE----------R . addl %edx, %edx
# CHECK-NEXT: [2,2] . D===========eeER sbbl %eax, %eax
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 5.0 0.3 0.0 imull %edx, %eax
# CHECK-NEXT: 1. 3 1.0 0.3 6.0 addl %edx, %edx
# CHECK-NEXT: 2. 3 8.0 0.0 0.0 sbbl %eax, %eax

View File

@ -0,0 +1,95 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=500 -timeline < %s | FileCheck %s
vpmuld %xmm0, %xmm0, %xmm1
vpaddd %xmm1, %xmm1, %xmm0
vpaddd %xmm0, %xmm0, %xmm3
# CHECK: Iterations: 500
# CHECK-NEXT: Instructions: 1500
# CHECK-NEXT: Total Cycles: 3004
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 0.50
# CHECK-NEXT: IPC: 0.50
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 5 1.00 vpmuldq %xmm0, %xmm0, %xmm1
# CHECK-NEXT: 1 1 0.50 vpaddd %xmm1, %xmm1, %xmm0
# CHECK-NEXT: 1 1 0.50 vpaddd %xmm0, %xmm0, %xmm3
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 1.00 1.00 - 1.00 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - 1.00 - - - - - vpmuldq %xmm0, %xmm0, %xmm1
# CHECK-NEXT: - - - - - 1.00 - - vpaddd %xmm1, %xmm1, %xmm0
# CHECK-NEXT: - - - 1.00 - - - - vpaddd %xmm0, %xmm0, %xmm3
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0123456789 0123456789
# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123
# CHECK: [0,0] DeeeeeER . . . . . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1
# CHECK-NEXT: [0,1] D=====eER . . . . . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0
# CHECK-NEXT: [0,2] D======eER. . . . . . . . . . . . vpaddd %xmm0, %xmm0, %xmm3
# CHECK-NEXT: [1,0] D======eeeeeER . . . . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1
# CHECK-NEXT: [1,1] .D==========eER. . . . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0
# CHECK-NEXT: [1,2] .D===========eER . . . . . . . . . . vpaddd %xmm0, %xmm0, %xmm3
# CHECK-NEXT: [2,0] .D===========eeeeeER. . . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1
# CHECK-NEXT: [2,1] .D================eER . . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0
# CHECK-NEXT: [2,2] . D================eER . . . . . . . . . vpaddd %xmm0, %xmm0, %xmm3
# CHECK-NEXT: [3,0] . D================eeeeeER . . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1
# CHECK-NEXT: [3,1] . D=====================eER . . . . . . . . vpaddd %xmm1, %xmm1, %xmm0
# CHECK-NEXT: [3,2] . D======================eER . . . . . . . . vpaddd %xmm0, %xmm0, %xmm3
# CHECK-NEXT: [4,0] . D=====================eeeeeER . . . . . . . vpmuldq %xmm0, %xmm0, %xmm1
# CHECK-NEXT: [4,1] . D==========================eER . . . . . . . vpaddd %xmm1, %xmm1, %xmm0
# CHECK-NEXT: [4,2] . D===========================eER . . . . . . . vpaddd %xmm0, %xmm0, %xmm3
# CHECK-NEXT: [5,0] . D===========================eeeeeER . . . . . . vpmuldq %xmm0, %xmm0, %xmm1
# CHECK-NEXT: [5,1] . D===============================eER . . . . . . vpaddd %xmm1, %xmm1, %xmm0
# CHECK-NEXT: [5,2] . D================================eER. . . . . . vpaddd %xmm0, %xmm0, %xmm3
# CHECK-NEXT: [6,0] . D================================eeeeeER . . . . . vpmuldq %xmm0, %xmm0, %xmm1
# CHECK-NEXT: [6,1] . D=====================================eER. . . . . vpaddd %xmm1, %xmm1, %xmm0
# CHECK-NEXT: [6,2] . D=====================================eER . . . . vpaddd %xmm0, %xmm0, %xmm3
# CHECK-NEXT: [7,0] . D=====================================eeeeeER. . . . vpmuldq %xmm0, %xmm0, %xmm1
# CHECK-NEXT: [7,1] . D==========================================eER . . . vpaddd %xmm1, %xmm1, %xmm0
# CHECK-NEXT: [7,2] . D===========================================eER . . . vpaddd %xmm0, %xmm0, %xmm3
# CHECK-NEXT: [8,0] . .D==========================================eeeeeER . . vpmuldq %xmm0, %xmm0, %xmm1
# CHECK-NEXT: [8,1] . .D===============================================eER . . vpaddd %xmm1, %xmm1, %xmm0
# CHECK-NEXT: [8,2] . .D================================================eER . . vpaddd %xmm0, %xmm0, %xmm3
# CHECK-NEXT: [9,0] . .D================================================eeeeeER . vpmuldq %xmm0, %xmm0, %xmm1
# CHECK-NEXT: [9,1] . . D====================================================eER. vpaddd %xmm1, %xmm1, %xmm0
# CHECK-NEXT: [9,2] . . D=====================================================eER vpaddd %xmm0, %xmm0, %xmm3
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 10 25.0 0.1 0.0 vpmuldq %xmm0, %xmm0, %xmm1
# CHECK-NEXT: 1. 10 29.7 0.0 0.0 vpaddd %xmm1, %xmm1, %xmm0
# CHECK-NEXT: 2. 10 30.5 0.0 0.0 vpaddd %xmm0, %xmm0, %xmm3

View File

@ -0,0 +1,74 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=300 -timeline -timeline-max-iterations=3 < %s | FileCheck %s
vmulps %xmm0, %xmm1, %xmm2
vhaddps %xmm2, %xmm2, %xmm3
vhaddps %xmm3, %xmm3, %xmm4
# CHECK: Iterations: 300
# CHECK-NEXT: Instructions: 900
# CHECK-NEXT: Total Cycles: 1211
# CHECK-NEXT: Total uOps: 2100
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 1.73
# CHECK-NEXT: IPC: 0.74
# CHECK-NEXT: Block RThroughput: 4.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 5 1.00 vmulps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 3 5 2.00 vhaddps %xmm2, %xmm2, %xmm3
# CHECK-NEXT: 3 5 2.00 vhaddps %xmm3, %xmm3, %xmm4
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 1.00 2.00 - 4.00 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - 1.00 - - - - - vmulps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - 1.00 - 2.00 - - vhaddps %xmm2, %xmm2, %xmm3
# CHECK-NEXT: - - - 1.00 - 2.00 - - vhaddps %xmm3, %xmm3, %xmm4
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
# CHECK-NEXT: Index 0123456789 012
# CHECK: [0,0] DeeeeeER . . . . vmulps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: [0,1] D=====eeeeeER . . . vhaddps %xmm2, %xmm2, %xmm3
# CHECK-NEXT: [0,2] .D==========eeeeeER . . vhaddps %xmm3, %xmm3, %xmm4
# CHECK-NEXT: [1,0] .DeeeeeE----------R . . vmulps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: [1,1] . D=====eeeeeE----R . . vhaddps %xmm2, %xmm2, %xmm3
# CHECK-NEXT: [1,2] . D==========eeeeeER . vhaddps %xmm3, %xmm3, %xmm4
# CHECK-NEXT: [2,0] . DeeeeeE----------R . vmulps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: [2,1] . D=====eeeeeE----R . vhaddps %xmm2, %xmm2, %xmm3
# CHECK-NEXT: [2,2] . D==========eeeeeER vhaddps %xmm3, %xmm3, %xmm4
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 1.0 1.0 6.7 vmulps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1. 3 6.0 0.7 2.7 vhaddps %xmm2, %xmm2, %xmm3
# CHECK-NEXT: 2. 3 11.0 1.0 0.0 vhaddps %xmm3, %xmm3, %xmm4

View File

@ -0,0 +1,44 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s
vshufps $0, %xmm0, %xmm1, %xmm1
vhaddps (%rdi), %xmm1, %xmm2
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 15
# CHECK-NEXT: Total uOps: 5
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 0.33
# CHECK-NEXT: IPC: 0.13
# CHECK-NEXT: Block RThroughput: 3.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 1.00 vshufps $0, %xmm0, %xmm1, %xmm1
# CHECK-NEXT: 4 11 2.00 * vhaddps (%rdi), %xmm1, %xmm2
# CHECK: Timeline view:
# CHECK-NEXT: 01234
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeER . . . vshufps $0, %xmm0, %xmm1, %xmm1
# CHECK-NEXT: [0,1] .DeeeeeeeeeeeER vhaddps (%rdi), %xmm1, %xmm2
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vshufps $0, %xmm0, %xmm1, %xmm1
# CHECK-NEXT: 1. 1 1.0 1.0 0.0 vhaddps (%rdi), %xmm1, %xmm2

View File

@ -0,0 +1,44 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s
vshufps $0, %xmm0, %xmm1, %xmm1
vhaddps (%rdi), %ymm1, %ymm2
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 16
# CHECK-NEXT: Total uOps: 5
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 0.31
# CHECK-NEXT: IPC: 0.13
# CHECK-NEXT: Block RThroughput: 3.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 1.00 vshufps $0, %xmm0, %xmm1, %xmm1
# CHECK-NEXT: 4 12 2.00 * vhaddps (%rdi), %ymm1, %ymm2
# CHECK: Timeline view:
# CHECK-NEXT: 012345
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeER . . . vshufps $0, %xmm0, %xmm1, %xmm1
# CHECK-NEXT: [0,1] .DeeeeeeeeeeeeER vhaddps (%rdi), %ymm1, %ymm2
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vshufps $0, %xmm0, %xmm1, %xmm1
# CHECK-NEXT: 1. 1 1.0 1.0 0.0 vhaddps (%rdi), %ymm1, %ymm2

View File

@ -0,0 +1,36 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -resource-pressure=false -instruction-info=true < %s | FileCheck %s --check-prefix=ENABLED
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -resource-pressure=false -instruction-info=false < %s | FileCheck %s -check-prefix=DISABLED
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -resource-pressure=false -instruction-info < %s | FileCheck %s -check-prefix=ENABLED
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -resource-pressure=false < %s | FileCheck %s -check-prefix=ENABLED
vmulps %xmm0, %xmm1, %xmm2
vhaddps %xmm2, %xmm2, %xmm3
vhaddps %xmm3, %xmm3, %xmm4
# DISABLED-NOT: Instruction Info:
# ENABLED: Iterations: 100
# ENABLED-NEXT: Instructions: 300
# ENABLED-NEXT: Total Cycles: 414
# ENABLED-NEXT: Total uOps: 700
# ENABLED: Dispatch Width: 4
# ENABLED-NEXT: uOps Per Cycle: 1.69
# ENABLED-NEXT: IPC: 0.72
# ENABLED-NEXT: Block RThroughput: 4.0
# ENABLED: Instruction Info:
# ENABLED-NEXT: [1]: #uOps
# ENABLED-NEXT: [2]: Latency
# ENABLED-NEXT: [3]: RThroughput
# ENABLED-NEXT: [4]: MayLoad
# ENABLED-NEXT: [5]: MayStore
# ENABLED-NEXT: [6]: HasSideEffects (U)
# ENABLED: [1] [2] [3] [4] [5] [6] Instructions:
# ENABLED-NEXT: 1 5 1.00 vmulps %xmm0, %xmm1, %xmm2
# ENABLED-NEXT: 3 5 2.00 vhaddps %xmm2, %xmm2, %xmm3
# ENABLED-NEXT: 3 5 2.00 vhaddps %xmm3, %xmm3, %xmm4

View File

@ -0,0 +1,93 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=100 -timeline -timeline-max-iterations=1 -noalias=false < %s | FileCheck %s
vmovaps (%rsi), %xmm0
vmovaps %xmm0, (%rdi)
vmovaps 16(%rsi), %xmm0
vmovaps %xmm0, 16(%rdi)
vmovaps 32(%rsi), %xmm0
vmovaps %xmm0, 32(%rdi)
vmovaps 48(%rsi), %xmm0
vmovaps %xmm0, 48(%rdi)
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 800
# CHECK-NEXT: Total Cycles: 2803
# CHECK-NEXT: Total uOps: 800
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 0.29
# CHECK-NEXT: IPC: 0.29
# CHECK-NEXT: Block RThroughput: 4.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 6 0.50 * vmovaps (%rsi), %xmm0
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, (%rdi)
# CHECK-NEXT: 1 6 0.50 * vmovaps 16(%rsi), %xmm0
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 16(%rdi)
# CHECK-NEXT: 1 6 0.50 * vmovaps 32(%rsi), %xmm0
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 32(%rdi)
# CHECK-NEXT: 1 6 0.50 * vmovaps 48(%rsi), %xmm0
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 48(%rdi)
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - - - 4.00 - - 8.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - - - - - 1.00 vmovaps (%rsi), %xmm0
# CHECK-NEXT: - - - - 1.00 - - 1.00 vmovaps %xmm0, (%rdi)
# CHECK-NEXT: - - - - - - - 1.00 vmovaps 16(%rsi), %xmm0
# CHECK-NEXT: - - - - 1.00 - - 1.00 vmovaps %xmm0, 16(%rdi)
# CHECK-NEXT: - - - - - - - 1.00 vmovaps 32(%rsi), %xmm0
# CHECK-NEXT: - - - - 1.00 - - 1.00 vmovaps %xmm0, 32(%rdi)
# CHECK-NEXT: - - - - - - - 1.00 vmovaps 48(%rsi), %xmm0
# CHECK-NEXT: - - - - 1.00 - - 1.00 vmovaps %xmm0, 48(%rdi)
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0
# CHECK-NEXT: Index 0123456789 0123456789
# CHECK: [0,0] DeeeeeeER . . . . . vmovaps (%rsi), %xmm0
# CHECK-NEXT: [0,1] D======eER. . . . . vmovaps %xmm0, (%rdi)
# CHECK-NEXT: [0,2] D=======eeeeeeER . . . vmovaps 16(%rsi), %xmm0
# CHECK-NEXT: [0,3] D=============eER . . . vmovaps %xmm0, 16(%rdi)
# CHECK-NEXT: [0,4] .D=============eeeeeeER . . vmovaps 32(%rsi), %xmm0
# CHECK-NEXT: [0,5] .D===================eER . . vmovaps %xmm0, 32(%rdi)
# CHECK-NEXT: [0,6] .D====================eeeeeeER. vmovaps 48(%rsi), %xmm0
# CHECK-NEXT: [0,7] .D==========================eER vmovaps %xmm0, 48(%rdi)
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vmovaps (%rsi), %xmm0
# CHECK-NEXT: 1. 1 7.0 0.0 0.0 vmovaps %xmm0, (%rdi)
# CHECK-NEXT: 2. 1 8.0 0.0 0.0 vmovaps 16(%rsi), %xmm0
# CHECK-NEXT: 3. 1 14.0 0.0 0.0 vmovaps %xmm0, 16(%rdi)
# CHECK-NEXT: 4. 1 14.0 0.0 0.0 vmovaps 32(%rsi), %xmm0
# CHECK-NEXT: 5. 1 20.0 0.0 0.0 vmovaps %xmm0, 32(%rdi)
# CHECK-NEXT: 6. 1 21.0 0.0 0.0 vmovaps 48(%rsi), %xmm0
# CHECK-NEXT: 7. 1 27.0 0.0 0.0 vmovaps %xmm0, 48(%rdi)

View File

@ -0,0 +1,93 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=100 -timeline -timeline-max-iterations=1 < %s | FileCheck %s
vmovaps (%rsi), %xmm0
vmovaps %xmm0, (%rdi)
vmovaps 16(%rsi), %xmm0
vmovaps %xmm0, 16(%rdi)
vmovaps 32(%rsi), %xmm0
vmovaps %xmm0, 32(%rdi)
vmovaps 48(%rsi), %xmm0
vmovaps %xmm0, 48(%rdi)
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 800
# CHECK-NEXT: Total Cycles: 409
# CHECK-NEXT: Total uOps: 800
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 1.96
# CHECK-NEXT: IPC: 1.96
# CHECK-NEXT: Block RThroughput: 4.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 6 0.50 * vmovaps (%rsi), %xmm0
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, (%rdi)
# CHECK-NEXT: 1 6 0.50 * vmovaps 16(%rsi), %xmm0
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 16(%rdi)
# CHECK-NEXT: 1 6 0.50 * vmovaps 32(%rsi), %xmm0
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 32(%rdi)
# CHECK-NEXT: 1 6 0.50 * vmovaps 48(%rsi), %xmm0
# CHECK-NEXT: 1 1 1.00 * vmovaps %xmm0, 48(%rdi)
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - - - 4.00 - 3.94 4.06
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - - - - 0.97 0.03 vmovaps (%rsi), %xmm0
# CHECK-NEXT: - - - - 1.00 - - 1.00 vmovaps %xmm0, (%rdi)
# CHECK-NEXT: - - - - - - 0.03 0.97 vmovaps 16(%rsi), %xmm0
# CHECK-NEXT: - - - - 1.00 - 0.97 0.03 vmovaps %xmm0, 16(%rdi)
# CHECK-NEXT: - - - - - - 1.00 - vmovaps 32(%rsi), %xmm0
# CHECK-NEXT: - - - - 1.00 - - 1.00 vmovaps %xmm0, 32(%rdi)
# CHECK-NEXT: - - - - - - - 1.00 vmovaps 48(%rsi), %xmm0
# CHECK-NEXT: - - - - 1.00 - 0.97 0.03 vmovaps %xmm0, 48(%rdi)
# CHECK: Timeline view:
# CHECK-NEXT: 012
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeeeER . . vmovaps (%rsi), %xmm0
# CHECK-NEXT: [0,1] D======eER. . vmovaps %xmm0, (%rdi)
# CHECK-NEXT: [0,2] DeeeeeeE-R. . vmovaps 16(%rsi), %xmm0
# CHECK-NEXT: [0,3] D=======eER . vmovaps %xmm0, 16(%rdi)
# CHECK-NEXT: [0,4] .DeeeeeeE-R . vmovaps 32(%rsi), %xmm0
# CHECK-NEXT: [0,5] .D=======eER. vmovaps %xmm0, 32(%rdi)
# CHECK-NEXT: [0,6] .DeeeeeeE--R. vmovaps 48(%rsi), %xmm0
# CHECK-NEXT: [0,7] .D========eER vmovaps %xmm0, 48(%rdi)
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vmovaps (%rsi), %xmm0
# CHECK-NEXT: 1. 1 7.0 0.0 0.0 vmovaps %xmm0, (%rdi)
# CHECK-NEXT: 2. 1 1.0 1.0 1.0 vmovaps 16(%rsi), %xmm0
# CHECK-NEXT: 3. 1 8.0 0.0 0.0 vmovaps %xmm0, 16(%rdi)
# CHECK-NEXT: 4. 1 1.0 1.0 1.0 vmovaps 32(%rsi), %xmm0
# CHECK-NEXT: 5. 1 8.0 0.0 0.0 vmovaps %xmm0, 32(%rdi)
# CHECK-NEXT: 6. 1 1.0 1.0 2.0 vmovaps 48(%rsi), %xmm0
# CHECK-NEXT: 7. 1 9.0 0.0 0.0 vmovaps %xmm0, 48(%rdi)

View File

@ -0,0 +1,142 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=1 -register-file-stats < %s | FileCheck %s
# These are dependency-breaking one-idioms.
# Much like zero-idioms, but they produce ones, and do consume resources.
# perf stats reports a throughput of 2.00 IPC.
pcmpeqb %mm2, %mm2
pcmpeqd %mm2, %mm2
pcmpeqw %mm2, %mm2
pcmpeqb %xmm2, %xmm2
pcmpeqd %xmm2, %xmm2
pcmpeqq %xmm2, %xmm2
pcmpeqw %xmm2, %xmm2
vpcmpeqb %xmm3, %xmm3, %xmm3
vpcmpeqd %xmm3, %xmm3, %xmm3
vpcmpeqq %xmm3, %xmm3, %xmm3
vpcmpeqw %xmm3, %xmm3, %xmm3
vpcmpeqb %xmm3, %xmm3, %xmm5
vpcmpeqd %xmm3, %xmm3, %xmm5
vpcmpeqq %xmm3, %xmm3, %xmm5
vpcmpeqw %xmm3, %xmm3, %xmm5
# FIXME: their handling is broken in llvm-mca.
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 1500
# CHECK-NEXT: Total Cycles: 903
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 1.66
# CHECK-NEXT: IPC: 1.66
# CHECK-NEXT: Block RThroughput: 6.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 3 1.00 pcmpeqb %mm2, %mm2
# CHECK-NEXT: 1 3 1.00 pcmpeqd %mm2, %mm2
# CHECK-NEXT: 1 3 1.00 pcmpeqw %mm2, %mm2
# CHECK-NEXT: 1 1 0.50 pcmpeqb %xmm2, %xmm2
# CHECK-NEXT: 1 1 0.50 pcmpeqd %xmm2, %xmm2
# CHECK-NEXT: 1 1 0.50 pcmpeqq %xmm2, %xmm2
# CHECK-NEXT: 1 1 0.50 pcmpeqw %xmm2, %xmm2
# CHECK-NEXT: 1 1 0.50 vpcmpeqb %xmm3, %xmm3, %xmm3
# CHECK-NEXT: 1 1 0.50 vpcmpeqd %xmm3, %xmm3, %xmm3
# CHECK-NEXT: 1 1 0.50 vpcmpeqq %xmm3, %xmm3, %xmm3
# CHECK-NEXT: 1 1 0.50 vpcmpeqw %xmm3, %xmm3, %xmm3
# CHECK-NEXT: 1 1 0.50 vpcmpeqb %xmm3, %xmm3, %xmm5
# CHECK-NEXT: 1 1 0.50 vpcmpeqd %xmm3, %xmm3, %xmm5
# CHECK-NEXT: 1 1 0.50 vpcmpeqq %xmm3, %xmm3, %xmm5
# CHECK-NEXT: 1 1 0.50 vpcmpeqw %xmm3, %xmm3, %xmm5
# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 1500
# CHECK-NEXT: Max number of mappings used: 168
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - - 7.65 - 7.35 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - 1.00 - - - - pcmpeqb %mm2, %mm2
# CHECK-NEXT: - - - 1.00 - - - - pcmpeqd %mm2, %mm2
# CHECK-NEXT: - - - 1.00 - - - - pcmpeqw %mm2, %mm2
# CHECK-NEXT: - - - 0.75 - 0.25 - - pcmpeqb %xmm2, %xmm2
# CHECK-NEXT: - - - 0.49 - 0.51 - - pcmpeqd %xmm2, %xmm2
# CHECK-NEXT: - - - 0.64 - 0.36 - - pcmpeqq %xmm2, %xmm2
# CHECK-NEXT: - - - 0.21 - 0.79 - - pcmpeqw %xmm2, %xmm2
# CHECK-NEXT: - - - 0.44 - 0.56 - - vpcmpeqb %xmm3, %xmm3, %xmm3
# CHECK-NEXT: - - - 0.26 - 0.74 - - vpcmpeqd %xmm3, %xmm3, %xmm3
# CHECK-NEXT: - - - 0.25 - 0.75 - - vpcmpeqq %xmm3, %xmm3, %xmm3
# CHECK-NEXT: - - - - - 1.00 - - vpcmpeqw %xmm3, %xmm3, %xmm3
# CHECK-NEXT: - - - 0.25 - 0.75 - - vpcmpeqb %xmm3, %xmm3, %xmm5
# CHECK-NEXT: - - - 0.55 - 0.45 - - vpcmpeqd %xmm3, %xmm3, %xmm5
# CHECK-NEXT: - - - 0.44 - 0.56 - - vpcmpeqq %xmm3, %xmm3, %xmm5
# CHECK-NEXT: - - - 0.37 - 0.63 - - vpcmpeqw %xmm3, %xmm3, %xmm5
# CHECK: Timeline view:
# CHECK-NEXT: 01
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeER .. pcmpeqb %mm2, %mm2
# CHECK-NEXT: [0,1] D===eeeER .. pcmpeqd %mm2, %mm2
# CHECK-NEXT: [0,2] D======eeeER pcmpeqw %mm2, %mm2
# CHECK-NEXT: [0,3] DeE--------R pcmpeqb %xmm2, %xmm2
# CHECK-NEXT: [0,4] .DeE-------R pcmpeqd %xmm2, %xmm2
# CHECK-NEXT: [0,5] .D=eE------R pcmpeqq %xmm2, %xmm2
# CHECK-NEXT: [0,6] .D==eE-----R pcmpeqw %xmm2, %xmm2
# CHECK-NEXT: [0,7] .DeE-------R vpcmpeqb %xmm3, %xmm3, %xmm3
# CHECK-NEXT: [0,8] . DeE------R vpcmpeqd %xmm3, %xmm3, %xmm3
# CHECK-NEXT: [0,9] . D==eE----R vpcmpeqq %xmm3, %xmm3, %xmm3
# CHECK-NEXT: [0,10] . D===eE---R vpcmpeqw %xmm3, %xmm3, %xmm3
# CHECK-NEXT: [0,11] . D====eE--R vpcmpeqb %xmm3, %xmm3, %xmm5
# CHECK-NEXT: [0,12] . D====eE-R vpcmpeqd %xmm3, %xmm3, %xmm5
# CHECK-NEXT: [0,13] . D====eE-R vpcmpeqq %xmm3, %xmm3, %xmm5
# CHECK-NEXT: [0,14] . D=====eER vpcmpeqw %xmm3, %xmm3, %xmm5
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 pcmpeqb %mm2, %mm2
# CHECK-NEXT: 1. 1 4.0 0.0 0.0 pcmpeqd %mm2, %mm2
# CHECK-NEXT: 2. 1 7.0 0.0 0.0 pcmpeqw %mm2, %mm2
# CHECK-NEXT: 3. 1 1.0 1.0 8.0 pcmpeqb %xmm2, %xmm2
# CHECK-NEXT: 4. 1 1.0 0.0 7.0 pcmpeqd %xmm2, %xmm2
# CHECK-NEXT: 5. 1 2.0 0.0 6.0 pcmpeqq %xmm2, %xmm2
# CHECK-NEXT: 6. 1 3.0 0.0 5.0 pcmpeqw %xmm2, %xmm2
# CHECK-NEXT: 7. 1 1.0 1.0 7.0 vpcmpeqb %xmm3, %xmm3, %xmm3
# CHECK-NEXT: 8. 1 1.0 0.0 6.0 vpcmpeqd %xmm3, %xmm3, %xmm3
# CHECK-NEXT: 9. 1 3.0 1.0 4.0 vpcmpeqq %xmm3, %xmm3, %xmm3
# CHECK-NEXT: 10. 1 4.0 0.0 3.0 vpcmpeqw %xmm3, %xmm3, %xmm3
# CHECK-NEXT: 11. 1 5.0 0.0 2.0 vpcmpeqb %xmm3, %xmm3, %xmm5
# CHECK-NEXT: 12. 1 5.0 1.0 1.0 vpcmpeqd %xmm3, %xmm3, %xmm5
# CHECK-NEXT: 13. 1 5.0 1.0 1.0 vpcmpeqq %xmm3, %xmm3, %xmm5
# CHECK-NEXT: 14. 1 6.0 2.0 0.0 vpcmpeqw %xmm3, %xmm3, %xmm5

View File

@ -0,0 +1,47 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -resource-pressure=false -timeline < %s | FileCheck %s
imul %rax, %rbx
lzcnt %ax, %bx
add %ecx, %ebx
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 3
# CHECK-NEXT: Total Cycles: 8
# CHECK-NEXT: Total uOps: 3
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 0.38
# CHECK-NEXT: IPC: 0.38
# CHECK-NEXT: Block RThroughput: 2.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 3 1.00 imulq %rax, %rbx
# CHECK-NEXT: 1 3 1.00 lzcntw %ax, %bx
# CHECK-NEXT: 1 1 0.33 addl %ecx, %ebx
# CHECK: Timeline view:
# CHECK-NEXT: Index 01234567
# CHECK: [0,0] DeeeER . imulq %rax, %rbx
# CHECK-NEXT: [0,1] D=eeeER. lzcntw %ax, %bx
# CHECK-NEXT: [0,2] D====eER addl %ecx, %ebx
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulq %rax, %rbx
# CHECK-NEXT: 1. 1 2.0 2.0 0.0 lzcntw %ax, %bx
# CHECK-NEXT: 2. 1 5.0 0.0 0.0 addl %ecx, %ebx

View File

@ -0,0 +1,78 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1500 -timeline -timeline-max-iterations=3 < %s | FileCheck %s
# perf stat reports a throughput of 1.00 IPC for this code snippet.
# The ILP is limited by the false dependency on %dx. So, the mov cannot execute
# in parallel with the add.
add %cx, %dx
mov %ax, %dx
xor %bx, %dx
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 4500
# CHECK-NEXT: Total Cycles: 1504
# CHECK-NEXT: Total uOps: 4500
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 2.99
# CHECK-NEXT: IPC: 2.99
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 0.33 addw %cx, %dx
# CHECK-NEXT: 1 1 0.33 movw %ax, %dx
# CHECK-NEXT: 1 1 0.33 xorw %bx, %dx
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 1.00 1.00 - 1.00 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - 0.67 - - 0.33 - - addw %cx, %dx
# CHECK-NEXT: - - - 0.67 - 0.33 - - movw %ax, %dx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - xorw %bx, %dx
# CHECK: Timeline view:
# CHECK-NEXT: Index 0123456
# CHECK: [0,0] DeER .. addw %cx, %dx
# CHECK-NEXT: [0,1] DeER .. movw %ax, %dx
# CHECK-NEXT: [0,2] D=eER.. xorw %bx, %dx
# CHECK-NEXT: [1,0] D==eER. addw %cx, %dx
# CHECK-NEXT: [1,1] .DeE-R. movw %ax, %dx
# CHECK-NEXT: [1,2] .D=eER. xorw %bx, %dx
# CHECK-NEXT: [2,0] .D==eER addw %cx, %dx
# CHECK-NEXT: [2,1] .DeE--R movw %ax, %dx
# CHECK-NEXT: [2,2] . DeE-R xorw %bx, %dx
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 2.3 0.3 0.0 addw %cx, %dx
# CHECK-NEXT: 1. 3 1.0 1.0 1.0 movw %ax, %dx
# CHECK-NEXT: 2. 3 1.7 0.0 0.3 xorw %bx, %dx

View File

@ -0,0 +1,79 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1500 -timeline -timeline-max-iterations=3 < %s | FileCheck %s
# perf stat reports a throughput of 0.60 IPC for this code snippet.
# The lzcnt cannot execute in parallel with the imul because there is a false
# dependency on %bx.
imul %ax, %bx
lzcnt %ax, %bx
add %cx, %bx
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 4500
# CHECK-NEXT: Total Cycles: 3005
# CHECK-NEXT: Total uOps: 4500
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 1.50
# CHECK-NEXT: IPC: 1.50
# CHECK-NEXT: Block RThroughput: 2.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 3 1.00 imulw %ax, %bx
# CHECK-NEXT: 1 3 1.00 lzcntw %ax, %bx
# CHECK-NEXT: 1 1 0.33 addw %cx, %bx
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 0.50 2.00 - 0.50 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - 1.00 - - - - imulw %ax, %bx
# CHECK-NEXT: - - - 1.00 - - - - lzcntw %ax, %bx
# CHECK-NEXT: - - 0.50 - - 0.50 - - addw %cx, %bx
# CHECK: Timeline view:
# CHECK-NEXT: 01
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeER .. imulw %ax, %bx
# CHECK-NEXT: [0,1] D=eeeER .. lzcntw %ax, %bx
# CHECK-NEXT: [0,2] D====eER .. addw %cx, %bx
# CHECK-NEXT: [1,0] D=====eeeER. imulw %ax, %bx
# CHECK-NEXT: [1,1] .D=eeeE---R. lzcntw %ax, %bx
# CHECK-NEXT: [1,2] .D====eE--R. addw %cx, %bx
# CHECK-NEXT: [2,0] .D=====eeeER imulw %ax, %bx
# CHECK-NEXT: [2,1] .D==eeeE---R lzcntw %ax, %bx
# CHECK-NEXT: [2,2] . D====eE--R addw %cx, %bx
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 4.3 0.3 0.0 imulw %ax, %bx
# CHECK-NEXT: 1. 3 2.3 2.3 2.0 lzcntw %ax, %bx
# CHECK-NEXT: 2. 3 5.0 0.0 1.3 addw %cx, %bx

View File

@ -0,0 +1,61 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1500 -timeline -timeline-max-iterations=3 < %s | FileCheck %s
# perf stat reports a throughput of 1.00 IPC for this code snippet.
lzcnt %ax, %bx ## partial register stall.
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 1500
# CHECK-NEXT: Total Cycles: 1505
# CHECK-NEXT: Total uOps: 1500
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 1.00
# CHECK-NEXT: IPC: 1.00
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 3 1.00 lzcntw %ax, %bx
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - - 1.00 - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - 1.00 - - - - lzcntw %ax, %bx
# CHECK: Timeline view:
# CHECK-NEXT: Index 01234567
# CHECK: [0,0] DeeeER . lzcntw %ax, %bx
# CHECK-NEXT: [1,0] D=eeeER. lzcntw %ax, %bx
# CHECK-NEXT: [2,0] D==eeeER lzcntw %ax, %bx
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 2.0 2.0 0.0 lzcntw %ax, %bx

View File

@ -0,0 +1,80 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1500 -timeline -timeline-max-iterations=3 < %s | FileCheck %s
# perf stat reports a throughput of 0.60 IPC for this code snippet.
# Each lzcnt has a false dependency on %ecx; the first lzcnt has to wait on the
# imul. However, the folded load can start immediately.
# The last lzcnt has a false dependency on %cx. However, even in this case, the
# folded load can start immediately.
imul %edx, %ecx
lzcnt (%rsp), %cx
lzcnt 2(%rsp), %cx
# CHECK: Iterations: 1500
# CHECK-NEXT: Instructions: 4500
# CHECK-NEXT: Total Cycles: 4510
# CHECK-NEXT: Total uOps: 7500
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 1.66
# CHECK-NEXT: IPC: 1.00
# CHECK-NEXT: Block RThroughput: 3.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 3 1.00 imull %edx, %ecx
# CHECK-NEXT: 2 8 1.00 * lzcntw (%rsp), %cx
# CHECK-NEXT: 2 8 1.00 * lzcntw 2(%rsp), %cx
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - - 3.00 - - - 2.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - 1.00 - - - - imull %edx, %ecx
# CHECK-NEXT: - - - 1.00 - - - 1.00 lzcntw (%rsp), %cx
# CHECK-NEXT: - - - 1.00 - - - 1.00 lzcntw 2(%rsp), %cx
# CHECK: Timeline view:
# CHECK-NEXT: 012345678
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeER . . . imull %edx, %ecx
# CHECK-NEXT: [0,1] D=eeeeeeeeER . . lzcntw (%rsp), %cx
# CHECK-NEXT: [0,2] .D=eeeeeeeeER . . lzcntw 2(%rsp), %cx
# CHECK-NEXT: [1,0] .D=========eeeER . imull %edx, %ecx
# CHECK-NEXT: [1,1] . D=eeeeeeeeE--R . lzcntw (%rsp), %cx
# CHECK-NEXT: [1,2] . D==eeeeeeeeE-R . lzcntw 2(%rsp), %cx
# CHECK-NEXT: [2,0] . D==========eeeER imull %edx, %ecx
# CHECK-NEXT: [2,1] . D==eeeeeeeeE---R lzcntw (%rsp), %cx
# CHECK-NEXT: [2,2] . D==eeeeeeeeE--R lzcntw 2(%rsp), %cx
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 7.3 0.3 0.0 imull %edx, %ecx
# CHECK-NEXT: 1. 3 2.3 2.3 1.7 lzcntw (%rsp), %cx
# CHECK-NEXT: 2. 3 2.7 2.7 1.0 lzcntw 2(%rsp), %cx

View File

@ -0,0 +1,47 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -resource-pressure=false -timeline < %s | FileCheck %s
imul %ax, %cx
add %al, %cl
add %ecx, %ebx
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 3
# CHECK-NEXT: Total Cycles: 8
# CHECK-NEXT: Total uOps: 3
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 0.38
# CHECK-NEXT: IPC: 0.38
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 3 1.00 imulw %ax, %cx
# CHECK-NEXT: 1 1 0.33 addb %al, %cl
# CHECK-NEXT: 1 1 0.33 addl %ecx, %ebx
# CHECK: Timeline view:
# CHECK-NEXT: Index 01234567
# CHECK: [0,0] DeeeER . imulw %ax, %cx
# CHECK-NEXT: [0,1] D===eER. addb %al, %cl
# CHECK-NEXT: [0,2] D====eER addl %ecx, %ebx
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 imulw %ax, %cx
# CHECK-NEXT: 1. 1 4.0 0.0 0.0 addb %al, %cl
# CHECK-NEXT: 2. 1 5.0 0.0 0.0 addl %ecx, %ebx

View File

@ -0,0 +1,99 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=2 < %s | FileCheck %s
# VALU0/VALU1
vpmulld %xmm0, %xmm1, %xmm2
vpand %xmm0, %xmm1, %xmm2
# VIMUL/STC
vcvttps2dq %xmm0, %xmm2
vpclmulqdq $0, %xmm0, %xmm1, %xmm2
# FPA/FPM
vaddps %xmm0, %xmm1, %xmm2
vsqrtps %xmm0, %xmm2
# FPA/FPM YMM
vaddps %ymm0, %ymm1, %ymm2
vsqrtps %ymm0, %ymm2
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 800
# CHECK-NEXT: Total Cycles: 4256
# CHECK-NEXT: Total uOps: 1000
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 0.23
# CHECK-NEXT: IPC: 0.19
# CHECK-NEXT: Block RThroughput: 42.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 5 1.00 vpmulld %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 1 0.33 vpand %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vcvttps2dq %xmm0, %xmm2
# CHECK-NEXT: 1 14 6.00 vpclmulqdq $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 14 14.00 vsqrtps %xmm0, %xmm2
# CHECK-NEXT: 1 3 1.00 vaddps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 3 29 28.00 vsqrtps %ymm0, %ymm2
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - 42.00 6.03 3.96 - 17.01 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - 1.00 - - - - - vpmulld %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.05 0.06 - 0.89 - - vpand %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - 1.00 - - - - vcvttps2dq %xmm0, %xmm2
# CHECK-NEXT: - - 1.98 0.90 - 15.12 - - vpclmulqdq $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - - 1.00 - - - - vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - 14.00 1.00 - - - - - vsqrtps %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - - - vaddps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - 28.00 2.00 - - 1.00 - - vsqrtps %ymm0, %ymm2
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0123456
# CHECK-NEXT: Index 0123456789 0123456789
# CHECK: [0,0] DeeeeeER . . . . . .. vpmulld %xmm0, %xmm1, %xmm2
# CHECK-NEXT: [0,1] DeE----R . . . . . .. vpand %xmm0, %xmm1, %xmm2
# CHECK-NEXT: [0,2] DeeeE--R . . . . . .. vcvttps2dq %xmm0, %xmm2
# CHECK-NEXT: [0,3] D=eeeeeeeeeeeeeeER . . . .. vpclmulqdq $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: [0,4] .DeeeE-----------R . . . .. vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: [0,5] .DeeeeeeeeeeeeeeER . . . .. vsqrtps %xmm0, %xmm2
# CHECK-NEXT: [0,6] .D=eeeE----------R . . . .. vaddps %ymm0, %ymm1, %ymm2
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 1.0 1.0 79.0 vpmulld %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1. 2 1.0 1.0 82.5 vpand %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2. 2 1.5 1.5 80.0 vcvttps2dq %xmm0, %xmm2
# CHECK-NEXT: 3. 2 1.5 1.5 74.0 vpclmulqdq $0, %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 4. 2 2.0 2.0 84.0 vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 5. 2 9.5 9.5 65.0 vsqrtps %xmm0, %xmm2
# CHECK-NEXT: 6. 2 2.5 2.5 83.0 vaddps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 7. 2 147.5 147.5 0.0 vsqrtps %ymm0, %ymm2

View File

@ -0,0 +1,43 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -lqueue=2 -iterations=2 -resource-pressure=false -timeline -timeline-max-cycles=104 < %s | FileCheck %s
int3
stmxcsr (%rsp)
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 4
# CHECK-NEXT: Total Cycles: 213
# CHECK-NEXT: Total uOps: 10
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 0.05
# CHECK-NEXT: IPC: 0.02
# CHECK-NEXT: Block RThroughput: 1.3
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 100 0.33 * * U int3
# CHECK-NEXT: 4 5 1.00 * * U stmxcsr (%rsp)
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0123456789 0123456789 0123456789 0123456789
# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789 012
# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER int3
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 3.0 0.5 0.0 int3
# CHECK-NEXT: 1. 2 100.0 0.0 0.0 stmxcsr (%rsp)

View File

@ -0,0 +1,109 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -timeline -timeline-max-iterations=3 < %s | FileCheck %s
add %eax, %ecx
add %eax, %edx
add %eax, %ebx
add %edx, %esi
add %ebx, %eax
add %edx, %esi
add %ebx, %eax
add %ebx, %eax
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 800
# CHECK-NEXT: Total Cycles: 403
# CHECK-NEXT: Total uOps: 800
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 1.99
# CHECK-NEXT: IPC: 1.99
# CHECK-NEXT: Block RThroughput: 2.7
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 0.33 addl %eax, %ecx
# CHECK-NEXT: 1 1 0.33 addl %eax, %edx
# CHECK-NEXT: 1 1 0.33 addl %eax, %ebx
# CHECK-NEXT: 1 1 0.33 addl %edx, %esi
# CHECK-NEXT: 1 1 0.33 addl %ebx, %eax
# CHECK-NEXT: 1 1 0.33 addl %edx, %esi
# CHECK-NEXT: 1 1 0.33 addl %ebx, %eax
# CHECK-NEXT: 1 1 0.33 addl %ebx, %eax
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 2.66 2.67 - 2.67 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - 0.33 0.33 - 0.34 - - addl %eax, %ecx
# CHECK-NEXT: - - 0.33 0.34 - 0.33 - - addl %eax, %edx
# CHECK-NEXT: - - 0.34 0.33 - 0.33 - - addl %eax, %ebx
# CHECK-NEXT: - - 0.33 0.33 - 0.34 - - addl %edx, %esi
# CHECK-NEXT: - - 0.33 0.34 - 0.33 - - addl %ebx, %eax
# CHECK-NEXT: - - 0.34 0.33 - 0.33 - - addl %edx, %esi
# CHECK-NEXT: - - 0.33 0.33 - 0.34 - - addl %ebx, %eax
# CHECK-NEXT: - - 0.33 0.34 - 0.33 - - addl %ebx, %eax
# CHECK: Timeline view:
# CHECK-NEXT: 01234
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeER . . . addl %eax, %ecx
# CHECK-NEXT: [0,1] DeER . . . addl %eax, %edx
# CHECK-NEXT: [0,2] DeER . . . addl %eax, %ebx
# CHECK-NEXT: [0,3] D=eER. . . addl %edx, %esi
# CHECK-NEXT: [0,4] .DeER. . . addl %ebx, %eax
# CHECK-NEXT: [0,5] .D=eER . . addl %edx, %esi
# CHECK-NEXT: [0,6] .D=eER . . addl %ebx, %eax
# CHECK-NEXT: [0,7] .D==eER . . addl %ebx, %eax
# CHECK-NEXT: [1,0] . D==eER . . addl %eax, %ecx
# CHECK-NEXT: [1,1] . D==eER . . addl %eax, %edx
# CHECK-NEXT: [1,2] . D==eER . . addl %eax, %ebx
# CHECK-NEXT: [1,3] . D===eER . . addl %edx, %esi
# CHECK-NEXT: [1,4] . D==eER . . addl %ebx, %eax
# CHECK-NEXT: [1,5] . D===eER. . addl %edx, %esi
# CHECK-NEXT: [1,6] . D===eER. . addl %ebx, %eax
# CHECK-NEXT: [1,7] . D====eER . addl %ebx, %eax
# CHECK-NEXT: [2,0] . D====eER . addl %eax, %ecx
# CHECK-NEXT: [2,1] . D====eER . addl %eax, %edx
# CHECK-NEXT: [2,2] . D====eER . addl %eax, %ebx
# CHECK-NEXT: [2,3] . D=====eER . addl %edx, %esi
# CHECK-NEXT: [2,4] . D====eER . addl %ebx, %eax
# CHECK-NEXT: [2,5] . D=====eER. addl %edx, %esi
# CHECK-NEXT: [2,6] . D=====eER. addl %ebx, %eax
# CHECK-NEXT: [2,7] . D======eER addl %ebx, %eax
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 3.0 0.3 0.0 addl %eax, %ecx
# CHECK-NEXT: 1. 3 3.0 0.3 0.0 addl %eax, %edx
# CHECK-NEXT: 2. 3 3.0 0.3 0.0 addl %eax, %ebx
# CHECK-NEXT: 3. 3 4.0 0.0 0.0 addl %edx, %esi
# CHECK-NEXT: 4. 3 3.0 0.0 0.0 addl %ebx, %eax
# CHECK-NEXT: 5. 3 4.0 0.0 0.0 addl %edx, %esi
# CHECK-NEXT: 6. 3 4.0 0.0 0.0 addl %ebx, %eax
# CHECK-NEXT: 7. 3 5.0 0.0 0.0 addl %ebx, %eax

View File

@ -0,0 +1,61 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -resource-pressure=false -retire-stats -iterations=1 < %s | FileCheck %s
vsqrtps %xmm0, %xmm2
vaddps %xmm0, %xmm1, %xmm2
vaddps %xmm0, %xmm1, %xmm2
vaddps %xmm0, %xmm1, %xmm2
vaddps %xmm0, %xmm1, %xmm2
vaddps %xmm0, %xmm1, %xmm2
vaddps %xmm0, %xmm1, %xmm2
vaddps %xmm0, %xmm1, %xmm2
vaddps %xmm0, %xmm1, %xmm2
vaddps %xmm0, %xmm1, %xmm2
vaddps %xmm0, %xmm1, %xmm2
vaddps %xmm0, %xmm1, %xmm2
vaddps %xmm0, %xmm1, %xmm2
vaddps %xmm0, %xmm1, %xmm2
vaddps %xmm0, %xmm1, %xmm2
vaddps %xmm0, %xmm1, %xmm2
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 16
# CHECK-NEXT: Total Cycles: 20
# CHECK-NEXT: Total uOps: 16
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 0.80
# CHECK-NEXT: IPC: 0.80
# CHECK-NEXT: Block RThroughput: 15.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 14 14.00 vsqrtps %xmm0, %xmm2
# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm1, %xmm2
# CHECK: Retire Control Unit - number of cycles where we saw N instructions retired:
# CHECK-NEXT: [# retired], [# cycles]
# CHECK-NEXT: 0, 16 (80.0%)
# CHECK-NEXT: 1, 3 (15.0%)
# CHECK-NEXT: 13, 1 (5.0%)

View File

@ -0,0 +1,48 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s
# The vmul can start executing 3cy in advance. That is beause the first use
# operand (i.e. %xmm1) is a ReadAfterLd. That means, the memory operand is
# evaluated before %xmm1.
vaddps %xmm0, %xmm0, %xmm1
vmulps (%rdi), %xmm1, %xmm2
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 14
# CHECK-NEXT: Total uOps: 3
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 0.21
# CHECK-NEXT: IPC: 0.14
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 3 1.00 vaddps %xmm0, %xmm0, %xmm1
# CHECK-NEXT: 2 11 1.00 * vmulps (%rdi), %xmm1, %xmm2
# CHECK: Timeline view:
# CHECK-NEXT: 0123
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm1
# CHECK-NEXT: [0,1] DeeeeeeeeeeeER vmulps (%rdi), %xmm1, %xmm2
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vaddps %xmm0, %xmm0, %xmm1
# CHECK-NEXT: 1. 1 1.0 0.0 0.0 vmulps (%rdi), %xmm1, %xmm2

View File

@ -0,0 +1,47 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -resource-pressure=0 -timeline < %s | FileCheck %s
imull %esi
imull (%rdi)
# The second integer multiply can start at cycle 2 because the implicit reads
# can start after the load operand is evaluated.
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 13
# CHECK-NEXT: Total uOps: 7
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 0.54
# CHECK-NEXT: IPC: 0.15
# CHECK-NEXT: Block RThroughput: 2.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 3 4 1.00 imull %esi
# CHECK-NEXT: 4 9 1.00 * imull (%rdi)
# CHECK: Timeline view:
# CHECK-NEXT: 012
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DeeeeER . . imull %esi
# CHECK-NEXT: [0,1] .DeeeeeeeeeER imull (%rdi)
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 imull %esi
# CHECK-NEXT: 1. 1 1.0 1.0 0.0 imull (%rdi)

View File

@ -0,0 +1,47 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -resource-pressure=0 -timeline -dispatch=3 < %s | FileCheck %s
add %rdi, %rsi
add (%rsp), %rsi
add %rdx, %r8
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 3
# CHECK-NEXT: Total Cycles: 9
# CHECK-NEXT: Total uOps: 4
# CHECK: Dispatch Width: 3
# CHECK-NEXT: uOps Per Cycle: 0.44
# CHECK-NEXT: IPC: 0.33
# CHECK-NEXT: Block RThroughput: 1.3
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 0.33 addq %rdi, %rsi
# CHECK-NEXT: 2 6 0.50 * addq (%rsp), %rsi
# CHECK-NEXT: 1 1 0.33 addq %rdx, %r8
# CHECK: Timeline view:
# CHECK-NEXT: Index 012345678
# CHECK: [0,0] DeER . . addq %rdi, %rsi
# CHECK-NEXT: [0,1] DeeeeeeER addq (%rsp), %rsi
# CHECK-NEXT: [0,2] .DeE----R addq %rdx, %r8
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 addq %rdi, %rsi
# CHECK-NEXT: 1. 1 1.0 0.0 0.0 addq (%rsp), %rsi
# CHECK-NEXT: 2. 1 1.0 1.0 4.0 addq %rdx, %r8

View File

@ -0,0 +1,80 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=3 -timeline -register-file-stats < %s | FileCheck %s
# The register move from XMM0 to XMM1 can be eliminated at register renaming
# stage. So, it should not consume pipeline resources.
vxorps %xmm0, %xmm0, %xmm0
vmovaps %xmm0, %xmm1
vaddps %xmm1, %xmm1, %xmm2
# CHECK: Iterations: 3
# CHECK-NEXT: Instructions: 9
# CHECK-NEXT: Total Cycles: 9
# CHECK-NEXT: Total uOps: 9
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 1.00
# CHECK-NEXT: IPC: 1.00
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 0 0.25 vxorps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: 1 1 1.00 vmovaps %xmm0, %xmm1
# CHECK-NEXT: 1 3 1.00 vaddps %xmm1, %xmm1, %xmm2
# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 9
# CHECK-NEXT: Max number of mappings used: 8
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - - 1.00 - 1.00 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - - - - - - vxorps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: - - - - - 1.00 - - vmovaps %xmm0, %xmm1
# CHECK-NEXT: - - - 1.00 - - - - vaddps %xmm1, %xmm1, %xmm2
# CHECK: Timeline view:
# CHECK-NEXT: Index 012345678
# CHECK: [0,0] DR . . vxorps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [0,1] DeER . . vmovaps %xmm0, %xmm1
# CHECK-NEXT: [0,2] D=eeeER . vaddps %xmm1, %xmm1, %xmm2
# CHECK-NEXT: [1,0] D-----R . vxorps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [1,1] .DeE--R . vmovaps %xmm0, %xmm1
# CHECK-NEXT: [1,2] .D=eeeER. vaddps %xmm1, %xmm1, %xmm2
# CHECK-NEXT: [2,0] .D-----R. vxorps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [2,1] .D=eE--R. vmovaps %xmm0, %xmm1
# CHECK-NEXT: [2,2] . D=eeeER vaddps %xmm1, %xmm1, %xmm2
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 0.0 0.0 3.3 vxorps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: 1. 3 1.3 1.3 1.3 vmovaps %xmm0, %xmm1
# CHECK-NEXT: 2. 3 2.0 0.0 0.0 vaddps %xmm1, %xmm1, %xmm2

View File

@ -0,0 +1,121 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=3 -timeline -register-file-stats < %s | FileCheck %s
pxor %mm0, %mm0
movq %mm0, %mm1
xorps %xmm0, %xmm0
movaps %xmm0, %xmm1
movups %xmm1, %xmm2
movapd %xmm2, %xmm3
movupd %xmm3, %xmm4
movdqa %xmm4, %xmm5
movdqu %xmm5, %xmm0
# CHECK: Iterations: 3
# CHECK-NEXT: Instructions: 27
# CHECK-NEXT: Total Cycles: 22
# CHECK-NEXT: Total uOps: 27
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 1.23
# CHECK-NEXT: IPC: 1.23
# CHECK-NEXT: Block RThroughput: 4.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 0.33 pxor %mm0, %mm0
# CHECK-NEXT: 1 1 0.50 movq %mm0, %mm1
# CHECK-NEXT: 1 0 0.25 xorps %xmm0, %xmm0
# CHECK-NEXT: 1 1 1.00 movaps %xmm0, %xmm1
# CHECK-NEXT: 1 1 1.00 movups %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 movapd %xmm2, %xmm3
# CHECK-NEXT: 1 1 1.00 movupd %xmm3, %xmm4
# CHECK-NEXT: 1 1 0.33 movdqa %xmm4, %xmm5
# CHECK-NEXT: 1 1 0.33 movdqu %xmm5, %xmm0
# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 27
# CHECK-NEXT: Max number of mappings used: 21
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 1.67 1.67 - 4.67 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - 0.67 - 0.33 - - pxor %mm0, %mm0
# CHECK-NEXT: - - 1.00 - - - - - movq %mm0, %mm1
# CHECK-NEXT: - - - - - - - - xorps %xmm0, %xmm0
# CHECK-NEXT: - - - - - 1.00 - - movaps %xmm0, %xmm1
# CHECK-NEXT: - - - - - 1.00 - - movups %xmm1, %xmm2
# CHECK-NEXT: - - - - - 1.00 - - movapd %xmm2, %xmm3
# CHECK-NEXT: - - - - - 1.00 - - movupd %xmm3, %xmm4
# CHECK-NEXT: - - - 1.00 - - - - movdqa %xmm4, %xmm5
# CHECK-NEXT: - - 0.67 - - 0.33 - - movdqu %xmm5, %xmm0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
# CHECK-NEXT: Index 0123456789 01
# CHECK: [0,0] DeER . . . .. pxor %mm0, %mm0
# CHECK-NEXT: [0,1] D=eER. . . .. movq %mm0, %mm1
# CHECK-NEXT: [0,2] D---R. . . .. xorps %xmm0, %xmm0
# CHECK-NEXT: [0,3] D=eER. . . .. movaps %xmm0, %xmm1
# CHECK-NEXT: [0,4] .D=eER . . .. movups %xmm1, %xmm2
# CHECK-NEXT: [0,5] .D==eER . . .. movapd %xmm2, %xmm3
# CHECK-NEXT: [0,6] .D===eER . . .. movupd %xmm3, %xmm4
# CHECK-NEXT: [0,7] .D====eER . . .. movdqa %xmm4, %xmm5
# CHECK-NEXT: [0,8] . D====eER. . .. movdqu %xmm5, %xmm0
# CHECK-NEXT: [1,0] . DeE----R. . .. pxor %mm0, %mm0
# CHECK-NEXT: [1,1] . D=eE---R. . .. movq %mm0, %mm1
# CHECK-NEXT: [1,2] . D=====ER. . .. xorps %xmm0, %xmm0
# CHECK-NEXT: [1,3] . D====eER . .. movaps %xmm0, %xmm1
# CHECK-NEXT: [1,4] . D=====eER . .. movups %xmm1, %xmm2
# CHECK-NEXT: [1,5] . D======eER . .. movapd %xmm2, %xmm3
# CHECK-NEXT: [1,6] . D=======eER . .. movupd %xmm3, %xmm4
# CHECK-NEXT: [1,7] . D=======eER. .. movdqa %xmm4, %xmm5
# CHECK-NEXT: [1,8] . D========eER .. movdqu %xmm5, %xmm0
# CHECK-NEXT: [2,0] . DeE--------R .. pxor %mm0, %mm0
# CHECK-NEXT: [2,1] . D=eE-------R .. movq %mm0, %mm1
# CHECK-NEXT: [2,2] . D========ER .. xorps %xmm0, %xmm0
# CHECK-NEXT: [2,3] . D========eER .. movaps %xmm0, %xmm1
# CHECK-NEXT: [2,4] . D=========eER .. movups %xmm1, %xmm2
# CHECK-NEXT: [2,5] . D==========eER .. movapd %xmm2, %xmm3
# CHECK-NEXT: [2,6] . .D==========eER.. movupd %xmm3, %xmm4
# CHECK-NEXT: [2,7] . .D===========eER. movdqa %xmm4, %xmm5
# CHECK-NEXT: [2,8] . .D============eER movdqu %xmm5, %xmm0
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 1.0 1.0 4.0 pxor %mm0, %mm0
# CHECK-NEXT: 1. 3 2.0 0.0 3.3 movq %mm0, %mm1
# CHECK-NEXT: 2. 3 5.0 0.0 1.0 xorps %xmm0, %xmm0
# CHECK-NEXT: 3. 3 5.3 0.7 0.0 movaps %xmm0, %xmm1
# CHECK-NEXT: 4. 3 6.0 0.0 0.0 movups %xmm1, %xmm2
# CHECK-NEXT: 5. 3 7.0 0.0 0.0 movapd %xmm2, %xmm3
# CHECK-NEXT: 6. 3 7.7 0.0 0.0 movupd %xmm3, %xmm4
# CHECK-NEXT: 7. 3 8.3 0.0 0.0 movdqa %xmm4, %xmm5
# CHECK-NEXT: 8. 3 9.0 0.0 0.0 movdqu %xmm5, %xmm0

View File

@ -0,0 +1,106 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=3 -timeline -register-file-stats < %s | FileCheck %s
vxorps %xmm0, %xmm0, %xmm0
vmovaps %xmm0, %xmm1
vmovups %xmm1, %xmm2
vmovapd %xmm2, %xmm3
vmovupd %xmm3, %xmm4
vmovdqa %xmm4, %xmm5
vmovdqu %xmm5, %xmm0
# CHECK: Iterations: 3
# CHECK-NEXT: Instructions: 21
# CHECK-NEXT: Total Cycles: 21
# CHECK-NEXT: Total uOps: 21
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 1.00
# CHECK-NEXT: IPC: 1.00
# CHECK-NEXT: Block RThroughput: 4.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 0 0.25 vxorps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: 1 1 1.00 vmovaps %xmm0, %xmm1
# CHECK-NEXT: 1 1 1.00 vmovups %xmm1, %xmm2
# CHECK-NEXT: 1 1 1.00 vmovapd %xmm2, %xmm3
# CHECK-NEXT: 1 1 1.00 vmovupd %xmm3, %xmm4
# CHECK-NEXT: 1 1 0.33 vmovdqa %xmm4, %xmm5
# CHECK-NEXT: 1 1 0.33 vmovdqu %xmm5, %xmm0
# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 21
# CHECK-NEXT: Max number of mappings used: 17
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 1.00 1.00 - 4.00 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - - - - - - vxorps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: - - - - - 1.00 - - vmovaps %xmm0, %xmm1
# CHECK-NEXT: - - - - - 1.00 - - vmovups %xmm1, %xmm2
# CHECK-NEXT: - - - - - 1.00 - - vmovapd %xmm2, %xmm3
# CHECK-NEXT: - - - - - 1.00 - - vmovupd %xmm3, %xmm4
# CHECK-NEXT: - - - 1.00 - - - - vmovdqa %xmm4, %xmm5
# CHECK-NEXT: - - 1.00 - - - - - vmovdqu %xmm5, %xmm0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789
# CHECK-NEXT: Index 0123456789 0
# CHECK: [0,0] DR . . . . vxorps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [0,1] DeER . . . . vmovaps %xmm0, %xmm1
# CHECK-NEXT: [0,2] D=eER. . . . vmovups %xmm1, %xmm2
# CHECK-NEXT: [0,3] D==eER . . . vmovapd %xmm2, %xmm3
# CHECK-NEXT: [0,4] .D==eER . . . vmovupd %xmm3, %xmm4
# CHECK-NEXT: [0,5] .D===eER . . . vmovdqa %xmm4, %xmm5
# CHECK-NEXT: [0,6] .D====eER . . . vmovdqu %xmm5, %xmm0
# CHECK-NEXT: [1,0] .D=====ER . . . vxorps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [1,1] . D====eER. . . vmovaps %xmm0, %xmm1
# CHECK-NEXT: [1,2] . D=====eER . . vmovups %xmm1, %xmm2
# CHECK-NEXT: [1,3] . D======eER . . vmovapd %xmm2, %xmm3
# CHECK-NEXT: [1,4] . D=======eER . . vmovupd %xmm3, %xmm4
# CHECK-NEXT: [1,5] . D=======eER . . vmovdqa %xmm4, %xmm5
# CHECK-NEXT: [1,6] . D========eER. . vmovdqu %xmm5, %xmm0
# CHECK-NEXT: [2,0] . D=========ER. . vxorps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [2,1] . D=========eER . vmovaps %xmm0, %xmm1
# CHECK-NEXT: [2,2] . D=========eER . vmovups %xmm1, %xmm2
# CHECK-NEXT: [2,3] . D==========eER . vmovapd %xmm2, %xmm3
# CHECK-NEXT: [2,4] . D===========eER . vmovupd %xmm3, %xmm4
# CHECK-NEXT: [2,5] . D============eER. vmovdqa %xmm4, %xmm5
# CHECK-NEXT: [2,6] . D============eER vmovdqu %xmm5, %xmm0
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 5.3 0.0 0.0 vxorps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: 1. 3 5.3 0.3 0.0 vmovaps %xmm0, %xmm1
# CHECK-NEXT: 2. 3 6.0 0.0 0.0 vmovups %xmm1, %xmm2
# CHECK-NEXT: 3. 3 7.0 0.0 0.0 vmovapd %xmm2, %xmm3
# CHECK-NEXT: 4. 3 7.7 0.0 0.0 vmovupd %xmm3, %xmm4
# CHECK-NEXT: 5. 3 8.3 0.0 0.0 vmovdqa %xmm4, %xmm5
# CHECK-NEXT: 6. 3 9.0 0.0 0.0 vmovdqu %xmm5, %xmm0

View File

@ -0,0 +1,92 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=3 -timeline -register-file-stats < %s | FileCheck %s
xor %eax, %eax
mov %eax, %ebx
mov %ebx, %ecx
mov %ecx, %edx
mov %edx, %eax
# CHECK: Iterations: 3
# CHECK-NEXT: Instructions: 15
# CHECK-NEXT: Total Cycles: 15
# CHECK-NEXT: Total uOps: 15
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 1.00
# CHECK-NEXT: IPC: 1.00
# CHECK-NEXT: Block RThroughput: 1.3
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 0 0.25 xorl %eax, %eax
# CHECK-NEXT: 1 1 0.33 movl %eax, %ebx
# CHECK-NEXT: 1 1 0.33 movl %ebx, %ecx
# CHECK-NEXT: 1 1 0.33 movl %ecx, %edx
# CHECK-NEXT: 1 1 0.33 movl %edx, %eax
# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 18
# CHECK-NEXT: Max number of mappings used: 15
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 1.33 1.33 - 1.33 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - - - - - - xorl %eax, %eax
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movl %eax, %ebx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movl %ebx, %ecx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movl %ecx, %edx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movl %edx, %eax
# CHECK: Timeline view:
# CHECK-NEXT: 01234
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DR . . . xorl %eax, %eax
# CHECK-NEXT: [0,1] DeER . . . movl %eax, %ebx
# CHECK-NEXT: [0,2] D=eER. . . movl %ebx, %ecx
# CHECK-NEXT: [0,3] D==eER . . movl %ecx, %edx
# CHECK-NEXT: [0,4] .D==eER . . movl %edx, %eax
# CHECK-NEXT: [1,0] .D===ER . . xorl %eax, %eax
# CHECK-NEXT: [1,1] .D===eER . . movl %eax, %ebx
# CHECK-NEXT: [1,2] .D====eER . . movl %ebx, %ecx
# CHECK-NEXT: [1,3] . D====eER. . movl %ecx, %edx
# CHECK-NEXT: [1,4] . D=====eER . movl %edx, %eax
# CHECK-NEXT: [2,0] . D======ER . xorl %eax, %eax
# CHECK-NEXT: [2,1] . D======eER . movl %eax, %ebx
# CHECK-NEXT: [2,2] . D======eER . movl %ebx, %ecx
# CHECK-NEXT: [2,3] . D=======eER. movl %ecx, %edx
# CHECK-NEXT: [2,4] . D========eER movl %edx, %eax
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 3.7 0.0 0.0 xorl %eax, %eax
# CHECK-NEXT: 1. 3 4.0 0.3 0.0 movl %eax, %ebx
# CHECK-NEXT: 2. 3 4.7 0.0 0.0 movl %ebx, %ecx
# CHECK-NEXT: 3. 3 5.3 0.0 0.0 movl %ecx, %edx
# CHECK-NEXT: 4. 3 6.0 0.0 0.0 movl %edx, %eax

View File

@ -0,0 +1,92 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=3 -timeline -register-file-stats < %s | FileCheck %s
xor %rax, %rax
mov %rax, %rbx
mov %rbx, %rcx
mov %rcx, %rdx
mov %rdx, %rax
# CHECK: Iterations: 3
# CHECK-NEXT: Instructions: 15
# CHECK-NEXT: Total Cycles: 15
# CHECK-NEXT: Total uOps: 15
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 1.00
# CHECK-NEXT: IPC: 1.00
# CHECK-NEXT: Block RThroughput: 1.3
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 0 0.25 xorq %rax, %rax
# CHECK-NEXT: 1 1 0.33 movq %rax, %rbx
# CHECK-NEXT: 1 1 0.33 movq %rbx, %rcx
# CHECK-NEXT: 1 1 0.33 movq %rcx, %rdx
# CHECK-NEXT: 1 1 0.33 movq %rdx, %rax
# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 18
# CHECK-NEXT: Max number of mappings used: 15
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 1.33 1.33 - 1.33 - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - - - - - - xorq %rax, %rax
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movq %rax, %rbx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movq %rbx, %rcx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movq %rcx, %rdx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movq %rdx, %rax
# CHECK: Timeline view:
# CHECK-NEXT: 01234
# CHECK-NEXT: Index 0123456789
# CHECK: [0,0] DR . . . xorq %rax, %rax
# CHECK-NEXT: [0,1] DeER . . . movq %rax, %rbx
# CHECK-NEXT: [0,2] D=eER. . . movq %rbx, %rcx
# CHECK-NEXT: [0,3] D==eER . . movq %rcx, %rdx
# CHECK-NEXT: [0,4] .D==eER . . movq %rdx, %rax
# CHECK-NEXT: [1,0] .D===ER . . xorq %rax, %rax
# CHECK-NEXT: [1,1] .D===eER . . movq %rax, %rbx
# CHECK-NEXT: [1,2] .D====eER . . movq %rbx, %rcx
# CHECK-NEXT: [1,3] . D====eER. . movq %rcx, %rdx
# CHECK-NEXT: [1,4] . D=====eER . movq %rdx, %rax
# CHECK-NEXT: [2,0] . D======ER . xorq %rax, %rax
# CHECK-NEXT: [2,1] . D======eER . movq %rax, %rbx
# CHECK-NEXT: [2,2] . D======eER . movq %rbx, %rcx
# CHECK-NEXT: [2,3] . D=======eER. movq %rcx, %rdx
# CHECK-NEXT: [2,4] . D========eER movq %rdx, %rax
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 3.7 0.0 0.0 xorq %rax, %rax
# CHECK-NEXT: 1. 3 4.0 0.3 0.0 movq %rax, %rbx
# CHECK-NEXT: 2. 3 4.7 0.0 0.0 movq %rbx, %rcx
# CHECK-NEXT: 3. 3 5.3 0.0 0.0 movq %rcx, %rdx
# CHECK-NEXT: 4. 3 6.0 0.0 0.0 movq %rdx, %rax

View File

@ -0,0 +1,77 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=5 -instruction-info=false -dispatch-stats -register-file-stats -timeline < %s | FileCheck %s
vaddps %xmm0, %xmm0, %xmm0
vmulps %xmm0, %xmm0, %xmm0
# CHECK: Iterations: 5
# CHECK-NEXT: Instructions: 10
# CHECK-NEXT: Total Cycles: 43
# CHECK-NEXT: Total uOps: 10
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 0.23
# CHECK-NEXT: IPC: 0.23
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 0
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
# CHECK-NEXT: LQ - Load queue full: 0
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
# CHECK-NEXT: 0, 40 (93.0%)
# CHECK-NEXT: 2, 1 (2.3%)
# CHECK-NEXT: 4, 2 (4.7%)
# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 10
# CHECK-NEXT: Max number of mappings used: 10
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 1.00 1.00 - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - 1.00 - - - - vaddps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: - - 1.00 - - - - - vmulps %xmm0, %xmm0, %xmm0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0123456789
# CHECK-NEXT: Index 0123456789 0123456789 012
# CHECK: [0,0] DeeeER . . . . . . . . vaddps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [0,1] D===eeeeeER . . . . . . . vmulps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [1,0] D========eeeER . . . . . . . vaddps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [1,1] D===========eeeeeER . . . . . . vmulps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [2,0] .D===============eeeER . . . . . vaddps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [2,1] .D==================eeeeeER . . . . vmulps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [3,0] .D=======================eeeER. . . . vaddps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [3,1] .D==========================eeeeeER. . . vmulps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [4,0] . D==============================eeeER . . vaddps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [4,1] . D=================================eeeeeER vmulps %xmm0, %xmm0, %xmm0
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 5 16.2 0.2 0.0 vaddps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: 1. 5 19.2 0.0 0.0 vmulps %xmm0, %xmm0, %xmm0

View File

@ -0,0 +1,77 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -register-file-size=5 -iterations=5 -instruction-info=false -dispatch-stats -register-file-stats -timeline < %s | FileCheck %s
vaddps %xmm0, %xmm0, %xmm0
vmulps %xmm0, %xmm0, %xmm0
# CHECK: Iterations: 5
# CHECK-NEXT: Instructions: 10
# CHECK-NEXT: Total Cycles: 43
# CHECK-NEXT: Total uOps: 10
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 0.23
# CHECK-NEXT: IPC: 0.23
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 20 (46.5%)
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
# CHECK-NEXT: LQ - Load queue full: 0
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
# CHECK-NEXT: 0, 36 (83.7%)
# CHECK-NEXT: 1, 6 (14.0%)
# CHECK-NEXT: 4, 1 (2.3%)
# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 10
# CHECK-NEXT: Max number of mappings used: 5
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 1.00 1.00 - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - 1.00 - - - - vaddps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: - - 1.00 - - - - - vmulps %xmm0, %xmm0, %xmm0
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0123456789
# CHECK-NEXT: Index 0123456789 0123456789 012
# CHECK: [0,0] DeeeER . . . . . . . . vaddps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [0,1] D===eeeeeER . . . . . . . vmulps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [1,0] D========eeeER . . . . . . . vaddps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [1,1] D===========eeeeeER . . . . . . vmulps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [2,0] .D===============eeeER . . . . . vaddps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [2,1] . D==============eeeeeER . . . . vmulps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [3,0] . . D==============eeeER. . . . vaddps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [3,1] . . . D==============eeeeeER. . . vmulps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [4,0] . . . . D==============eeeER . . vaddps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: [4,1] . . . . .D==============eeeeeER vmulps %xmm0, %xmm0, %xmm0
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 5 11.2 0.2 0.0 vaddps %xmm0, %xmm0, %xmm0
# CHECK-NEXT: 1. 5 12.2 0.0 0.0 vmulps %xmm0, %xmm0, %xmm0

View File

@ -0,0 +1,76 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -register-file-size=5 -iterations=2 -dispatch-stats -register-file-stats -timeline < %s | FileCheck %s
idiv %eax
# CHECK: Iterations: 2
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 55
# CHECK-NEXT: Total uOps: 2
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 0.04
# CHECK-NEXT: IPC: 0.04
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 25 10.00 U idivl %eax
# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 27 (49.1%)
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
# CHECK-NEXT: LQ - Load queue full: 0
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
# CHECK-NEXT: 0, 53 (96.4%)
# CHECK-NEXT: 1, 2 (3.6%)
# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 6
# CHECK-NEXT: Max number of mappings used: 3
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: 10.00 - 1.00 - - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: 10.00 - 1.00 - - - - - idivl %eax
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0123456789 01234
# CHECK-NEXT: Index 0123456789 0123456789 0123456789
# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . idivl %eax
# CHECK-NEXT: [1,0] . . . . . . DeeeeeeeeeeeeeeeeeeeeeeeeeER idivl %eax
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 2 1.0 1.0 0.0 idivl %eax

View File

@ -0,0 +1,60 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=22 -dispatch-stats -register-file-stats -resource-pressure=false -timeline -timeline-max-iterations=3 < %s | FileCheck %s
idiv %eax
# CHECK: Iterations: 22
# CHECK-NEXT: Instructions: 22
# CHECK-NEXT: Total Cycles: 553
# CHECK-NEXT: Total uOps: 22
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 0.04
# CHECK-NEXT: IPC: 0.04
# CHECK-NEXT: Block RThroughput: 10.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 25 10.00 U idivl %eax
# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 0
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
# CHECK-NEXT: LQ - Load queue full: 0
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
# CHECK-NEXT: 0, 547 (98.9%)
# CHECK-NEXT: 2, 1 (0.2%)
# CHECK-NEXT: 4, 5 (0.9%)
# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 66
# CHECK-NEXT: Max number of mappings used: 66
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0123456789 0123456789 01234567
# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789
# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . . . . . . idivl %eax
# CHECK-NEXT: [1,0] D=========================eeeeeeeeeeeeeeeeeeeeeeeeeER . . . . . . idivl %eax
# CHECK-NEXT: [2,0] D==================================================eeeeeeeeeeeeeeeeeeeeeeeeeER idivl %eax
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 3 26.0 0.3 0.0 idivl %eax

View File

@ -0,0 +1,143 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -resource-pressure=false -instruction-info=false -dispatch-stats -register-file-stats -timeline < %s | FileCheck %s
vdivps %ymm0, %ymm0, %ymm1
vaddps %ymm0, %ymm0, %ymm2
vaddps %ymm0, %ymm0, %ymm3
vaddps %ymm0, %ymm0, %ymm4
vaddps %ymm0, %ymm0, %ymm5
vaddps %ymm0, %ymm0, %ymm6
vaddps %ymm0, %ymm0, %ymm7
vaddps %ymm0, %ymm0, %ymm8
vaddps %ymm0, %ymm0, %ymm9
vaddps %ymm0, %ymm0, %ymm10
vaddps %ymm0, %ymm0, %ymm11
vaddps %ymm0, %ymm0, %ymm12
vaddps %ymm0, %ymm0, %ymm13
vaddps %ymm0, %ymm0, %ymm14
vaddps %ymm0, %ymm0, %ymm15
vaddps %ymm2, %ymm0, %ymm0
vaddps %ymm2, %ymm0, %ymm3
vaddps %ymm2, %ymm0, %ymm4
vaddps %ymm2, %ymm0, %ymm5
vaddps %ymm2, %ymm0, %ymm6
vaddps %ymm2, %ymm0, %ymm7
vaddps %ymm2, %ymm0, %ymm8
vaddps %ymm2, %ymm0, %ymm9
vaddps %ymm2, %ymm0, %ymm10
vaddps %ymm2, %ymm0, %ymm11
vaddps %ymm2, %ymm0, %ymm12
vaddps %ymm2, %ymm0, %ymm13
vaddps %ymm2, %ymm0, %ymm14
vaddps %ymm2, %ymm0, %ymm15
vaddps %ymm3, %ymm0, %ymm2
vaddps %ymm3, %ymm0, %ymm4
vaddps %ymm3, %ymm0, %ymm5
vaddps %ymm3, %ymm0, %ymm6
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 33
# CHECK-NEXT: Total Cycles: 37
# CHECK-NEXT: Total uOps: 35
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 0.95
# CHECK-NEXT: IPC: 0.89
# CHECK-NEXT: Block RThroughput: 32.0
# CHECK: Dynamic Dispatch Stall Cycles:
# CHECK-NEXT: RAT - Register unavailable: 0
# CHECK-NEXT: RCU - Retire tokens unavailable: 0
# CHECK-NEXT: SCHEDQ - Scheduler full: 0
# CHECK-NEXT: LQ - Load queue full: 0
# CHECK-NEXT: SQ - Store queue full: 0
# CHECK-NEXT: GROUP - Static restrictions on the dispatch group: 0
# CHECK: Dispatch Logic - number of cycles where we saw N micro opcodes dispatched:
# CHECK-NEXT: [# dispatched], [# cycles]
# CHECK-NEXT: 0, 28 (75.7%)
# CHECK-NEXT: 3, 1 (2.7%)
# CHECK-NEXT: 4, 8 (21.6%)
# CHECK: Register File statistics:
# CHECK-NEXT: Total number of mappings created: 33
# CHECK-NEXT: Max number of mappings used: 33
# CHECK: Timeline view:
# CHECK-NEXT: 0123456789 0123456
# CHECK-NEXT: Index 0123456789 0123456789
# CHECK: [0,0] DeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER .. vdivps %ymm0, %ymm0, %ymm1
# CHECK-NEXT: [0,1] DeeeE--------------------------R .. vaddps %ymm0, %ymm0, %ymm2
# CHECK-NEXT: [0,2] .DeeeE-------------------------R .. vaddps %ymm0, %ymm0, %ymm3
# CHECK-NEXT: [0,3] .D=eeeE------------------------R .. vaddps %ymm0, %ymm0, %ymm4
# CHECK-NEXT: [0,4] .D==eeeE-----------------------R .. vaddps %ymm0, %ymm0, %ymm5
# CHECK-NEXT: [0,5] .D===eeeE----------------------R .. vaddps %ymm0, %ymm0, %ymm6
# CHECK-NEXT: [0,6] . D===eeeE---------------------R .. vaddps %ymm0, %ymm0, %ymm7
# CHECK-NEXT: [0,7] . D=====eeeE-------------------R .. vaddps %ymm0, %ymm0, %ymm8
# CHECK-NEXT: [0,8] . D======eeeE------------------R .. vaddps %ymm0, %ymm0, %ymm9
# CHECK-NEXT: [0,9] . D=======eeeE-----------------R .. vaddps %ymm0, %ymm0, %ymm10
# CHECK-NEXT: [0,10] . D=======eeeE----------------R .. vaddps %ymm0, %ymm0, %ymm11
# CHECK-NEXT: [0,11] . D========eeeE---------------R .. vaddps %ymm0, %ymm0, %ymm12
# CHECK-NEXT: [0,12] . D=========eeeE--------------R .. vaddps %ymm0, %ymm0, %ymm13
# CHECK-NEXT: [0,13] . D===========eeeE------------R .. vaddps %ymm0, %ymm0, %ymm14
# CHECK-NEXT: [0,14] . D===========eeeE-----------R .. vaddps %ymm0, %ymm0, %ymm15
# CHECK-NEXT: [0,15] . D==eeeE--------------------R .. vaddps %ymm2, %ymm0, %ymm0
# CHECK-NEXT: [0,16] . D=========eeeE-------------R .. vaddps %ymm2, %ymm0, %ymm3
# CHECK-NEXT: [0,17] . D============eeeE----------R .. vaddps %ymm2, %ymm0, %ymm4
# CHECK-NEXT: [0,18] . D============eeeE---------R .. vaddps %ymm2, %ymm0, %ymm5
# CHECK-NEXT: [0,19] . D=============eeeE--------R .. vaddps %ymm2, %ymm0, %ymm6
# CHECK-NEXT: [0,20] . D==============eeeE-------R .. vaddps %ymm2, %ymm0, %ymm7
# CHECK-NEXT: [0,21] . D===============eeeE------R .. vaddps %ymm2, %ymm0, %ymm8
# CHECK-NEXT: [0,22] . .D===============eeeE-----R .. vaddps %ymm2, %ymm0, %ymm9
# CHECK-NEXT: [0,23] . .D================eeeE----R .. vaddps %ymm2, %ymm0, %ymm10
# CHECK-NEXT: [0,24] . .D=================eeeE---R .. vaddps %ymm2, %ymm0, %ymm11
# CHECK-NEXT: [0,25] . .D==================eeeE--R .. vaddps %ymm2, %ymm0, %ymm12
# CHECK-NEXT: [0,26] . . D==================eeeE-R .. vaddps %ymm2, %ymm0, %ymm13
# CHECK-NEXT: [0,27] . . D===================eeeER .. vaddps %ymm2, %ymm0, %ymm14
# CHECK-NEXT: [0,28] . . D====================eeeER .. vaddps %ymm2, %ymm0, %ymm15
# CHECK-NEXT: [0,29] . . D=====================eeeER .. vaddps %ymm3, %ymm0, %ymm2
# CHECK-NEXT: [0,30] . . D=====================eeeER.. vaddps %ymm3, %ymm0, %ymm4
# CHECK-NEXT: [0,31] . . D======================eeeER. vaddps %ymm3, %ymm0, %ymm5
# CHECK-NEXT: [0,32] . . D=======================eeeER vaddps %ymm3, %ymm0, %ymm6
# CHECK: Average Wait times (based on the timeline view):
# CHECK-NEXT: [0]: Executions
# CHECK-NEXT: [1]: Average time spent waiting in a scheduler's queue
# CHECK-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
# CHECK-NEXT: [3]: Average time elapsed from WB until retire stage
# CHECK: [0] [1] [2] [3]
# CHECK-NEXT: 0. 1 1.0 1.0 0.0 vdivps %ymm0, %ymm0, %ymm1
# CHECK-NEXT: 1. 1 1.0 1.0 26.0 vaddps %ymm0, %ymm0, %ymm2
# CHECK-NEXT: 2. 1 1.0 1.0 25.0 vaddps %ymm0, %ymm0, %ymm3
# CHECK-NEXT: 3. 1 2.0 2.0 24.0 vaddps %ymm0, %ymm0, %ymm4
# CHECK-NEXT: 4. 1 3.0 3.0 23.0 vaddps %ymm0, %ymm0, %ymm5
# CHECK-NEXT: 5. 1 4.0 4.0 22.0 vaddps %ymm0, %ymm0, %ymm6
# CHECK-NEXT: 6. 1 4.0 4.0 21.0 vaddps %ymm0, %ymm0, %ymm7
# CHECK-NEXT: 7. 1 6.0 6.0 19.0 vaddps %ymm0, %ymm0, %ymm8
# CHECK-NEXT: 8. 1 7.0 7.0 18.0 vaddps %ymm0, %ymm0, %ymm9
# CHECK-NEXT: 9. 1 8.0 8.0 17.0 vaddps %ymm0, %ymm0, %ymm10
# CHECK-NEXT: 10. 1 8.0 8.0 16.0 vaddps %ymm0, %ymm0, %ymm11
# CHECK-NEXT: 11. 1 9.0 9.0 15.0 vaddps %ymm0, %ymm0, %ymm12
# CHECK-NEXT: 12. 1 10.0 10.0 14.0 vaddps %ymm0, %ymm0, %ymm13
# CHECK-NEXT: 13. 1 12.0 12.0 12.0 vaddps %ymm0, %ymm0, %ymm14
# CHECK-NEXT: 14. 1 12.0 12.0 11.0 vaddps %ymm0, %ymm0, %ymm15
# CHECK-NEXT: 15. 1 3.0 3.0 20.0 vaddps %ymm2, %ymm0, %ymm0
# CHECK-NEXT: 16. 1 10.0 4.0 13.0 vaddps %ymm2, %ymm0, %ymm3
# CHECK-NEXT: 17. 1 13.0 7.0 10.0 vaddps %ymm2, %ymm0, %ymm4
# CHECK-NEXT: 18. 1 13.0 8.0 9.0 vaddps %ymm2, %ymm0, %ymm5
# CHECK-NEXT: 19. 1 14.0 9.0 8.0 vaddps %ymm2, %ymm0, %ymm6
# CHECK-NEXT: 20. 1 15.0 10.0 7.0 vaddps %ymm2, %ymm0, %ymm7
# CHECK-NEXT: 21. 1 16.0 11.0 6.0 vaddps %ymm2, %ymm0, %ymm8
# CHECK-NEXT: 22. 1 16.0 12.0 5.0 vaddps %ymm2, %ymm0, %ymm9
# CHECK-NEXT: 23. 1 17.0 13.0 4.0 vaddps %ymm2, %ymm0, %ymm10
# CHECK-NEXT: 24. 1 18.0 14.0 3.0 vaddps %ymm2, %ymm0, %ymm11
# CHECK-NEXT: 25. 1 19.0 15.0 2.0 vaddps %ymm2, %ymm0, %ymm12
# CHECK-NEXT: 26. 1 19.0 16.0 1.0 vaddps %ymm2, %ymm0, %ymm13
# CHECK-NEXT: 27. 1 20.0 17.0 0.0 vaddps %ymm2, %ymm0, %ymm14
# CHECK-NEXT: 28. 1 21.0 18.0 0.0 vaddps %ymm2, %ymm0, %ymm15
# CHECK-NEXT: 29. 1 22.0 12.0 0.0 vaddps %ymm3, %ymm0, %ymm2
# CHECK-NEXT: 30. 1 22.0 13.0 0.0 vaddps %ymm3, %ymm0, %ymm4
# CHECK-NEXT: 31. 1 23.0 14.0 0.0 vaddps %ymm3, %ymm0, %ymm5
# CHECK-NEXT: 32. 1 24.0 15.0 0.0 vaddps %ymm3, %ymm0, %ymm6

View File

@ -0,0 +1,208 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
femms
pavgusb %mm0, %mm2
pavgusb (%rax), %mm2
pf2id %mm0, %mm2
pf2id (%rax), %mm2
pf2iw %mm0, %mm2
pf2iw (%rax), %mm2
pfacc %mm0, %mm2
pfacc (%rax), %mm2
pfadd %mm0, %mm2
pfadd (%rax), %mm2
pfcmpeq %mm0, %mm2
pfcmpeq (%rax), %mm2
pfcmpge %mm0, %mm2
pfcmpge (%rax), %mm2
pfcmpgt %mm0, %mm2
pfcmpgt (%rax), %mm2
pfmax %mm0, %mm2
pfmax (%rax), %mm2
pfmin %mm0, %mm2
pfmin (%rax), %mm2
pfmul %mm0, %mm2
pfmul (%rax), %mm2
pfnacc %mm0, %mm2
pfnacc (%rax), %mm2
pfpnacc %mm0, %mm2
pfpnacc (%rax), %mm2
pfrcp %mm0, %mm2
pfrcp (%rax), %mm2
pfrcpit1 %mm0, %mm2
pfrcpit1 (%rax), %mm2
pfrcpit2 %mm0, %mm2
pfrcpit2 (%rax), %mm2
pfrsqit1 %mm0, %mm2
pfrsqit1 (%rax), %mm2
pfrsqrt %mm0, %mm2
pfrsqrt (%rax), %mm2
pfsub %mm0, %mm2
pfsub (%rax), %mm2
pfsubr %mm0, %mm2
pfsubr (%rax), %mm2
pi2fd %mm0, %mm2
pi2fd (%rax), %mm2
pi2fw %mm0, %mm2
pi2fw (%rax), %mm2
pmulhrw %mm0, %mm2
pmulhrw (%rax), %mm2
prefetch (%rax)
prefetchw (%rax)
pswapd %mm0, %mm2
pswapd (%rax), %mm2
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 31 31 10.33 * * U femms
# CHECK-NEXT: 1 3 1.00 pavgusb %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * pavgusb (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pf2id %mm0, %mm2
# CHECK-NEXT: 2 9 1.00 * pf2id (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pf2iw %mm0, %mm2
# CHECK-NEXT: 2 9 1.00 * pf2iw (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pfacc %mm0, %mm2
# CHECK-NEXT: 2 9 1.00 * pfacc (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pfadd %mm0, %mm2
# CHECK-NEXT: 2 9 1.00 * pfadd (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pfcmpeq %mm0, %mm2
# CHECK-NEXT: 2 9 1.00 * pfcmpeq (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pfcmpge %mm0, %mm2
# CHECK-NEXT: 2 9 1.00 * pfcmpge (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pfcmpgt %mm0, %mm2
# CHECK-NEXT: 2 9 1.00 * pfcmpgt (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pfmax %mm0, %mm2
# CHECK-NEXT: 2 9 1.00 * pfmax (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pfmin %mm0, %mm2
# CHECK-NEXT: 2 9 1.00 * pfmin (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pfmul %mm0, %mm2
# CHECK-NEXT: 2 9 1.00 * pfmul (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pfnacc %mm0, %mm2
# CHECK-NEXT: 2 9 1.00 * pfnacc (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pfpnacc %mm0, %mm2
# CHECK-NEXT: 2 9 1.00 * pfpnacc (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pfrcp %mm0, %mm2
# CHECK-NEXT: 2 9 1.00 * pfrcp (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pfrcpit1 %mm0, %mm2
# CHECK-NEXT: 2 9 1.00 * pfrcpit1 (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pfrcpit2 %mm0, %mm2
# CHECK-NEXT: 2 9 1.00 * pfrcpit2 (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pfrsqit1 %mm0, %mm2
# CHECK-NEXT: 2 9 1.00 * pfrsqit1 (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pfrsqrt %mm0, %mm2
# CHECK-NEXT: 2 9 1.00 * pfrsqrt (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pfsub %mm0, %mm2
# CHECK-NEXT: 2 9 1.00 * pfsub (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pfsubr %mm0, %mm2
# CHECK-NEXT: 2 9 1.00 * pfsubr (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pi2fd %mm0, %mm2
# CHECK-NEXT: 2 9 1.00 * pi2fd (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pi2fw %mm0, %mm2
# CHECK-NEXT: 2 9 1.00 * pi2fw (%rax), %mm2
# CHECK-NEXT: 1 5 1.00 pmulhrw %mm0, %mm2
# CHECK-NEXT: 2 10 1.00 * pmulhrw (%rax), %mm2
# CHECK-NEXT: 1 5 0.50 * * prefetch (%rax)
# CHECK-NEXT: 1 5 0.50 * * prefetchw (%rax)
# CHECK-NEXT: 1 1 1.00 pswapd %mm0, %mm2
# CHECK-NEXT: 2 6 1.00 * pswapd (%rax), %mm2
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 12.33 54.33 - 12.33 13.00 13.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - 10.33 10.33 - 10.33 - - femms
# CHECK-NEXT: - - - 1.00 - - - - pavgusb %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pavgusb (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pf2id %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pf2id (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pf2iw %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pf2iw (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pfacc %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfacc (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pfadd %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfadd (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pfcmpeq %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfcmpeq (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pfcmpge %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfcmpge (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pfcmpgt %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfcmpgt (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pfmax %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfmax (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pfmin %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfmin (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pfmul %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfmul (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pfnacc %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfnacc (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pfpnacc %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfpnacc (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pfrcp %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfrcp (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pfrcpit1 %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfrcpit1 (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pfrcpit2 %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfrcpit2 (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pfrsqit1 %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfrsqit1 (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pfrsqrt %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfrsqrt (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pfsub %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfsub (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pfsubr %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pfsubr (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pi2fd %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pi2fd (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pi2fw %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pi2fw (%rax), %mm2
# CHECK-NEXT: - - 1.00 - - - - - pmulhrw %mm0, %mm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmulhrw (%rax), %mm2
# CHECK-NEXT: - - - - - - 0.50 0.50 prefetch (%rax)
# CHECK-NEXT: - - - - - - 0.50 0.50 prefetchw (%rax)
# CHECK-NEXT: - - - - - 1.00 - - pswapd %mm0, %mm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 pswapd (%rax), %mm2

View File

@ -0,0 +1,55 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
adcx %ebx, %ecx
adcx (%rbx), %ecx
adcx %rbx, %rcx
adcx (%rbx), %rcx
adox %ebx, %ecx
adox (%rbx), %ecx
adox %rbx, %rcx
adox (%rbx), %rcx
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 2 2 0.67 adcxl %ebx, %ecx
# CHECK-NEXT: 3 7 0.67 * adcxl (%rbx), %ecx
# CHECK-NEXT: 2 2 0.67 adcxq %rbx, %rcx
# CHECK-NEXT: 3 7 0.67 * adcxq (%rbx), %rcx
# CHECK-NEXT: 2 2 0.67 adoxl %ebx, %ecx
# CHECK-NEXT: 3 7 0.67 * adoxl (%rbx), %ecx
# CHECK-NEXT: 2 2 0.67 adoxq %rbx, %rcx
# CHECK-NEXT: 3 7 0.67 * adoxq (%rbx), %rcx
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 6.67 2.67 - 6.67 2.00 2.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - adcxl %ebx, %ecx
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 adcxl (%rbx), %ecx
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - adcxq %rbx, %rcx
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 adcxq (%rbx), %rcx
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - adoxl %ebx, %ecx
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 adoxl (%rbx), %ecx
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - adoxq %rbx, %rcx
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 adoxq (%rbx), %rcx

View File

@ -0,0 +1,71 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
aesdec %xmm0, %xmm2
aesdec (%rax), %xmm2
aesdeclast %xmm0, %xmm2
aesdeclast (%rax), %xmm2
aesenc %xmm0, %xmm2
aesenc (%rax), %xmm2
aesenclast %xmm0, %xmm2
aesenclast (%rax), %xmm2
aesimc %xmm0, %xmm2
aesimc (%rax), %xmm2
aeskeygenassist $22, %xmm0, %xmm2
aeskeygenassist $22, (%rax), %xmm2
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 2 7 1.00 aesdec %xmm0, %xmm2
# CHECK-NEXT: 3 13 1.00 * aesdec (%rax), %xmm2
# CHECK-NEXT: 2 7 1.00 aesdeclast %xmm0, %xmm2
# CHECK-NEXT: 3 13 1.00 * aesdeclast (%rax), %xmm2
# CHECK-NEXT: 2 7 1.00 aesenc %xmm0, %xmm2
# CHECK-NEXT: 3 13 1.00 * aesenc (%rax), %xmm2
# CHECK-NEXT: 2 7 1.00 aesenclast %xmm0, %xmm2
# CHECK-NEXT: 3 13 1.00 * aesenclast (%rax), %xmm2
# CHECK-NEXT: 2 12 2.00 aesimc %xmm0, %xmm2
# CHECK-NEXT: 3 18 2.00 * aesimc (%rax), %xmm2
# CHECK-NEXT: 1 8 3.67 aeskeygenassist $22, %xmm0, %xmm2
# CHECK-NEXT: 1 8 3.33 * aeskeygenassist $22, (%rax), %xmm2
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 9.67 9.67 - 21.67 3.00 3.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - 0.33 0.33 - 1.33 - - aesdec %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - 1.33 0.50 0.50 aesdec (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - 1.33 - - aesdeclast %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - 1.33 0.50 0.50 aesdeclast (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - 1.33 - - aesenc %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - 1.33 0.50 0.50 aesenc (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - 1.33 - - aesenclast %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - 1.33 0.50 0.50 aesenclast (%rax), %xmm2
# CHECK-NEXT: - - - - - 2.00 - - aesimc %xmm0, %xmm2
# CHECK-NEXT: - - - - - 2.00 0.50 0.50 aesimc (%rax), %xmm2
# CHECK-NEXT: - - 3.67 3.67 - 3.67 - - aeskeygenassist $22, %xmm0, %xmm2
# CHECK-NEXT: - - 3.33 3.33 - 3.33 0.50 0.50 aeskeygenassist $22, (%rax), %xmm2

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,113 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
andn %eax, %ebx, %ecx
andn (%rax), %ebx, %ecx
andn %rax, %rbx, %rcx
andn (%rax), %rbx, %rcx
bextr %eax, %ebx, %ecx
bextr %eax, (%rbx), %ecx
bextr %rax, %rbx, %rcx
bextr %rax, (%rbx), %rcx
blsi %eax, %ecx
blsi (%rax), %ecx
blsi %rax, %rcx
blsi (%rax), %rcx
blsmsk %eax, %ecx
blsmsk (%rax), %ecx
blsmsk %rax, %rcx
blsmsk (%rax), %rcx
blsr %eax, %ecx
blsr (%rax), %ecx
blsr %rax, %rcx
blsr (%rax), %rcx
tzcnt %eax, %ecx
tzcnt (%rax), %ecx
tzcnt %rax, %rcx
tzcnt (%rax), %rcx
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 0.33 andnl %eax, %ebx, %ecx
# CHECK-NEXT: 2 6 0.50 * andnl (%rax), %ebx, %ecx
# CHECK-NEXT: 1 1 0.33 andnq %rax, %rbx, %rcx
# CHECK-NEXT: 2 6 0.50 * andnq (%rax), %rbx, %rcx
# CHECK-NEXT: 2 2 1.00 bextrl %eax, %ebx, %ecx
# CHECK-NEXT: 3 7 1.00 * bextrl %eax, (%rbx), %ecx
# CHECK-NEXT: 2 2 1.00 bextrq %rax, %rbx, %rcx
# CHECK-NEXT: 3 7 1.00 * bextrq %rax, (%rbx), %rcx
# CHECK-NEXT: 1 1 0.33 blsil %eax, %ecx
# CHECK-NEXT: 2 6 0.50 * blsil (%rax), %ecx
# CHECK-NEXT: 1 1 0.33 blsiq %rax, %rcx
# CHECK-NEXT: 2 6 0.50 * blsiq (%rax), %rcx
# CHECK-NEXT: 1 1 0.33 blsmskl %eax, %ecx
# CHECK-NEXT: 2 6 0.50 * blsmskl (%rax), %ecx
# CHECK-NEXT: 1 1 0.33 blsmskq %rax, %rcx
# CHECK-NEXT: 2 6 0.50 * blsmskq (%rax), %rcx
# CHECK-NEXT: 1 1 0.33 blsrl %eax, %ecx
# CHECK-NEXT: 2 6 0.50 * blsrl (%rax), %ecx
# CHECK-NEXT: 1 1 0.33 blsrq %rax, %rcx
# CHECK-NEXT: 2 6 0.50 * blsrq (%rax), %rcx
# CHECK-NEXT: 1 3 1.00 tzcntl %eax, %ecx
# CHECK-NEXT: 2 8 1.00 * tzcntl (%rax), %ecx
# CHECK-NEXT: 1 3 1.00 tzcntq %rax, %rcx
# CHECK-NEXT: 2 8 1.00 * tzcntq (%rax), %rcx
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 7.33 13.33 - 7.33 6.00 6.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - andnl %eax, %ebx, %ecx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 andnl (%rax), %ebx, %ecx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - andnq %rax, %rbx, %rcx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 andnq (%rax), %rbx, %rcx
# CHECK-NEXT: - - 0.50 1.00 - 0.50 - - bextrl %eax, %ebx, %ecx
# CHECK-NEXT: - - 0.50 1.00 - 0.50 0.50 0.50 bextrl %eax, (%rbx), %ecx
# CHECK-NEXT: - - 0.50 1.00 - 0.50 - - bextrq %rax, %rbx, %rcx
# CHECK-NEXT: - - 0.50 1.00 - 0.50 0.50 0.50 bextrq %rax, (%rbx), %rcx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blsil %eax, %ecx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blsil (%rax), %ecx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blsiq %rax, %rcx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blsiq (%rax), %rcx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blsmskl %eax, %ecx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blsmskl (%rax), %ecx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blsmskq %rax, %rcx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blsmskq (%rax), %rcx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blsrl %eax, %ecx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blsrl (%rax), %ecx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blsrq %rax, %rcx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blsrq (%rax), %rcx
# CHECK-NEXT: - - - 1.00 - - - - tzcntl %eax, %ecx
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 tzcntl (%rax), %ecx
# CHECK-NEXT: - - - 1.00 - - - - tzcntq %rax, %rcx
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 tzcntq (%rax), %rcx

View File

@ -0,0 +1,33 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
clflushopt (%rax)
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 4 5 1.00 * * U clflushopt (%rax)
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 0.50 0.50 1.00 1.00 0.50 0.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - 0.50 0.50 1.00 1.00 0.50 0.50 clflushopt (%rax)

View File

@ -0,0 +1,323 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
cmovow %si, %di
cmovnow %si, %di
cmovbw %si, %di
cmovaew %si, %di
cmovew %si, %di
cmovnew %si, %di
cmovbew %si, %di
cmovaw %si, %di
cmovsw %si, %di
cmovnsw %si, %di
cmovpw %si, %di
cmovnpw %si, %di
cmovlw %si, %di
cmovgew %si, %di
cmovlew %si, %di
cmovgw %si, %di
cmovow (%rax), %di
cmovnow (%rax), %di
cmovbw (%rax), %di
cmovaew (%rax), %di
cmovew (%rax), %di
cmovnew (%rax), %di
cmovbew (%rax), %di
cmovaw (%rax), %di
cmovsw (%rax), %di
cmovnsw (%rax), %di
cmovpw (%rax), %di
cmovnpw (%rax), %di
cmovlw (%rax), %di
cmovgew (%rax), %di
cmovlew (%rax), %di
cmovgw (%rax), %di
cmovol %esi, %edi
cmovnol %esi, %edi
cmovbl %esi, %edi
cmovael %esi, %edi
cmovel %esi, %edi
cmovnel %esi, %edi
cmovbel %esi, %edi
cmoval %esi, %edi
cmovsl %esi, %edi
cmovnsl %esi, %edi
cmovpl %esi, %edi
cmovnpl %esi, %edi
cmovll %esi, %edi
cmovgel %esi, %edi
cmovlel %esi, %edi
cmovgl %esi, %edi
cmovol (%rax), %edi
cmovnol (%rax), %edi
cmovbl (%rax), %edi
cmovael (%rax), %edi
cmovel (%rax), %edi
cmovnel (%rax), %edi
cmovbel (%rax), %edi
cmoval (%rax), %edi
cmovsl (%rax), %edi
cmovnsl (%rax), %edi
cmovpl (%rax), %edi
cmovnpl (%rax), %edi
cmovll (%rax), %edi
cmovgel (%rax), %edi
cmovlel (%rax), %edi
cmovgl (%rax), %edi
cmovoq %rsi, %rdi
cmovnoq %rsi, %rdi
cmovbq %rsi, %rdi
cmovaeq %rsi, %rdi
cmoveq %rsi, %rdi
cmovneq %rsi, %rdi
cmovbeq %rsi, %rdi
cmovaq %rsi, %rdi
cmovsq %rsi, %rdi
cmovnsq %rsi, %rdi
cmovpq %rsi, %rdi
cmovnpq %rsi, %rdi
cmovlq %rsi, %rdi
cmovgeq %rsi, %rdi
cmovleq %rsi, %rdi
cmovgq %rsi, %rdi
cmovoq (%rax), %rdi
cmovnoq (%rax), %rdi
cmovbq (%rax), %rdi
cmovaeq (%rax), %rdi
cmoveq (%rax), %rdi
cmovneq (%rax), %rdi
cmovbeq (%rax), %rdi
cmovaq (%rax), %rdi
cmovsq (%rax), %rdi
cmovnsq (%rax), %rdi
cmovpq (%rax), %rdi
cmovnpq (%rax), %rdi
cmovlq (%rax), %rdi
cmovgeq (%rax), %rdi
cmovleq (%rax), %rdi
cmovgq (%rax), %rdi
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 2 2 0.67 cmovow %si, %di
# CHECK-NEXT: 2 2 0.67 cmovnow %si, %di
# CHECK-NEXT: 2 2 0.67 cmovbw %si, %di
# CHECK-NEXT: 2 2 0.67 cmovaew %si, %di
# CHECK-NEXT: 2 2 0.67 cmovew %si, %di
# CHECK-NEXT: 2 2 0.67 cmovnew %si, %di
# CHECK-NEXT: 3 3 1.00 cmovbew %si, %di
# CHECK-NEXT: 3 3 1.00 cmovaw %si, %di
# CHECK-NEXT: 2 2 0.67 cmovsw %si, %di
# CHECK-NEXT: 2 2 0.67 cmovnsw %si, %di
# CHECK-NEXT: 2 2 0.67 cmovpw %si, %di
# CHECK-NEXT: 2 2 0.67 cmovnpw %si, %di
# CHECK-NEXT: 2 2 0.67 cmovlw %si, %di
# CHECK-NEXT: 2 2 0.67 cmovgew %si, %di
# CHECK-NEXT: 2 2 0.67 cmovlew %si, %di
# CHECK-NEXT: 2 2 0.67 cmovgw %si, %di
# CHECK-NEXT: 3 7 0.67 * cmovow (%rax), %di
# CHECK-NEXT: 3 7 0.67 * cmovnow (%rax), %di
# CHECK-NEXT: 3 7 0.67 * cmovbw (%rax), %di
# CHECK-NEXT: 3 7 0.67 * cmovaew (%rax), %di
# CHECK-NEXT: 3 7 0.67 * cmovew (%rax), %di
# CHECK-NEXT: 3 7 0.67 * cmovnew (%rax), %di
# CHECK-NEXT: 4 8 1.00 * cmovbew (%rax), %di
# CHECK-NEXT: 4 8 1.00 * cmovaw (%rax), %di
# CHECK-NEXT: 3 7 0.67 * cmovsw (%rax), %di
# CHECK-NEXT: 3 7 0.67 * cmovnsw (%rax), %di
# CHECK-NEXT: 3 7 0.67 * cmovpw (%rax), %di
# CHECK-NEXT: 3 7 0.67 * cmovnpw (%rax), %di
# CHECK-NEXT: 3 7 0.67 * cmovlw (%rax), %di
# CHECK-NEXT: 3 7 0.67 * cmovgew (%rax), %di
# CHECK-NEXT: 3 7 0.67 * cmovlew (%rax), %di
# CHECK-NEXT: 3 7 0.67 * cmovgw (%rax), %di
# CHECK-NEXT: 2 2 0.67 cmovol %esi, %edi
# CHECK-NEXT: 2 2 0.67 cmovnol %esi, %edi
# CHECK-NEXT: 2 2 0.67 cmovbl %esi, %edi
# CHECK-NEXT: 2 2 0.67 cmovael %esi, %edi
# CHECK-NEXT: 2 2 0.67 cmovel %esi, %edi
# CHECK-NEXT: 2 2 0.67 cmovnel %esi, %edi
# CHECK-NEXT: 3 3 1.00 cmovbel %esi, %edi
# CHECK-NEXT: 3 3 1.00 cmoval %esi, %edi
# CHECK-NEXT: 2 2 0.67 cmovsl %esi, %edi
# CHECK-NEXT: 2 2 0.67 cmovnsl %esi, %edi
# CHECK-NEXT: 2 2 0.67 cmovpl %esi, %edi
# CHECK-NEXT: 2 2 0.67 cmovnpl %esi, %edi
# CHECK-NEXT: 2 2 0.67 cmovll %esi, %edi
# CHECK-NEXT: 2 2 0.67 cmovgel %esi, %edi
# CHECK-NEXT: 2 2 0.67 cmovlel %esi, %edi
# CHECK-NEXT: 2 2 0.67 cmovgl %esi, %edi
# CHECK-NEXT: 3 7 0.67 * cmovol (%rax), %edi
# CHECK-NEXT: 3 7 0.67 * cmovnol (%rax), %edi
# CHECK-NEXT: 3 7 0.67 * cmovbl (%rax), %edi
# CHECK-NEXT: 3 7 0.67 * cmovael (%rax), %edi
# CHECK-NEXT: 3 7 0.67 * cmovel (%rax), %edi
# CHECK-NEXT: 3 7 0.67 * cmovnel (%rax), %edi
# CHECK-NEXT: 4 8 1.00 * cmovbel (%rax), %edi
# CHECK-NEXT: 4 8 1.00 * cmoval (%rax), %edi
# CHECK-NEXT: 3 7 0.67 * cmovsl (%rax), %edi
# CHECK-NEXT: 3 7 0.67 * cmovnsl (%rax), %edi
# CHECK-NEXT: 3 7 0.67 * cmovpl (%rax), %edi
# CHECK-NEXT: 3 7 0.67 * cmovnpl (%rax), %edi
# CHECK-NEXT: 3 7 0.67 * cmovll (%rax), %edi
# CHECK-NEXT: 3 7 0.67 * cmovgel (%rax), %edi
# CHECK-NEXT: 3 7 0.67 * cmovlel (%rax), %edi
# CHECK-NEXT: 3 7 0.67 * cmovgl (%rax), %edi
# CHECK-NEXT: 2 2 0.67 cmovoq %rsi, %rdi
# CHECK-NEXT: 2 2 0.67 cmovnoq %rsi, %rdi
# CHECK-NEXT: 2 2 0.67 cmovbq %rsi, %rdi
# CHECK-NEXT: 2 2 0.67 cmovaeq %rsi, %rdi
# CHECK-NEXT: 2 2 0.67 cmoveq %rsi, %rdi
# CHECK-NEXT: 2 2 0.67 cmovneq %rsi, %rdi
# CHECK-NEXT: 3 3 1.00 cmovbeq %rsi, %rdi
# CHECK-NEXT: 3 3 1.00 cmovaq %rsi, %rdi
# CHECK-NEXT: 2 2 0.67 cmovsq %rsi, %rdi
# CHECK-NEXT: 2 2 0.67 cmovnsq %rsi, %rdi
# CHECK-NEXT: 2 2 0.67 cmovpq %rsi, %rdi
# CHECK-NEXT: 2 2 0.67 cmovnpq %rsi, %rdi
# CHECK-NEXT: 2 2 0.67 cmovlq %rsi, %rdi
# CHECK-NEXT: 2 2 0.67 cmovgeq %rsi, %rdi
# CHECK-NEXT: 2 2 0.67 cmovleq %rsi, %rdi
# CHECK-NEXT: 2 2 0.67 cmovgq %rsi, %rdi
# CHECK-NEXT: 3 7 0.67 * cmovoq (%rax), %rdi
# CHECK-NEXT: 3 7 0.67 * cmovnoq (%rax), %rdi
# CHECK-NEXT: 3 7 0.67 * cmovbq (%rax), %rdi
# CHECK-NEXT: 3 7 0.67 * cmovaeq (%rax), %rdi
# CHECK-NEXT: 3 7 0.67 * cmoveq (%rax), %rdi
# CHECK-NEXT: 3 7 0.67 * cmovneq (%rax), %rdi
# CHECK-NEXT: 4 8 1.00 * cmovbeq (%rax), %rdi
# CHECK-NEXT: 4 8 1.00 * cmovaq (%rax), %rdi
# CHECK-NEXT: 3 7 0.67 * cmovsq (%rax), %rdi
# CHECK-NEXT: 3 7 0.67 * cmovnsq (%rax), %rdi
# CHECK-NEXT: 3 7 0.67 * cmovpq (%rax), %rdi
# CHECK-NEXT: 3 7 0.67 * cmovnpq (%rax), %rdi
# CHECK-NEXT: 3 7 0.67 * cmovlq (%rax), %rdi
# CHECK-NEXT: 3 7 0.67 * cmovgeq (%rax), %rdi
# CHECK-NEXT: 3 7 0.67 * cmovleq (%rax), %rdi
# CHECK-NEXT: 3 7 0.67 * cmovgq (%rax), %rdi
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 86.00 32.00 - 86.00 24.00 24.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovow %si, %di
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovnow %si, %di
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovbw %si, %di
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovaew %si, %di
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovew %si, %di
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovnew %si, %di
# CHECK-NEXT: - - 1.33 0.33 - 1.33 - - cmovbew %si, %di
# CHECK-NEXT: - - 1.33 0.33 - 1.33 - - cmovaw %si, %di
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovsw %si, %di
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovnsw %si, %di
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovpw %si, %di
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovnpw %si, %di
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovlw %si, %di
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovgew %si, %di
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovlew %si, %di
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovgw %si, %di
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovow (%rax), %di
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovnow (%rax), %di
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovbw (%rax), %di
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovaew (%rax), %di
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovew (%rax), %di
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovnew (%rax), %di
# CHECK-NEXT: - - 1.33 0.33 - 1.33 0.50 0.50 cmovbew (%rax), %di
# CHECK-NEXT: - - 1.33 0.33 - 1.33 0.50 0.50 cmovaw (%rax), %di
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovsw (%rax), %di
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovnsw (%rax), %di
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovpw (%rax), %di
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovnpw (%rax), %di
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovlw (%rax), %di
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovgew (%rax), %di
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovlew (%rax), %di
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovgw (%rax), %di
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovol %esi, %edi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovnol %esi, %edi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovbl %esi, %edi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovael %esi, %edi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovel %esi, %edi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovnel %esi, %edi
# CHECK-NEXT: - - 1.33 0.33 - 1.33 - - cmovbel %esi, %edi
# CHECK-NEXT: - - 1.33 0.33 - 1.33 - - cmoval %esi, %edi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovsl %esi, %edi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovnsl %esi, %edi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovpl %esi, %edi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovnpl %esi, %edi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovll %esi, %edi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovgel %esi, %edi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovlel %esi, %edi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovgl %esi, %edi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovol (%rax), %edi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovnol (%rax), %edi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovbl (%rax), %edi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovael (%rax), %edi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovel (%rax), %edi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovnel (%rax), %edi
# CHECK-NEXT: - - 1.33 0.33 - 1.33 0.50 0.50 cmovbel (%rax), %edi
# CHECK-NEXT: - - 1.33 0.33 - 1.33 0.50 0.50 cmoval (%rax), %edi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovsl (%rax), %edi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovnsl (%rax), %edi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovpl (%rax), %edi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovnpl (%rax), %edi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovll (%rax), %edi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovgel (%rax), %edi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovlel (%rax), %edi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovgl (%rax), %edi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovoq %rsi, %rdi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovnoq %rsi, %rdi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovbq %rsi, %rdi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovaeq %rsi, %rdi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmoveq %rsi, %rdi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovneq %rsi, %rdi
# CHECK-NEXT: - - 1.33 0.33 - 1.33 - - cmovbeq %rsi, %rdi
# CHECK-NEXT: - - 1.33 0.33 - 1.33 - - cmovaq %rsi, %rdi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovsq %rsi, %rdi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovnsq %rsi, %rdi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovpq %rsi, %rdi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovnpq %rsi, %rdi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovlq %rsi, %rdi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovgeq %rsi, %rdi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovleq %rsi, %rdi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 - - cmovgq %rsi, %rdi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovoq (%rax), %rdi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovnoq (%rax), %rdi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovbq (%rax), %rdi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovaeq (%rax), %rdi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmoveq (%rax), %rdi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovneq (%rax), %rdi
# CHECK-NEXT: - - 1.33 0.33 - 1.33 0.50 0.50 cmovbeq (%rax), %rdi
# CHECK-NEXT: - - 1.33 0.33 - 1.33 0.50 0.50 cmovaq (%rax), %rdi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovsq (%rax), %rdi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovnsq (%rax), %rdi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovpq (%rax), %rdi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovnpq (%rax), %rdi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovlq (%rax), %rdi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovgeq (%rax), %rdi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovleq (%rax), %rdi
# CHECK-NEXT: - - 0.83 0.33 - 0.83 0.50 0.50 cmovgq (%rax), %rdi

View File

@ -0,0 +1,36 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
cmpxchg8b (%rax)
cmpxchg16b (%rax)
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 3 6 1.00 * * cmpxchg8b (%rax)
# CHECK-NEXT: 3 6 1.00 * * cmpxchg16b (%rax)
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 0.67 0.67 2.00 0.67 2.00 2.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 cmpxchg8b (%rax)
# CHECK-NEXT: - - 0.33 0.33 1.00 0.33 1.00 1.00 cmpxchg16b (%rax)

View File

@ -0,0 +1,57 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
vcvtph2ps %xmm0, %xmm2
vcvtph2ps (%rax), %xmm2
vcvtph2ps %xmm0, %ymm2
vcvtph2ps (%rax), %ymm2
vcvtps2ph $0, %xmm0, %xmm2
vcvtps2ph $0, %xmm0, (%rax)
vcvtps2ph $0, %ymm0, %xmm2
vcvtps2ph $0, %ymm0, (%rax)
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %xmm2
# CHECK-NEXT: 2 8 1.00 * vcvtph2ps (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 vcvtph2ps %xmm0, %ymm2
# CHECK-NEXT: 2 8 1.00 * vcvtph2ps (%rax), %ymm2
# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %xmm0, %xmm2
# CHECK-NEXT: 1 4 1.00 * vcvtps2ph $0, %xmm0, (%rax)
# CHECK-NEXT: 1 3 1.00 vcvtps2ph $0, %ymm0, %xmm2
# CHECK-NEXT: 1 4 1.00 * vcvtps2ph $0, %ymm0, (%rax)
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - - 8.00 2.00 - 2.00 2.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - 1.00 - - - - vcvtph2ps %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtph2ps (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - vcvtph2ps %xmm0, %ymm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vcvtph2ps (%rax), %ymm2
# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %xmm0, (%rax)
# CHECK-NEXT: - - - 1.00 - - - - vcvtps2ph $0, %ymm0, %xmm2
# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 vcvtps2ph $0, %ymm0, (%rax)

View File

@ -0,0 +1,701 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
vfmadd132pd %xmm0, %xmm1, %xmm2
vfmadd132pd (%rax), %xmm1, %xmm2
vfmadd132pd %ymm0, %ymm1, %ymm2
vfmadd132pd (%rax), %ymm1, %ymm2
vfmadd213pd %xmm0, %xmm1, %xmm2
vfmadd213pd (%rax), %xmm1, %xmm2
vfmadd213pd %ymm0, %ymm1, %ymm2
vfmadd213pd (%rax), %ymm1, %ymm2
vfmadd231pd %xmm0, %xmm1, %xmm2
vfmadd231pd (%rax), %xmm1, %xmm2
vfmadd231pd %ymm0, %ymm1, %ymm2
vfmadd231pd (%rax), %ymm1, %ymm2
vfmadd132ps %xmm0, %xmm1, %xmm2
vfmadd132ps (%rax), %xmm1, %xmm2
vfmadd132ps %ymm0, %ymm1, %ymm2
vfmadd132ps (%rax), %ymm1, %ymm2
vfmadd213ps %xmm0, %xmm1, %xmm2
vfmadd213ps (%rax), %xmm1, %xmm2
vfmadd213ps %ymm0, %ymm1, %ymm2
vfmadd213ps (%rax), %ymm1, %ymm2
vfmadd231ps %xmm0, %xmm1, %xmm2
vfmadd231ps (%rax), %xmm1, %xmm2
vfmadd231ps %ymm0, %ymm1, %ymm2
vfmadd231ps (%rax), %ymm1, %ymm2
vfmadd132sd %xmm0, %xmm1, %xmm2
vfmadd132sd (%rax), %xmm1, %xmm2
vfmadd213sd %xmm0, %xmm1, %xmm2
vfmadd213sd (%rax), %xmm1, %xmm2
vfmadd231sd %xmm0, %xmm1, %xmm2
vfmadd231sd (%rax), %xmm1, %xmm2
vfmadd132ss %xmm0, %xmm1, %xmm2
vfmadd132ss (%rax), %xmm1, %xmm2
vfmadd213ss %xmm0, %xmm1, %xmm2
vfmadd213ss (%rax), %xmm1, %xmm2
vfmadd231ss %xmm0, %xmm1, %xmm2
vfmadd231ss (%rax), %xmm1, %xmm2
vfmaddsub132pd %xmm0, %xmm1, %xmm2
vfmaddsub132pd (%rax), %xmm1, %xmm2
vfmaddsub132pd %ymm0, %ymm1, %ymm2
vfmaddsub132pd (%rax), %ymm1, %ymm2
vfmaddsub213pd %xmm0, %xmm1, %xmm2
vfmaddsub213pd (%rax), %xmm1, %xmm2
vfmaddsub213pd %ymm0, %ymm1, %ymm2
vfmaddsub213pd (%rax), %ymm1, %ymm2
vfmaddsub231pd %xmm0, %xmm1, %xmm2
vfmaddsub231pd (%rax), %xmm1, %xmm2
vfmaddsub231pd %ymm0, %ymm1, %ymm2
vfmaddsub231pd (%rax), %ymm1, %ymm2
vfmaddsub132ps %xmm0, %xmm1, %xmm2
vfmaddsub132ps (%rax), %xmm1, %xmm2
vfmaddsub132ps %ymm0, %ymm1, %ymm2
vfmaddsub132ps (%rax), %ymm1, %ymm2
vfmaddsub213ps %xmm0, %xmm1, %xmm2
vfmaddsub213ps (%rax), %xmm1, %xmm2
vfmaddsub213ps %ymm0, %ymm1, %ymm2
vfmaddsub213ps (%rax), %ymm1, %ymm2
vfmaddsub231ps %xmm0, %xmm1, %xmm2
vfmaddsub231ps (%rax), %xmm1, %xmm2
vfmaddsub231ps %ymm0, %ymm1, %ymm2
vfmaddsub231ps (%rax), %ymm1, %ymm2
vfmsub132pd %xmm0, %xmm1, %xmm2
vfmsub132pd (%rax), %xmm1, %xmm2
vfmsub132pd %ymm0, %ymm1, %ymm2
vfmsub132pd (%rax), %ymm1, %ymm2
vfmsub213pd %xmm0, %xmm1, %xmm2
vfmsub213pd (%rax), %xmm1, %xmm2
vfmsub213pd %ymm0, %ymm1, %ymm2
vfmsub213pd (%rax), %ymm1, %ymm2
vfmsub231pd %xmm0, %xmm1, %xmm2
vfmsub231pd (%rax), %xmm1, %xmm2
vfmsub231pd %ymm0, %ymm1, %ymm2
vfmsub231pd (%rax), %ymm1, %ymm2
vfmsub132ps %xmm0, %xmm1, %xmm2
vfmsub132ps (%rax), %xmm1, %xmm2
vfmsub132ps %ymm0, %ymm1, %ymm2
vfmsub132ps (%rax), %ymm1, %ymm2
vfmsub213ps %xmm0, %xmm1, %xmm2
vfmsub213ps (%rax), %xmm1, %xmm2
vfmsub213ps %ymm0, %ymm1, %ymm2
vfmsub213ps (%rax), %ymm1, %ymm2
vfmsub231ps %xmm0, %xmm1, %xmm2
vfmsub231ps (%rax), %xmm1, %xmm2
vfmsub231ps %ymm0, %ymm1, %ymm2
vfmsub231ps (%rax), %ymm1, %ymm2
vfmsub132sd %xmm0, %xmm1, %xmm2
vfmsub132sd (%rax), %xmm1, %xmm2
vfmsub213sd %xmm0, %xmm1, %xmm2
vfmsub213sd (%rax), %xmm1, %xmm2
vfmsub231sd %xmm0, %xmm1, %xmm2
vfmsub231sd (%rax), %xmm1, %xmm2
vfmsub132ss %xmm0, %xmm1, %xmm2
vfmsub132ss (%rax), %xmm1, %xmm2
vfmsub213ss %xmm0, %xmm1, %xmm2
vfmsub213ss (%rax), %xmm1, %xmm2
vfmsub231ss %xmm0, %xmm1, %xmm2
vfmsub231ss (%rax), %xmm1, %xmm2
vfmsubadd132pd %xmm0, %xmm1, %xmm2
vfmsubadd132pd (%rax), %xmm1, %xmm2
vfmsubadd132pd %ymm0, %ymm1, %ymm2
vfmsubadd132pd (%rax), %ymm1, %ymm2
vfmsubadd213pd %xmm0, %xmm1, %xmm2
vfmsubadd213pd (%rax), %xmm1, %xmm2
vfmsubadd213pd %ymm0, %ymm1, %ymm2
vfmsubadd213pd (%rax), %ymm1, %ymm2
vfmsubadd231pd %xmm0, %xmm1, %xmm2
vfmsubadd231pd (%rax), %xmm1, %xmm2
vfmsubadd231pd %ymm0, %ymm1, %ymm2
vfmsubadd231pd (%rax), %ymm1, %ymm2
vfmsubadd132ps %xmm0, %xmm1, %xmm2
vfmsubadd132ps (%rax), %xmm1, %xmm2
vfmsubadd132ps %ymm0, %ymm1, %ymm2
vfmsubadd132ps (%rax), %ymm1, %ymm2
vfmsubadd213ps %xmm0, %xmm1, %xmm2
vfmsubadd213ps (%rax), %xmm1, %xmm2
vfmsubadd213ps %ymm0, %ymm1, %ymm2
vfmsubadd213ps (%rax), %ymm1, %ymm2
vfmsubadd231ps %xmm0, %xmm1, %xmm2
vfmsubadd231ps (%rax), %xmm1, %xmm2
vfmsubadd231ps %ymm0, %ymm1, %ymm2
vfmsubadd231ps (%rax), %ymm1, %ymm2
vfnmadd132pd %xmm0, %xmm1, %xmm2
vfnmadd132pd (%rax), %xmm1, %xmm2
vfnmadd132pd %ymm0, %ymm1, %ymm2
vfnmadd132pd (%rax), %ymm1, %ymm2
vfnmadd213pd %xmm0, %xmm1, %xmm2
vfnmadd213pd (%rax), %xmm1, %xmm2
vfnmadd213pd %ymm0, %ymm1, %ymm2
vfnmadd213pd (%rax), %ymm1, %ymm2
vfnmadd231pd %xmm0, %xmm1, %xmm2
vfnmadd231pd (%rax), %xmm1, %xmm2
vfnmadd231pd %ymm0, %ymm1, %ymm2
vfnmadd231pd (%rax), %ymm1, %ymm2
vfnmadd132ps %xmm0, %xmm1, %xmm2
vfnmadd132ps (%rax), %xmm1, %xmm2
vfnmadd132ps %ymm0, %ymm1, %ymm2
vfnmadd132ps (%rax), %ymm1, %ymm2
vfnmadd213ps %xmm0, %xmm1, %xmm2
vfnmadd213ps (%rax), %xmm1, %xmm2
vfnmadd213ps %ymm0, %ymm1, %ymm2
vfnmadd213ps (%rax), %ymm1, %ymm2
vfnmadd231ps %xmm0, %xmm1, %xmm2
vfnmadd231ps (%rax), %xmm1, %xmm2
vfnmadd231ps %ymm0, %ymm1, %ymm2
vfnmadd231ps (%rax), %ymm1, %ymm2
vfnmadd132sd %xmm0, %xmm1, %xmm2
vfnmadd132sd (%rax), %xmm1, %xmm2
vfnmadd213sd %xmm0, %xmm1, %xmm2
vfnmadd213sd (%rax), %xmm1, %xmm2
vfnmadd231sd %xmm0, %xmm1, %xmm2
vfnmadd231sd (%rax), %xmm1, %xmm2
vfnmadd132ss %xmm0, %xmm1, %xmm2
vfnmadd132ss (%rax), %xmm1, %xmm2
vfnmadd213ss %xmm0, %xmm1, %xmm2
vfnmadd213ss (%rax), %xmm1, %xmm2
vfnmadd231ss %xmm0, %xmm1, %xmm2
vfnmadd231ss (%rax), %xmm1, %xmm2
vfnmsub132pd %xmm0, %xmm1, %xmm2
vfnmsub132pd (%rax), %xmm1, %xmm2
vfnmsub132pd %ymm0, %ymm1, %ymm2
vfnmsub132pd (%rax), %ymm1, %ymm2
vfnmsub213pd %xmm0, %xmm1, %xmm2
vfnmsub213pd (%rax), %xmm1, %xmm2
vfnmsub213pd %ymm0, %ymm1, %ymm2
vfnmsub213pd (%rax), %ymm1, %ymm2
vfnmsub231pd %xmm0, %xmm1, %xmm2
vfnmsub231pd (%rax), %xmm1, %xmm2
vfnmsub231pd %ymm0, %ymm1, %ymm2
vfnmsub231pd (%rax), %ymm1, %ymm2
vfnmsub132ps %xmm0, %xmm1, %xmm2
vfnmsub132ps (%rax), %xmm1, %xmm2
vfnmsub132ps %ymm0, %ymm1, %ymm2
vfnmsub132ps (%rax), %ymm1, %ymm2
vfnmsub213ps %xmm0, %xmm1, %xmm2
vfnmsub213ps (%rax), %xmm1, %xmm2
vfnmsub213ps %ymm0, %ymm1, %ymm2
vfnmsub213ps (%rax), %ymm1, %ymm2
vfnmsub231ps %xmm0, %xmm1, %xmm2
vfnmsub231ps (%rax), %xmm1, %xmm2
vfnmsub231ps %ymm0, %ymm1, %ymm2
vfnmsub231ps (%rax), %ymm1, %ymm2
vfnmsub132sd %xmm0, %xmm1, %xmm2
vfnmsub132sd (%rax), %xmm1, %xmm2
vfnmsub213sd %xmm0, %xmm1, %xmm2
vfnmsub213sd (%rax), %xmm1, %xmm2
vfnmsub231sd %xmm0, %xmm1, %xmm2
vfnmsub231sd (%rax), %xmm1, %xmm2
vfnmsub132ss %xmm0, %xmm1, %xmm2
vfnmsub132ss (%rax), %xmm1, %xmm2
vfnmsub213ss %xmm0, %xmm1, %xmm2
vfnmsub213ss (%rax), %xmm1, %xmm2
vfnmsub231ss %xmm0, %xmm1, %xmm2
vfnmsub231ss (%rax), %xmm1, %xmm2
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 5 0.50 vfmadd132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmadd132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfmadd132pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfmadd213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmadd213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfmadd213pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfmadd231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmadd231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfmadd231pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfmadd132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmadd132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfmadd132ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfmadd213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmadd213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfmadd213ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfmadd231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmadd231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfmadd231ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfmadd132sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmadd132sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmadd213sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmadd213sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmadd231sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmadd231sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmadd132ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmadd132ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmadd213ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmadd213ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmadd231ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmadd231ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmaddsub132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmaddsub132pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmaddsub132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfmaddsub132pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfmaddsub213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmaddsub213pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmaddsub213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfmaddsub213pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfmaddsub231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmaddsub231pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmaddsub231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfmaddsub231pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfmaddsub132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmaddsub132ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmaddsub132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfmaddsub132ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfmaddsub213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmaddsub213ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmaddsub213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfmaddsub213ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfmaddsub231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmaddsub231ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmaddsub231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfmaddsub231ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfmsub132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsub132pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmsub132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfmsub132pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfmsub213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsub213pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmsub213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfmsub213pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfmsub231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsub231pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmsub231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfmsub231pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfmsub132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsub132ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmsub132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfmsub132ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfmsub213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsub213ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmsub213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfmsub213ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfmsub231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsub231ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmsub231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfmsub231ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfmsub132sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsub132sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmsub213sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsub213sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmsub231sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsub231sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmsub132ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsub132ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmsub213ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsub213ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmsub231ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsub231ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmsubadd132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsubadd132pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmsubadd132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfmsubadd132pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfmsubadd213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsubadd213pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmsubadd213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfmsubadd213pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfmsubadd231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsubadd231pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmsubadd231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfmsubadd231pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfmsubadd132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsubadd132ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmsubadd132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfmsubadd132ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfmsubadd213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsubadd213ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmsubadd213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfmsubadd213ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfmsubadd231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfmsubadd231ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfmsubadd231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfmsubadd231ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfnmadd132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmadd132pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfnmadd132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfnmadd132pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfnmadd213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmadd213pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfnmadd213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfnmadd213pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfnmadd231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmadd231pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfnmadd231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfnmadd231pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfnmadd132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmadd132ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfnmadd132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfnmadd132ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfnmadd213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmadd213ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfnmadd213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfnmadd213ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfnmadd231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmadd231ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfnmadd231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfnmadd231ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfnmadd132sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmadd132sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfnmadd213sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmadd213sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfnmadd231sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmadd231sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfnmadd132ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmadd132ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfnmadd213ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmadd213ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfnmadd231ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmadd231ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfnmsub132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmsub132pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfnmsub132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfnmsub132pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfnmsub213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmsub213pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfnmsub213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfnmsub213pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfnmsub231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmsub231pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfnmsub231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfnmsub231pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfnmsub132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmsub132ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfnmsub132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfnmsub132ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfnmsub213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmsub213ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfnmsub213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfnmsub213ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfnmsub231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmsub231ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfnmsub231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: 2 10 0.50 * vfnmsub231ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: 1 5 0.50 vfnmsub132sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmsub132sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfnmsub213sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmsub213sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfnmsub231sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmsub231sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfnmsub132ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmsub132ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfnmsub213ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmsub213ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: 1 5 0.50 vfnmsub231ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: 2 10 0.50 * vfnmsub231ss (%rax), %xmm1, %xmm2
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 96.00 96.00 - - 48.00 48.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd132sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd213sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd231sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd132ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd132ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd213ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd213ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmadd231ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmadd231ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsub132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsub132pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsub132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsub132pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsub213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsub213pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsub213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsub213pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsub231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsub231pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsub231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsub231pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsub132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsub132ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsub132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsub132ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsub213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsub213ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsub213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsub213ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsub231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsub231ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsub231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsub231ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub132pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub132pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub213pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub213pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub231pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub231pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub132ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub132ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub213ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub213ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub231ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub231ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub132sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub132sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub213sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub213sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub231sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub231sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub132ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub132ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub213ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub213ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsub231ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsub231ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubadd132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubadd132pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubadd132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubadd132pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubadd213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubadd213pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubadd213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubadd213pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubadd231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubadd231pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubadd231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubadd231pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubadd132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubadd132ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubadd132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubadd132ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubadd213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubadd213ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubadd213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubadd213ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubadd231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubadd231ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubadd231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubadd231ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd132pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd132pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd213pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd213pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd231pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd231pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd132ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd132ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd213ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd213ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd231ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd231ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd132sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd132sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd213sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd213sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd231sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd231sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd132ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd132ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd213ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd213ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmadd231ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmadd231ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub132pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub132pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub132pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub132pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub213pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub213pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub213pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub213pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub231pd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub231pd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub231pd %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub231pd (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub132ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub132ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub132ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub132ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub213ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub213ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub213ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub213ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub231ps %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub231ps (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub231ps %ymm0, %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub231ps (%rax), %ymm1, %ymm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub132sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub132sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub213sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub213sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub231sd %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub231sd (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub132ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub132ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub213ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub213ss (%rax), %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsub231ss %xmm0, %xmm1, %xmm2
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsub231ss (%rax), %xmm1, %xmm2

View File

@ -0,0 +1,349 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
vfmaddpd %xmm0, %xmm1, %xmm2, %xmm3
vfmaddpd (%rax), %xmm1, %xmm2, %xmm3
vfmaddpd %xmm0, (%rax), %xmm2, %xmm3
vfmaddpd %ymm0, %ymm1, %ymm2, %ymm3
vfmaddpd (%rax), %ymm1, %ymm2, %ymm3
vfmaddpd %ymm0, (%rax), %ymm2, %ymm3
vfmaddps %xmm0, %xmm1, %xmm2, %xmm3
vfmaddps (%rax), %xmm1, %xmm2, %xmm3
vfmaddps %xmm0, (%rax), %xmm2, %xmm3
vfmaddps %ymm0, %ymm1, %ymm2, %ymm3
vfmaddps (%rax), %ymm1, %ymm2, %ymm3
vfmaddps %ymm0, (%rax), %ymm2, %ymm3
vfmaddsd %xmm0, %xmm1, %xmm2, %xmm3
vfmaddsd (%rax), %xmm1, %xmm2, %xmm3
vfmaddsd %xmm0, (%rax), %xmm2, %xmm3
vfmaddss %xmm0, %xmm1, %xmm2, %xmm3
vfmaddss (%rax), %xmm1, %xmm2, %xmm3
vfmaddss %xmm0, (%rax), %xmm2, %xmm3
vfmaddsubpd %xmm0, %xmm1, %xmm2, %xmm3
vfmaddsubpd (%rax), %xmm1, %xmm2, %xmm3
vfmaddsubpd %xmm0, (%rax), %xmm2, %xmm3
vfmaddsubpd %ymm0, %ymm1, %ymm2, %ymm3
vfmaddsubpd (%rax), %ymm1, %ymm2, %ymm3
vfmaddsubpd %ymm0, (%rax), %ymm2, %ymm3
vfmaddsubps %xmm0, %xmm1, %xmm2, %xmm3
vfmaddsubps (%rax), %xmm1, %xmm2, %xmm3
vfmaddsubps %xmm0, (%rax), %xmm2, %xmm3
vfmaddsubps %ymm0, %ymm1, %ymm2, %ymm3
vfmaddsubps (%rax), %ymm1, %ymm2, %ymm3
vfmaddsubps %ymm0, (%rax), %ymm2, %ymm3
vfmsubaddpd %xmm0, %xmm1, %xmm2, %xmm3
vfmsubaddpd (%rax), %xmm1, %xmm2, %xmm3
vfmsubaddpd %xmm0, (%rax), %xmm2, %xmm3
vfmsubaddpd %ymm0, %ymm1, %ymm2, %ymm3
vfmsubaddpd (%rax), %ymm1, %ymm2, %ymm3
vfmsubaddpd %ymm0, (%rax), %ymm2, %ymm3
vfmsubaddps %xmm0, %xmm1, %xmm2, %xmm3
vfmsubaddps (%rax), %xmm1, %xmm2, %xmm3
vfmsubaddps %xmm0, (%rax), %xmm2, %xmm3
vfmsubaddps %ymm0, %ymm1, %ymm2, %ymm3
vfmsubaddps (%rax), %ymm1, %ymm2, %ymm3
vfmsubaddps %ymm0, (%rax), %ymm2, %ymm3
vfmsubpd %xmm0, %xmm1, %xmm2, %xmm3
vfmsubpd (%rax), %xmm1, %xmm2, %xmm3
vfmsubpd %xmm0, (%rax), %xmm2, %xmm3
vfmsubpd %ymm0, %ymm1, %ymm2, %ymm3
vfmsubpd (%rax), %ymm1, %ymm2, %ymm3
vfmsubpd %ymm0, (%rax), %ymm2, %ymm3
vfmsubps %xmm0, %xmm1, %xmm2, %xmm3
vfmsubps (%rax), %xmm1, %xmm2, %xmm3
vfmsubps %xmm0, (%rax), %xmm2, %xmm3
vfmsubps %ymm0, %ymm1, %ymm2, %ymm3
vfmsubps (%rax), %ymm1, %ymm2, %ymm3
vfmsubps %ymm0, (%rax), %ymm2, %ymm3
vfmsubsd %xmm0, %xmm1, %xmm2, %xmm3
vfmsubsd (%rax), %xmm1, %xmm2, %xmm3
vfmsubsd %xmm0, (%rax), %xmm2, %xmm3
vfmsubss %xmm0, %xmm1, %xmm2, %xmm3
vfmsubss (%rax), %xmm1, %xmm2, %xmm3
vfmsubss %xmm0, (%rax), %xmm2, %xmm3
vfnmaddpd %xmm0, %xmm1, %xmm2, %xmm3
vfnmaddpd (%rax), %xmm1, %xmm2, %xmm3
vfnmaddpd %xmm0, (%rax), %xmm2, %xmm3
vfnmaddpd %ymm0, %ymm1, %ymm2, %ymm3
vfnmaddpd (%rax), %ymm1, %ymm2, %ymm3
vfnmaddpd %ymm0, (%rax), %ymm2, %ymm3
vfnmaddps %xmm0, %xmm1, %xmm2, %xmm3
vfnmaddps (%rax), %xmm1, %xmm2, %xmm3
vfnmaddps %xmm0, (%rax), %xmm2, %xmm3
vfnmaddps %ymm0, %ymm1, %ymm2, %ymm3
vfnmaddps (%rax), %ymm1, %ymm2, %ymm3
vfnmaddps %ymm0, (%rax), %ymm2, %ymm3
vfnmaddsd %xmm0, %xmm1, %xmm2, %xmm3
vfnmaddsd (%rax), %xmm1, %xmm2, %xmm3
vfnmaddsd %xmm0, (%rax), %xmm2, %xmm3
vfnmaddss %xmm0, %xmm1, %xmm2, %xmm3
vfnmaddss (%rax), %xmm1, %xmm2, %xmm3
vfnmaddss %xmm0, (%rax), %xmm2, %xmm3
vfnmsubpd %xmm0, %xmm1, %xmm2, %xmm3
vfnmsubpd (%rax), %xmm1, %xmm2, %xmm3
vfnmsubpd %xmm0, (%rax), %xmm2, %xmm3
vfnmsubpd %ymm0, %ymm1, %ymm2, %ymm3
vfnmsubpd (%rax), %ymm1, %ymm2, %ymm3
vfnmsubpd %ymm0, (%rax), %ymm2, %ymm3
vfnmsubps %xmm0, %xmm1, %xmm2, %xmm3
vfnmsubps (%rax), %xmm1, %xmm2, %xmm3
vfnmsubps %xmm0, (%rax), %xmm2, %xmm3
vfnmsubps %ymm0, %ymm1, %ymm2, %ymm3
vfnmsubps (%rax), %ymm1, %ymm2, %ymm3
vfnmsubps %ymm0, (%rax), %ymm2, %ymm3
vfnmsubsd %xmm0, %xmm1, %xmm2, %xmm3
vfnmsubsd (%rax), %xmm1, %xmm2, %xmm3
vfnmsubsd %xmm0, (%rax), %xmm2, %xmm3
vfnmsubss %xmm0, %xmm1, %xmm2, %xmm3
vfnmsubss (%rax), %xmm1, %xmm2, %xmm3
vfnmsubss %xmm0, (%rax), %xmm2, %xmm3
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 5 0.50 vfmaddpd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfmaddpd (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfmaddpd %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: 1 5 0.50 vfmaddpd %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 2 10 0.50 * vfmaddpd (%rax), %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 2 10 0.50 * vfmaddpd %ymm0, (%rax), %ymm2, %ymm3
# CHECK-NEXT: 1 5 0.50 vfmaddps %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfmaddps (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfmaddps %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: 1 5 0.50 vfmaddps %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 2 10 0.50 * vfmaddps (%rax), %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 2 10 0.50 * vfmaddps %ymm0, (%rax), %ymm2, %ymm3
# CHECK-NEXT: 1 5 0.50 vfmaddsd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfmaddsd (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfmaddsd %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: 1 5 0.50 vfmaddss %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfmaddss (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfmaddss %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: 1 5 0.50 vfmaddsubpd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfmaddsubpd (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfmaddsubpd %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: 1 5 0.50 vfmaddsubpd %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 2 10 0.50 * vfmaddsubpd (%rax), %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 2 10 0.50 * vfmaddsubpd %ymm0, (%rax), %ymm2, %ymm3
# CHECK-NEXT: 1 5 0.50 vfmaddsubps %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfmaddsubps (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfmaddsubps %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: 1 5 0.50 vfmaddsubps %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 2 10 0.50 * vfmaddsubps (%rax), %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 2 10 0.50 * vfmaddsubps %ymm0, (%rax), %ymm2, %ymm3
# CHECK-NEXT: 1 5 0.50 vfmsubaddpd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfmsubaddpd (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfmsubaddpd %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: 1 5 0.50 vfmsubaddpd %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 2 10 0.50 * vfmsubaddpd (%rax), %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 2 10 0.50 * vfmsubaddpd %ymm0, (%rax), %ymm2, %ymm3
# CHECK-NEXT: 1 5 0.50 vfmsubaddps %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfmsubaddps (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfmsubaddps %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: 1 5 0.50 vfmsubaddps %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 2 10 0.50 * vfmsubaddps (%rax), %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 2 10 0.50 * vfmsubaddps %ymm0, (%rax), %ymm2, %ymm3
# CHECK-NEXT: 1 5 0.50 vfmsubpd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfmsubpd (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfmsubpd %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: 1 5 0.50 vfmsubpd %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 2 10 0.50 * vfmsubpd (%rax), %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 2 10 0.50 * vfmsubpd %ymm0, (%rax), %ymm2, %ymm3
# CHECK-NEXT: 1 5 0.50 vfmsubps %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfmsubps (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfmsubps %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: 1 5 0.50 vfmsubps %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 2 10 0.50 * vfmsubps (%rax), %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 2 10 0.50 * vfmsubps %ymm0, (%rax), %ymm2, %ymm3
# CHECK-NEXT: 1 5 0.50 vfmsubsd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfmsubsd (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfmsubsd %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: 1 5 0.50 vfmsubss %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfmsubss (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfmsubss %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: 1 5 0.50 vfnmaddpd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfnmaddpd (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfnmaddpd %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: 1 5 0.50 vfnmaddpd %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 2 10 0.50 * vfnmaddpd (%rax), %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 2 10 0.50 * vfnmaddpd %ymm0, (%rax), %ymm2, %ymm3
# CHECK-NEXT: 1 5 0.50 vfnmaddps %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfnmaddps (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfnmaddps %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: 1 5 0.50 vfnmaddps %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 2 10 0.50 * vfnmaddps (%rax), %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 2 10 0.50 * vfnmaddps %ymm0, (%rax), %ymm2, %ymm3
# CHECK-NEXT: 1 5 0.50 vfnmaddsd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfnmaddsd (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfnmaddsd %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: 1 5 0.50 vfnmaddss %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfnmaddss (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfnmaddss %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: 1 5 0.50 vfnmsubpd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfnmsubpd (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfnmsubpd %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: 1 5 0.50 vfnmsubpd %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 2 10 0.50 * vfnmsubpd (%rax), %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 2 10 0.50 * vfnmsubpd %ymm0, (%rax), %ymm2, %ymm3
# CHECK-NEXT: 1 5 0.50 vfnmsubps %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfnmsubps (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfnmsubps %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: 1 5 0.50 vfnmsubps %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 2 10 0.50 * vfnmsubps (%rax), %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 2 10 0.50 * vfnmsubps %ymm0, (%rax), %ymm2, %ymm3
# CHECK-NEXT: 1 5 0.50 vfnmsubsd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfnmsubsd (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfnmsubsd %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: 1 5 0.50 vfnmsubss %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfnmsubss (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 10 0.50 * vfnmsubss %xmm0, (%rax), %xmm2, %xmm3
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 48.00 48.00 - - 32.00 32.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddpd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddpd (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddpd %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddpd %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddpd (%rax), %ymm1, %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddpd %ymm0, (%rax), %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddps %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddps (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddps %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddps %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddps (%rax), %ymm1, %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddps %ymm0, (%rax), %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsd (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsd %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddss %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddss (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddss %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsubpd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsubpd (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsubpd %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsubpd %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsubpd (%rax), %ymm1, %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsubpd %ymm0, (%rax), %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsubps %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsubps (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsubps %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmaddsubps %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsubps (%rax), %ymm1, %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmaddsubps %ymm0, (%rax), %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubaddpd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubaddpd (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubaddpd %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubaddpd %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubaddpd (%rax), %ymm1, %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubaddpd %ymm0, (%rax), %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubaddps %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubaddps (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubaddps %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubaddps %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubaddps (%rax), %ymm1, %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubaddps %ymm0, (%rax), %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubpd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubpd (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubpd %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubpd %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubpd (%rax), %ymm1, %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubpd %ymm0, (%rax), %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubps %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubps (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubps %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubps %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubps (%rax), %ymm1, %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubps %ymm0, (%rax), %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubsd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubsd (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubsd %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - - - vfmsubss %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubss (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfmsubss %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmaddpd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmaddpd (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmaddpd %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmaddpd %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmaddpd (%rax), %ymm1, %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmaddpd %ymm0, (%rax), %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmaddps %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmaddps (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmaddps %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmaddps %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmaddps (%rax), %ymm1, %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmaddps %ymm0, (%rax), %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmaddsd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmaddsd (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmaddsd %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmaddss %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmaddss (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmaddss %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsubpd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsubpd (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsubpd %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsubpd %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsubpd (%rax), %ymm1, %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsubpd %ymm0, (%rax), %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsubps %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsubps (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsubps %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsubps %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsubps (%rax), %ymm1, %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsubps %ymm0, (%rax), %ymm2, %ymm3
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsubsd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsubsd (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsubsd %xmm0, (%rax), %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - - - vfnmsubss %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsubss (%rax), %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 0.50 0.50 - - 0.50 0.50 vfnmsubss %xmm0, (%rax), %xmm2, %xmm3

View File

@ -0,0 +1,437 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
lea 0(), %cx
lea 0(), %ecx
lea 0(), %rcx
lea (%eax), %cx
lea (%eax), %ecx
lea (%eax), %rcx
lea (%rax), %cx
lea (%rax), %ecx
lea (%rax), %rcx
lea (, %ebx), %cx
lea (, %ebx), %ecx
lea (, %ebx), %rcx
lea (, %rbx), %cx
lea (, %rbx), %ecx
lea (, %rbx), %rcx
lea (, %ebx, 1), %cx
lea (, %ebx, 1), %ecx
lea (, %ebx, 1), %rcx
lea (, %rbx, 1), %cx
lea (, %rbx, 1), %ecx
lea (, %rbx, 1), %rcx
lea (, %ebx, 2), %cx
lea (, %ebx, 2), %ecx
lea (, %ebx, 2), %rcx
lea (, %rbx, 2), %cx
lea (, %rbx, 2), %ecx
lea (, %rbx, 2), %rcx
lea (%eax, %ebx), %cx
lea (%eax, %ebx), %ecx
lea (%eax, %ebx), %rcx
lea (%rax, %rbx), %cx
lea (%rax, %rbx), %ecx
lea (%rax, %rbx), %rcx
lea (%eax, %ebx, 1), %cx
lea (%eax, %ebx, 1), %ecx
lea (%eax, %ebx, 1), %rcx
lea (%rax, %rbx, 1), %cx
lea (%rax, %rbx, 1), %ecx
lea (%rax, %rbx, 1), %rcx
lea (%eax, %ebx, 2), %cx
lea (%eax, %ebx, 2), %ecx
lea (%eax, %ebx, 2), %rcx
lea (%rax, %rbx, 2), %cx
lea (%rax, %rbx, 2), %ecx
lea (%rax, %rbx, 2), %rcx
lea -16(), %cx
lea -16(), %ecx
lea -16(), %rcx
lea -16(%eax), %cx
lea -16(%eax), %ecx
lea -16(%eax), %rcx
lea -16(%rax), %cx
lea -16(%rax), %ecx
lea -16(%rax), %rcx
lea -16(, %ebx), %cx
lea -16(, %ebx), %ecx
lea -16(, %ebx), %rcx
lea -16(, %rbx), %cx
lea -16(, %rbx), %ecx
lea -16(, %rbx), %rcx
lea -16(, %ebx, 1), %cx
lea -16(, %ebx, 1), %ecx
lea -16(, %ebx, 1), %rcx
lea -16(, %rbx, 1), %cx
lea -16(, %rbx, 1), %ecx
lea -16(, %rbx, 1), %rcx
lea -16(, %ebx, 2), %cx
lea -16(, %ebx, 2), %ecx
lea -16(, %ebx, 2), %rcx
lea -16(, %rbx, 2), %cx
lea -16(, %rbx, 2), %ecx
lea -16(, %rbx, 2), %rcx
lea -16(%eax, %ebx), %cx
lea -16(%eax, %ebx), %ecx
lea -16(%eax, %ebx), %rcx
lea -16(%rax, %rbx), %cx
lea -16(%rax, %rbx), %ecx
lea -16(%rax, %rbx), %rcx
lea -16(%eax, %ebx, 1), %cx
lea -16(%eax, %ebx, 1), %ecx
lea -16(%eax, %ebx, 1), %rcx
lea -16(%rax, %rbx, 1), %cx
lea -16(%rax, %rbx, 1), %ecx
lea -16(%rax, %rbx, 1), %rcx
lea -16(%eax, %ebx, 2), %cx
lea -16(%eax, %ebx, 2), %ecx
lea -16(%eax, %ebx, 2), %rcx
lea -16(%rax, %rbx, 2), %cx
lea -16(%rax, %rbx, 2), %ecx
lea -16(%rax, %rbx, 2), %rcx
lea 1024(), %cx
lea 1024(), %ecx
lea 1024(), %rcx
lea 1024(%eax), %cx
lea 1024(%eax), %ecx
lea 1024(%eax), %rcx
lea 1024(%rax), %cx
lea 1024(%rax), %ecx
lea 1024(%rax), %rcx
lea 1024(, %ebx), %cx
lea 1024(, %ebx), %ecx
lea 1024(, %ebx), %rcx
lea 1024(, %rbx), %cx
lea 1024(, %rbx), %ecx
lea 1024(, %rbx), %rcx
lea 1024(, %ebx, 1), %cx
lea 1024(, %ebx, 1), %ecx
lea 1024(, %ebx, 1), %rcx
lea 1024(, %rbx, 1), %cx
lea 1024(, %rbx, 1), %ecx
lea 1024(, %rbx, 1), %rcx
lea 1024(, %ebx, 2), %cx
lea 1024(, %ebx, 2), %ecx
lea 1024(, %ebx, 2), %rcx
lea 1024(, %rbx, 2), %cx
lea 1024(, %rbx, 2), %ecx
lea 1024(, %rbx, 2), %rcx
lea 1024(%eax, %ebx), %cx
lea 1024(%eax, %ebx), %ecx
lea 1024(%eax, %ebx), %rcx
lea 1024(%rax, %rbx), %cx
lea 1024(%rax, %rbx), %ecx
lea 1024(%rax, %rbx), %rcx
lea 1024(%eax, %ebx, 1), %cx
lea 1024(%eax, %ebx, 1), %ecx
lea 1024(%eax, %ebx, 1), %rcx
lea 1024(%rax, %rbx, 1), %cx
lea 1024(%rax, %rbx, 1), %ecx
lea 1024(%rax, %rbx, 1), %rcx
lea 1024(%eax, %ebx, 2), %cx
lea 1024(%eax, %ebx, 2), %ecx
lea 1024(%eax, %ebx, 2), %rcx
lea 1024(%rax, %rbx, 2), %cx
lea 1024(%rax, %rbx, 2), %ecx
lea 1024(%rax, %rbx, 2), %rcx
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 0.50 leaw 0, %cx
# CHECK-NEXT: 1 1 0.50 leal 0, %ecx
# CHECK-NEXT: 1 1 0.50 leaq 0, %rcx
# CHECK-NEXT: 1 1 0.50 leaw (%eax), %cx
# CHECK-NEXT: 1 1 0.50 leal (%eax), %ecx
# CHECK-NEXT: 1 1 0.50 leaq (%eax), %rcx
# CHECK-NEXT: 1 1 0.50 leaw (%rax), %cx
# CHECK-NEXT: 1 1 0.50 leal (%rax), %ecx
# CHECK-NEXT: 1 1 0.50 leaq (%rax), %rcx
# CHECK-NEXT: 1 1 0.50 leaw (,%ebx), %cx
# CHECK-NEXT: 1 1 0.50 leal (,%ebx), %ecx
# CHECK-NEXT: 1 1 0.50 leaq (,%ebx), %rcx
# CHECK-NEXT: 1 1 0.50 leaw (,%rbx), %cx
# CHECK-NEXT: 1 1 0.50 leal (,%rbx), %ecx
# CHECK-NEXT: 1 1 0.50 leaq (,%rbx), %rcx
# CHECK-NEXT: 1 1 0.50 leaw (,%ebx), %cx
# CHECK-NEXT: 1 1 0.50 leal (,%ebx), %ecx
# CHECK-NEXT: 1 1 0.50 leaq (,%ebx), %rcx
# CHECK-NEXT: 1 1 0.50 leaw (,%rbx), %cx
# CHECK-NEXT: 1 1 0.50 leal (,%rbx), %ecx
# CHECK-NEXT: 1 1 0.50 leaq (,%rbx), %rcx
# CHECK-NEXT: 1 1 0.50 leaw (,%ebx,2), %cx
# CHECK-NEXT: 1 1 0.50 leal (,%ebx,2), %ecx
# CHECK-NEXT: 1 1 0.50 leaq (,%ebx,2), %rcx
# CHECK-NEXT: 1 1 0.50 leaw (,%rbx,2), %cx
# CHECK-NEXT: 1 1 0.50 leal (,%rbx,2), %ecx
# CHECK-NEXT: 1 1 0.50 leaq (,%rbx,2), %rcx
# CHECK-NEXT: 1 1 0.50 leaw (%eax,%ebx), %cx
# CHECK-NEXT: 1 1 0.50 leal (%eax,%ebx), %ecx
# CHECK-NEXT: 1 1 0.50 leaq (%eax,%ebx), %rcx
# CHECK-NEXT: 1 1 0.50 leaw (%rax,%rbx), %cx
# CHECK-NEXT: 1 1 0.50 leal (%rax,%rbx), %ecx
# CHECK-NEXT: 1 1 0.50 leaq (%rax,%rbx), %rcx
# CHECK-NEXT: 1 1 0.50 leaw (%eax,%ebx), %cx
# CHECK-NEXT: 1 1 0.50 leal (%eax,%ebx), %ecx
# CHECK-NEXT: 1 1 0.50 leaq (%eax,%ebx), %rcx
# CHECK-NEXT: 1 1 0.50 leaw (%rax,%rbx), %cx
# CHECK-NEXT: 1 1 0.50 leal (%rax,%rbx), %ecx
# CHECK-NEXT: 1 1 0.50 leaq (%rax,%rbx), %rcx
# CHECK-NEXT: 1 1 0.50 leaw (%eax,%ebx,2), %cx
# CHECK-NEXT: 1 1 0.50 leal (%eax,%ebx,2), %ecx
# CHECK-NEXT: 1 1 0.50 leaq (%eax,%ebx,2), %rcx
# CHECK-NEXT: 1 1 0.50 leaw (%rax,%rbx,2), %cx
# CHECK-NEXT: 1 1 0.50 leal (%rax,%rbx,2), %ecx
# CHECK-NEXT: 1 1 0.50 leaq (%rax,%rbx,2), %rcx
# CHECK-NEXT: 1 1 0.50 leaw -16, %cx
# CHECK-NEXT: 1 1 0.50 leal -16, %ecx
# CHECK-NEXT: 1 1 0.50 leaq -16, %rcx
# CHECK-NEXT: 1 1 0.50 leaw -16(%eax), %cx
# CHECK-NEXT: 1 1 0.50 leal -16(%eax), %ecx
# CHECK-NEXT: 1 1 0.50 leaq -16(%eax), %rcx
# CHECK-NEXT: 1 1 0.50 leaw -16(%rax), %cx
# CHECK-NEXT: 1 1 0.50 leal -16(%rax), %ecx
# CHECK-NEXT: 1 1 0.50 leaq -16(%rax), %rcx
# CHECK-NEXT: 1 1 0.50 leaw -16(,%ebx), %cx
# CHECK-NEXT: 1 1 0.50 leal -16(,%ebx), %ecx
# CHECK-NEXT: 1 1 0.50 leaq -16(,%ebx), %rcx
# CHECK-NEXT: 1 1 0.50 leaw -16(,%rbx), %cx
# CHECK-NEXT: 1 1 0.50 leal -16(,%rbx), %ecx
# CHECK-NEXT: 1 1 0.50 leaq -16(,%rbx), %rcx
# CHECK-NEXT: 1 1 0.50 leaw -16(,%ebx), %cx
# CHECK-NEXT: 1 1 0.50 leal -16(,%ebx), %ecx
# CHECK-NEXT: 1 1 0.50 leaq -16(,%ebx), %rcx
# CHECK-NEXT: 1 1 0.50 leaw -16(,%rbx), %cx
# CHECK-NEXT: 1 1 0.50 leal -16(,%rbx), %ecx
# CHECK-NEXT: 1 1 0.50 leaq -16(,%rbx), %rcx
# CHECK-NEXT: 1 1 0.50 leaw -16(,%ebx,2), %cx
# CHECK-NEXT: 1 1 0.50 leal -16(,%ebx,2), %ecx
# CHECK-NEXT: 1 1 0.50 leaq -16(,%ebx,2), %rcx
# CHECK-NEXT: 1 1 0.50 leaw -16(,%rbx,2), %cx
# CHECK-NEXT: 1 1 0.50 leal -16(,%rbx,2), %ecx
# CHECK-NEXT: 1 1 0.50 leaq -16(,%rbx,2), %rcx
# CHECK-NEXT: 1 1 0.50 leaw -16(%eax,%ebx), %cx
# CHECK-NEXT: 1 1 0.50 leal -16(%eax,%ebx), %ecx
# CHECK-NEXT: 1 1 0.50 leaq -16(%eax,%ebx), %rcx
# CHECK-NEXT: 1 1 0.50 leaw -16(%rax,%rbx), %cx
# CHECK-NEXT: 1 1 0.50 leal -16(%rax,%rbx), %ecx
# CHECK-NEXT: 1 1 0.50 leaq -16(%rax,%rbx), %rcx
# CHECK-NEXT: 1 1 0.50 leaw -16(%eax,%ebx), %cx
# CHECK-NEXT: 1 1 0.50 leal -16(%eax,%ebx), %ecx
# CHECK-NEXT: 1 1 0.50 leaq -16(%eax,%ebx), %rcx
# CHECK-NEXT: 1 1 0.50 leaw -16(%rax,%rbx), %cx
# CHECK-NEXT: 1 1 0.50 leal -16(%rax,%rbx), %ecx
# CHECK-NEXT: 1 1 0.50 leaq -16(%rax,%rbx), %rcx
# CHECK-NEXT: 1 1 0.50 leaw -16(%eax,%ebx,2), %cx
# CHECK-NEXT: 1 1 0.50 leal -16(%eax,%ebx,2), %ecx
# CHECK-NEXT: 1 1 0.50 leaq -16(%eax,%ebx,2), %rcx
# CHECK-NEXT: 1 1 0.50 leaw -16(%rax,%rbx,2), %cx
# CHECK-NEXT: 1 1 0.50 leal -16(%rax,%rbx,2), %ecx
# CHECK-NEXT: 1 1 0.50 leaq -16(%rax,%rbx,2), %rcx
# CHECK-NEXT: 1 1 0.50 leaw 1024, %cx
# CHECK-NEXT: 1 1 0.50 leal 1024, %ecx
# CHECK-NEXT: 1 1 0.50 leaq 1024, %rcx
# CHECK-NEXT: 1 1 0.50 leaw 1024(%eax), %cx
# CHECK-NEXT: 1 1 0.50 leal 1024(%eax), %ecx
# CHECK-NEXT: 1 1 0.50 leaq 1024(%eax), %rcx
# CHECK-NEXT: 1 1 0.50 leaw 1024(%rax), %cx
# CHECK-NEXT: 1 1 0.50 leal 1024(%rax), %ecx
# CHECK-NEXT: 1 1 0.50 leaq 1024(%rax), %rcx
# CHECK-NEXT: 1 1 0.50 leaw 1024(,%ebx), %cx
# CHECK-NEXT: 1 1 0.50 leal 1024(,%ebx), %ecx
# CHECK-NEXT: 1 1 0.50 leaq 1024(,%ebx), %rcx
# CHECK-NEXT: 1 1 0.50 leaw 1024(,%rbx), %cx
# CHECK-NEXT: 1 1 0.50 leal 1024(,%rbx), %ecx
# CHECK-NEXT: 1 1 0.50 leaq 1024(,%rbx), %rcx
# CHECK-NEXT: 1 1 0.50 leaw 1024(,%ebx), %cx
# CHECK-NEXT: 1 1 0.50 leal 1024(,%ebx), %ecx
# CHECK-NEXT: 1 1 0.50 leaq 1024(,%ebx), %rcx
# CHECK-NEXT: 1 1 0.50 leaw 1024(,%rbx), %cx
# CHECK-NEXT: 1 1 0.50 leal 1024(,%rbx), %ecx
# CHECK-NEXT: 1 1 0.50 leaq 1024(,%rbx), %rcx
# CHECK-NEXT: 1 1 0.50 leaw 1024(,%ebx,2), %cx
# CHECK-NEXT: 1 1 0.50 leal 1024(,%ebx,2), %ecx
# CHECK-NEXT: 1 1 0.50 leaq 1024(,%ebx,2), %rcx
# CHECK-NEXT: 1 1 0.50 leaw 1024(,%rbx,2), %cx
# CHECK-NEXT: 1 1 0.50 leal 1024(,%rbx,2), %ecx
# CHECK-NEXT: 1 1 0.50 leaq 1024(,%rbx,2), %rcx
# CHECK-NEXT: 1 1 0.50 leaw 1024(%eax,%ebx), %cx
# CHECK-NEXT: 1 1 0.50 leal 1024(%eax,%ebx), %ecx
# CHECK-NEXT: 1 1 0.50 leaq 1024(%eax,%ebx), %rcx
# CHECK-NEXT: 1 1 0.50 leaw 1024(%rax,%rbx), %cx
# CHECK-NEXT: 1 1 0.50 leal 1024(%rax,%rbx), %ecx
# CHECK-NEXT: 1 1 0.50 leaq 1024(%rax,%rbx), %rcx
# CHECK-NEXT: 1 1 0.50 leaw 1024(%eax,%ebx), %cx
# CHECK-NEXT: 1 1 0.50 leal 1024(%eax,%ebx), %ecx
# CHECK-NEXT: 1 1 0.50 leaq 1024(%eax,%ebx), %rcx
# CHECK-NEXT: 1 1 0.50 leaw 1024(%rax,%rbx), %cx
# CHECK-NEXT: 1 1 0.50 leal 1024(%rax,%rbx), %ecx
# CHECK-NEXT: 1 1 0.50 leaq 1024(%rax,%rbx), %rcx
# CHECK-NEXT: 1 1 0.50 leaw 1024(%eax,%ebx,2), %cx
# CHECK-NEXT: 1 1 0.50 leal 1024(%eax,%ebx,2), %ecx
# CHECK-NEXT: 1 1 0.50 leaq 1024(%eax,%ebx,2), %rcx
# CHECK-NEXT: 1 1 0.50 leaw 1024(%rax,%rbx,2), %cx
# CHECK-NEXT: 1 1 0.50 leal 1024(%rax,%rbx,2), %ecx
# CHECK-NEXT: 1 1 0.50 leaq 1024(%rax,%rbx,2), %rcx
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 67.50 67.50 - - - -
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 0, %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal 0, %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 0, %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw (%eax), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal (%eax), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq (%eax), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw (%rax), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal (%rax), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq (%rax), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw (,%ebx), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal (,%ebx), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq (,%ebx), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw (,%rbx), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal (,%rbx), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq (,%rbx), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw (,%ebx), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal (,%ebx), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq (,%ebx), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw (,%rbx), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal (,%rbx), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq (,%rbx), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw (,%ebx,2), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal (,%ebx,2), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq (,%ebx,2), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw (,%rbx,2), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal (,%rbx,2), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq (,%rbx,2), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw (%eax,%ebx), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal (%eax,%ebx), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq (%eax,%ebx), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw (%rax,%rbx), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal (%rax,%rbx), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq (%rax,%rbx), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw (%eax,%ebx), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal (%eax,%ebx), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq (%eax,%ebx), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw (%rax,%rbx), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal (%rax,%rbx), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq (%rax,%rbx), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw (%eax,%ebx,2), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal (%eax,%ebx,2), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq (%eax,%ebx,2), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw (%rax,%rbx,2), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal (%rax,%rbx,2), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq (%rax,%rbx,2), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw -16, %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal -16, %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq -16, %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw -16(%eax), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal -16(%eax), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq -16(%eax), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw -16(%rax), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal -16(%rax), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq -16(%rax), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw -16(,%ebx), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal -16(,%ebx), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq -16(,%ebx), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw -16(,%rbx), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal -16(,%rbx), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq -16(,%rbx), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw -16(,%ebx), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal -16(,%ebx), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq -16(,%ebx), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw -16(,%rbx), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal -16(,%rbx), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq -16(,%rbx), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw -16(,%ebx,2), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal -16(,%ebx,2), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq -16(,%ebx,2), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw -16(,%rbx,2), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal -16(,%rbx,2), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq -16(,%rbx,2), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw -16(%eax,%ebx), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal -16(%eax,%ebx), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq -16(%eax,%ebx), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw -16(%rax,%rbx), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal -16(%rax,%rbx), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq -16(%rax,%rbx), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw -16(%eax,%ebx), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal -16(%eax,%ebx), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq -16(%eax,%ebx), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw -16(%rax,%rbx), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal -16(%rax,%rbx), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq -16(%rax,%rbx), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw -16(%eax,%ebx,2), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal -16(%eax,%ebx,2), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq -16(%eax,%ebx,2), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw -16(%rax,%rbx,2), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal -16(%rax,%rbx,2), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq -16(%rax,%rbx,2), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 1024, %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal 1024, %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 1024, %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 1024(%eax), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal 1024(%eax), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 1024(%eax), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 1024(%rax), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal 1024(%rax), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 1024(%rax), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 1024(,%ebx), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal 1024(,%ebx), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 1024(,%ebx), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 1024(,%rbx), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal 1024(,%rbx), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 1024(,%rbx), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 1024(,%ebx), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal 1024(,%ebx), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 1024(,%ebx), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 1024(,%rbx), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal 1024(,%rbx), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 1024(,%rbx), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 1024(,%ebx,2), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal 1024(,%ebx,2), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 1024(,%ebx,2), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 1024(,%rbx,2), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal 1024(,%rbx,2), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 1024(,%rbx,2), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 1024(%eax,%ebx), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal 1024(%eax,%ebx), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 1024(%eax,%ebx), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 1024(%rax,%rbx), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal 1024(%rax,%rbx), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 1024(%rax,%rbx), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 1024(%eax,%ebx), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal 1024(%eax,%ebx), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 1024(%eax,%ebx), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 1024(%rax,%rbx), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal 1024(%rax,%rbx), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 1024(%rax,%rbx), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 1024(%eax,%ebx,2), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal 1024(%eax,%ebx,2), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 1024(%eax,%ebx,2), %rcx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaw 1024(%rax,%rbx,2), %cx
# CHECK-NEXT: - - 0.50 0.50 - - - - leal 1024(%rax,%rbx,2), %ecx
# CHECK-NEXT: - - 0.50 0.50 - - - - leaq 1024(%rax,%rbx,2), %rcx

View File

@ -0,0 +1,50 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
lzcntw %cx, %cx
lzcntw (%rax), %cx
lzcntl %eax, %ecx
lzcntl (%rax), %ecx
lzcntq %rax, %rcx
lzcntq (%rax), %rcx
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 3 1.00 lzcntw %cx, %cx
# CHECK-NEXT: 2 8 1.00 * lzcntw (%rax), %cx
# CHECK-NEXT: 1 3 1.00 lzcntl %eax, %ecx
# CHECK-NEXT: 2 8 1.00 * lzcntl (%rax), %ecx
# CHECK-NEXT: 1 3 1.00 lzcntq %rax, %rcx
# CHECK-NEXT: 2 8 1.00 * lzcntq (%rax), %rcx
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - - 6.00 - - 1.50 1.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - 1.00 - - - - lzcntw %cx, %cx
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 lzcntw (%rax), %cx
# CHECK-NEXT: - - - 1.00 - - - - lzcntl %eax, %ecx
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 lzcntl (%rax), %ecx
# CHECK-NEXT: - - - 1.00 - - - - lzcntq %rax, %rcx
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 lzcntq (%rax), %rcx

View File

@ -0,0 +1,393 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
emms
movd %eax, %mm2
movd (%rax), %mm2
movd %mm0, %ecx
movd %mm0, (%rax)
movq %rax, %mm2
movq (%rax), %mm2
movq %mm0, %rcx
movq %mm0, (%rax)
packsswb %mm0, %mm2
packsswb (%rax), %mm2
packssdw %mm0, %mm2
packssdw (%rax), %mm2
packuswb %mm0, %mm2
packuswb (%rax), %mm2
paddb %mm0, %mm2
paddb (%rax), %mm2
paddd %mm0, %mm2
paddd (%rax), %mm2
paddsb %mm0, %mm2
paddsb (%rax), %mm2
paddsw %mm0, %mm2
paddsw (%rax), %mm2
paddusb %mm0, %mm2
paddusb (%rax), %mm2
paddusw %mm0, %mm2
paddusw (%rax), %mm2
paddw %mm0, %mm2
paddw (%rax), %mm2
pand %mm0, %mm2
pand (%rax), %mm2
pandn %mm0, %mm2
pandn (%rax), %mm2
pcmpeqb %mm0, %mm2
pcmpeqb (%rax), %mm2
pcmpeqd %mm0, %mm2
pcmpeqd (%rax), %mm2
pcmpeqw %mm0, %mm2
pcmpeqw (%rax), %mm2
pcmpgtb %mm0, %mm2
pcmpgtb (%rax), %mm2
pcmpgtd %mm0, %mm2
pcmpgtd (%rax), %mm2
pcmpgtw %mm0, %mm2
pcmpgtw (%rax), %mm2
pmaddwd %mm0, %mm2
pmaddwd (%rax), %mm2
pmulhw %mm0, %mm2
pmulhw (%rax), %mm2
pmullw %mm0, %mm2
pmullw (%rax), %mm2
por %mm0, %mm2
por (%rax), %mm2
pslld $1, %mm2
pslld %mm0, %mm2
pslld (%rax), %mm2
psllq $1, %mm2
psllq %mm0, %mm2
psllq (%rax), %mm2
psllw $1, %mm2
psllw %mm0, %mm2
psllw (%rax), %mm2
psrad $1, %mm2
psrad %mm0, %mm2
psrad (%rax), %mm2
psraw $1, %mm2
psraw %mm0, %mm2
psraw (%rax), %mm2
psrld $1, %mm2
psrld %mm0, %mm2
psrld (%rax), %mm2
psrlq $1, %mm2
psrlq %mm0, %mm2
psrlq (%rax), %mm2
psrlw $1, %mm2
psrlw %mm0, %mm2
psrlw (%rax), %mm2
psubb %mm0, %mm2
psubb (%rax), %mm2
psubd %mm0, %mm2
psubd (%rax), %mm2
psubsb %mm0, %mm2
psubsb (%rax), %mm2
psubsw %mm0, %mm2
psubsw (%rax), %mm2
psubusb %mm0, %mm2
psubusb (%rax), %mm2
psubusw %mm0, %mm2
psubusw (%rax), %mm2
psubw %mm0, %mm2
psubw (%rax), %mm2
punpckhbw %mm0, %mm2
punpckhbw (%rax), %mm2
punpckhdq %mm0, %mm2
punpckhdq (%rax), %mm2
punpckhwd %mm0, %mm2
punpckhwd (%rax), %mm2
punpcklbw %mm0, %mm2
punpcklbw (%rax), %mm2
punpckldq %mm0, %mm2
punpckldq (%rax), %mm2
punpcklwd %mm0, %mm2
punpcklwd (%rax), %mm2
pxor %mm0, %mm2
pxor (%rax), %mm2
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 31 31 10.33 * * U emms
# CHECK-NEXT: 1 1 1.00 movd %eax, %mm2
# CHECK-NEXT: 1 5 0.50 * movd (%rax), %mm2
# CHECK-NEXT: 1 2 1.00 movd %mm0, %ecx
# CHECK-NEXT: 1 1 1.00 * U movd %mm0, (%rax)
# CHECK-NEXT: 1 1 1.00 movq %rax, %mm2
# CHECK-NEXT: 1 5 0.50 * movq (%rax), %mm2
# CHECK-NEXT: 1 2 1.00 movq %mm0, %rcx
# CHECK-NEXT: 1 1 1.00 * movq %mm0, (%rax)
# CHECK-NEXT: 1 1 1.00 packsswb %mm0, %mm2
# CHECK-NEXT: 2 6 1.00 * packsswb (%rax), %mm2
# CHECK-NEXT: 1 1 1.00 packssdw %mm0, %mm2
# CHECK-NEXT: 2 6 1.00 * packssdw (%rax), %mm2
# CHECK-NEXT: 1 1 1.00 packuswb %mm0, %mm2
# CHECK-NEXT: 2 6 1.00 * packuswb (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 paddb %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * paddb (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 paddd %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * paddd (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 paddsb %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * paddsb (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 paddsw %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * paddsw (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 paddusb %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * paddusb (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 paddusw %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * paddusw (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 paddw %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * paddw (%rax), %mm2
# CHECK-NEXT: 1 1 0.33 pand %mm0, %mm2
# CHECK-NEXT: 2 6 0.50 * pand (%rax), %mm2
# CHECK-NEXT: 1 1 0.33 pandn %mm0, %mm2
# CHECK-NEXT: 2 6 0.50 * pandn (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pcmpeqb %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * pcmpeqb (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pcmpeqd %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * pcmpeqd (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pcmpeqw %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * pcmpeqw (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pcmpgtb %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * pcmpgtb (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pcmpgtd %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * pcmpgtd (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pcmpgtw %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * pcmpgtw (%rax), %mm2
# CHECK-NEXT: 1 5 1.00 pmaddwd %mm0, %mm2
# CHECK-NEXT: 2 10 1.00 * pmaddwd (%rax), %mm2
# CHECK-NEXT: 1 5 1.00 pmulhw %mm0, %mm2
# CHECK-NEXT: 2 10 1.00 * pmulhw (%rax), %mm2
# CHECK-NEXT: 1 5 1.00 pmullw %mm0, %mm2
# CHECK-NEXT: 2 10 1.00 * pmullw (%rax), %mm2
# CHECK-NEXT: 1 1 0.33 por %mm0, %mm2
# CHECK-NEXT: 2 6 0.50 * por (%rax), %mm2
# CHECK-NEXT: 1 1 1.00 pslld $1, %mm2
# CHECK-NEXT: 1 1 1.00 pslld %mm0, %mm2
# CHECK-NEXT: 2 6 1.00 * pslld (%rax), %mm2
# CHECK-NEXT: 1 1 1.00 psllq $1, %mm2
# CHECK-NEXT: 1 1 1.00 psllq %mm0, %mm2
# CHECK-NEXT: 2 6 1.00 * psllq (%rax), %mm2
# CHECK-NEXT: 1 1 1.00 psllw $1, %mm2
# CHECK-NEXT: 1 1 1.00 psllw %mm0, %mm2
# CHECK-NEXT: 2 6 1.00 * psllw (%rax), %mm2
# CHECK-NEXT: 1 1 1.00 psrad $1, %mm2
# CHECK-NEXT: 1 1 1.00 psrad %mm0, %mm2
# CHECK-NEXT: 2 6 1.00 * psrad (%rax), %mm2
# CHECK-NEXT: 1 1 1.00 psraw $1, %mm2
# CHECK-NEXT: 1 1 1.00 psraw %mm0, %mm2
# CHECK-NEXT: 2 6 1.00 * psraw (%rax), %mm2
# CHECK-NEXT: 1 1 1.00 psrld $1, %mm2
# CHECK-NEXT: 1 1 1.00 psrld %mm0, %mm2
# CHECK-NEXT: 2 6 1.00 * psrld (%rax), %mm2
# CHECK-NEXT: 1 1 1.00 psrlq $1, %mm2
# CHECK-NEXT: 1 1 1.00 psrlq %mm0, %mm2
# CHECK-NEXT: 2 6 1.00 * psrlq (%rax), %mm2
# CHECK-NEXT: 1 1 1.00 psrlw $1, %mm2
# CHECK-NEXT: 1 1 1.00 psrlw %mm0, %mm2
# CHECK-NEXT: 2 6 1.00 * psrlw (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 psubb %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * psubb (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 psubd %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * psubd (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 psubsb %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * psubsb (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 psubsw %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * psubsw (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 psubusb %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * psubusb (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 psubusw %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * psubusw (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 psubw %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * psubw (%rax), %mm2
# CHECK-NEXT: 1 1 1.00 punpckhbw %mm0, %mm2
# CHECK-NEXT: 2 6 1.00 * punpckhbw (%rax), %mm2
# CHECK-NEXT: 1 1 1.00 punpckhdq %mm0, %mm2
# CHECK-NEXT: 2 6 1.00 * punpckhdq (%rax), %mm2
# CHECK-NEXT: 1 1 1.00 punpckhwd %mm0, %mm2
# CHECK-NEXT: 2 6 1.00 * punpckhwd (%rax), %mm2
# CHECK-NEXT: 1 1 1.00 punpcklbw %mm0, %mm2
# CHECK-NEXT: 2 6 1.00 * punpcklbw (%rax), %mm2
# CHECK-NEXT: 1 1 1.00 punpckldq %mm0, %mm2
# CHECK-NEXT: 2 6 1.00 * punpckldq (%rax), %mm2
# CHECK-NEXT: 1 1 1.00 punpcklwd %mm0, %mm2
# CHECK-NEXT: 2 6 1.00 * punpcklwd (%rax), %mm2
# CHECK-NEXT: 1 1 0.33 pxor %mm0, %mm2
# CHECK-NEXT: 2 6 0.50 * pxor (%rax), %mm2
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 21.00 53.00 2.00 57.00 24.00 24.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - 10.33 10.33 - 10.33 - - emms
# CHECK-NEXT: - - - - - 1.00 - - movd %eax, %mm2
# CHECK-NEXT: - - - - - - 0.50 0.50 movd (%rax), %mm2
# CHECK-NEXT: - - 1.00 - - - - - movd %mm0, %ecx
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movd %mm0, (%rax)
# CHECK-NEXT: - - - - - 1.00 - - movq %rax, %mm2
# CHECK-NEXT: - - - - - - 0.50 0.50 movq (%rax), %mm2
# CHECK-NEXT: - - 1.00 - - - - - movq %mm0, %rcx
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movq %mm0, (%rax)
# CHECK-NEXT: - - - - - 1.00 - - packsswb %mm0, %mm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 packsswb (%rax), %mm2
# CHECK-NEXT: - - - - - 1.00 - - packssdw %mm0, %mm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 packssdw (%rax), %mm2
# CHECK-NEXT: - - - - - 1.00 - - packuswb %mm0, %mm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 packuswb (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - paddb %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 paddb (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - paddd %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 paddd (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - paddsb %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 paddsb (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - paddsw %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 paddsw (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - paddusb %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 paddusb (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - paddusw %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 paddusw (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - paddw %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 paddw (%rax), %mm2
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - pand %mm0, %mm2
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 pand (%rax), %mm2
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - pandn %mm0, %mm2
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 pandn (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pcmpeqb %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pcmpeqb (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pcmpeqd %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pcmpeqd (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pcmpeqw %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pcmpeqw (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pcmpgtb %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pcmpgtb (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pcmpgtd %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pcmpgtd (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pcmpgtw %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pcmpgtw (%rax), %mm2
# CHECK-NEXT: - - 1.00 - - - - - pmaddwd %mm0, %mm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmaddwd (%rax), %mm2
# CHECK-NEXT: - - 1.00 - - - - - pmulhw %mm0, %mm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmulhw (%rax), %mm2
# CHECK-NEXT: - - 1.00 - - - - - pmullw %mm0, %mm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmullw (%rax), %mm2
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - por %mm0, %mm2
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 por (%rax), %mm2
# CHECK-NEXT: - - - - - 1.00 - - pslld $1, %mm2
# CHECK-NEXT: - - - - - 1.00 - - pslld %mm0, %mm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 pslld (%rax), %mm2
# CHECK-NEXT: - - - - - 1.00 - - psllq $1, %mm2
# CHECK-NEXT: - - - - - 1.00 - - psllq %mm0, %mm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 psllq (%rax), %mm2
# CHECK-NEXT: - - - - - 1.00 - - psllw $1, %mm2
# CHECK-NEXT: - - - - - 1.00 - - psllw %mm0, %mm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 psllw (%rax), %mm2
# CHECK-NEXT: - - - - - 1.00 - - psrad $1, %mm2
# CHECK-NEXT: - - - - - 1.00 - - psrad %mm0, %mm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 psrad (%rax), %mm2
# CHECK-NEXT: - - - - - 1.00 - - psraw $1, %mm2
# CHECK-NEXT: - - - - - 1.00 - - psraw %mm0, %mm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 psraw (%rax), %mm2
# CHECK-NEXT: - - - - - 1.00 - - psrld $1, %mm2
# CHECK-NEXT: - - - - - 1.00 - - psrld %mm0, %mm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 psrld (%rax), %mm2
# CHECK-NEXT: - - - - - 1.00 - - psrlq $1, %mm2
# CHECK-NEXT: - - - - - 1.00 - - psrlq %mm0, %mm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 psrlq (%rax), %mm2
# CHECK-NEXT: - - - - - 1.00 - - psrlw $1, %mm2
# CHECK-NEXT: - - - - - 1.00 - - psrlw %mm0, %mm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 psrlw (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - psubb %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 psubb (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - psubd %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 psubd (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - psubsb %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 psubsb (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - psubsw %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 psubsw (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - psubusb %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 psubusb (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - psubusw %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 psubusw (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - psubw %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 psubw (%rax), %mm2
# CHECK-NEXT: - - - - - 1.00 - - punpckhbw %mm0, %mm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 punpckhbw (%rax), %mm2
# CHECK-NEXT: - - - - - 1.00 - - punpckhdq %mm0, %mm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 punpckhdq (%rax), %mm2
# CHECK-NEXT: - - - - - 1.00 - - punpckhwd %mm0, %mm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 punpckhwd (%rax), %mm2
# CHECK-NEXT: - - - - - 1.00 - - punpcklbw %mm0, %mm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 punpcklbw (%rax), %mm2
# CHECK-NEXT: - - - - - 1.00 - - punpckldq %mm0, %mm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 punpckldq (%rax), %mm2
# CHECK-NEXT: - - - - - 1.00 - - punpcklwd %mm0, %mm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 punpcklwd (%rax), %mm2
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - pxor %mm0, %mm2
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 pxor (%rax), %mm2

View File

@ -0,0 +1,50 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
movbe %cx, (%rax)
movbe (%rax), %cx
movbe %ecx, (%rax)
movbe (%rax), %ecx
movbe %rcx, (%rax)
movbe (%rax), %rcx
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 1.00 * movbew %cx, (%rax)
# CHECK-NEXT: 2 6 0.50 * movbew (%rax), %cx
# CHECK-NEXT: 1 1 1.00 * movbel %ecx, (%rax)
# CHECK-NEXT: 2 6 0.50 * movbel (%rax), %ecx
# CHECK-NEXT: 1 1 1.00 * movbeq %rcx, (%rax)
# CHECK-NEXT: 2 6 0.50 * movbeq (%rax), %rcx
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 1.00 1.00 3.00 1.00 3.00 3.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movbew %cx, (%rax)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 movbew (%rax), %cx
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movbel %ecx, (%rax)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 movbel (%rax), %ecx
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movbeq %rcx, (%rax)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 movbeq (%rax), %rcx

View File

@ -0,0 +1,36 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
pclmulqdq $11, %xmm0, %xmm2
pclmulqdq $11, (%rax), %xmm2
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 14 6.00 pclmulqdq $11, %xmm0, %xmm2
# CHECK-NEXT: 1 14 5.67 * pclmulqdq $11, (%rax), %xmm2
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 11.67 11.67 - 11.67 0.50 0.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - 6.00 6.00 - 6.00 - - pclmulqdq $11, %xmm0, %xmm2
# CHECK-NEXT: - - 5.67 5.67 - 5.67 0.50 0.50 pclmulqdq $11, (%rax), %xmm2

View File

@ -0,0 +1,50 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
popcntw %cx, %cx
popcntw (%rax), %cx
popcntl %eax, %ecx
popcntl (%rax), %ecx
popcntq %rax, %rcx
popcntq (%rax), %rcx
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 3 1.00 popcntw %cx, %cx
# CHECK-NEXT: 2 9 1.00 * popcntw (%rax), %cx
# CHECK-NEXT: 1 3 1.00 popcntl %eax, %ecx
# CHECK-NEXT: 2 9 1.00 * popcntl (%rax), %ecx
# CHECK-NEXT: 1 3 1.00 popcntq %rax, %rcx
# CHECK-NEXT: 2 9 1.00 * popcntq (%rax), %rcx
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - - 6.00 - - 1.50 1.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - 1.00 - - - - popcntw %cx, %cx
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 popcntw (%rax), %cx
# CHECK-NEXT: - - - 1.00 - - - - popcntl %eax, %ecx
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 popcntl (%rax), %ecx
# CHECK-NEXT: - - - 1.00 - - - - popcntq %rax, %rcx
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 popcntq (%rax), %rcx

View File

@ -0,0 +1,36 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
prefetch (%rax)
prefetchw (%rax)
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 5 0.50 * * prefetch (%rax)
# CHECK-NEXT: 1 5 0.50 * * prefetchw (%rax)
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - - - - - 1.00 1.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - - - - 0.50 0.50 prefetch (%rax)
# CHECK-NEXT: - - - - - - 0.50 0.50 prefetchw (%rax)

View File

@ -0,0 +1,461 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
addps %xmm0, %xmm2
addps (%rax), %xmm2
addss %xmm0, %xmm2
addss (%rax), %xmm2
andnps %xmm0, %xmm2
andnps (%rax), %xmm2
andps %xmm0, %xmm2
andps (%rax), %xmm2
cmpps $0, %xmm0, %xmm2
cmpps $0, (%rax), %xmm2
cmpss $0, %xmm0, %xmm2
cmpss $0, (%rax), %xmm2
comiss %xmm0, %xmm1
comiss (%rax), %xmm1
cvtpi2ps %mm0, %xmm2
cvtpi2ps (%rax), %xmm2
cvtps2pi %xmm0, %mm2
cvtps2pi (%rax), %mm2
cvtsi2ss %ecx, %xmm2
cvtsi2ss %rcx, %xmm2
cvtsi2ss (%rax), %xmm2
cvtsi2ss (%rax), %xmm2
cvtss2si %xmm0, %ecx
cvtss2si %xmm0, %rcx
cvtss2si (%rax), %ecx
cvtss2si (%rax), %rcx
cvttps2pi %xmm0, %mm2
cvttps2pi (%rax), %mm2
cvttss2si %xmm0, %ecx
cvttss2si %xmm0, %rcx
cvttss2si (%rax), %ecx
cvttss2si (%rax), %rcx
divps %xmm0, %xmm2
divps (%rax), %xmm2
divss %xmm0, %xmm2
divss (%rax), %xmm2
ldmxcsr (%rax)
maskmovq %mm0, %mm1
maxps %xmm0, %xmm2
maxps (%rax), %xmm2
maxss %xmm0, %xmm2
maxss (%rax), %xmm2
minps %xmm0, %xmm2
minps (%rax), %xmm2
minss %xmm0, %xmm2
minss (%rax), %xmm2
movaps %xmm0, %xmm2
movaps %xmm0, (%rax)
movaps (%rax), %xmm2
movhlps %xmm0, %xmm2
movlhps %xmm0, %xmm2
movhps %xmm0, (%rax)
movhps (%rax), %xmm2
movlps %xmm0, (%rax)
movlps (%rax), %xmm2
movmskps %xmm0, %rcx
movntps %xmm0, (%rax)
movntq %mm0, (%rax)
movss %xmm0, %xmm2
movss %xmm0, (%rax)
movss (%rax), %xmm2
movups %xmm0, %xmm2
movups %xmm0, (%rax)
movups (%rax), %xmm2
mulps %xmm0, %xmm2
mulps (%rax), %xmm2
mulss %xmm0, %xmm2
mulss (%rax), %xmm2
orps %xmm0, %xmm2
orps (%rax), %xmm2
pavgb %mm0, %mm2
pavgb (%rax), %mm2
pavgw %mm0, %mm2
pavgw (%rax), %mm2
pextrw $1, %mm0, %rcx
pinsrw $1, %rax, %mm2
pinsrw $1, (%rax), %mm2
pmaxsw %mm0, %mm2
pmaxsw (%rax), %mm2
pmaxub %mm0, %mm2
pmaxub (%rax), %mm2
pminsw %mm0, %mm2
pminsw (%rax), %mm2
pminub %mm0, %mm2
pminub (%rax), %mm2
pmovmskb %xmm0, %rcx
pmulhuw %mm0, %mm2
pmulhuw (%rax), %mm2
prefetcht0 (%rax)
prefetcht1 (%rax)
prefetcht2 (%rax)
prefetchnta (%rax)
psadbw %mm0, %mm2
psadbw (%rax), %mm2
pshufw $1, %mm0, %mm2
pshufw $1, (%rax), %mm2
rcpps %xmm0, %xmm2
rcpps (%rax), %xmm2
rcpss %xmm0, %xmm2
rcpss (%rax), %xmm2
rsqrtps %xmm0, %xmm2
rsqrtps (%rax), %xmm2
rsqrtss %xmm0, %xmm2
rsqrtss (%rax), %xmm2
sfence
shufps $1, %xmm0, %xmm2
shufps $1, (%rax), %xmm2
sqrtps %xmm0, %xmm2
sqrtps (%rax), %xmm2
sqrtss %xmm0, %xmm2
sqrtss (%rax), %xmm2
stmxcsr (%rax)
subps %xmm0, %xmm2
subps (%rax), %xmm2
subss %xmm0, %xmm2
subss (%rax), %xmm2
ucomiss %xmm0, %xmm1
ucomiss (%rax), %xmm1
unpckhps %xmm0, %xmm2
unpckhps (%rax), %xmm2
unpcklps %xmm0, %xmm2
unpcklps (%rax), %xmm2
xorps %xmm0, %xmm2
xorps (%rax), %xmm2
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 3 1.00 addps %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * addps (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 addss %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * addss (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 andnps %xmm0, %xmm2
# CHECK-NEXT: 2 7 1.00 * andnps (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 andps %xmm0, %xmm2
# CHECK-NEXT: 2 7 1.00 * andps (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 cmpps $0, %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * cmpps $0, (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 cmpss $0, %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * cmpss $0, (%rax), %xmm2
# CHECK-NEXT: 2 2 1.00 comiss %xmm0, %xmm1
# CHECK-NEXT: 3 8 1.00 * comiss (%rax), %xmm1
# CHECK-NEXT: 1 3 1.00 cvtpi2ps %mm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * cvtpi2ps (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 cvtps2pi %xmm0, %mm2
# CHECK-NEXT: 2 9 1.00 * cvtps2pi (%rax), %mm2
# CHECK-NEXT: 3 5 2.00 cvtsi2ssl %ecx, %xmm2
# CHECK-NEXT: 3 5 2.00 cvtsi2ssq %rcx, %xmm2
# CHECK-NEXT: 3 10 1.00 * cvtsi2ssl (%rax), %xmm2
# CHECK-NEXT: 3 10 1.00 * cvtsi2ssl (%rax), %xmm2
# CHECK-NEXT: 2 5 1.00 cvtss2si %xmm0, %ecx
# CHECK-NEXT: 2 5 1.00 cvtss2si %xmm0, %rcx
# CHECK-NEXT: 3 9 1.00 * cvtss2si (%rax), %ecx
# CHECK-NEXT: 3 9 1.00 * cvtss2si (%rax), %rcx
# CHECK-NEXT: 1 3 1.00 cvttps2pi %xmm0, %mm2
# CHECK-NEXT: 2 9 1.00 * cvttps2pi (%rax), %mm2
# CHECK-NEXT: 2 5 1.00 cvttss2si %xmm0, %ecx
# CHECK-NEXT: 2 5 1.00 cvttss2si %xmm0, %rcx
# CHECK-NEXT: 3 9 1.00 * cvttss2si (%rax), %ecx
# CHECK-NEXT: 3 9 1.00 * cvttss2si (%rax), %rcx
# CHECK-NEXT: 1 14 14.00 divps %xmm0, %xmm2
# CHECK-NEXT: 2 20 14.00 * divps (%rax), %xmm2
# CHECK-NEXT: 1 14 14.00 divss %xmm0, %xmm2
# CHECK-NEXT: 2 20 14.00 * divss (%rax), %xmm2
# CHECK-NEXT: 4 5 1.00 * * U ldmxcsr (%rax)
# CHECK-NEXT: 1 1 1.00 * * U maskmovq %mm0, %mm1
# CHECK-NEXT: 1 3 1.00 maxps %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * maxps (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 maxss %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * maxss (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 minps %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * minps (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 minss %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * minss (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 movaps %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * movaps %xmm0, (%rax)
# CHECK-NEXT: 1 6 0.50 * movaps (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 movhlps %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 movlhps %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * movhps %xmm0, (%rax)
# CHECK-NEXT: 2 7 1.00 * movhps (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 * movlps %xmm0, (%rax)
# CHECK-NEXT: 2 7 1.00 * movlps (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 movmskps %xmm0, %ecx
# CHECK-NEXT: 1 1 1.00 * movntps %xmm0, (%rax)
# CHECK-NEXT: 1 1 1.00 * * U movntq %mm0, (%rax)
# CHECK-NEXT: 1 1 1.00 movss %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * movss %xmm0, (%rax)
# CHECK-NEXT: 1 6 0.50 * movss (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 movups %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * movups %xmm0, (%rax)
# CHECK-NEXT: 1 6 0.50 * movups (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 mulps %xmm0, %xmm2
# CHECK-NEXT: 2 11 1.00 * mulps (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 mulss %xmm0, %xmm2
# CHECK-NEXT: 2 11 1.00 * mulss (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 orps %xmm0, %xmm2
# CHECK-NEXT: 2 7 1.00 * orps (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 pavgb %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * pavgb (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pavgw %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * pavgw (%rax), %mm2
# CHECK-NEXT: 2 3 1.00 pextrw $1, %mm0, %ecx
# CHECK-NEXT: 2 2 1.00 pinsrw $1, %eax, %mm2
# CHECK-NEXT: 2 7 0.50 * pinsrw $1, (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pmaxsw %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * pmaxsw (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pmaxub %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * pmaxub (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pminsw %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * pminsw (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 pminub %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * pminub (%rax), %mm2
# CHECK-NEXT: 1 2 1.00 pmovmskb %xmm0, %ecx
# CHECK-NEXT: 1 5 1.00 pmulhuw %mm0, %mm2
# CHECK-NEXT: 2 10 1.00 * pmulhuw (%rax), %mm2
# CHECK-NEXT: 1 5 0.50 * * prefetcht0 (%rax)
# CHECK-NEXT: 1 5 0.50 * * prefetcht1 (%rax)
# CHECK-NEXT: 1 5 0.50 * * prefetcht2 (%rax)
# CHECK-NEXT: 1 5 0.50 * * prefetchnta (%rax)
# CHECK-NEXT: 1 5 1.00 psadbw %mm0, %mm2
# CHECK-NEXT: 2 10 1.00 * psadbw (%rax), %mm2
# CHECK-NEXT: 1 1 1.00 pshufw $1, %mm0, %mm2
# CHECK-NEXT: 2 6 1.00 * pshufw $1, (%rax), %mm2
# CHECK-NEXT: 1 5 1.00 rcpps %xmm0, %xmm2
# CHECK-NEXT: 2 11 1.00 * rcpps (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 rcpss %xmm0, %xmm2
# CHECK-NEXT: 2 11 1.00 * rcpss (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 rsqrtps %xmm0, %xmm2
# CHECK-NEXT: 2 11 1.00 * rsqrtps (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 rsqrtss %xmm0, %xmm2
# CHECK-NEXT: 2 11 1.00 * rsqrtss (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 * * U sfence
# CHECK-NEXT: 1 1 1.00 shufps $1, %xmm0, %xmm2
# CHECK-NEXT: 2 7 1.00 * shufps $1, (%rax), %xmm2
# CHECK-NEXT: 1 14 14.00 sqrtps %xmm0, %xmm2
# CHECK-NEXT: 2 20 14.00 * sqrtps (%rax), %xmm2
# CHECK-NEXT: 1 14 14.00 sqrtss %xmm0, %xmm2
# CHECK-NEXT: 2 20 14.00 * sqrtss (%rax), %xmm2
# CHECK-NEXT: 4 5 1.00 * * U stmxcsr (%rax)
# CHECK-NEXT: 1 3 1.00 subps %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * subps (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 subss %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * subss (%rax), %xmm2
# CHECK-NEXT: 2 2 1.00 ucomiss %xmm0, %xmm1
# CHECK-NEXT: 3 8 1.00 * ucomiss (%rax), %xmm1
# CHECK-NEXT: 1 1 1.00 unpckhps %xmm0, %xmm2
# CHECK-NEXT: 2 7 1.00 * unpckhps (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 unpcklps %xmm0, %xmm2
# CHECK-NEXT: 2 7 1.00 * unpcklps (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 xorps %xmm0, %xmm2
# CHECK-NEXT: 2 7 1.00 * xorps (%rax), %xmm2
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - 112.00 41.00 55.50 10.00 34.50 33.50 33.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - 1.00 - - - - addps %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 addps (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - addss %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 addss (%rax), %xmm2
# CHECK-NEXT: - - - - - 1.00 - - andnps %xmm0, %xmm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 andnps (%rax), %xmm2
# CHECK-NEXT: - - - - - 1.00 - - andps %xmm0, %xmm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 andps (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - cmpps $0, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cmpps $0, (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - cmpss $0, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cmpss $0, (%rax), %xmm2
# CHECK-NEXT: - - 1.00 1.00 - - - - comiss %xmm0, %xmm1
# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 comiss (%rax), %xmm1
# CHECK-NEXT: - - - 1.00 - - - - cvtpi2ps %mm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtpi2ps (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - cvtps2pi %xmm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtps2pi (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - 2.00 - - cvtsi2ssl %ecx, %xmm2
# CHECK-NEXT: - - - 1.00 - 2.00 - - cvtsi2ssq %rcx, %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 cvtsi2ssl (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 cvtsi2ssl (%rax), %xmm2
# CHECK-NEXT: - - 1.00 1.00 - - - - cvtss2si %xmm0, %ecx
# CHECK-NEXT: - - 1.00 1.00 - - - - cvtss2si %xmm0, %rcx
# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 cvtss2si (%rax), %ecx
# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 cvtss2si (%rax), %rcx
# CHECK-NEXT: - - - 1.00 - - - - cvttps2pi %xmm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvttps2pi (%rax), %mm2
# CHECK-NEXT: - - 1.00 1.00 - - - - cvttss2si %xmm0, %ecx
# CHECK-NEXT: - - 1.00 1.00 - - - - cvttss2si %xmm0, %rcx
# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 cvttss2si (%rax), %ecx
# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 cvttss2si (%rax), %rcx
# CHECK-NEXT: - 14.00 1.00 - - - - - divps %xmm0, %xmm2
# CHECK-NEXT: - 14.00 1.00 - - - 0.50 0.50 divps (%rax), %xmm2
# CHECK-NEXT: - 14.00 1.00 - - - - - divss %xmm0, %xmm2
# CHECK-NEXT: - 14.00 1.00 - - - 0.50 0.50 divss (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - 1.00 1.00 0.50 0.50 ldmxcsr (%rax)
# CHECK-NEXT: - - - - - 1.00 - - maskmovq %mm0, %mm1
# CHECK-NEXT: - - - 1.00 - - - - maxps %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 maxps (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - maxss %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 maxss (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - minps %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 minps (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - minss %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 minss (%rax), %xmm2
# CHECK-NEXT: - - - - - 1.00 - - movaps %xmm0, %xmm2
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movaps %xmm0, (%rax)
# CHECK-NEXT: - - - - - - 0.50 0.50 movaps (%rax), %xmm2
# CHECK-NEXT: - - - - - 1.00 - - movhlps %xmm0, %xmm2
# CHECK-NEXT: - - - - - 1.00 - - movlhps %xmm0, %xmm2
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movhps %xmm0, (%rax)
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 movhps (%rax), %xmm2
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movlps %xmm0, (%rax)
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 movlps (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - movmskps %xmm0, %ecx
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movntps %xmm0, (%rax)
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movntq %mm0, (%rax)
# CHECK-NEXT: - - - - - 1.00 - - movss %xmm0, %xmm2
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movss %xmm0, (%rax)
# CHECK-NEXT: - - - - - - 0.50 0.50 movss (%rax), %xmm2
# CHECK-NEXT: - - - - - 1.00 - - movups %xmm0, %xmm2
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movups %xmm0, (%rax)
# CHECK-NEXT: - - - - - - 0.50 0.50 movups (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - mulps %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 mulps (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - mulss %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 mulss (%rax), %xmm2
# CHECK-NEXT: - - - - - 1.00 - - orps %xmm0, %xmm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 orps (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - pavgb %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pavgb (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pavgw %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pavgw (%rax), %mm2
# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - pextrw $1, %mm0, %ecx
# CHECK-NEXT: - - - 0.50 - 1.50 - - pinsrw $1, %eax, %mm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pinsrw $1, (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pmaxsw %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pmaxsw (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pmaxub %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pmaxub (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pminsw %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pminsw (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - pminub %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 pminub (%rax), %mm2
# CHECK-NEXT: - - 1.00 - - - - - pmovmskb %xmm0, %ecx
# CHECK-NEXT: - - 1.00 - - - - - pmulhuw %mm0, %mm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmulhuw (%rax), %mm2
# CHECK-NEXT: - - - - - - 0.50 0.50 prefetcht0 (%rax)
# CHECK-NEXT: - - - - - - 0.50 0.50 prefetcht1 (%rax)
# CHECK-NEXT: - - - - - - 0.50 0.50 prefetcht2 (%rax)
# CHECK-NEXT: - - - - - - 0.50 0.50 prefetchnta (%rax)
# CHECK-NEXT: - - 1.00 - - - - - psadbw %mm0, %mm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 psadbw (%rax), %mm2
# CHECK-NEXT: - - - - - 1.00 - - pshufw $1, %mm0, %mm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 pshufw $1, (%rax), %mm2
# CHECK-NEXT: - - 1.00 - - - - - rcpps %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 rcpps (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - rcpss %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 rcpss (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - rsqrtps %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 rsqrtps (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - rsqrtss %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 rsqrtss (%rax), %xmm2
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 sfence
# CHECK-NEXT: - - - - - 1.00 - - shufps $1, %xmm0, %xmm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 shufps $1, (%rax), %xmm2
# CHECK-NEXT: - 14.00 1.00 - - - - - sqrtps %xmm0, %xmm2
# CHECK-NEXT: - 14.00 1.00 - - - 0.50 0.50 sqrtps (%rax), %xmm2
# CHECK-NEXT: - 14.00 1.00 - - - - - sqrtss %xmm0, %xmm2
# CHECK-NEXT: - 14.00 1.00 - - - 0.50 0.50 sqrtss (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - 1.00 1.00 0.50 0.50 stmxcsr (%rax)
# CHECK-NEXT: - - - 1.00 - - - - subps %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 subps (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - subss %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 subss (%rax), %xmm2
# CHECK-NEXT: - - 1.00 1.00 - - - - ucomiss %xmm0, %xmm1
# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 ucomiss (%rax), %xmm1
# CHECK-NEXT: - - - - - 1.00 - - unpckhps %xmm0, %xmm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 unpckhps (%rax), %xmm2
# CHECK-NEXT: - - - - - 1.00 - - unpcklps %xmm0, %xmm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 unpcklps (%rax), %xmm2
# CHECK-NEXT: - - - - - 1.00 - - xorps %xmm0, %xmm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 xorps (%rax), %xmm2

View File

@ -0,0 +1,949 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
addpd %xmm0, %xmm2
addpd (%rax), %xmm2
addsd %xmm0, %xmm2
addsd (%rax), %xmm2
andnpd %xmm0, %xmm2
andnpd (%rax), %xmm2
andpd %xmm0, %xmm2
andpd (%rax), %xmm2
clflush (%rax)
cmppd $0, %xmm0, %xmm2
cmppd $0, (%rax), %xmm2
cmpsd $0, %xmm0, %xmm2
cmpsd $0, (%rax), %xmm2
comisd %xmm0, %xmm1
comisd (%rax), %xmm1
cvtdq2pd %xmm0, %xmm2
cvtdq2pd (%rax), %xmm2
cvtdq2ps %xmm0, %xmm2
cvtdq2ps (%rax), %xmm2
cvtpd2dq %xmm0, %xmm2
cvtpd2dq (%rax), %xmm2
cvtpd2pi %xmm0, %mm2
cvtpd2pi (%rax), %mm2
cvtpd2ps %xmm0, %xmm2
cvtpd2ps (%rax), %xmm2
cvtpi2pd %mm0, %xmm2
cvtpi2pd (%rax), %xmm2
cvtps2dq %xmm0, %xmm2
cvtps2dq (%rax), %xmm2
cvtps2pd %xmm0, %xmm2
cvtps2pd (%rax), %xmm2
cvtsd2si %xmm0, %ecx
cvtsd2si %xmm0, %rcx
cvtsd2si (%rax), %ecx
cvtsd2si (%rax), %rcx
cvtsd2ss %xmm0, %xmm2
cvtsd2ss (%rax), %xmm2
cvtsi2sd %ecx, %xmm2
cvtsi2sd %rcx, %xmm2
cvtsi2sd (%rax), %xmm2
cvtsi2sd (%rax), %xmm2
cvtss2sd %xmm0, %xmm2
cvtss2sd (%rax), %xmm2
cvttpd2dq %xmm0, %xmm2
cvttpd2dq (%rax), %xmm2
cvttpd2pi %xmm0, %mm2
cvttpd2pi (%rax), %mm2
cvttps2dq %xmm0, %xmm2
cvttps2dq (%rax), %xmm2
cvttsd2si %xmm0, %ecx
cvttsd2si %xmm0, %rcx
cvttsd2si (%rax), %ecx
cvttsd2si (%rax), %rcx
divpd %xmm0, %xmm2
divpd (%rax), %xmm2
divsd %xmm0, %xmm2
divsd (%rax), %xmm2
lfence
maskmovdqu %xmm0, %xmm1
maxpd %xmm0, %xmm2
maxpd (%rax), %xmm2
maxsd %xmm0, %xmm2
maxsd (%rax), %xmm2
minpd %xmm0, %xmm2
minpd (%rax), %xmm2
minsd %xmm0, %xmm2
minsd (%rax), %xmm2
movapd %xmm0, %xmm2
movapd %xmm0, (%rax)
movapd (%rax), %xmm2
movd %eax, %xmm2
movd (%rax), %xmm2
movd %xmm0, %ecx
movd %xmm0, (%rax)
movdqa %xmm0, %xmm2
movdqa %xmm0, (%rax)
movdqa (%rax), %xmm2
movdqu %xmm0, %xmm2
movdqu %xmm0, (%rax)
movdqu (%rax), %xmm2
movdq2q %xmm0, %mm2
movhpd %xmm0, (%rax)
movhpd (%rax), %xmm2
movlpd %xmm0, (%rax)
movlpd (%rax), %xmm2
movmskpd %xmm0, %rcx
movntil %eax, (%rax)
movntiq %rax, (%rax)
movntdq %xmm0, (%rax)
movntpd %xmm0, (%rax)
movq %xmm0, %xmm2
movq %rax, %xmm2
movq (%rax), %xmm2
movq %xmm0, %rcx
movq %xmm0, (%rax)
movq2dq %mm0, %xmm2
movsd %xmm0, %xmm2
movsd %xmm0, (%rax)
movsd (%rax), %xmm2
movupd %xmm0, %xmm2
movupd %xmm0, (%rax)
movupd (%rax), %xmm2
mulpd %xmm0, %xmm2
mulpd (%rax), %xmm2
mulsd %xmm0, %xmm2
mulsd (%rax), %xmm2
orpd %xmm0, %xmm2
orpd (%rax), %xmm2
packssdw %xmm0, %xmm2
packssdw (%rax), %xmm2
packsswb %xmm0, %xmm2
packsswb (%rax), %xmm2
packuswb %xmm0, %xmm2
packuswb (%rax), %xmm2
paddb %xmm0, %xmm2
paddb (%rax), %xmm2
paddd %xmm0, %xmm2
paddd (%rax), %xmm2
paddq %mm0, %mm2
paddq (%rax), %mm2
paddq %xmm0, %xmm2
paddq (%rax), %xmm2
paddsb %xmm0, %xmm2
paddsb (%rax), %xmm2
paddsw %xmm0, %xmm2
paddsw (%rax), %xmm2
paddusb %xmm0, %xmm2
paddusb (%rax), %xmm2
paddusw %xmm0, %xmm2
paddusw (%rax), %xmm2
paddw %xmm0, %xmm2
paddw (%rax), %xmm2
pand %xmm0, %xmm2
pand (%rax), %xmm2
pandn %xmm0, %xmm2
pandn (%rax), %xmm2
pavgb %xmm0, %xmm2
pavgb (%rax), %xmm2
pavgw %xmm0, %xmm2
pavgw (%rax), %xmm2
pcmpeqb %xmm0, %xmm2
pcmpeqb (%rax), %xmm2
pcmpeqd %xmm0, %xmm2
pcmpeqd (%rax), %xmm2
pcmpeqw %xmm0, %xmm2
pcmpeqw (%rax), %xmm2
pcmpgtb %xmm0, %xmm2
pcmpgtb (%rax), %xmm2
pcmpgtd %xmm0, %xmm2
pcmpgtd (%rax), %xmm2
pcmpgtw %xmm0, %xmm2
pcmpgtw (%rax), %xmm2
pextrw $1, %xmm0, %rcx
pmaddwd %xmm0, %xmm2
pmaddwd (%rax), %xmm2
pmaxsw %xmm0, %xmm2
pmaxsw (%rax), %xmm2
pmaxub %xmm0, %xmm2
pmaxub (%rax), %xmm2
pminsw %xmm0, %xmm2
pminsw (%rax), %xmm2
pminub %xmm0, %xmm2
pminub (%rax), %xmm2
pmovmskb %xmm0, %rcx
pmulhuw %xmm0, %xmm2
pmulhuw (%rax), %xmm2
pmulhw %xmm0, %xmm2
pmulhw (%rax), %xmm2
pmullw %xmm0, %xmm2
pmullw (%rax), %xmm2
pmuludq %mm0, %mm2
pmuludq (%rax), %mm2
pmuludq %xmm0, %xmm2
pmuludq (%rax), %xmm2
por %xmm0, %xmm2
por (%rax), %xmm2
psadbw %xmm0, %xmm2
psadbw (%rax), %xmm2
pshufd $1, %xmm0, %xmm2
pshufd $1, (%rax), %xmm2
pshufhw $1, %xmm0, %xmm2
pshufhw $1, (%rax), %xmm2
pshuflw $1, %xmm0, %xmm2
pshuflw $1, (%rax), %xmm2
pslld $1, %xmm2
pslld %xmm0, %xmm2
pslld (%rax), %xmm2
pslldq $1, %xmm2
psllq $1, %xmm2
psllq %xmm0, %xmm2
psllq (%rax), %xmm2
psllw $1, %xmm2
psllw %xmm0, %xmm2
psllw (%rax), %xmm2
psrad $1, %xmm2
psrad %xmm0, %xmm2
psrad (%rax), %xmm2
psraw $1, %xmm2
psraw %xmm0, %xmm2
psraw (%rax), %xmm2
psrld $1, %xmm2
psrld %xmm0, %xmm2
psrld (%rax), %xmm2
psrldq $1, %xmm2
psrlq $1, %xmm2
psrlq %xmm0, %xmm2
psrlq (%rax), %xmm2
psrlw $1, %xmm2
psrlw %xmm0, %xmm2
psrlw (%rax), %xmm2
psubb %xmm0, %xmm2
psubb (%rax), %xmm2
psubd %xmm0, %xmm2
psubd (%rax), %xmm2
psubq %mm0, %mm2
psubq (%rax), %mm2
psubq %xmm0, %xmm2
psubq (%rax), %xmm2
psubsb %xmm0, %xmm2
psubsb (%rax), %xmm2
psubsw %xmm0, %xmm2
psubsw (%rax), %xmm2
psubusb %xmm0, %xmm2
psubusb (%rax), %xmm2
psubusw %xmm0, %xmm2
psubusw (%rax), %xmm2
psubw %xmm0, %xmm2
psubw (%rax), %xmm2
punpckhbw %xmm0, %xmm2
punpckhbw (%rax), %xmm2
punpckhdq %xmm0, %xmm2
punpckhdq (%rax), %xmm2
punpckhqdq %xmm0, %xmm2
punpckhqdq (%rax), %xmm2
punpckhwd %xmm0, %xmm2
punpckhwd (%rax), %xmm2
punpcklbw %xmm0, %xmm2
punpcklbw (%rax), %xmm2
punpckldq %xmm0, %xmm2
punpckldq (%rax), %xmm2
punpcklqdq %xmm0, %xmm2
punpcklqdq (%rax), %xmm2
punpcklwd %xmm0, %xmm2
punpcklwd (%rax), %xmm2
pxor %xmm0, %xmm2
pxor (%rax), %xmm2
shufpd $1, %xmm0, %xmm2
shufpd $1, (%rax), %xmm2
sqrtpd %xmm0, %xmm2
sqrtpd (%rax), %xmm2
sqrtsd %xmm0, %xmm2
sqrtsd (%rax), %xmm2
subpd %xmm0, %xmm2
subpd (%rax), %xmm2
subsd %xmm0, %xmm2
subsd (%rax), %xmm2
ucomisd %xmm0, %xmm1
ucomisd (%rax), %xmm1
unpckhpd %xmm0, %xmm2
unpckhpd (%rax), %xmm2
unpcklpd %xmm0, %xmm2
unpcklpd (%rax), %xmm2
xorpd %xmm0, %xmm2
xorpd (%rax), %xmm2
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 3 1.00 addpd %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * addpd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 addsd %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * addsd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 andnpd %xmm0, %xmm2
# CHECK-NEXT: 2 7 1.00 * andnpd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 andpd %xmm0, %xmm2
# CHECK-NEXT: 2 7 1.00 * andpd (%rax), %xmm2
# CHECK-NEXT: 4 5 1.00 * * U clflush (%rax)
# CHECK-NEXT: 1 3 1.00 cmppd $0, %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * cmppd $0, (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 cmpsd $0, %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * cmpsd $0, (%rax), %xmm2
# CHECK-NEXT: 2 2 1.00 comisd %xmm0, %xmm1
# CHECK-NEXT: 3 8 1.00 * comisd (%rax), %xmm1
# CHECK-NEXT: 2 4 1.00 cvtdq2pd %xmm0, %xmm2
# CHECK-NEXT: 3 10 1.00 * cvtdq2pd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 cvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * cvtdq2ps (%rax), %xmm2
# CHECK-NEXT: 2 4 1.00 cvtpd2dq %xmm0, %xmm2
# CHECK-NEXT: 3 10 1.00 * cvtpd2dq (%rax), %xmm2
# CHECK-NEXT: 2 4 1.00 cvtpd2pi %xmm0, %mm2
# CHECK-NEXT: 3 10 1.00 * cvtpd2pi (%rax), %mm2
# CHECK-NEXT: 2 4 1.00 cvtpd2ps %xmm0, %xmm2
# CHECK-NEXT: 3 10 1.00 * cvtpd2ps (%rax), %xmm2
# CHECK-NEXT: 2 4 1.00 cvtpi2pd %mm0, %xmm2
# CHECK-NEXT: 3 10 1.00 * cvtpi2pd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 cvtps2dq %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * cvtps2dq (%rax), %xmm2
# CHECK-NEXT: 2 2 1.00 cvtps2pd %xmm0, %xmm2
# CHECK-NEXT: 2 7 1.00 * cvtps2pd (%rax), %xmm2
# CHECK-NEXT: 2 5 1.00 cvtsd2si %xmm0, %ecx
# CHECK-NEXT: 2 5 1.00 cvtsd2si %xmm0, %rcx
# CHECK-NEXT: 3 9 1.00 * cvtsd2si (%rax), %ecx
# CHECK-NEXT: 3 9 1.00 * cvtsd2si (%rax), %rcx
# CHECK-NEXT: 2 4 1.00 cvtsd2ss %xmm0, %xmm2
# CHECK-NEXT: 3 10 1.00 * cvtsd2ss (%rax), %xmm2
# CHECK-NEXT: 2 4 1.00 cvtsi2sdl %ecx, %xmm2
# CHECK-NEXT: 2 4 1.00 cvtsi2sdq %rcx, %xmm2
# CHECK-NEXT: 2 9 1.00 * cvtsi2sdl (%rax), %xmm2
# CHECK-NEXT: 2 9 1.00 * cvtsi2sdl (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 cvtss2sd %xmm0, %xmm2
# CHECK-NEXT: 2 7 1.00 * cvtss2sd (%rax), %xmm2
# CHECK-NEXT: 2 4 1.00 cvttpd2dq %xmm0, %xmm2
# CHECK-NEXT: 3 10 1.00 * cvttpd2dq (%rax), %xmm2
# CHECK-NEXT: 2 4 1.00 cvttpd2pi %xmm0, %mm2
# CHECK-NEXT: 3 10 1.00 * cvttpd2pi (%rax), %mm2
# CHECK-NEXT: 1 3 1.00 cvttps2dq %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * cvttps2dq (%rax), %xmm2
# CHECK-NEXT: 2 5 1.00 cvttsd2si %xmm0, %ecx
# CHECK-NEXT: 2 5 1.00 cvttsd2si %xmm0, %rcx
# CHECK-NEXT: 3 9 1.00 * cvttsd2si (%rax), %ecx
# CHECK-NEXT: 3 9 1.00 * cvttsd2si (%rax), %rcx
# CHECK-NEXT: 1 22 22.00 divpd %xmm0, %xmm2
# CHECK-NEXT: 2 28 22.00 * divpd (%rax), %xmm2
# CHECK-NEXT: 1 22 22.00 divsd %xmm0, %xmm2
# CHECK-NEXT: 2 28 22.00 * divsd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 * * U lfence
# CHECK-NEXT: 1 1 1.00 * * U maskmovdqu %xmm0, %xmm1
# CHECK-NEXT: 1 3 1.00 maxpd %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * maxpd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 maxsd %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * maxsd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 minpd %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * minpd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 minsd %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * minsd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 movapd %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * movapd %xmm0, (%rax)
# CHECK-NEXT: 1 6 0.50 * movapd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 movd %eax, %xmm2
# CHECK-NEXT: 1 6 0.50 * movd (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 movd %xmm0, %ecx
# CHECK-NEXT: 1 1 1.00 * movd %xmm0, (%rax)
# CHECK-NEXT: 1 1 0.33 movdqa %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * movdqa %xmm0, (%rax)
# CHECK-NEXT: 1 6 0.50 * movdqa (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 movdqu %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * movdqu %xmm0, (%rax)
# CHECK-NEXT: 1 6 0.50 * movdqu (%rax), %xmm2
# CHECK-NEXT: 2 2 1.00 movdq2q %xmm0, %mm2
# CHECK-NEXT: 1 1 1.00 * movhpd %xmm0, (%rax)
# CHECK-NEXT: 2 7 1.00 * movhpd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 * movlpd %xmm0, (%rax)
# CHECK-NEXT: 2 7 1.00 * movlpd (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 movmskpd %xmm0, %ecx
# CHECK-NEXT: 1 1 1.00 * movntil %eax, (%rax)
# CHECK-NEXT: 1 1 1.00 * movntiq %rax, (%rax)
# CHECK-NEXT: 1 1 1.00 * movntdq %xmm0, (%rax)
# CHECK-NEXT: 1 1 1.00 * movntpd %xmm0, (%rax)
# CHECK-NEXT: 1 1 0.33 movq %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 movq %rax, %xmm2
# CHECK-NEXT: 1 6 0.50 * movq (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 movq %xmm0, %rcx
# CHECK-NEXT: 1 1 1.00 * movq %xmm0, (%rax)
# CHECK-NEXT: 1 1 0.33 movq2dq %mm0, %xmm2
# CHECK-NEXT: 1 1 1.00 movsd %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * movsd %xmm0, (%rax)
# CHECK-NEXT: 1 6 0.50 * movsd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 movupd %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * movupd %xmm0, (%rax)
# CHECK-NEXT: 1 6 0.50 * movupd (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 mulpd %xmm0, %xmm2
# CHECK-NEXT: 2 11 1.00 * mulpd (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 mulsd %xmm0, %xmm2
# CHECK-NEXT: 2 11 1.00 * mulsd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 orpd %xmm0, %xmm2
# CHECK-NEXT: 2 7 1.00 * orpd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 packssdw %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * packssdw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 packsswb %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * packsswb (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 packuswb %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * packuswb (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 paddb %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * paddb (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 paddd %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * paddd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 paddq %mm0, %mm2
# CHECK-NEXT: 2 7 0.50 * paddq (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 paddq %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * paddq (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 paddsb %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * paddsb (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 paddsw %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * paddsw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 paddusb %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * paddusb (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 paddusw %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * paddusw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 paddw %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * paddw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 pand %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pand (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 pandn %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pandn (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pavgb %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pavgb (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pavgw %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pavgw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pcmpeqb %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pcmpeqb (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pcmpeqd %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pcmpeqd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pcmpeqw %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pcmpeqw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pcmpgtb %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pcmpgtb (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pcmpgtd %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pcmpgtd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pcmpgtw %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pcmpgtw (%rax), %xmm2
# CHECK-NEXT: 2 3 1.00 pextrw $1, %xmm0, %ecx
# CHECK-NEXT: 1 5 1.00 pmaddwd %xmm0, %xmm2
# CHECK-NEXT: 2 11 1.00 * pmaddwd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmaxsw %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pmaxsw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmaxub %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pmaxub (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pminsw %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pminsw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pminub %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pminub (%rax), %xmm2
# CHECK-NEXT: 1 2 1.00 pmovmskb %xmm0, %ecx
# CHECK-NEXT: 1 5 1.00 pmulhuw %xmm0, %xmm2
# CHECK-NEXT: 2 11 1.00 * pmulhuw (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 pmulhw %xmm0, %xmm2
# CHECK-NEXT: 2 11 1.00 * pmulhw (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 pmullw %xmm0, %xmm2
# CHECK-NEXT: 2 11 1.00 * pmullw (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 pmuludq %mm0, %mm2
# CHECK-NEXT: 2 10 1.00 * pmuludq (%rax), %mm2
# CHECK-NEXT: 1 5 1.00 pmuludq %xmm0, %xmm2
# CHECK-NEXT: 2 11 1.00 * pmuludq (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 por %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * por (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 psadbw %xmm0, %xmm2
# CHECK-NEXT: 2 11 1.00 * psadbw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pshufd $1, %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pshufd $1, (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pshufhw $1, %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pshufhw $1, (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pshuflw $1, %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pshuflw $1, (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 pslld $1, %xmm2
# CHECK-NEXT: 2 2 1.00 pslld %xmm0, %xmm2
# CHECK-NEXT: 3 8 1.00 * pslld (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pslldq $1, %xmm2
# CHECK-NEXT: 1 1 1.00 psllq $1, %xmm2
# CHECK-NEXT: 2 2 1.00 psllq %xmm0, %xmm2
# CHECK-NEXT: 3 8 1.00 * psllq (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 psllw $1, %xmm2
# CHECK-NEXT: 2 2 1.00 psllw %xmm0, %xmm2
# CHECK-NEXT: 3 8 1.00 * psllw (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 psrad $1, %xmm2
# CHECK-NEXT: 2 2 1.00 psrad %xmm0, %xmm2
# CHECK-NEXT: 3 8 1.00 * psrad (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 psraw $1, %xmm2
# CHECK-NEXT: 2 2 1.00 psraw %xmm0, %xmm2
# CHECK-NEXT: 3 8 1.00 * psraw (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 psrld $1, %xmm2
# CHECK-NEXT: 2 2 1.00 psrld %xmm0, %xmm2
# CHECK-NEXT: 3 8 1.00 * psrld (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 psrldq $1, %xmm2
# CHECK-NEXT: 1 1 1.00 psrlq $1, %xmm2
# CHECK-NEXT: 2 2 1.00 psrlq %xmm0, %xmm2
# CHECK-NEXT: 3 8 1.00 * psrlq (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 psrlw $1, %xmm2
# CHECK-NEXT: 2 2 1.00 psrlw %xmm0, %xmm2
# CHECK-NEXT: 3 8 1.00 * psrlw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 psubb %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * psubb (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 psubd %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * psubd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 psubq %mm0, %mm2
# CHECK-NEXT: 2 8 1.00 * psubq (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 psubq %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * psubq (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 psubsb %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * psubsb (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 psubsw %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * psubsw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 psubusb %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * psubusb (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 psubusw %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * psubusw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 psubw %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * psubw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 punpckhbw %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * punpckhbw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 punpckhdq %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * punpckhdq (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 punpckhqdq %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * punpckhqdq (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 punpckhwd %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * punpckhwd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 punpcklbw %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * punpcklbw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 punpckldq %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * punpckldq (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 punpcklqdq %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * punpcklqdq (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 punpcklwd %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * punpcklwd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.33 pxor %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pxor (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 shufpd $1, %xmm0, %xmm2
# CHECK-NEXT: 2 7 1.00 * shufpd $1, (%rax), %xmm2
# CHECK-NEXT: 1 21 21.00 sqrtpd %xmm0, %xmm2
# CHECK-NEXT: 2 27 21.00 * sqrtpd (%rax), %xmm2
# CHECK-NEXT: 1 21 21.00 sqrtsd %xmm0, %xmm2
# CHECK-NEXT: 2 27 21.00 * sqrtsd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 subpd %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * subpd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 subsd %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * subsd (%rax), %xmm2
# CHECK-NEXT: 2 2 1.00 ucomisd %xmm0, %xmm1
# CHECK-NEXT: 3 8 1.00 * ucomisd (%rax), %xmm1
# CHECK-NEXT: 1 1 1.00 unpckhpd %xmm0, %xmm2
# CHECK-NEXT: 2 7 1.00 * unpckhpd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 unpcklpd %xmm0, %xmm2
# CHECK-NEXT: 2 7 1.00 * unpcklpd (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 xorpd %xmm0, %xmm2
# CHECK-NEXT: 2 7 1.00 * xorpd (%rax), %xmm2
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - 172.00 75.83 117.33 16.00 98.83 66.00 66.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - 1.00 - - - - addpd %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 addpd (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - addsd %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 addsd (%rax), %xmm2
# CHECK-NEXT: - - - - - 1.00 - - andnpd %xmm0, %xmm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 andnpd (%rax), %xmm2
# CHECK-NEXT: - - - - - 1.00 - - andpd %xmm0, %xmm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 andpd (%rax), %xmm2
# CHECK-NEXT: - - 0.50 0.50 1.00 1.00 0.50 0.50 clflush (%rax)
# CHECK-NEXT: - - - 1.00 - - - - cmppd $0, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cmppd $0, (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - cmpsd $0, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cmpsd $0, (%rax), %xmm2
# CHECK-NEXT: - - 1.00 1.00 - - - - comisd %xmm0, %xmm1
# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 comisd (%rax), %xmm1
# CHECK-NEXT: - - - 1.00 - 1.00 - - cvtdq2pd %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 cvtdq2pd (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - cvtdq2ps %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtdq2ps (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 - - cvtpd2dq %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 cvtpd2dq (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 - - cvtpd2pi %xmm0, %mm2
# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 cvtpd2pi (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - 1.00 - - cvtpd2ps %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 cvtpd2ps (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 - - cvtpi2pd %mm0, %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 cvtpi2pd (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - cvtps2dq %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtps2dq (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - 1.00 - - cvtps2pd %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 cvtps2pd (%rax), %xmm2
# CHECK-NEXT: - - 1.00 1.00 - - - - cvtsd2si %xmm0, %ecx
# CHECK-NEXT: - - 1.00 1.00 - - - - cvtsd2si %xmm0, %rcx
# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 cvtsd2si (%rax), %ecx
# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 cvtsd2si (%rax), %rcx
# CHECK-NEXT: - - - 1.00 - 1.00 - - cvtsd2ss %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 cvtsd2ss (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 - - cvtsi2sdl %ecx, %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 - - cvtsi2sdq %rcx, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtsi2sdl (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvtsi2sdl (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - cvtss2sd %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 cvtss2sd (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 - - cvttpd2dq %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 cvttpd2dq (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 - - cvttpd2pi %xmm0, %mm2
# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 cvttpd2pi (%rax), %mm2
# CHECK-NEXT: - - - 1.00 - - - - cvttps2dq %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 cvttps2dq (%rax), %xmm2
# CHECK-NEXT: - - 1.00 1.00 - - - - cvttsd2si %xmm0, %ecx
# CHECK-NEXT: - - 1.00 1.00 - - - - cvttsd2si %xmm0, %rcx
# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 cvttsd2si (%rax), %ecx
# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 cvttsd2si (%rax), %rcx
# CHECK-NEXT: - 22.00 1.00 - - - - - divpd %xmm0, %xmm2
# CHECK-NEXT: - 22.00 1.00 - - - 0.50 0.50 divpd (%rax), %xmm2
# CHECK-NEXT: - 22.00 1.00 - - - - - divsd %xmm0, %xmm2
# CHECK-NEXT: - 22.00 1.00 - - - 0.50 0.50 divsd (%rax), %xmm2
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 lfence
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 maskmovdqu %xmm0, %xmm1
# CHECK-NEXT: - - - 1.00 - - - - maxpd %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 maxpd (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - maxsd %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 maxsd (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - minpd %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 minpd (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - minsd %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 minsd (%rax), %xmm2
# CHECK-NEXT: - - - - - 1.00 - - movapd %xmm0, %xmm2
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movapd %xmm0, (%rax)
# CHECK-NEXT: - - - - - - 0.50 0.50 movapd (%rax), %xmm2
# CHECK-NEXT: - - - - - 1.00 - - movd %eax, %xmm2
# CHECK-NEXT: - - - - - - 0.50 0.50 movd (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - movd %xmm0, %ecx
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movd %xmm0, (%rax)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movdqa %xmm0, %xmm2
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movdqa %xmm0, (%rax)
# CHECK-NEXT: - - - - - - 0.50 0.50 movdqa (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movdqu %xmm0, %xmm2
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movdqu %xmm0, (%rax)
# CHECK-NEXT: - - - - - - 0.50 0.50 movdqu (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - 1.33 - - movdq2q %xmm0, %mm2
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movhpd %xmm0, (%rax)
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 movhpd (%rax), %xmm2
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movlpd %xmm0, (%rax)
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 movlpd (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - movmskpd %xmm0, %ecx
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movntil %eax, (%rax)
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movntiq %rax, (%rax)
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movntdq %xmm0, (%rax)
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movntpd %xmm0, (%rax)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movq %xmm0, %xmm2
# CHECK-NEXT: - - - - - 1.00 - - movq %rax, %xmm2
# CHECK-NEXT: - - - - - - 0.50 0.50 movq (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - movq %xmm0, %rcx
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movq %xmm0, (%rax)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movq2dq %mm0, %xmm2
# CHECK-NEXT: - - - - - 1.00 - - movsd %xmm0, %xmm2
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movsd %xmm0, (%rax)
# CHECK-NEXT: - - - - - - 0.50 0.50 movsd (%rax), %xmm2
# CHECK-NEXT: - - - - - 1.00 - - movupd %xmm0, %xmm2
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movupd %xmm0, (%rax)
# CHECK-NEXT: - - - - - - 0.50 0.50 movupd (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - mulpd %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 mulpd (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - mulsd %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 mulsd (%rax), %xmm2
# CHECK-NEXT: - - - - - 1.00 - - orpd %xmm0, %xmm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 orpd (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - packssdw %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 packssdw (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - packsswb %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 packsswb (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - packuswb %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 packuswb (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - paddb %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 paddb (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - paddd %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 paddd (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - paddq %mm0, %mm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 paddq (%rax), %mm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - paddq %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 paddq (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - paddsb %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 paddsb (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - paddsw %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 paddsw (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - paddusb %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 paddusb (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - paddusw %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 paddusw (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - paddw %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 paddw (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - pand %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 pand (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - pandn %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 pandn (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pavgb %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pavgb (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pavgw %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pavgw (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pcmpeqb %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pcmpeqb (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pcmpeqd %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pcmpeqd (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pcmpeqw %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pcmpeqw (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pcmpgtb %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pcmpgtb (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pcmpgtd %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pcmpgtd (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pcmpgtw %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pcmpgtw (%rax), %xmm2
# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - pextrw $1, %xmm0, %ecx
# CHECK-NEXT: - - 1.00 - - - - - pmaddwd %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmaddwd (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pmaxsw %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmaxsw (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pmaxub %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmaxub (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pminsw %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pminsw (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pminub %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pminub (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - pmovmskb %xmm0, %ecx
# CHECK-NEXT: - - 1.00 - - - - - pmulhuw %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmulhuw (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - pmulhw %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmulhw (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - pmullw %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmullw (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - pmuludq %mm0, %mm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmuludq (%rax), %mm2
# CHECK-NEXT: - - 1.00 - - - - - pmuludq %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmuludq (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - por %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 por (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - psadbw %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 psadbw (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pshufd $1, %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pshufd $1, (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pshufhw $1, %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pshufhw $1, (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pshuflw $1, %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pshuflw $1, (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - pslld $1, %xmm2
# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - pslld %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 0.50 - 0.50 0.50 0.50 pslld (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pslldq $1, %xmm2
# CHECK-NEXT: - - 1.00 - - - - - psllq $1, %xmm2
# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - psllq %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 0.50 - 0.50 0.50 0.50 psllq (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - psllw $1, %xmm2
# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - psllw %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 0.50 - 0.50 0.50 0.50 psllw (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - psrad $1, %xmm2
# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - psrad %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 0.50 - 0.50 0.50 0.50 psrad (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - psraw $1, %xmm2
# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - psraw %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 0.50 - 0.50 0.50 0.50 psraw (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - psrld $1, %xmm2
# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - psrld %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 0.50 - 0.50 0.50 0.50 psrld (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - psrldq $1, %xmm2
# CHECK-NEXT: - - 1.00 - - - - - psrlq $1, %xmm2
# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - psrlq %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 0.50 - 0.50 0.50 0.50 psrlq (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - psrlw $1, %xmm2
# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - psrlw %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 0.50 - 0.50 0.50 0.50 psrlw (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - psubb %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 psubb (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - psubd %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 psubd (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - psubq %mm0, %mm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 psubq (%rax), %mm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - psubq %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 psubq (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - psubsb %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 psubsb (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - psubsw %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 psubsw (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - psubusb %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 psubusb (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - psubusw %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 psubusw (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - psubw %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 psubw (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - punpckhbw %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 punpckhbw (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - punpckhdq %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 punpckhdq (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - punpckhqdq %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 punpckhqdq (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - punpckhwd %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 punpckhwd (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - punpcklbw %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 punpcklbw (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - punpckldq %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 punpckldq (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - punpcklqdq %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 punpcklqdq (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - punpcklwd %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 punpcklwd (%rax), %xmm2
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - pxor %xmm0, %xmm2
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 pxor (%rax), %xmm2
# CHECK-NEXT: - - - - - 1.00 - - shufpd $1, %xmm0, %xmm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 shufpd $1, (%rax), %xmm2
# CHECK-NEXT: - 21.00 1.00 - - - - - sqrtpd %xmm0, %xmm2
# CHECK-NEXT: - 21.00 1.00 - - - 0.50 0.50 sqrtpd (%rax), %xmm2
# CHECK-NEXT: - 21.00 1.00 - - - - - sqrtsd %xmm0, %xmm2
# CHECK-NEXT: - 21.00 1.00 - - - 0.50 0.50 sqrtsd (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - subpd %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 subpd (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - subsd %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 subsd (%rax), %xmm2
# CHECK-NEXT: - - 1.00 1.00 - - - - ucomisd %xmm0, %xmm1
# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 ucomisd (%rax), %xmm1
# CHECK-NEXT: - - - - - 1.00 - - unpckhpd %xmm0, %xmm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 unpckhpd (%rax), %xmm2
# CHECK-NEXT: - - - - - 1.00 - - unpcklpd %xmm0, %xmm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 unpcklpd (%rax), %xmm2
# CHECK-NEXT: - - - - - 1.00 - - xorpd %xmm0, %xmm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 xorpd (%rax), %xmm2

View File

@ -0,0 +1,96 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
addsubpd %xmm0, %xmm2
addsubpd (%rax), %xmm2
addsubps %xmm0, %xmm2
addsubps (%rax), %xmm2
haddpd %xmm0, %xmm2
haddpd (%rax), %xmm2
haddps %xmm0, %xmm2
haddps (%rax), %xmm2
hsubpd %xmm0, %xmm2
hsubpd (%rax), %xmm2
hsubps %xmm0, %xmm2
hsubps (%rax), %xmm2
lddqu (%rax), %xmm2
movddup %xmm0, %xmm2
movddup (%rax), %xmm2
movshdup %xmm0, %xmm2
movshdup (%rax), %xmm2
movsldup %xmm0, %xmm2
movsldup (%rax), %xmm2
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 3 1.00 addsubpd %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * addsubpd (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 addsubps %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * addsubps (%rax), %xmm2
# CHECK-NEXT: 3 5 2.00 haddpd %xmm0, %xmm2
# CHECK-NEXT: 4 11 2.00 * haddpd (%rax), %xmm2
# CHECK-NEXT: 3 5 2.00 haddps %xmm0, %xmm2
# CHECK-NEXT: 4 11 2.00 * haddps (%rax), %xmm2
# CHECK-NEXT: 3 5 2.00 hsubpd %xmm0, %xmm2
# CHECK-NEXT: 4 11 2.00 * hsubpd (%rax), %xmm2
# CHECK-NEXT: 3 5 2.00 hsubps %xmm0, %xmm2
# CHECK-NEXT: 4 11 2.00 * hsubps (%rax), %xmm2
# CHECK-NEXT: 1 6 0.50 * lddqu (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 movddup %xmm0, %xmm2
# CHECK-NEXT: 1 6 0.50 * movddup (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 movshdup %xmm0, %xmm2
# CHECK-NEXT: 1 6 0.50 * movshdup (%rax), %xmm2
# CHECK-NEXT: 1 1 1.00 movsldup %xmm0, %xmm2
# CHECK-NEXT: 1 6 0.50 * movsldup (%rax), %xmm2
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - - 12.00 - 19.00 5.00 5.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - 1.00 - - - - addsubpd %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 addsubpd (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - addsubps %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 addsubps (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - 2.00 - - haddpd %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - 2.00 0.50 0.50 haddpd (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - 2.00 - - haddps %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - 2.00 0.50 0.50 haddps (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - 2.00 - - hsubpd %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - 2.00 0.50 0.50 hsubpd (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - 2.00 - - hsubps %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - 2.00 0.50 0.50 hsubps (%rax), %xmm2
# CHECK-NEXT: - - - - - - 0.50 0.50 lddqu (%rax), %xmm2
# CHECK-NEXT: - - - - - 1.00 - - movddup %xmm0, %xmm2
# CHECK-NEXT: - - - - - - 0.50 0.50 movddup (%rax), %xmm2
# CHECK-NEXT: - - - - - 1.00 - - movshdup %xmm0, %xmm2
# CHECK-NEXT: - - - - - - 0.50 0.50 movshdup (%rax), %xmm2
# CHECK-NEXT: - - - - - 1.00 - - movsldup %xmm0, %xmm2
# CHECK-NEXT: - - - - - - 0.50 0.50 movsldup (%rax), %xmm2

View File

@ -0,0 +1,366 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
blendpd $11, %xmm0, %xmm2
blendpd $11, (%rax), %xmm2
blendps $11, %xmm0, %xmm2
blendps $11, (%rax), %xmm2
blendvpd %xmm0, %xmm2
blendvpd (%rax), %xmm2
blendvps %xmm0, %xmm2
blendvps (%rax), %xmm2
dppd $22, %xmm0, %xmm2
dppd $22, (%rax), %xmm2
dpps $22, %xmm0, %xmm2
dpps $22, (%rax), %xmm2
extractps $1, %xmm0, %rcx
extractps $1, %xmm0, (%rax)
insertps $1, %xmm0, %xmm2
insertps $1, (%rax), %xmm2
movntdqa (%rax), %xmm2
mpsadbw $1, %xmm0, %xmm2
mpsadbw $1, (%rax), %xmm2
packusdw %xmm0, %xmm2
packusdw (%rax), %xmm2
pblendvb %xmm0, %xmm2
pblendvb (%rax), %xmm2
pblendw $11, %xmm0, %xmm2
pblendw $11, (%rax), %xmm2
pcmpeqq %xmm0, %xmm2
pcmpeqq (%rax), %xmm2
pextrb $1, %xmm0, %ecx
pextrb $1, %xmm0, (%rax)
pextrd $1, %xmm0, %ecx
pextrd $1, %xmm0, (%rax)
pextrq $1, %xmm0, %rcx
pextrq $1, %xmm0, (%rax)
pextrw $1, %xmm0, (%rax)
phminposuw %xmm0, %xmm2
phminposuw (%rax), %xmm2
pinsrb $1, %eax, %xmm1
pinsrb $1, (%rax), %xmm1
pinsrd $1, %eax, %xmm1
pinsrd $1, (%rax), %xmm1
pinsrq $1, %rax, %xmm1
pinsrq $1, (%rax), %xmm1
pmaxsb %xmm0, %xmm2
pmaxsb (%rax), %xmm2
pmaxsd %xmm0, %xmm2
pmaxsd (%rax), %xmm2
pmaxud %xmm0, %xmm2
pmaxud (%rax), %xmm2
pmaxuw %xmm0, %xmm2
pmaxuw (%rax), %xmm2
pminsb %xmm0, %xmm2
pminsb (%rax), %xmm2
pminsd %xmm0, %xmm2
pminsd (%rax), %xmm2
pminud %xmm0, %xmm2
pminud (%rax), %xmm2
pminuw %xmm0, %xmm2
pminuw (%rax), %xmm2
pmovsxbd %xmm0, %xmm2
pmovsxbd (%rax), %xmm2
pmovsxbq %xmm0, %xmm2
pmovsxbq (%rax), %xmm2
pmovsxbw %xmm0, %xmm2
pmovsxbw (%rax), %xmm2
pmovsxdq %xmm0, %xmm2
pmovsxdq (%rax), %xmm2
pmovsxwd %xmm0, %xmm2
pmovsxwd (%rax), %xmm2
pmovsxwq %xmm0, %xmm2
pmovsxwq (%rax), %xmm2
pmovzxbd %xmm0, %xmm2
pmovzxbd (%rax), %xmm2
pmovzxbq %xmm0, %xmm2
pmovzxbq (%rax), %xmm2
pmovzxbw %xmm0, %xmm2
pmovzxbw (%rax), %xmm2
pmovzxdq %xmm0, %xmm2
pmovzxdq (%rax), %xmm2
pmovzxwd %xmm0, %xmm2
pmovzxwd (%rax), %xmm2
pmovzxwq %xmm0, %xmm2
pmovzxwq (%rax), %xmm2
pmuldq %xmm0, %xmm2
pmuldq (%rax), %xmm2
pmulld %xmm0, %xmm2
pmulld (%rax), %xmm2
ptest %xmm0, %xmm1
ptest (%rax), %xmm1
roundpd $1, %xmm0, %xmm2
roundpd $1, (%rax), %xmm2
roundps $1, %xmm0, %xmm2
roundps $1, (%rax), %xmm2
roundsd $1, %xmm0, %xmm2
roundsd $1, (%rax), %xmm2
roundss $1, %xmm0, %xmm2
roundss $1, (%rax), %xmm2
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 0.50 blendpd $11, %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * blendpd $11, (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 blendps $11, %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * blendps $11, (%rax), %xmm2
# CHECK-NEXT: 2 2 1.00 blendvpd %xmm0, %xmm0, %xmm2
# CHECK-NEXT: 3 8 1.00 * blendvpd %xmm0, (%rax), %xmm2
# CHECK-NEXT: 2 2 1.00 blendvps %xmm0, %xmm0, %xmm2
# CHECK-NEXT: 3 8 1.00 * blendvps %xmm0, (%rax), %xmm2
# CHECK-NEXT: 3 9 1.00 dppd $22, %xmm0, %xmm2
# CHECK-NEXT: 4 15 1.00 * dppd $22, (%rax), %xmm2
# CHECK-NEXT: 4 12 2.00 dpps $22, %xmm0, %xmm2
# CHECK-NEXT: 5 18 2.00 * dpps $22, (%rax), %xmm2
# CHECK-NEXT: 2 3 1.00 extractps $1, %xmm0, %ecx
# CHECK-NEXT: 3 5 1.00 * extractps $1, %xmm0, (%rax)
# CHECK-NEXT: 1 1 1.00 insertps $1, %xmm0, %xmm2
# CHECK-NEXT: 2 7 1.00 * insertps $1, (%rax), %xmm2
# CHECK-NEXT: 1 6 0.50 * movntdqa (%rax), %xmm2
# CHECK-NEXT: 3 7 1.00 mpsadbw $1, %xmm0, %xmm2
# CHECK-NEXT: 4 13 1.00 * mpsadbw $1, (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 packusdw %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * packusdw (%rax), %xmm2
# CHECK-NEXT: 2 2 1.00 pblendvb %xmm0, %xmm0, %xmm2
# CHECK-NEXT: 3 8 1.00 * pblendvb %xmm0, (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pblendw $11, %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pblendw $11, (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pcmpeqq %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pcmpeqq (%rax), %xmm2
# CHECK-NEXT: 2 3 1.00 pextrb $1, %xmm0, %ecx
# CHECK-NEXT: 3 5 1.00 * pextrb $1, %xmm0, (%rax)
# CHECK-NEXT: 2 3 1.00 pextrd $1, %xmm0, %ecx
# CHECK-NEXT: 4 5 1.00 * pextrd $1, %xmm0, (%rax)
# CHECK-NEXT: 2 3 1.00 pextrq $1, %xmm0, %rcx
# CHECK-NEXT: 4 5 1.00 * pextrq $1, %xmm0, (%rax)
# CHECK-NEXT: 3 5 1.00 * pextrw $1, %xmm0, (%rax)
# CHECK-NEXT: 1 5 1.00 phminposuw %xmm0, %xmm2
# CHECK-NEXT: 2 11 1.00 * phminposuw (%rax), %xmm2
# CHECK-NEXT: 2 2 1.00 pinsrb $1, %eax, %xmm1
# CHECK-NEXT: 2 7 0.50 * pinsrb $1, (%rax), %xmm1
# CHECK-NEXT: 2 2 1.00 pinsrd $1, %eax, %xmm1
# CHECK-NEXT: 2 7 0.50 * pinsrd $1, (%rax), %xmm1
# CHECK-NEXT: 2 2 1.00 pinsrq $1, %rax, %xmm1
# CHECK-NEXT: 2 7 0.50 * pinsrq $1, (%rax), %xmm1
# CHECK-NEXT: 1 1 0.50 pmaxsb %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pmaxsb (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmaxsd %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pmaxsd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmaxud %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pmaxud (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmaxuw %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pmaxuw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pminsb %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pminsb (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pminsd %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pminsd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pminud %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pminud (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pminuw %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pminuw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmovsxbd %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pmovsxbd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmovsxbq %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pmovsxbq (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmovsxbw %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pmovsxbw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmovsxdq %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pmovsxdq (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmovsxwd %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pmovsxwd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmovsxwq %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pmovsxwq (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmovzxbd %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pmovzxbd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmovzxbq %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pmovzxbq (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmovzxbw %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pmovzxbw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmovzxdq %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pmovzxdq (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmovzxwd %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pmovzxwd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pmovzxwq %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pmovzxwq (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 pmuldq %xmm0, %xmm2
# CHECK-NEXT: 2 11 1.00 * pmuldq (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 pmulld %xmm0, %xmm2
# CHECK-NEXT: 2 11 1.00 * pmulld (%rax), %xmm2
# CHECK-NEXT: 2 2 1.00 ptest %xmm0, %xmm1
# CHECK-NEXT: 3 8 1.00 * ptest (%rax), %xmm1
# CHECK-NEXT: 1 3 1.00 roundpd $1, %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * roundpd $1, (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 roundps $1, %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * roundps $1, (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 roundsd $1, %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * roundsd $1, (%rax), %xmm2
# CHECK-NEXT: 1 3 1.00 roundss $1, %xmm0, %xmm2
# CHECK-NEXT: 2 9 1.00 * roundss $1, (%rax), %xmm2
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 26.00 47.50 5.00 52.50 24.50 24.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - 0.50 - - 0.50 - - blendpd $11, %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 - - 0.50 0.50 0.50 blendpd $11, (%rax), %xmm2
# CHECK-NEXT: - - 0.50 - - 0.50 - - blendps $11, %xmm0, %xmm2
# CHECK-NEXT: - - 0.50 - - 0.50 0.50 0.50 blendps $11, (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - 1.00 - - blendvpd %xmm0, %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 blendvpd %xmm0, (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - 1.00 - - blendvps %xmm0, %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 blendvps %xmm0, (%rax), %xmm2
# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - dppd $22, %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 1.00 - 1.00 0.50 0.50 dppd $22, (%rax), %xmm2
# CHECK-NEXT: - - 1.00 2.00 - 1.00 - - dpps $22, %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 2.00 - 1.00 0.50 0.50 dpps $22, (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - 1.00 - - extractps $1, %xmm0, %ecx
# CHECK-NEXT: - - - - 1.00 1.00 0.50 0.50 extractps $1, %xmm0, (%rax)
# CHECK-NEXT: - - - - - 1.00 - - insertps $1, %xmm0, %xmm2
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 insertps $1, (%rax), %xmm2
# CHECK-NEXT: - - - - - - 0.50 0.50 movntdqa (%rax), %xmm2
# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - mpsadbw $1, %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 1.00 - 1.00 0.50 0.50 mpsadbw $1, (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - packusdw %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 packusdw (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 - - pblendvb %xmm0, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - 1.00 0.50 0.50 pblendvb %xmm0, (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pblendw $11, %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pblendw $11, (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pcmpeqq %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pcmpeqq (%rax), %xmm2
# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - pextrb $1, %xmm0, %ecx
# CHECK-NEXT: - - - 0.50 1.00 0.50 0.50 0.50 pextrb $1, %xmm0, (%rax)
# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - pextrd $1, %xmm0, %ecx
# CHECK-NEXT: - - 1.00 0.50 1.00 0.50 0.50 0.50 pextrd $1, %xmm0, (%rax)
# CHECK-NEXT: - - 1.00 0.50 - 0.50 - - pextrq $1, %xmm0, %rcx
# CHECK-NEXT: - - 1.00 0.50 1.00 0.50 0.50 0.50 pextrq $1, %xmm0, (%rax)
# CHECK-NEXT: - - - 0.50 1.00 0.50 0.50 0.50 pextrw $1, %xmm0, (%rax)
# CHECK-NEXT: - - 1.00 - - - - - phminposuw %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 phminposuw (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 1.50 - - pinsrb $1, %eax, %xmm1
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pinsrb $1, (%rax), %xmm1
# CHECK-NEXT: - - - 0.50 - 1.50 - - pinsrd $1, %eax, %xmm1
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pinsrd $1, (%rax), %xmm1
# CHECK-NEXT: - - - 0.50 - 1.50 - - pinsrq $1, %rax, %xmm1
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pinsrq $1, (%rax), %xmm1
# CHECK-NEXT: - - - 0.50 - 0.50 - - pmaxsb %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmaxsb (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pmaxsd %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmaxsd (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pmaxud %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmaxud (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pmaxuw %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmaxuw (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pminsb %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pminsb (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pminsd %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pminsd (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pminud %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pminud (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pminuw %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pminuw (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pmovsxbd %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmovsxbd (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pmovsxbq %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmovsxbq (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pmovsxbw %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmovsxbw (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pmovsxdq %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmovsxdq (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pmovsxwd %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmovsxwd (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pmovsxwq %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmovsxwq (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pmovzxbd %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmovzxbd (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pmovzxbq %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmovzxbq (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pmovzxbw %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmovzxbw (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pmovzxdq %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmovzxdq (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pmovzxwd %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmovzxwd (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pmovzxwq %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pmovzxwq (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - pmuldq %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmuldq (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - pmulld %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmulld (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - 1.00 - - ptest %xmm0, %xmm1
# CHECK-NEXT: - - 1.00 - - 1.00 0.50 0.50 ptest (%rax), %xmm1
# CHECK-NEXT: - - - 1.00 - - - - roundpd $1, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 roundpd $1, (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - roundps $1, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 roundps $1, (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - roundsd $1, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 roundsd $1, (%rax), %xmm2
# CHECK-NEXT: - - - 1.00 - - - - roundss $1, %xmm0, %xmm2
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 roundss $1, (%rax), %xmm2

View File

@ -0,0 +1,99 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
crc32b %al, %ecx
crc32b (%rax), %ecx
crc32l %eax, %ecx
crc32l (%rax), %ecx
crc32w %ax, %ecx
crc32w (%rax), %ecx
crc32b %al, %rcx
crc32b (%rax), %rcx
crc32q %rax, %rcx
crc32q (%rax), %rcx
pcmpestri $1, %xmm0, %xmm2
pcmpestri $1, (%rax), %xmm2
pcmpestrm $1, %xmm0, %xmm2
pcmpestrm $1, (%rax), %xmm2
pcmpistri $1, %xmm0, %xmm2
pcmpistri $1, (%rax), %xmm2
pcmpistrm $1, %xmm0, %xmm2
pcmpistrm $1, (%rax), %xmm2
pcmpgtq %xmm0, %xmm2
pcmpgtq (%rax), %xmm2
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 3 1.00 crc32b %al, %ecx
# CHECK-NEXT: 2 8 1.00 * crc32b (%rax), %ecx
# CHECK-NEXT: 1 3 1.00 crc32l %eax, %ecx
# CHECK-NEXT: 2 8 1.00 * crc32l (%rax), %ecx
# CHECK-NEXT: 1 3 1.00 crc32w %ax, %ecx
# CHECK-NEXT: 2 8 1.00 * crc32w (%rax), %ecx
# CHECK-NEXT: 1 3 1.00 crc32b %al, %rcx
# CHECK-NEXT: 2 8 1.00 * crc32b (%rax), %rcx
# CHECK-NEXT: 1 3 1.00 crc32q %rax, %rcx
# CHECK-NEXT: 2 8 1.00 * crc32q (%rax), %rcx
# CHECK-NEXT: 1 4 2.67 pcmpestri $1, %xmm0, %xmm2
# CHECK-NEXT: 1 4 2.33 * pcmpestri $1, (%rax), %xmm2
# CHECK-NEXT: 1 11 2.67 pcmpestrm $1, %xmm0, %xmm2
# CHECK-NEXT: 1 11 2.33 * pcmpestrm $1, (%rax), %xmm2
# CHECK-NEXT: 3 11 3.00 pcmpistri $1, %xmm0, %xmm2
# CHECK-NEXT: 4 17 3.00 * pcmpistri $1, (%rax), %xmm2
# CHECK-NEXT: 3 11 3.00 pcmpistrm $1, %xmm0, %xmm2
# CHECK-NEXT: 4 17 3.00 * pcmpistrm $1, (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 pcmpgtq %xmm0, %xmm2
# CHECK-NEXT: 2 11 1.00 * pcmpgtq (%rax), %xmm2
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 24.00 20.00 - 10.00 5.00 5.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - 1.00 - - - - crc32b %al, %ecx
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 crc32b (%rax), %ecx
# CHECK-NEXT: - - - 1.00 - - - - crc32l %eax, %ecx
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 crc32l (%rax), %ecx
# CHECK-NEXT: - - - 1.00 - - - - crc32w %ax, %ecx
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 crc32w (%rax), %ecx
# CHECK-NEXT: - - - 1.00 - - - - crc32b %al, %rcx
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 crc32b (%rax), %rcx
# CHECK-NEXT: - - - 1.00 - - - - crc32q %rax, %rcx
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 crc32q (%rax), %rcx
# CHECK-NEXT: - - 2.67 2.67 - 2.67 - - pcmpestri $1, %xmm0, %xmm2
# CHECK-NEXT: - - 2.33 2.33 - 2.33 0.50 0.50 pcmpestri $1, (%rax), %xmm2
# CHECK-NEXT: - - 2.67 2.67 - 2.67 - - pcmpestrm $1, %xmm0, %xmm2
# CHECK-NEXT: - - 2.33 2.33 - 2.33 0.50 0.50 pcmpestrm $1, (%rax), %xmm2
# CHECK-NEXT: - - 3.00 - - - - - pcmpistri $1, %xmm0, %xmm2
# CHECK-NEXT: - - 3.00 - - - 0.50 0.50 pcmpistri $1, (%rax), %xmm2
# CHECK-NEXT: - - 3.00 - - - - - pcmpistrm $1, %xmm0, %xmm2
# CHECK-NEXT: - - 3.00 - - - 0.50 0.50 pcmpistrm $1, (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - pcmpgtq %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pcmpgtq (%rax), %xmm2

View File

@ -0,0 +1,50 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
extrq %xmm0, %xmm2
extrq $22, $2, %xmm2
insertq %xmm0, %xmm2
insertq $22, $22, %xmm0, %xmm2
movntsd %xmm0, (%rax)
movntss %xmm0, (%rax)
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 0.50 extrq %xmm0, %xmm2
# CHECK-NEXT: 1 1 0.50 extrq $22, $2, %xmm2
# CHECK-NEXT: 1 1 0.50 insertq %xmm0, %xmm2
# CHECK-NEXT: 1 1 0.50 insertq $22, $22, %xmm0, %xmm2
# CHECK-NEXT: 1 1 1.00 * movntsd %xmm0, (%rax)
# CHECK-NEXT: 1 1 1.00 * movntss %xmm0, (%rax)
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - - 2.00 2.00 2.00 1.00 1.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - 0.50 - 0.50 - - extrq %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - extrq $22, $2, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - insertq %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - insertq $22, $22, %xmm0, %xmm2
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movntsd %xmm0, (%rax)
# CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movntss %xmm0, (%rax)

View File

@ -0,0 +1,253 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
pabsb %mm0, %mm2
pabsb (%rax), %mm2
pabsb %xmm0, %xmm2
pabsb (%rax), %xmm2
pabsd %mm0, %mm2
pabsd (%rax), %mm2
pabsd %xmm0, %xmm2
pabsd (%rax), %xmm2
pabsw %mm0, %mm2
pabsw (%rax), %mm2
pabsw %xmm0, %xmm2
pabsw (%rax), %xmm2
palignr $1, %mm0, %mm2
palignr $1, (%rax), %mm2
palignr $1, %xmm0, %xmm2
palignr $1, (%rax), %xmm2
phaddd %mm0, %mm2
phaddd (%rax), %mm2
phaddd %xmm0, %xmm2
phaddd (%rax), %xmm2
phaddsw %mm0, %mm2
phaddsw (%rax), %mm2
phaddsw %xmm0, %xmm2
phaddsw (%rax), %xmm2
phaddw %mm0, %mm2
phaddw (%rax), %mm2
phaddw %xmm0, %xmm2
phaddw (%rax), %xmm2
phsubd %mm0, %mm2
phsubd (%rax), %mm2
phsubd %xmm0, %xmm2
phsubd (%rax), %xmm2
phsubsw %mm0, %mm2
phsubsw (%rax), %mm2
phsubsw %xmm0, %xmm2
phsubsw (%rax), %xmm2
phsubw %mm0, %mm2
phsubw (%rax), %mm2
phsubw %xmm0, %xmm2
phsubw (%rax), %xmm2
pmaddubsw %mm0, %mm2
pmaddubsw (%rax), %mm2
pmaddubsw %xmm0, %xmm2
pmaddubsw (%rax), %xmm2
pmulhrsw %mm0, %mm2
pmulhrsw (%rax), %mm2
pmulhrsw %xmm0, %xmm2
pmulhrsw (%rax), %xmm2
pshufb %mm0, %mm2
pshufb (%rax), %mm2
pshufb %xmm0, %xmm2
pshufb (%rax), %xmm2
psignb %mm0, %mm2
psignb (%rax), %mm2
psignb %xmm0, %xmm2
psignb (%rax), %xmm2
psignd %mm0, %mm2
psignd (%rax), %mm2
psignd %xmm0, %xmm2
psignd (%rax), %xmm2
psignw %mm0, %mm2
psignw (%rax), %mm2
psignw %xmm0, %xmm2
psignw (%rax), %xmm2
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 1 0.50 pabsb %mm0, %mm2
# CHECK-NEXT: 2 6 0.50 * pabsb (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 pabsb %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pabsb (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pabsd %mm0, %mm2
# CHECK-NEXT: 2 6 0.50 * pabsd (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 pabsd %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pabsd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pabsw %mm0, %mm2
# CHECK-NEXT: 2 6 0.50 * pabsw (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 pabsw %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pabsw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 palignr $1, %mm0, %mm2
# CHECK-NEXT: 2 6 0.50 * palignr $1, (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 palignr $1, %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * palignr $1, (%rax), %xmm2
# CHECK-NEXT: 3 3 1.50 phaddd %mm0, %mm2
# CHECK-NEXT: 4 8 1.50 * phaddd (%rax), %mm2
# CHECK-NEXT: 3 3 1.50 phaddd %xmm0, %xmm2
# CHECK-NEXT: 4 9 1.50 * phaddd (%rax), %xmm2
# CHECK-NEXT: 3 3 1.50 phaddsw %mm0, %mm2
# CHECK-NEXT: 4 8 1.50 * phaddsw (%rax), %mm2
# CHECK-NEXT: 3 3 1.50 phaddsw %xmm0, %xmm2
# CHECK-NEXT: 4 9 1.50 * phaddsw (%rax), %xmm2
# CHECK-NEXT: 3 3 1.50 phaddw %mm0, %mm2
# CHECK-NEXT: 4 8 1.50 * phaddw (%rax), %mm2
# CHECK-NEXT: 3 3 1.50 phaddw %xmm0, %xmm2
# CHECK-NEXT: 4 9 1.50 * phaddw (%rax), %xmm2
# CHECK-NEXT: 3 3 1.50 phsubd %mm0, %mm2
# CHECK-NEXT: 4 8 1.50 * phsubd (%rax), %mm2
# CHECK-NEXT: 3 3 1.50 phsubd %xmm0, %xmm2
# CHECK-NEXT: 4 9 1.50 * phsubd (%rax), %xmm2
# CHECK-NEXT: 3 3 1.50 phsubsw %mm0, %mm2
# CHECK-NEXT: 4 8 1.50 * phsubsw (%rax), %mm2
# CHECK-NEXT: 3 3 1.50 phsubsw %xmm0, %xmm2
# CHECK-NEXT: 4 9 1.50 * phsubsw (%rax), %xmm2
# CHECK-NEXT: 3 3 1.50 phsubw %mm0, %mm2
# CHECK-NEXT: 4 8 1.50 * phsubw (%rax), %mm2
# CHECK-NEXT: 3 3 1.50 phsubw %xmm0, %xmm2
# CHECK-NEXT: 4 9 1.50 * phsubw (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 pmaddubsw %mm0, %mm2
# CHECK-NEXT: 2 10 1.00 * pmaddubsw (%rax), %mm2
# CHECK-NEXT: 1 5 1.00 pmaddubsw %xmm0, %xmm2
# CHECK-NEXT: 2 11 1.00 * pmaddubsw (%rax), %xmm2
# CHECK-NEXT: 1 5 1.00 pmulhrsw %mm0, %mm2
# CHECK-NEXT: 2 10 1.00 * pmulhrsw (%rax), %mm2
# CHECK-NEXT: 1 5 1.00 pmulhrsw %xmm0, %xmm2
# CHECK-NEXT: 2 11 1.00 * pmulhrsw (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 pshufb %mm0, %mm2
# CHECK-NEXT: 2 6 0.50 * pshufb (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 pshufb %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * pshufb (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 psignb %mm0, %mm2
# CHECK-NEXT: 2 6 0.50 * psignb (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 psignb %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * psignb (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 psignd %mm0, %mm2
# CHECK-NEXT: 2 6 0.50 * psignd (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 psignd %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * psignd (%rax), %xmm2
# CHECK-NEXT: 1 1 0.50 psignw %mm0, %mm2
# CHECK-NEXT: 2 6 0.50 * psignw (%rax), %mm2
# CHECK-NEXT: 1 1 0.50 psignw %xmm0, %xmm2
# CHECK-NEXT: 2 7 0.50 * psignw (%rax), %xmm2
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 8.00 52.00 - 52.00 16.00 16.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - 0.50 - 0.50 - - pabsb %mm0, %mm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pabsb (%rax), %mm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pabsb %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pabsb (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pabsd %mm0, %mm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pabsd (%rax), %mm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pabsd %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pabsd (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pabsw %mm0, %mm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pabsw (%rax), %mm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pabsw %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pabsw (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - palignr $1, %mm0, %mm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 palignr $1, (%rax), %mm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - palignr $1, %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 palignr $1, (%rax), %xmm2
# CHECK-NEXT: - - - 1.50 - 1.50 - - phaddd %mm0, %mm2
# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 phaddd (%rax), %mm2
# CHECK-NEXT: - - - 1.50 - 1.50 - - phaddd %xmm0, %xmm2
# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 phaddd (%rax), %xmm2
# CHECK-NEXT: - - - 1.50 - 1.50 - - phaddsw %mm0, %mm2
# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 phaddsw (%rax), %mm2
# CHECK-NEXT: - - - 1.50 - 1.50 - - phaddsw %xmm0, %xmm2
# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 phaddsw (%rax), %xmm2
# CHECK-NEXT: - - - 1.50 - 1.50 - - phaddw %mm0, %mm2
# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 phaddw (%rax), %mm2
# CHECK-NEXT: - - - 1.50 - 1.50 - - phaddw %xmm0, %xmm2
# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 phaddw (%rax), %xmm2
# CHECK-NEXT: - - - 1.50 - 1.50 - - phsubd %mm0, %mm2
# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 phsubd (%rax), %mm2
# CHECK-NEXT: - - - 1.50 - 1.50 - - phsubd %xmm0, %xmm2
# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 phsubd (%rax), %xmm2
# CHECK-NEXT: - - - 1.50 - 1.50 - - phsubsw %mm0, %mm2
# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 phsubsw (%rax), %mm2
# CHECK-NEXT: - - - 1.50 - 1.50 - - phsubsw %xmm0, %xmm2
# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 phsubsw (%rax), %xmm2
# CHECK-NEXT: - - - 1.50 - 1.50 - - phsubw %mm0, %mm2
# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 phsubw (%rax), %mm2
# CHECK-NEXT: - - - 1.50 - 1.50 - - phsubw %xmm0, %xmm2
# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 phsubw (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - pmaddubsw %mm0, %mm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmaddubsw (%rax), %mm2
# CHECK-NEXT: - - 1.00 - - - - - pmaddubsw %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmaddubsw (%rax), %xmm2
# CHECK-NEXT: - - 1.00 - - - - - pmulhrsw %mm0, %mm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmulhrsw (%rax), %mm2
# CHECK-NEXT: - - 1.00 - - - - - pmulhrsw %xmm0, %xmm2
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 pmulhrsw (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pshufb %mm0, %mm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pshufb (%rax), %mm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - pshufb %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 pshufb (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - psignb %mm0, %mm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 psignb (%rax), %mm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - psignb %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 psignb (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - psignd %mm0, %mm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 psignd (%rax), %mm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - psignd %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 psignd (%rax), %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - psignw %mm0, %mm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 psignw (%rax), %mm2
# CHECK-NEXT: - - - 0.50 - 0.50 - - psignw %xmm0, %xmm2
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 psignw (%rax), %xmm2

View File

@ -0,0 +1,169 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
bextr $8192, %ebx, %ecx
bextr $8192, (%rbx), %ecx
bextr $16384, %rbx, %rcx
bextr $16384, (%rbx), %rcx
blcfill %eax, %ecx
blcfill (%rax), %ecx
blcfill %rax, %rcx
blcfill (%rax), %rcx
blci %eax, %ecx
blci (%rax), %ecx
blci %rax, %rcx
blci (%rax), %rcx
blcic %eax, %ecx
blcic (%rax), %ecx
blcic %rax, %rcx
blcic (%rax), %rcx
blcmsk %eax, %ecx
blcmsk (%rax), %ecx
blcmsk %rax, %rcx
blcmsk (%rax), %rcx
blcs %eax, %ecx
blcs (%rax), %ecx
blcs %rax, %rcx
blcs (%rax), %rcx
blsfill %eax, %ecx
blsfill (%rax), %ecx
blsfill %rax, %rcx
blsfill (%rax), %rcx
blsic %eax, %ecx
blsic (%rax), %ecx
blsic %rax, %rcx
blsic (%rax), %rcx
t1mskc %eax, %ecx
t1mskc (%rax), %ecx
t1mskc %rax, %rcx
t1mskc (%rax), %rcx
tzmsk %eax, %ecx
tzmsk (%rax), %ecx
tzmsk %rax, %rcx
tzmsk (%rax), %rcx
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 2 2 1.00 bextrl $8192, %ebx, %ecx
# CHECK-NEXT: 3 7 1.00 * bextrl $8192, (%rbx), %ecx
# CHECK-NEXT: 2 2 1.00 bextrq $16384, %rbx, %rcx
# CHECK-NEXT: 3 7 1.00 * bextrq $16384, (%rbx), %rcx
# CHECK-NEXT: 1 1 0.33 blcfilll %eax, %ecx
# CHECK-NEXT: 2 6 0.50 * blcfilll (%rax), %ecx
# CHECK-NEXT: 1 1 0.33 blcfillq %rax, %rcx
# CHECK-NEXT: 2 6 0.50 * blcfillq (%rax), %rcx
# CHECK-NEXT: 1 1 0.33 blcil %eax, %ecx
# CHECK-NEXT: 2 6 0.50 * blcil (%rax), %ecx
# CHECK-NEXT: 1 1 0.33 blciq %rax, %rcx
# CHECK-NEXT: 2 6 0.50 * blciq (%rax), %rcx
# CHECK-NEXT: 1 1 0.33 blcicl %eax, %ecx
# CHECK-NEXT: 2 6 0.50 * blcicl (%rax), %ecx
# CHECK-NEXT: 1 1 0.33 blcicq %rax, %rcx
# CHECK-NEXT: 2 6 0.50 * blcicq (%rax), %rcx
# CHECK-NEXT: 1 1 0.33 blcmskl %eax, %ecx
# CHECK-NEXT: 2 6 0.50 * blcmskl (%rax), %ecx
# CHECK-NEXT: 1 1 0.33 blcmskq %rax, %rcx
# CHECK-NEXT: 2 6 0.50 * blcmskq (%rax), %rcx
# CHECK-NEXT: 1 1 0.33 blcsl %eax, %ecx
# CHECK-NEXT: 2 6 0.50 * blcsl (%rax), %ecx
# CHECK-NEXT: 1 1 0.33 blcsq %rax, %rcx
# CHECK-NEXT: 2 6 0.50 * blcsq (%rax), %rcx
# CHECK-NEXT: 1 1 0.33 blsfilll %eax, %ecx
# CHECK-NEXT: 2 6 0.50 * blsfilll (%rax), %ecx
# CHECK-NEXT: 1 1 0.33 blsfillq %rax, %rcx
# CHECK-NEXT: 2 6 0.50 * blsfillq (%rax), %rcx
# CHECK-NEXT: 1 1 0.33 blsicl %eax, %ecx
# CHECK-NEXT: 2 6 0.50 * blsicl (%rax), %ecx
# CHECK-NEXT: 1 1 0.33 blsicq %rax, %rcx
# CHECK-NEXT: 2 6 0.50 * blsicq (%rax), %rcx
# CHECK-NEXT: 1 1 0.33 t1mskcl %eax, %ecx
# CHECK-NEXT: 2 6 0.50 * t1mskcl (%rax), %ecx
# CHECK-NEXT: 1 1 0.33 t1mskcq %rax, %rcx
# CHECK-NEXT: 2 6 0.50 * t1mskcq (%rax), %rcx
# CHECK-NEXT: 1 1 0.33 tzmskl %eax, %ecx
# CHECK-NEXT: 2 6 0.50 * tzmskl (%rax), %ecx
# CHECK-NEXT: 1 1 0.33 tzmskq %rax, %rcx
# CHECK-NEXT: 2 6 0.50 * tzmskq (%rax), %rcx
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 14.00 16.00 - 14.00 10.00 10.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - 0.50 1.00 - 0.50 - - bextrl $8192, %ebx, %ecx
# CHECK-NEXT: - - 0.50 1.00 - 0.50 0.50 0.50 bextrl $8192, (%rbx), %ecx
# CHECK-NEXT: - - 0.50 1.00 - 0.50 - - bextrq $16384, %rbx, %rcx
# CHECK-NEXT: - - 0.50 1.00 - 0.50 0.50 0.50 bextrq $16384, (%rbx), %rcx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blcfilll %eax, %ecx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blcfilll (%rax), %ecx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blcfillq %rax, %rcx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blcfillq (%rax), %rcx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blcil %eax, %ecx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blcil (%rax), %ecx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blciq %rax, %rcx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blciq (%rax), %rcx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blcicl %eax, %ecx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blcicl (%rax), %ecx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blcicq %rax, %rcx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blcicq (%rax), %rcx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blcmskl %eax, %ecx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blcmskl (%rax), %ecx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blcmskq %rax, %rcx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blcmskq (%rax), %rcx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blcsl %eax, %ecx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blcsl (%rax), %ecx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blcsq %rax, %rcx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blcsq (%rax), %rcx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blsfilll %eax, %ecx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blsfilll (%rax), %ecx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blsfillq %rax, %rcx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blsfillq (%rax), %rcx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blsicl %eax, %ecx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blsicl (%rax), %ecx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - blsicq %rax, %rcx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 blsicq (%rax), %rcx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - t1mskcl %eax, %ecx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 t1mskcl (%rax), %ecx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - t1mskcq %rax, %rcx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 t1mskcq (%rax), %rcx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - tzmskl %eax, %ecx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 tzmskl (%rax), %ecx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - tzmskq %rax, %rcx
# CHECK-NEXT: - - 0.33 0.33 - 0.33 0.50 0.50 tzmskq (%rax), %rcx

View File

@ -0,0 +1,78 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=i686-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
aaa
aad
aad $7
aam
aam $7
aas
bound %bx, (%eax)
bound %ebx, (%eax)
daa
das
into
leave
salc
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 100 0.33 aaa
# CHECK-NEXT: 1 100 0.33 aad
# CHECK-NEXT: 1 100 0.33 aad $7
# CHECK-NEXT: 1 100 0.33 aam
# CHECK-NEXT: 1 100 0.33 aam $7
# CHECK-NEXT: 1 100 0.33 aas
# CHECK-NEXT: 1 100 0.33 U bound %bx, (%eax)
# CHECK-NEXT: 1 100 0.33 U bound %ebx, (%eax)
# CHECK-NEXT: 1 100 0.33 daa
# CHECK-NEXT: 1 100 0.33 das
# CHECK-NEXT: 1 100 0.33 U into
# CHECK-NEXT: 3 7 0.67 * leave
# CHECK-NEXT: 1 1 0.33 U salc
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 4.67 4.67 - 4.67 0.50 0.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - aaa
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - aad
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - aad $7
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - aam
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - aam $7
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - aas
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - bound %bx, (%eax)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - bound %ebx, (%eax)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - daa
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - das
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - into
# CHECK-NEXT: - - 0.67 0.67 - 0.67 0.50 0.50 leave
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - salc

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,521 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
f2xm1
fabs
fadd %st(0), %st(1)
fadd %st(2)
fadds (%ecx)
faddl (%ecx)
faddp %st(1)
faddp %st(2)
fiadds (%ecx)
fiaddl (%ecx)
fbld (%ecx)
fbstp (%eax)
fchs
fnclex
fcmovb %st(1), %st(0)
fcmovbe %st(1), %st(0)
fcmove %st(1), %st(0)
fcmovnb %st(1), %st(0)
fcmovnbe %st(1), %st(0)
fcmovne %st(1), %st(0)
fcmovnu %st(1), %st(0)
fcmovu %st(1), %st(0)
fcom %st(1)
fcom %st(3)
fcoms (%ecx)
fcoml (%eax)
fcomp %st(1)
fcomp %st(3)
fcomps (%ecx)
fcompl (%eax)
fcompp
fcomi %st(3)
fcompi %st(3)
fcos
fdecstp
fdiv %st(0), %st(1)
fdiv %st(2)
fdivs (%ecx)
fdivl (%eax)
fdivp %st(1)
fdivp %st(2)
fidivs (%ecx)
fidivl (%eax)
fdivr %st(0), %st(1)
fdivr %st(2)
fdivrs (%ecx)
fdivrl (%eax)
fdivrp %st(1)
fdivrp %st(2)
fidivrs (%ecx)
fidivrl (%eax)
ffree %st(0)
ficoms (%ecx)
ficoml (%eax)
ficomps (%ecx)
ficompl (%eax)
filds (%edx)
fildl (%ecx)
fildll (%eax)
fincstp
fninit
fists (%edx)
fistl (%ecx)
fistps (%edx)
fistpl (%ecx)
fistpll (%eax)
fisttps (%edx)
fisttpl (%ecx)
fisttpll (%eax)
fld %st(0)
flds (%edx)
fldl (%ecx)
fldt (%eax)
fldcw (%eax)
fldenv (%eax)
fld1
fldl2e
fldl2t
fldlg2
fldln2
fldpi
fldz
fmul %st(0), %st(1)
fmul %st(2)
fmuls (%ecx)
fmull (%eax)
fmulp %st(1)
fmulp %st(2)
fimuls (%ecx)
fimull (%eax)
fnop
fpatan
fprem
fprem1
fptan
frndint
frstor (%eax)
fnsave (%eax)
fscale
fsin
fsincos
fsqrt
fst %st(0)
fsts (%edx)
fstl (%ecx)
fstp %st(0)
fstpl (%edx)
fstpl (%ecx)
fstpt (%eax)
fnstcw (%eax)
fnstenv (%eax)
fnstsw (%eax)
frstor (%eax)
fsave (%eax)
fsub %st(0), %st(1)
fsub %st(2)
fsubs (%ecx)
fsubl (%eax)
fsubp %st(1)
fsubp %st(2)
fisubs (%ecx)
fisubl (%eax)
fsubr %st(0), %st(1)
fsubr %st(2)
fsubrs (%ecx)
fsubrl (%eax)
fsubrp %st(1)
fsubrp %st(2)
fisubrs (%ecx)
fisubrl (%eax)
ftst
fucom %st(1)
fucom %st(3)
fucomp %st(1)
fucomp %st(3)
fucompp
fucomi %st(3)
fucompi %st(3)
fwait
fxam
fxch %st(1)
fxch %st(3)
fxrstor (%eax)
fxsave (%eax)
fxtract
fyl2x
fyl2xp1
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 100 0.33 U f2xm1
# CHECK-NEXT: 1 1 1.00 U fabs
# CHECK-NEXT: 1 3 1.00 U fadd %st(0), %st(1)
# CHECK-NEXT: 1 3 1.00 U fadd %st(2)
# CHECK-NEXT: 2 10 1.00 * U fadds (%ecx)
# CHECK-NEXT: 2 10 1.00 * U faddl (%ecx)
# CHECK-NEXT: 1 3 1.00 U faddp %st(1)
# CHECK-NEXT: 1 3 1.00 U faddp %st(2)
# CHECK-NEXT: 3 13 2.00 * U fiadds (%ecx)
# CHECK-NEXT: 3 13 2.00 * U fiaddl (%ecx)
# CHECK-NEXT: 1 100 0.33 U fbld (%ecx)
# CHECK-NEXT: 1 100 0.33 U fbstp (%eax)
# CHECK-NEXT: 1 1 1.00 U fchs
# CHECK-NEXT: 1 100 0.33 U fnclex
# CHECK-NEXT: 3 3 2.00 U fcmovb %st(1), %st(0)
# CHECK-NEXT: 3 3 2.00 U fcmovbe %st(1), %st(0)
# CHECK-NEXT: 3 3 2.00 U fcmove %st(1), %st(0)
# CHECK-NEXT: 3 3 2.00 U fcmovnb %st(1), %st(0)
# CHECK-NEXT: 3 3 2.00 U fcmovnbe %st(1), %st(0)
# CHECK-NEXT: 3 3 2.00 U fcmovne %st(1), %st(0)
# CHECK-NEXT: 3 3 2.00 U fcmovnu %st(1), %st(0)
# CHECK-NEXT: 3 3 2.00 U fcmovu %st(1), %st(0)
# CHECK-NEXT: 1 1 1.00 U fcom %st(1)
# CHECK-NEXT: 1 1 1.00 U fcom %st(3)
# CHECK-NEXT: 2 8 1.00 U fcoms (%ecx)
# CHECK-NEXT: 2 8 1.00 U fcoml (%eax)
# CHECK-NEXT: 1 1 1.00 U fcomp %st(1)
# CHECK-NEXT: 1 1 1.00 U fcomp %st(3)
# CHECK-NEXT: 2 8 1.00 U fcomps (%ecx)
# CHECK-NEXT: 2 8 1.00 U fcompl (%eax)
# CHECK-NEXT: 1 100 0.33 U fcompp
# CHECK-NEXT: 3 3 1.00 U fcomi %st(3)
# CHECK-NEXT: 3 3 1.00 U fcompi %st(3)
# CHECK-NEXT: 1 100 0.33 U fcos
# CHECK-NEXT: 1 1 1.00 U fdecstp
# CHECK-NEXT: 1 14 14.00 U fdiv %st(0), %st(1)
# CHECK-NEXT: 1 14 14.00 U fdiv %st(2)
# CHECK-NEXT: 2 31 1.00 * U fdivs (%ecx)
# CHECK-NEXT: 2 31 1.00 * U fdivl (%eax)
# CHECK-NEXT: 1 14 14.00 U fdivp %st(1)
# CHECK-NEXT: 1 14 14.00 U fdivp %st(2)
# CHECK-NEXT: 3 34 1.00 * U fidivs (%ecx)
# CHECK-NEXT: 3 34 1.00 * U fidivl (%eax)
# CHECK-NEXT: 1 14 14.00 U fdivr %st(0), %st(1)
# CHECK-NEXT: 1 14 14.00 U fdivr %st(2)
# CHECK-NEXT: 2 31 1.00 * U fdivrs (%ecx)
# CHECK-NEXT: 2 31 1.00 * U fdivrl (%eax)
# CHECK-NEXT: 1 14 14.00 U fdivrp %st(1)
# CHECK-NEXT: 1 14 14.00 U fdivrp %st(2)
# CHECK-NEXT: 3 34 1.00 * U fidivrs (%ecx)
# CHECK-NEXT: 3 34 1.00 * U fidivrl (%eax)
# CHECK-NEXT: 1 1 1.00 U ffree %st(0)
# CHECK-NEXT: 3 11 2.00 U ficoms (%ecx)
# CHECK-NEXT: 3 11 2.00 U ficoml (%eax)
# CHECK-NEXT: 3 11 2.00 U ficomps (%ecx)
# CHECK-NEXT: 3 11 2.00 U ficompl (%eax)
# CHECK-NEXT: 2 10 1.00 * U filds (%edx)
# CHECK-NEXT: 2 10 1.00 * U fildl (%ecx)
# CHECK-NEXT: 2 10 1.00 * U fildll (%eax)
# CHECK-NEXT: 1 1 1.00 U fincstp
# CHECK-NEXT: 4 5 1.33 U fninit
# CHECK-NEXT: 4 9 1.00 * U fists (%edx)
# CHECK-NEXT: 4 9 1.00 * U fistl (%ecx)
# CHECK-NEXT: 4 9 1.00 * U fistps (%edx)
# CHECK-NEXT: 4 9 1.00 * U fistpl (%ecx)
# CHECK-NEXT: 4 9 1.00 * U fistpll (%eax)
# CHECK-NEXT: 3 5 1.00 * U fisttps (%edx)
# CHECK-NEXT: 3 5 1.00 * U fisttpl (%ecx)
# CHECK-NEXT: 3 5 1.00 * U fisttpll (%eax)
# CHECK-NEXT: 1 1 1.00 U fld %st(0)
# CHECK-NEXT: 3 9 1.00 * U flds (%edx)
# CHECK-NEXT: 3 9 1.00 * U fldl (%ecx)
# CHECK-NEXT: 3 9 1.00 * U fldt (%eax)
# CHECK-NEXT: 5 8 2.00 * U fldcw (%eax)
# CHECK-NEXT: 1 100 0.33 U fldenv (%eax)
# CHECK-NEXT: 2 1 1.00 U fld1
# CHECK-NEXT: 2 1 1.00 U fldl2e
# CHECK-NEXT: 2 1 1.00 U fldl2t
# CHECK-NEXT: 2 1 1.00 U fldlg2
# CHECK-NEXT: 2 1 1.00 U fldln2
# CHECK-NEXT: 2 1 1.00 U fldpi
# CHECK-NEXT: 1 1 1.00 U fldz
# CHECK-NEXT: 1 5 1.00 U fmul %st(0), %st(1)
# CHECK-NEXT: 1 5 1.00 U fmul %st(2)
# CHECK-NEXT: 2 12 1.00 * U fmuls (%ecx)
# CHECK-NEXT: 2 12 1.00 * U fmull (%eax)
# CHECK-NEXT: 1 5 1.00 U fmulp %st(1)
# CHECK-NEXT: 1 5 1.00 U fmulp %st(2)
# CHECK-NEXT: 3 15 1.00 * U fimuls (%ecx)
# CHECK-NEXT: 3 15 1.00 * U fimull (%eax)
# CHECK-NEXT: 1 1 1.00 U fnop
# CHECK-NEXT: 1 100 0.33 U fpatan
# CHECK-NEXT: 1 100 0.33 U fprem
# CHECK-NEXT: 1 100 0.33 U fprem1
# CHECK-NEXT: 1 100 0.33 U fptan
# CHECK-NEXT: 1 100 0.33 U frndint
# CHECK-NEXT: 1 100 0.33 U frstor (%eax)
# CHECK-NEXT: 1 100 0.33 U fnsave (%eax)
# CHECK-NEXT: 1 100 0.33 U fscale
# CHECK-NEXT: 1 100 0.33 U fsin
# CHECK-NEXT: 1 100 0.33 U fsincos
# CHECK-NEXT: 1 24 24.00 U fsqrt
# CHECK-NEXT: 1 1 1.00 U fst %st(0)
# CHECK-NEXT: 3 6 1.00 * U fsts (%edx)
# CHECK-NEXT: 3 6 1.00 * U fstl (%ecx)
# CHECK-NEXT: 1 1 1.00 U fstp %st(0)
# CHECK-NEXT: 3 6 1.00 * U fstpl (%edx)
# CHECK-NEXT: 3 6 1.00 * U fstpl (%ecx)
# CHECK-NEXT: 3 6 1.00 * U fstpt (%eax)
# CHECK-NEXT: 4 7 1.00 * U fnstcw (%eax)
# CHECK-NEXT: 1 100 0.33 U fnstenv (%eax)
# CHECK-NEXT: 4 7 1.00 U fnstsw (%eax)
# CHECK-NEXT: 1 100 0.33 U frstor (%eax)
# CHECK-NEXT: 1 100 0.33 U wait
# CHECK-NEXT: 1 100 0.33 U fnsave (%eax)
# CHECK-NEXT: 1 3 1.00 U fsub %st(0), %st(1)
# CHECK-NEXT: 1 3 1.00 U fsub %st(2)
# CHECK-NEXT: 2 10 1.00 * U fsubs (%ecx)
# CHECK-NEXT: 2 10 1.00 * U fsubl (%eax)
# CHECK-NEXT: 1 3 1.00 U fsubp %st(1)
# CHECK-NEXT: 1 3 1.00 U fsubp %st(2)
# CHECK-NEXT: 3 13 2.00 * U fisubs (%ecx)
# CHECK-NEXT: 3 13 2.00 * U fisubl (%eax)
# CHECK-NEXT: 1 3 1.00 U fsubr %st(0), %st(1)
# CHECK-NEXT: 1 3 1.00 U fsubr %st(2)
# CHECK-NEXT: 2 10 1.00 * U fsubrs (%ecx)
# CHECK-NEXT: 2 10 1.00 * U fsubrl (%eax)
# CHECK-NEXT: 1 3 1.00 U fsubrp %st(1)
# CHECK-NEXT: 1 3 1.00 U fsubrp %st(2)
# CHECK-NEXT: 3 13 2.00 * U fisubrs (%ecx)
# CHECK-NEXT: 3 13 2.00 * U fisubrl (%eax)
# CHECK-NEXT: 1 3 1.00 U ftst
# CHECK-NEXT: 1 1 1.00 U fucom %st(1)
# CHECK-NEXT: 1 1 1.00 U fucom %st(3)
# CHECK-NEXT: 1 1 1.00 U fucomp %st(1)
# CHECK-NEXT: 1 1 1.00 U fucomp %st(3)
# CHECK-NEXT: 1 3 1.00 U fucompp
# CHECK-NEXT: 3 3 1.00 U fucomi %st(3)
# CHECK-NEXT: 3 3 1.00 U fucompi %st(3)
# CHECK-NEXT: 1 100 0.33 U wait
# CHECK-NEXT: 1 100 0.33 U fxam
# CHECK-NEXT: 1 1 0.33 U fxch %st(1)
# CHECK-NEXT: 1 1 0.33 U fxch %st(3)
# CHECK-NEXT: 5 5 2.00 * * U fxrstor (%eax)
# CHECK-NEXT: 1 100 0.33 * * U fxsave (%eax)
# CHECK-NEXT: 1 100 0.33 U fxtract
# CHECK-NEXT: 1 100 0.33 U fyl2x
# CHECK-NEXT: 1 100 0.33 U fyl2xp1
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - 136.00 52.67 90.67 17.00 54.67 34.00 34.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - f2xm1
# CHECK-NEXT: - - - - - 1.00 - - fabs
# CHECK-NEXT: - - - 1.00 - - - - fadd %st(0), %st(1)
# CHECK-NEXT: - - - 1.00 - - - - fadd %st(2)
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fadds (%ecx)
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 faddl (%ecx)
# CHECK-NEXT: - - - 1.00 - - - - faddp %st(1)
# CHECK-NEXT: - - - 1.00 - - - - faddp %st(2)
# CHECK-NEXT: - - - 2.00 - - 0.50 0.50 fiadds (%ecx)
# CHECK-NEXT: - - - 2.00 - - 0.50 0.50 fiaddl (%ecx)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fbld (%ecx)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fbstp (%eax)
# CHECK-NEXT: - - - - - 1.00 - - fchs
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fnclex
# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovb %st(1), %st(0)
# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovbe %st(1), %st(0)
# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmove %st(1), %st(0)
# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovnb %st(1), %st(0)
# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovnbe %st(1), %st(0)
# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovne %st(1), %st(0)
# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovnu %st(1), %st(0)
# CHECK-NEXT: - - 0.50 - - 2.50 - - fcmovu %st(1), %st(0)
# CHECK-NEXT: - - - 1.00 - - - - fcom %st(1)
# CHECK-NEXT: - - - 1.00 - - - - fcom %st(3)
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fcoms (%ecx)
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fcoml (%eax)
# CHECK-NEXT: - - - 1.00 - - - - fcomp %st(1)
# CHECK-NEXT: - - - 1.00 - - - - fcomp %st(3)
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fcomps (%ecx)
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fcompl (%eax)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fcompp
# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - fcomi %st(3)
# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - fcompi %st(3)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fcos
# CHECK-NEXT: - - - - - 1.00 - - fdecstp
# CHECK-NEXT: - 14.00 1.00 - - - - - fdiv %st(0), %st(1)
# CHECK-NEXT: - 14.00 1.00 - - - - - fdiv %st(2)
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 fdivs (%ecx)
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 fdivl (%eax)
# CHECK-NEXT: - 14.00 1.00 - - - - - fdivp %st(1)
# CHECK-NEXT: - 14.00 1.00 - - - - - fdivp %st(2)
# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 fidivs (%ecx)
# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 fidivl (%eax)
# CHECK-NEXT: - 14.00 1.00 - - - - - fdivr %st(0), %st(1)
# CHECK-NEXT: - 14.00 1.00 - - - - - fdivr %st(2)
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 fdivrs (%ecx)
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 fdivrl (%eax)
# CHECK-NEXT: - 14.00 1.00 - - - - - fdivrp %st(1)
# CHECK-NEXT: - 14.00 1.00 - - - - - fdivrp %st(2)
# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 fidivrs (%ecx)
# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 fidivrl (%eax)
# CHECK-NEXT: - - - - - 1.00 - - ffree %st(0)
# CHECK-NEXT: - - - 2.00 - - 0.50 0.50 ficoms (%ecx)
# CHECK-NEXT: - - - 2.00 - - 0.50 0.50 ficoml (%eax)
# CHECK-NEXT: - - - 2.00 - - 0.50 0.50 ficomps (%ecx)
# CHECK-NEXT: - - - 2.00 - - 0.50 0.50 ficompl (%eax)
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 filds (%edx)
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fildl (%ecx)
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fildll (%eax)
# CHECK-NEXT: - - - - - 1.00 - - fincstp
# CHECK-NEXT: - - 1.00 1.00 - 2.00 - - fninit
# CHECK-NEXT: - - - 1.00 1.00 - 1.00 1.00 fists (%edx)
# CHECK-NEXT: - - - 1.00 1.00 - 1.00 1.00 fistl (%ecx)
# CHECK-NEXT: - - - 1.00 1.00 - 1.00 1.00 fistps (%edx)
# CHECK-NEXT: - - - 1.00 1.00 - 1.00 1.00 fistpl (%ecx)
# CHECK-NEXT: - - - 1.00 1.00 - 1.00 1.00 fistpll (%eax)
# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 fisttps (%edx)
# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 fisttpl (%ecx)
# CHECK-NEXT: - - - 1.00 1.00 - 0.50 0.50 fisttpll (%eax)
# CHECK-NEXT: - - - - - 1.00 - - fld %st(0)
# CHECK-NEXT: - - 0.50 0.50 - 1.00 0.50 0.50 flds (%edx)
# CHECK-NEXT: - - 0.50 0.50 - 1.00 0.50 0.50 fldl (%ecx)
# CHECK-NEXT: - - 0.50 0.50 - 1.00 0.50 0.50 fldt (%eax)
# CHECK-NEXT: - - - - 1.00 2.00 1.00 1.00 fldcw (%eax)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fldenv (%eax)
# CHECK-NEXT: - - 1.00 - - 1.00 - - fld1
# CHECK-NEXT: - - 1.00 1.00 - - - - fldl2e
# CHECK-NEXT: - - 1.00 1.00 - - - - fldl2t
# CHECK-NEXT: - - 1.00 1.00 - - - - fldlg2
# CHECK-NEXT: - - 1.00 1.00 - - - - fldln2
# CHECK-NEXT: - - 1.00 1.00 - - - - fldpi
# CHECK-NEXT: - - - - - 1.00 - - fldz
# CHECK-NEXT: - - 1.00 - - - - - fmul %st(0), %st(1)
# CHECK-NEXT: - - 1.00 - - - - - fmul %st(2)
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 fmuls (%ecx)
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 fmull (%eax)
# CHECK-NEXT: - - 1.00 - - - - - fmulp %st(1)
# CHECK-NEXT: - - 1.00 - - - - - fmulp %st(2)
# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 fimuls (%ecx)
# CHECK-NEXT: - - 1.00 1.00 - - 0.50 0.50 fimull (%eax)
# CHECK-NEXT: - - - - - 1.00 - - fnop
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fpatan
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fprem
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fprem1
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fptan
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - frndint
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - frstor (%eax)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fnsave (%eax)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fscale
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fsin
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fsincos
# CHECK-NEXT: - 24.00 1.00 - - - - - fsqrt
# CHECK-NEXT: - - - - - 1.00 - - fst %st(0)
# CHECK-NEXT: - - - - 1.00 - 1.00 1.00 fsts (%edx)
# CHECK-NEXT: - - - - 1.00 - 1.00 1.00 fstl (%ecx)
# CHECK-NEXT: - - - - - 1.00 - - fstp %st(0)
# CHECK-NEXT: - - - - 1.00 - 1.00 1.00 fstpl (%edx)
# CHECK-NEXT: - - - - 1.00 - 1.00 1.00 fstpl (%ecx)
# CHECK-NEXT: - - - - 1.00 - 1.00 1.00 fstpt (%eax)
# CHECK-NEXT: - - - - 1.00 1.00 1.00 1.00 fnstcw (%eax)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fnstenv (%eax)
# CHECK-NEXT: - - 1.00 - 1.00 - 1.00 1.00 fnstsw (%eax)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - frstor (%eax)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - wait
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fnsave (%eax)
# CHECK-NEXT: - - - 1.00 - - - - fsub %st(0), %st(1)
# CHECK-NEXT: - - - 1.00 - - - - fsub %st(2)
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fsubs (%ecx)
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fsubl (%eax)
# CHECK-NEXT: - - - 1.00 - - - - fsubp %st(1)
# CHECK-NEXT: - - - 1.00 - - - - fsubp %st(2)
# CHECK-NEXT: - - - 2.00 - - 0.50 0.50 fisubs (%ecx)
# CHECK-NEXT: - - - 2.00 - - 0.50 0.50 fisubl (%eax)
# CHECK-NEXT: - - - 1.00 - - - - fsubr %st(0), %st(1)
# CHECK-NEXT: - - - 1.00 - - - - fsubr %st(2)
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fsubrs (%ecx)
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 fsubrl (%eax)
# CHECK-NEXT: - - - 1.00 - - - - fsubrp %st(1)
# CHECK-NEXT: - - - 1.00 - - - - fsubrp %st(2)
# CHECK-NEXT: - - - 2.00 - - 0.50 0.50 fisubrs (%ecx)
# CHECK-NEXT: - - - 2.00 - - 0.50 0.50 fisubrl (%eax)
# CHECK-NEXT: - - - 1.00 - - - - ftst
# CHECK-NEXT: - - - 1.00 - - - - fucom %st(1)
# CHECK-NEXT: - - - 1.00 - - - - fucom %st(3)
# CHECK-NEXT: - - - 1.00 - - - - fucomp %st(1)
# CHECK-NEXT: - - - 1.00 - - - - fucomp %st(3)
# CHECK-NEXT: - - - 1.00 - - - - fucompp
# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - fucomi %st(3)
# CHECK-NEXT: - - 1.00 1.00 - 1.00 - - fucompi %st(3)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - wait
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fxam
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fxch %st(1)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fxch %st(3)
# CHECK-NEXT: - - 0.50 0.50 1.00 2.00 0.50 0.50 fxrstor (%eax)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fxsave (%eax)
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fxtract
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fyl2x
# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - fyl2xp1

View File

@ -0,0 +1,534 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
vfrczpd %xmm0, %xmm3
vfrczpd (%rax), %xmm3
vfrczpd %ymm0, %ymm3
vfrczpd (%rax), %ymm3
vfrczps %xmm0, %xmm3
vfrczps (%rax), %xmm3
vfrczps %ymm0, %ymm3
vfrczps (%rax), %ymm3
vfrczsd %xmm0, %xmm3
vfrczsd (%rax), %xmm3
vfrczss %xmm0, %xmm3
vfrczss (%rax), %xmm3
vpcmov %xmm0, %xmm1, %xmm2, %xmm3
vpcmov (%rax), %xmm0, %xmm1, %xmm3
vpcmov %xmm0, (%rax), %xmm1, %xmm3
vpcmov %ymm0, %ymm1, %ymm2, %ymm3
vpcmov (%rax), %ymm0, %ymm1, %ymm3
vpcmov %ymm0, (%rax), %ymm1, %ymm3
vpcomb $0, %xmm0, %xmm1, %xmm3
vpcomb $0, (%rax), %xmm0, %xmm3
vpcomd $0, %xmm0, %xmm1, %xmm3
vpcomd $0, (%rax), %xmm0, %xmm3
vpcomq $0, %xmm0, %xmm1, %xmm3
vpcomq $0, (%rax), %xmm0, %xmm3
vpcomub $0, %xmm0, %xmm1, %xmm3
vpcomub $0, (%rax), %xmm0, %xmm3
vpcomud $0, %xmm0, %xmm1, %xmm3
vpcomud $0, (%rax), %xmm0, %xmm3
vpcomuq $0, %xmm0, %xmm1, %xmm3
vpcomuq $0, (%rax), %xmm0, %xmm3
vpcomuw $0, %xmm0, %xmm1, %xmm3
vpcomuw $0, (%rax), %xmm0, %xmm3
vpcomw $0, %xmm0, %xmm1, %xmm3
vpcomw $0, (%rax), %xmm0, %xmm3
vpermil2pd $0, %xmm0, %xmm1, %xmm2, %xmm3
vpermil2pd $0, (%rax), %xmm0, %xmm1, %xmm3
vpermil2pd $0, %xmm0, (%rax), %xmm1, %xmm3
vpermil2pd $0, %ymm0, %ymm1, %ymm2, %ymm3
vpermil2pd $0, (%rax), %ymm0, %ymm1, %ymm3
vpermil2pd $0, %ymm0, (%rax), %ymm1, %ymm3
vpermil2ps $0, %xmm0, %xmm1, %xmm2, %xmm3
vpermil2ps $0, (%rax), %xmm0, %xmm1, %xmm3
vpermil2ps $0, %xmm0, (%rax), %xmm1, %xmm3
vpermil2ps $0, %ymm0, %ymm1, %ymm2, %ymm3
vpermil2ps $0, (%rax), %ymm0, %ymm1, %ymm3
vpermil2ps $0, %ymm0, (%rax), %ymm1, %ymm3
vphaddbd %xmm0, %xmm3
vphaddbd (%rax), %xmm3
vphaddbq %xmm0, %xmm3
vphaddbq (%rax), %xmm3
vphaddbw %xmm0, %xmm3
vphaddbw (%rax), %xmm3
vphadddq %xmm0, %xmm3
vphadddq (%rax), %xmm3
vphaddubd %xmm0, %xmm3
vphaddubd (%rax), %xmm3
vphaddubq %xmm0, %xmm3
vphaddubq (%rax), %xmm3
vphaddubw %xmm0, %xmm3
vphaddubw (%rax), %xmm3
vphaddudq %xmm0, %xmm3
vphaddudq (%rax), %xmm3
vphadduwd %xmm0, %xmm3
vphadduwd (%rax), %xmm3
vphadduwq %xmm0, %xmm3
vphadduwq (%rax), %xmm3
vphaddwd %xmm0, %xmm3
vphaddwd (%rax), %xmm3
vphaddwq %xmm0, %xmm3
vphaddwq (%rax), %xmm3
vphsubbw %xmm0, %xmm3
vphsubbw (%rax), %xmm3
vphsubdq %xmm0, %xmm3
vphsubdq (%rax), %xmm3
vphsubwd %xmm0, %xmm3
vphsubwd (%rax), %xmm3
vpmacsdd %xmm0, %xmm1, %xmm2, %xmm3
vpmacsdd %xmm0, (%rax), %xmm1, %xmm3
vpmacsdqh %xmm0, %xmm1, %xmm2, %xmm3
vpmacsdqh %xmm0, (%rax), %xmm1, %xmm3
vpmacsdql %xmm0, %xmm1, %xmm2, %xmm3
vpmacsdql %xmm0, (%rax), %xmm1, %xmm3
vpmacssdd %xmm0, %xmm1, %xmm2, %xmm3
vpmacssdd %xmm0, (%rax), %xmm1, %xmm3
vpmacssdqh %xmm0, %xmm1, %xmm2, %xmm3
vpmacssdqh %xmm0, (%rax), %xmm1, %xmm3
vpmacssdql %xmm0, %xmm1, %xmm2, %xmm3
vpmacssdql %xmm0, (%rax), %xmm1, %xmm3
vpmacsswd %xmm0, %xmm1, %xmm2, %xmm3
vpmacsswd %xmm0, (%rax), %xmm1, %xmm3
vpmacssww %xmm0, %xmm1, %xmm2, %xmm3
vpmacssww %xmm0, (%rax), %xmm1, %xmm3
vpmacswd %xmm0, %xmm1, %xmm2, %xmm3
vpmacswd %xmm0, (%rax), %xmm1, %xmm3
vpmacsww %xmm0, %xmm1, %xmm2, %xmm3
vpmacsww %xmm0, (%rax), %xmm1, %xmm3
vpmadcsswd %xmm0, %xmm1, %xmm2, %xmm3
vpmadcsswd %xmm0, (%rax), %xmm1, %xmm3
vpmadcswd %xmm0, %xmm1, %xmm2, %xmm3
vpmadcswd %xmm0, (%rax), %xmm1, %xmm3
vpperm %xmm0, %xmm1, %xmm2, %xmm3
vpperm (%rax), %xmm0, %xmm1, %xmm3
vpperm %xmm0, (%rax), %xmm1, %xmm3
vprotb %xmm0, %xmm1, %xmm3
vprotb (%rax), %xmm0, %xmm3
vprotb %xmm0, (%rax), %xmm3
vprotb $0, %xmm0, %xmm3
vprotb $0, (%rax), %xmm3
vprotd %xmm0, %xmm1, %xmm3
vprotd (%rax), %xmm0, %xmm3
vprotd %xmm0, (%rax), %xmm3
vprotd $0, %xmm0, %xmm3
vprotd $0, (%rax), %xmm3
vprotq %xmm0, %xmm1, %xmm3
vprotq (%rax), %xmm0, %xmm3
vprotq %xmm0, (%rax), %xmm3
vprotq $0, %xmm0, %xmm3
vprotq $0, (%rax), %xmm3
vprotw %xmm0, %xmm1, %xmm3
vprotw (%rax), %xmm0, %xmm3
vprotw %xmm0, (%rax), %xmm3
vprotw $0, %xmm0, %xmm3
vprotw $0, (%rax), %xmm3
vpshab %xmm0, %xmm1, %xmm3
vpshab (%rax), %xmm0, %xmm3
vpshab %xmm0, (%rax), %xmm3
vpshad %xmm0, %xmm1, %xmm3
vpshad (%rax), %xmm0, %xmm3
vpshad %xmm0, (%rax), %xmm3
vpshaq %xmm0, %xmm1, %xmm3
vpshaq (%rax), %xmm0, %xmm3
vpshaq %xmm0, (%rax), %xmm3
vpshaw %xmm0, %xmm1, %xmm3
vpshaw (%rax), %xmm0, %xmm3
vpshaw %xmm0, (%rax), %xmm3
vpshlb %xmm0, %xmm1, %xmm3
vpshlb (%rax), %xmm0, %xmm3
vpshlb %xmm0, (%rax), %xmm3
vpshld %xmm0, %xmm1, %xmm3
vpshld (%rax), %xmm0, %xmm3
vpshld %xmm0, (%rax), %xmm3
vpshlq %xmm0, %xmm1, %xmm3
vpshlq (%rax), %xmm0, %xmm3
vpshlq %xmm0, (%rax), %xmm3
vpshlw %xmm0, %xmm1, %xmm3
vpshlw (%rax), %xmm0, %xmm3
vpshlw %xmm0, (%rax), %xmm3
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 3 1.00 vfrczpd %xmm0, %xmm3
# CHECK-NEXT: 2 9 1.00 * vfrczpd (%rax), %xmm3
# CHECK-NEXT: 1 3 1.00 vfrczpd %ymm0, %ymm3
# CHECK-NEXT: 2 10 1.00 * vfrczpd (%rax), %ymm3
# CHECK-NEXT: 1 3 1.00 vfrczps %xmm0, %xmm3
# CHECK-NEXT: 2 9 1.00 * vfrczps (%rax), %xmm3
# CHECK-NEXT: 1 3 1.00 vfrczps %ymm0, %ymm3
# CHECK-NEXT: 2 10 1.00 * vfrczps (%rax), %ymm3
# CHECK-NEXT: 1 3 1.00 vfrczsd %xmm0, %xmm3
# CHECK-NEXT: 2 9 1.00 * vfrczsd (%rax), %xmm3
# CHECK-NEXT: 1 3 1.00 vfrczss %xmm0, %xmm3
# CHECK-NEXT: 2 9 1.00 * vfrczss (%rax), %xmm3
# CHECK-NEXT: 1 1 0.50 vpcmov %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 7 0.50 * vpcmov (%rax), %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 2 7 0.50 * vpcmov %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: 1 1 1.00 vpcmov %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 2 8 1.00 * vpcmov (%rax), %ymm0, %ymm1, %ymm3
# CHECK-NEXT: 2 8 1.00 * vpcmov %ymm0, (%rax), %ymm1, %ymm3
# CHECK-NEXT: 1 1 0.50 vpcomb $0, %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 2 7 0.50 * vpcomb $0, (%rax), %xmm0, %xmm3
# CHECK-NEXT: 1 1 0.50 vpcomd $0, %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 2 7 0.50 * vpcomd $0, (%rax), %xmm0, %xmm3
# CHECK-NEXT: 1 1 0.50 vpcomq $0, %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 2 7 0.50 * vpcomq $0, (%rax), %xmm0, %xmm3
# CHECK-NEXT: 1 1 0.50 vpcomub $0, %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 2 7 0.50 * vpcomub $0, (%rax), %xmm0, %xmm3
# CHECK-NEXT: 1 1 0.50 vpcomud $0, %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 2 7 0.50 * vpcomud $0, (%rax), %xmm0, %xmm3
# CHECK-NEXT: 1 1 0.50 vpcomuq $0, %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 2 7 0.50 * vpcomuq $0, (%rax), %xmm0, %xmm3
# CHECK-NEXT: 1 1 0.50 vpcomuw $0, %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 2 7 0.50 * vpcomuw $0, (%rax), %xmm0, %xmm3
# CHECK-NEXT: 1 1 0.50 vpcomw $0, %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 2 7 0.50 * vpcomw $0, (%rax), %xmm0, %xmm3
# CHECK-NEXT: 1 1 1.00 vpermil2pd $0, %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 7 1.00 * vpermil2pd $0, (%rax), %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 2 7 1.00 * vpermil2pd $0, %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: 1 1 1.00 vpermil2pd $0, %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 2 8 1.00 * vpermil2pd $0, (%rax), %ymm0, %ymm1, %ymm3
# CHECK-NEXT: 2 8 1.00 * vpermil2pd $0, %ymm0, (%rax), %ymm1, %ymm3
# CHECK-NEXT: 1 1 1.00 vpermil2ps $0, %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 7 1.00 * vpermil2ps $0, (%rax), %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 2 7 1.00 * vpermil2ps $0, %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: 1 1 1.00 vpermil2ps $0, %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: 2 8 1.00 * vpermil2ps $0, (%rax), %ymm0, %ymm1, %ymm3
# CHECK-NEXT: 2 8 1.00 * vpermil2ps $0, %ymm0, (%rax), %ymm1, %ymm3
# CHECK-NEXT: 3 3 1.50 vphaddbd %xmm0, %xmm3
# CHECK-NEXT: 4 9 1.50 * vphaddbd (%rax), %xmm3
# CHECK-NEXT: 3 3 1.50 vphaddbq %xmm0, %xmm3
# CHECK-NEXT: 4 9 1.50 * vphaddbq (%rax), %xmm3
# CHECK-NEXT: 3 3 1.50 vphaddbw %xmm0, %xmm3
# CHECK-NEXT: 4 9 1.50 * vphaddbw (%rax), %xmm3
# CHECK-NEXT: 3 3 1.50 vphadddq %xmm0, %xmm3
# CHECK-NEXT: 4 9 1.50 * vphadddq (%rax), %xmm3
# CHECK-NEXT: 3 3 1.50 vphaddubd %xmm0, %xmm3
# CHECK-NEXT: 4 9 1.50 * vphaddubd (%rax), %xmm3
# CHECK-NEXT: 3 3 1.50 vphaddubq %xmm0, %xmm3
# CHECK-NEXT: 4 9 1.50 * vphaddubq (%rax), %xmm3
# CHECK-NEXT: 3 3 1.50 vphaddubw %xmm0, %xmm3
# CHECK-NEXT: 4 9 1.50 * vphaddubw (%rax), %xmm3
# CHECK-NEXT: 3 3 1.50 vphaddudq %xmm0, %xmm3
# CHECK-NEXT: 4 9 1.50 * vphaddudq (%rax), %xmm3
# CHECK-NEXT: 3 3 1.50 vphadduwd %xmm0, %xmm3
# CHECK-NEXT: 4 9 1.50 * vphadduwd (%rax), %xmm3
# CHECK-NEXT: 3 3 1.50 vphadduwq %xmm0, %xmm3
# CHECK-NEXT: 4 9 1.50 * vphadduwq (%rax), %xmm3
# CHECK-NEXT: 3 3 1.50 vphaddwd %xmm0, %xmm3
# CHECK-NEXT: 4 9 1.50 * vphaddwd (%rax), %xmm3
# CHECK-NEXT: 3 3 1.50 vphaddwq %xmm0, %xmm3
# CHECK-NEXT: 4 9 1.50 * vphaddwq (%rax), %xmm3
# CHECK-NEXT: 3 3 1.50 vphsubbw %xmm0, %xmm3
# CHECK-NEXT: 4 9 1.50 * vphsubbw (%rax), %xmm3
# CHECK-NEXT: 3 3 1.50 vphsubdq %xmm0, %xmm3
# CHECK-NEXT: 4 9 1.50 * vphsubdq (%rax), %xmm3
# CHECK-NEXT: 3 3 1.50 vphsubwd %xmm0, %xmm3
# CHECK-NEXT: 4 9 1.50 * vphsubwd (%rax), %xmm3
# CHECK-NEXT: 1 5 1.00 vpmacsdd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 11 1.00 * vpmacsdd %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: 1 5 1.00 vpmacsdqh %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 11 1.00 * vpmacsdqh %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: 1 5 1.00 vpmacsdql %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 11 1.00 * vpmacsdql %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: 1 5 1.00 vpmacssdd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 11 1.00 * vpmacssdd %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: 1 5 1.00 vpmacssdqh %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 11 1.00 * vpmacssdqh %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: 1 5 1.00 vpmacssdql %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 11 1.00 * vpmacssdql %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: 1 5 1.00 vpmacsswd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 11 1.00 * vpmacsswd %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: 1 5 1.00 vpmacssww %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 11 1.00 * vpmacssww %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: 1 5 1.00 vpmacswd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 11 1.00 * vpmacswd %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: 1 5 1.00 vpmacsww %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 11 1.00 * vpmacsww %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: 1 5 1.00 vpmadcsswd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 11 1.00 * vpmadcsswd %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: 1 5 1.00 vpmadcswd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 11 1.00 * vpmadcswd %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: 1 1 0.50 vpperm %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: 2 7 0.50 * vpperm (%rax), %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 2 7 0.50 * vpperm %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: 1 1 1.00 vprotb %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 2 7 1.00 * vprotb (%rax), %xmm0, %xmm3
# CHECK-NEXT: 2 7 1.00 * vprotb %xmm0, (%rax), %xmm3
# CHECK-NEXT: 1 1 1.00 vprotb $0, %xmm0, %xmm3
# CHECK-NEXT: 2 7 1.00 * vprotb $0, (%rax), %xmm3
# CHECK-NEXT: 1 1 1.00 vprotd %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 2 7 1.00 * vprotd (%rax), %xmm0, %xmm3
# CHECK-NEXT: 2 7 1.00 * vprotd %xmm0, (%rax), %xmm3
# CHECK-NEXT: 1 1 1.00 vprotd $0, %xmm0, %xmm3
# CHECK-NEXT: 2 7 1.00 * vprotd $0, (%rax), %xmm3
# CHECK-NEXT: 1 1 1.00 vprotq %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 2 7 1.00 * vprotq (%rax), %xmm0, %xmm3
# CHECK-NEXT: 2 7 1.00 * vprotq %xmm0, (%rax), %xmm3
# CHECK-NEXT: 1 1 1.00 vprotq $0, %xmm0, %xmm3
# CHECK-NEXT: 2 7 1.00 * vprotq $0, (%rax), %xmm3
# CHECK-NEXT: 1 1 1.00 vprotw %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 2 7 1.00 * vprotw (%rax), %xmm0, %xmm3
# CHECK-NEXT: 2 7 1.00 * vprotw %xmm0, (%rax), %xmm3
# CHECK-NEXT: 1 1 1.00 vprotw $0, %xmm0, %xmm3
# CHECK-NEXT: 2 7 1.00 * vprotw $0, (%rax), %xmm3
# CHECK-NEXT: 1 1 1.00 vpshab %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 2 7 1.00 * vpshab (%rax), %xmm0, %xmm3
# CHECK-NEXT: 2 7 1.00 * vpshab %xmm0, (%rax), %xmm3
# CHECK-NEXT: 1 1 1.00 vpshad %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 2 7 1.00 * vpshad (%rax), %xmm0, %xmm3
# CHECK-NEXT: 2 7 1.00 * vpshad %xmm0, (%rax), %xmm3
# CHECK-NEXT: 1 1 1.00 vpshaq %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 2 7 1.00 * vpshaq (%rax), %xmm0, %xmm3
# CHECK-NEXT: 2 7 1.00 * vpshaq %xmm0, (%rax), %xmm3
# CHECK-NEXT: 1 1 1.00 vpshaw %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 2 7 1.00 * vpshaw (%rax), %xmm0, %xmm3
# CHECK-NEXT: 2 7 1.00 * vpshaw %xmm0, (%rax), %xmm3
# CHECK-NEXT: 1 1 1.00 vpshlb %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 2 7 1.00 * vpshlb (%rax), %xmm0, %xmm3
# CHECK-NEXT: 2 7 1.00 * vpshlb %xmm0, (%rax), %xmm3
# CHECK-NEXT: 1 1 1.00 vpshld %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 2 7 1.00 * vpshld (%rax), %xmm0, %xmm3
# CHECK-NEXT: 2 7 1.00 * vpshld %xmm0, (%rax), %xmm3
# CHECK-NEXT: 1 1 1.00 vpshlq %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 2 7 1.00 * vpshlq (%rax), %xmm0, %xmm3
# CHECK-NEXT: 2 7 1.00 * vpshlq %xmm0, (%rax), %xmm3
# CHECK-NEXT: 1 1 1.00 vpshlw %xmm0, %xmm1, %xmm3
# CHECK-NEXT: 2 7 1.00 * vpshlw (%rax), %xmm0, %xmm3
# CHECK-NEXT: 2 7 1.00 * vpshlw %xmm0, (%rax), %xmm3
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 68.00 68.00 - 71.00 41.50 41.50
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - - 1.00 - - - - vfrczpd %xmm0, %xmm3
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfrczpd (%rax), %xmm3
# CHECK-NEXT: - - - 1.00 - - - - vfrczpd %ymm0, %ymm3
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfrczpd (%rax), %ymm3
# CHECK-NEXT: - - - 1.00 - - - - vfrczps %xmm0, %xmm3
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfrczps (%rax), %xmm3
# CHECK-NEXT: - - - 1.00 - - - - vfrczps %ymm0, %ymm3
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfrczps (%rax), %ymm3
# CHECK-NEXT: - - - 1.00 - - - - vfrczsd %xmm0, %xmm3
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfrczsd (%rax), %xmm3
# CHECK-NEXT: - - - 1.00 - - - - vfrczss %xmm0, %xmm3
# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vfrczss (%rax), %xmm3
# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcmov %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmov (%rax), %xmm0, %xmm1, %xmm3
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcmov %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: - - - - - 1.00 - - vpcmov %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpcmov (%rax), %ymm0, %ymm1, %ymm3
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpcmov %ymm0, (%rax), %ymm1, %ymm3
# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcomb $0, %xmm0, %xmm1, %xmm3
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcomb $0, (%rax), %xmm0, %xmm3
# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcomd $0, %xmm0, %xmm1, %xmm3
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcomd $0, (%rax), %xmm0, %xmm3
# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcomq $0, %xmm0, %xmm1, %xmm3
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcomq $0, (%rax), %xmm0, %xmm3
# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcomub $0, %xmm0, %xmm1, %xmm3
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcomub $0, (%rax), %xmm0, %xmm3
# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcomud $0, %xmm0, %xmm1, %xmm3
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcomud $0, (%rax), %xmm0, %xmm3
# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcomuq $0, %xmm0, %xmm1, %xmm3
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcomuq $0, (%rax), %xmm0, %xmm3
# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcomuw $0, %xmm0, %xmm1, %xmm3
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcomuw $0, (%rax), %xmm0, %xmm3
# CHECK-NEXT: - - - 0.50 - 0.50 - - vpcomw $0, %xmm0, %xmm1, %xmm3
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpcomw $0, (%rax), %xmm0, %xmm3
# CHECK-NEXT: - - - - - 1.00 - - vpermil2pd $0, %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermil2pd $0, (%rax), %xmm0, %xmm1, %xmm3
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermil2pd $0, %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: - - - - - 1.00 - - vpermil2pd $0, %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermil2pd $0, (%rax), %ymm0, %ymm1, %ymm3
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermil2pd $0, %ymm0, (%rax), %ymm1, %ymm3
# CHECK-NEXT: - - - - - 1.00 - - vpermil2ps $0, %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermil2ps $0, (%rax), %xmm0, %xmm1, %xmm3
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermil2ps $0, %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: - - - - - 1.00 - - vpermil2ps $0, %ymm0, %ymm1, %ymm2, %ymm3
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermil2ps $0, (%rax), %ymm0, %ymm1, %ymm3
# CHECK-NEXT: - - - - - 1.00 0.50 0.50 vpermil2ps $0, %ymm0, (%rax), %ymm1, %ymm3
# CHECK-NEXT: - - - 1.50 - 1.50 - - vphaddbd %xmm0, %xmm3
# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphaddbd (%rax), %xmm3
# CHECK-NEXT: - - - 1.50 - 1.50 - - vphaddbq %xmm0, %xmm3
# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphaddbq (%rax), %xmm3
# CHECK-NEXT: - - - 1.50 - 1.50 - - vphaddbw %xmm0, %xmm3
# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphaddbw (%rax), %xmm3
# CHECK-NEXT: - - - 1.50 - 1.50 - - vphadddq %xmm0, %xmm3
# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphadddq (%rax), %xmm3
# CHECK-NEXT: - - - 1.50 - 1.50 - - vphaddubd %xmm0, %xmm3
# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphaddubd (%rax), %xmm3
# CHECK-NEXT: - - - 1.50 - 1.50 - - vphaddubq %xmm0, %xmm3
# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphaddubq (%rax), %xmm3
# CHECK-NEXT: - - - 1.50 - 1.50 - - vphaddubw %xmm0, %xmm3
# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphaddubw (%rax), %xmm3
# CHECK-NEXT: - - - 1.50 - 1.50 - - vphaddudq %xmm0, %xmm3
# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphaddudq (%rax), %xmm3
# CHECK-NEXT: - - - 1.50 - 1.50 - - vphadduwd %xmm0, %xmm3
# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphadduwd (%rax), %xmm3
# CHECK-NEXT: - - - 1.50 - 1.50 - - vphadduwq %xmm0, %xmm3
# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphadduwq (%rax), %xmm3
# CHECK-NEXT: - - - 1.50 - 1.50 - - vphaddwd %xmm0, %xmm3
# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphaddwd (%rax), %xmm3
# CHECK-NEXT: - - - 1.50 - 1.50 - - vphaddwq %xmm0, %xmm3
# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphaddwq (%rax), %xmm3
# CHECK-NEXT: - - - 1.50 - 1.50 - - vphsubbw %xmm0, %xmm3
# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphsubbw (%rax), %xmm3
# CHECK-NEXT: - - - 1.50 - 1.50 - - vphsubdq %xmm0, %xmm3
# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphsubdq (%rax), %xmm3
# CHECK-NEXT: - - - 1.50 - 1.50 - - vphsubwd %xmm0, %xmm3
# CHECK-NEXT: - - - 1.50 - 1.50 0.50 0.50 vphsubwd (%rax), %xmm3
# CHECK-NEXT: - - 1.00 - - - - - vpmacsdd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmacsdd %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: - - 1.00 - - - - - vpmacsdqh %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmacsdqh %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: - - 1.00 - - - - - vpmacsdql %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmacsdql %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: - - 1.00 - - - - - vpmacssdd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmacssdd %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: - - 1.00 - - - - - vpmacssdqh %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmacssdqh %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: - - 1.00 - - - - - vpmacssdql %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmacssdql %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: - - 1.00 - - - - - vpmacsswd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmacsswd %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: - - 1.00 - - - - - vpmacssww %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmacssww %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: - - 1.00 - - - - - vpmacswd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmacswd %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: - - 1.00 - - - - - vpmacsww %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmacsww %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: - - 1.00 - - - - - vpmadcsswd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmadcsswd %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: - - 1.00 - - - - - vpmadcswd %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpmadcswd %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: - - - 0.50 - 0.50 - - vpperm %xmm0, %xmm1, %xmm2, %xmm3
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpperm (%rax), %xmm0, %xmm1, %xmm3
# CHECK-NEXT: - - - 0.50 - 0.50 0.50 0.50 vpperm %xmm0, (%rax), %xmm1, %xmm3
# CHECK-NEXT: - - 1.00 - - - - - vprotb %xmm0, %xmm1, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vprotb (%rax), %xmm0, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vprotb %xmm0, (%rax), %xmm3
# CHECK-NEXT: - - 1.00 - - - - - vprotb $0, %xmm0, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vprotb $0, (%rax), %xmm3
# CHECK-NEXT: - - 1.00 - - - - - vprotd %xmm0, %xmm1, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vprotd (%rax), %xmm0, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vprotd %xmm0, (%rax), %xmm3
# CHECK-NEXT: - - 1.00 - - - - - vprotd $0, %xmm0, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vprotd $0, (%rax), %xmm3
# CHECK-NEXT: - - 1.00 - - - - - vprotq %xmm0, %xmm1, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vprotq (%rax), %xmm0, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vprotq %xmm0, (%rax), %xmm3
# CHECK-NEXT: - - 1.00 - - - - - vprotq $0, %xmm0, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vprotq $0, (%rax), %xmm3
# CHECK-NEXT: - - 1.00 - - - - - vprotw %xmm0, %xmm1, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vprotw (%rax), %xmm0, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vprotw %xmm0, (%rax), %xmm3
# CHECK-NEXT: - - 1.00 - - - - - vprotw $0, %xmm0, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vprotw $0, (%rax), %xmm3
# CHECK-NEXT: - - 1.00 - - - - - vpshab %xmm0, %xmm1, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshab (%rax), %xmm0, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshab %xmm0, (%rax), %xmm3
# CHECK-NEXT: - - 1.00 - - - - - vpshad %xmm0, %xmm1, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshad (%rax), %xmm0, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshad %xmm0, (%rax), %xmm3
# CHECK-NEXT: - - 1.00 - - - - - vpshaq %xmm0, %xmm1, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshaq (%rax), %xmm0, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshaq %xmm0, (%rax), %xmm3
# CHECK-NEXT: - - 1.00 - - - - - vpshaw %xmm0, %xmm1, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshaw (%rax), %xmm0, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshaw %xmm0, (%rax), %xmm3
# CHECK-NEXT: - - 1.00 - - - - - vpshlb %xmm0, %xmm1, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshlb (%rax), %xmm0, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshlb %xmm0, (%rax), %xmm3
# CHECK-NEXT: - - 1.00 - - - - - vpshld %xmm0, %xmm1, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshld (%rax), %xmm0, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshld %xmm0, (%rax), %xmm3
# CHECK-NEXT: - - 1.00 - - - - - vpshlq %xmm0, %xmm1, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshlq (%rax), %xmm0, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshlq %xmm0, (%rax), %xmm3
# CHECK-NEXT: - - 1.00 - - - - - vpshlw %xmm0, %xmm1, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshlw (%rax), %xmm0, %xmm3
# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vpshlw %xmm0, (%rax), %xmm3

View File

@ -0,0 +1,60 @@
# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -scheduler-stats < %s | FileCheck %s
vmulps (%rsi), %xmm0, %xmm0
add %rsi, %rsi
# CHECK: Iterations: 1
# CHECK-NEXT: Instructions: 2
# CHECK-NEXT: Total Cycles: 14
# CHECK-NEXT: Total uOps: 3
# CHECK: Dispatch Width: 4
# CHECK-NEXT: uOps Per Cycle: 0.21
# CHECK-NEXT: IPC: 0.14
# CHECK-NEXT: Block RThroughput: 1.0
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 2 11 1.00 * vmulps (%rsi), %xmm0, %xmm0
# CHECK-NEXT: 1 1 0.33 addq %rsi, %rsi
# CHECK: Schedulers - number of cycles where we saw N instructions issued:
# CHECK-NEXT: [# issued], [# cycles]
# CHECK-NEXT: 0, 13 (92.9%)
# CHECK-NEXT: 2, 1 (7.1%)
# CHECK: Scheduler's queue usage:
# CHECK-NEXT: [1] Resource name.
# CHECK-NEXT: [2] Average number of used buffer entries.
# CHECK-NEXT: [3] Maximum number of used buffer entries.
# CHECK-NEXT: [4] Total number of buffer entries.
# CHECK: [1] [2] [3] [4]
# CHECK-NEXT: SBPortAny 0 2 54
# CHECK: Resources:
# CHECK-NEXT: [0] - SBDivider
# CHECK-NEXT: [1] - SBFPDivider
# CHECK-NEXT: [2] - SBPort0
# CHECK-NEXT: [3] - SBPort1
# CHECK-NEXT: [4] - SBPort4
# CHECK-NEXT: [5] - SBPort5
# CHECK-NEXT: [6.0] - SBPort23
# CHECK-NEXT: [6.1] - SBPort23
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
# CHECK-NEXT: - - 1.00 - - 1.00 - 1.00
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
# CHECK-NEXT: - - 1.00 - - - - 1.00 vmulps (%rsi), %xmm0, %xmm0
# CHECK-NEXT: - - - - - 1.00 - - addq %rsi, %rsi

Some files were not shown because too many files have changed in this diff Show More