1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

Regenerate FMA tests

This commit is contained in:
Simon Pilgrim 2020-02-08 14:31:06 +00:00
parent 4628374ad1
commit f84c84ef81
5 changed files with 421 additions and 154 deletions

View File

@ -1,20 +1,18 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mcpu=bdver2 -mattr=-fma -mtriple=i686-apple-darwin < %s | FileCheck %s
; RUN: llc -mcpu=bdver2 -mattr=-fma,-fma4 -mtriple=i686-apple-darwin < %s | FileCheck %s --check-prefix=CHECK-NOFMA
; CHECK-LABEL: fmafunc
define <3 x float> @fmafunc(<3 x float> %a, <3 x float> %b, <3 x float> %c) {
; CHECK-NOT: vmulps
; CHECK-NOT: vaddps
; CHECK: vfmaddps
; CHECK-NOT: vmulps
; CHECK-NOT: vaddps
; CHECK-NOFMA-NOT: calll
; CHECK-NOFMA: vmulps
; CHECK-NOFMA: vaddps
; CHECK-NOFMA-NOT: calll
; CHECK-LABEL: fmafunc:
; CHECK: ## %bb.0:
; CHECK-NEXT: vfmaddps %xmm2, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retl
;
; CHECK-NOFMA-LABEL: fmafunc:
; CHECK-NOFMA: ## %bb.0:
; CHECK-NOFMA-NEXT: vmulps %xmm1, %xmm0, %xmm0
; CHECK-NOFMA-NEXT: vaddps %xmm2, %xmm0, %xmm0
; CHECK-NOFMA-NEXT: retl
%ret = tail call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %a, <3 x float> %b, <3 x float> %c)
ret <3 x float> %ret
}

View File

@ -21,7 +21,7 @@ define <4 x float> @test_x86_fmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
; FMA-LABEL: test_x86_fmadd_aba_ss:
; FMA: # %bb.0:
; FMA-NEXT: vmovaps (%rcx), %xmm0
; FMA-NEXT: vfmadd132ss (%rdx), %xmm0, %xmm0
; FMA-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
; FMA-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
ret <4 x float> %res
@ -31,7 +31,7 @@ define <4 x float> @test_x86_fmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
; FMA-LABEL: test_x86_fmadd_bba_ss:
; FMA: # %bb.0:
; FMA-NEXT: vmovaps (%rdx), %xmm0
; FMA-NEXT: vfmadd213ss (%rcx), %xmm0, %xmm0
; FMA-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
; FMA-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
ret <4 x float> %res
@ -115,7 +115,7 @@ define <2 x double> @test_x86_fmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0
; FMA-LABEL: test_x86_fmadd_aba_sd:
; FMA: # %bb.0:
; FMA-NEXT: vmovapd (%rcx), %xmm0
; FMA-NEXT: vfmadd132sd (%rdx), %xmm0, %xmm0
; FMA-NEXT: vfmadd132sd {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
; FMA-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
ret <2 x double> %res
@ -125,7 +125,7 @@ define <2 x double> @test_x86_fmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0
; FMA-LABEL: test_x86_fmadd_bba_sd:
; FMA: # %bb.0:
; FMA-NEXT: vmovapd (%rdx), %xmm0
; FMA-NEXT: vfmadd213sd (%rcx), %xmm0, %xmm0
; FMA-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm0 * xmm0) + mem
; FMA-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
ret <2 x double> %res
@ -210,7 +210,7 @@ define <4 x float> @test_x86_fnmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
; FMA-LABEL: test_x86_fnmadd_aba_ss:
; FMA: # %bb.0:
; FMA-NEXT: vmovaps (%rcx), %xmm0
; FMA-NEXT: vfnmadd132ss (%rdx), %xmm0, %xmm0
; FMA-NEXT: vfnmadd132ss {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
; FMA-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
ret <4 x float> %res
@ -220,7 +220,7 @@ define <4 x float> @test_x86_fnmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
; FMA-LABEL: test_x86_fnmadd_bba_ss:
; FMA: # %bb.0:
; FMA-NEXT: vmovaps (%rdx), %xmm0
; FMA-NEXT: vfnmadd213ss (%rcx), %xmm0, %xmm0
; FMA-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm0 * xmm0) + mem
; FMA-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
ret <4 x float> %res
@ -304,7 +304,7 @@ define <2 x double> @test_x86_fnmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0
; FMA-LABEL: test_x86_fnmadd_aba_sd:
; FMA: # %bb.0:
; FMA-NEXT: vmovapd (%rcx), %xmm0
; FMA-NEXT: vfnmadd132sd (%rdx), %xmm0, %xmm0
; FMA-NEXT: vfnmadd132sd {{.*#+}} xmm0 = -(xmm0 * mem) + xmm0
; FMA-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
ret <2 x double> %res
@ -314,7 +314,7 @@ define <2 x double> @test_x86_fnmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0
; FMA-LABEL: test_x86_fnmadd_bba_sd:
; FMA: # %bb.0:
; FMA-NEXT: vmovapd (%rdx), %xmm0
; FMA-NEXT: vfnmadd213sd (%rcx), %xmm0, %xmm0
; FMA-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm0 * xmm0) + mem
; FMA-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
ret <2 x double> %res
@ -398,7 +398,7 @@ define <4 x float> @test_x86_fmsub_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
; FMA-LABEL: test_x86_fmsub_aba_ss:
; FMA: # %bb.0:
; FMA-NEXT: vmovaps (%rcx), %xmm0
; FMA-NEXT: vfmsub132ss (%rdx), %xmm0, %xmm0
; FMA-NEXT: vfmsub132ss {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
; FMA-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
ret <4 x float> %res
@ -408,7 +408,7 @@ define <4 x float> @test_x86_fmsub_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
; FMA-LABEL: test_x86_fmsub_bba_ss:
; FMA: # %bb.0:
; FMA-NEXT: vmovaps (%rdx), %xmm0
; FMA-NEXT: vfmsub213ss (%rcx), %xmm0, %xmm0
; FMA-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm0 * xmm0) - mem
; FMA-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
ret <4 x float> %res
@ -492,7 +492,7 @@ define <2 x double> @test_x86_fmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0
; FMA-LABEL: test_x86_fmsub_aba_sd:
; FMA: # %bb.0:
; FMA-NEXT: vmovapd (%rcx), %xmm0
; FMA-NEXT: vfmsub132sd (%rdx), %xmm0, %xmm0
; FMA-NEXT: vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm0
; FMA-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
ret <2 x double> %res
@ -502,7 +502,7 @@ define <2 x double> @test_x86_fmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0
; FMA-LABEL: test_x86_fmsub_bba_sd:
; FMA: # %bb.0:
; FMA-NEXT: vmovapd (%rdx), %xmm0
; FMA-NEXT: vfmsub213sd (%rcx), %xmm0, %xmm0
; FMA-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm0 * xmm0) - mem
; FMA-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
ret <2 x double> %res
@ -587,7 +587,7 @@ define <4 x float> @test_x86_fnmsub_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
; FMA-LABEL: test_x86_fnmsub_aba_ss:
; FMA: # %bb.0:
; FMA-NEXT: vmovaps (%rcx), %xmm0
; FMA-NEXT: vfnmsub132ss (%rdx), %xmm0, %xmm0
; FMA-NEXT: vfnmsub132ss {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
; FMA-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
ret <4 x float> %res
@ -597,7 +597,7 @@ define <4 x float> @test_x86_fnmsub_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
; FMA-LABEL: test_x86_fnmsub_bba_ss:
; FMA: # %bb.0:
; FMA-NEXT: vmovaps (%rdx), %xmm0
; FMA-NEXT: vfnmsub213ss (%rcx), %xmm0, %xmm0
; FMA-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm0 * xmm0) - mem
; FMA-NEXT: retq
%res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
ret <4 x float> %res
@ -681,7 +681,7 @@ define <2 x double> @test_x86_fnmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0
; FMA-LABEL: test_x86_fnmsub_aba_sd:
; FMA: # %bb.0:
; FMA-NEXT: vmovapd (%rcx), %xmm0
; FMA-NEXT: vfnmsub132sd (%rdx), %xmm0, %xmm0
; FMA-NEXT: vfnmsub132sd {{.*#+}} xmm0 = -(xmm0 * mem) - xmm0
; FMA-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
ret <2 x double> %res
@ -691,7 +691,7 @@ define <2 x double> @test_x86_fnmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0
; FMA-LABEL: test_x86_fnmsub_bba_sd:
; FMA: # %bb.0:
; FMA-NEXT: vmovapd (%rdx), %xmm0
; FMA-NEXT: vfnmsub213sd (%rcx), %xmm0, %xmm0
; FMA-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm0 * xmm0) - mem
; FMA-NEXT: retq
%res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
ret <2 x double> %res

View File

@ -1,10 +1,22 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fma | FileCheck %s
; CHECK-LABEL: fmaddsubpd_loop_128:
; CHECK: vfmaddsub231pd %xmm1, %xmm0, %xmm2
; CHECK: vmovapd %xmm2, %xmm0
; CHECK-NEXT: retq
define <2 x double> @fmaddsubpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
; CHECK-LABEL: fmaddsubpd_loop_128:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jge .LBB0_3
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vfmaddsub231pd {{.*#+}} xmm2 = (xmm0 * xmm1) +/- xmm2
; CHECK-NEXT: incl %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jl .LBB0_2
; CHECK-NEXT: .LBB0_3: # %for.end
; CHECK-NEXT: vmovapd %xmm2, %xmm0
; CHECK-NEXT: retq
entry:
br label %for.cond
@ -26,11 +38,22 @@ for.end:
ret <2 x double> %c.addr.0
}
; CHECK-LABEL: fmsubaddpd_loop_128:
; CHECK: vfmsubadd231pd %xmm1, %xmm0, %xmm2
; CHECK: vmovapd %xmm2, %xmm0
; CHECK-NEXT: retq
define <2 x double> @fmsubaddpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
; CHECK-LABEL: fmsubaddpd_loop_128:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jge .LBB1_3
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB1_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vfmsubadd231pd {{.*#+}} xmm2 = (xmm0 * xmm1) -/+ xmm2
; CHECK-NEXT: incl %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jl .LBB1_2
; CHECK-NEXT: .LBB1_3: # %for.end
; CHECK-NEXT: vmovapd %xmm2, %xmm0
; CHECK-NEXT: retq
entry:
br label %for.cond
@ -52,11 +75,22 @@ for.end:
ret <2 x double> %c.addr.0
}
; CHECK-LABEL: fmaddpd_loop_128:
; CHECK: vfmadd231pd %xmm1, %xmm0, %xmm2
; CHECK: vmovapd %xmm2, %xmm0
; CHECK-NEXT: retq
define <2 x double> @fmaddpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
; CHECK-LABEL: fmaddpd_loop_128:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jge .LBB2_3
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB2_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vfmadd231pd {{.*#+}} xmm2 = (xmm0 * xmm1) + xmm2
; CHECK-NEXT: incl %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jl .LBB2_2
; CHECK-NEXT: .LBB2_3: # %for.end
; CHECK-NEXT: vmovapd %xmm2, %xmm0
; CHECK-NEXT: retq
entry:
br label %for.cond
@ -78,11 +112,22 @@ for.end:
ret <2 x double> %c.addr.0
}
; CHECK-LABEL: fmsubpd_loop_128:
; CHECK: vfmsub231pd %xmm1, %xmm0, %xmm2
; CHECK: vmovapd %xmm2, %xmm0
; CHECK-NEXT: retq
define <2 x double> @fmsubpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
; CHECK-LABEL: fmsubpd_loop_128:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jge .LBB3_3
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB3_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vfmsub231pd {{.*#+}} xmm2 = (xmm0 * xmm1) - xmm2
; CHECK-NEXT: incl %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jl .LBB3_2
; CHECK-NEXT: .LBB3_3: # %for.end
; CHECK-NEXT: vmovapd %xmm2, %xmm0
; CHECK-NEXT: retq
entry:
br label %for.cond
@ -104,11 +149,22 @@ for.end:
ret <2 x double> %c.addr.0
}
; CHECK-LABEL: fnmaddpd_loop_128:
; CHECK: vfnmadd231pd %xmm1, %xmm0, %xmm2
; CHECK: vmovapd %xmm2, %xmm0
; CHECK-NEXT: retq
define <2 x double> @fnmaddpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
; CHECK-LABEL: fnmaddpd_loop_128:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jge .LBB4_3
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB4_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vfnmadd231pd {{.*#+}} xmm2 = -(xmm0 * xmm1) + xmm2
; CHECK-NEXT: incl %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jl .LBB4_2
; CHECK-NEXT: .LBB4_3: # %for.end
; CHECK-NEXT: vmovapd %xmm2, %xmm0
; CHECK-NEXT: retq
entry:
br label %for.cond
@ -130,11 +186,22 @@ for.end:
ret <2 x double> %c.addr.0
}
; CHECK-LABEL: fnmsubpd_loop_128:
; CHECK: vfnmsub231pd %xmm1, %xmm0, %xmm2
; CHECK: vmovapd %xmm2, %xmm0
; CHECK-NEXT: retq
define <2 x double> @fnmsubpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
; CHECK-LABEL: fnmsubpd_loop_128:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jge .LBB5_3
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB5_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vfnmsub231pd {{.*#+}} xmm2 = -(xmm0 * xmm1) - xmm2
; CHECK-NEXT: incl %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jl .LBB5_2
; CHECK-NEXT: .LBB5_3: # %for.end
; CHECK-NEXT: vmovapd %xmm2, %xmm0
; CHECK-NEXT: retq
entry:
br label %for.cond
@ -163,12 +230,22 @@ declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x do
declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>)
declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>)
; CHECK-LABEL: fmaddsubps_loop_128:
; CHECK: vfmaddsub231ps %xmm1, %xmm0, %xmm2
; CHECK: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
define <4 x float> @fmaddsubps_loop_128(i32 %iter, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: fmaddsubps_loop_128:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jge .LBB6_3
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB6_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vfmaddsub231ps {{.*#+}} xmm2 = (xmm0 * xmm1) +/- xmm2
; CHECK-NEXT: incl %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jl .LBB6_2
; CHECK-NEXT: .LBB6_3: # %for.end
; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
entry:
br label %for.cond
@ -190,11 +267,22 @@ for.end:
ret <4 x float> %c.addr.0
}
; CHECK-LABEL: fmsubaddps_loop_128:
; CHECK: vfmsubadd231ps %xmm1, %xmm0, %xmm2
; CHECK: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
define <4 x float> @fmsubaddps_loop_128(i32 %iter, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: fmsubaddps_loop_128:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jge .LBB7_3
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB7_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vfmsubadd231ps {{.*#+}} xmm2 = (xmm0 * xmm1) -/+ xmm2
; CHECK-NEXT: incl %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jl .LBB7_2
; CHECK-NEXT: .LBB7_3: # %for.end
; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
entry:
br label %for.cond
@ -216,11 +304,22 @@ for.end:
ret <4 x float> %c.addr.0
}
; CHECK-LABEL: fmaddps_loop_128:
; CHECK: vfmadd231ps %xmm1, %xmm0, %xmm2
; CHECK: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
define <4 x float> @fmaddps_loop_128(i32 %iter, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: fmaddps_loop_128:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jge .LBB8_3
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB8_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vfmadd231ps {{.*#+}} xmm2 = (xmm0 * xmm1) + xmm2
; CHECK-NEXT: incl %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jl .LBB8_2
; CHECK-NEXT: .LBB8_3: # %for.end
; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
entry:
br label %for.cond
@ -242,11 +341,22 @@ for.end:
ret <4 x float> %c.addr.0
}
; CHECK-LABEL: fmsubps_loop_128:
; CHECK: vfmsub231ps %xmm1, %xmm0, %xmm2
; CHECK: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
define <4 x float> @fmsubps_loop_128(i32 %iter, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: fmsubps_loop_128:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jge .LBB9_3
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB9_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vfmsub231ps {{.*#+}} xmm2 = (xmm0 * xmm1) - xmm2
; CHECK-NEXT: incl %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jl .LBB9_2
; CHECK-NEXT: .LBB9_3: # %for.end
; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
entry:
br label %for.cond
@ -268,11 +378,22 @@ for.end:
ret <4 x float> %c.addr.0
}
; CHECK-LABEL: fnmaddps_loop_128:
; CHECK: vfnmadd231ps %xmm1, %xmm0, %xmm2
; CHECK: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
define <4 x float> @fnmaddps_loop_128(i32 %iter, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: fnmaddps_loop_128:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jge .LBB10_3
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB10_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vfnmadd231ps {{.*#+}} xmm2 = -(xmm0 * xmm1) + xmm2
; CHECK-NEXT: incl %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jl .LBB10_2
; CHECK-NEXT: .LBB10_3: # %for.end
; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
entry:
br label %for.cond
@ -294,11 +415,22 @@ for.end:
ret <4 x float> %c.addr.0
}
; CHECK-LABEL: fnmsubps_loop_128:
; CHECK: vfnmsub231ps %xmm1, %xmm0, %xmm2
; CHECK: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
define <4 x float> @fnmsubps_loop_128(i32 %iter, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
; CHECK-LABEL: fnmsubps_loop_128:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jge .LBB11_3
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB11_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vfnmsub231ps {{.*#+}} xmm2 = -(xmm0 * xmm1) - xmm2
; CHECK-NEXT: incl %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jl .LBB11_2
; CHECK-NEXT: .LBB11_3: # %for.end
; CHECK-NEXT: vmovaps %xmm2, %xmm0
; CHECK-NEXT: retq
entry:
br label %for.cond
@ -327,11 +459,22 @@ declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float
declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>)
declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>)
; CHECK-LABEL: fmaddsubpd_loop_256:
; CHECK: vfmaddsub231pd %ymm1, %ymm0, %ymm2
; CHECK: vmovapd %ymm2, %ymm0
; CHECK-NEXT: retq
define <4 x double> @fmaddsubpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
; CHECK-LABEL: fmaddsubpd_loop_256:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jge .LBB12_3
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB12_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vfmaddsub231pd {{.*#+}} ymm2 = (ymm0 * ymm1) +/- ymm2
; CHECK-NEXT: incl %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jl .LBB12_2
; CHECK-NEXT: .LBB12_3: # %for.end
; CHECK-NEXT: vmovapd %ymm2, %ymm0
; CHECK-NEXT: retq
entry:
br label %for.cond
@ -353,11 +496,22 @@ for.end:
ret <4 x double> %c.addr.0
}
; CHECK-LABEL: fmsubaddpd_loop_256:
; CHECK: vfmsubadd231pd %ymm1, %ymm0, %ymm2
; CHECK: vmovapd %ymm2, %ymm0
; CHECK-NEXT: retq
define <4 x double> @fmsubaddpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
; CHECK-LABEL: fmsubaddpd_loop_256:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jge .LBB13_3
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB13_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vfmsubadd231pd {{.*#+}} ymm2 = (ymm0 * ymm1) -/+ ymm2
; CHECK-NEXT: incl %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jl .LBB13_2
; CHECK-NEXT: .LBB13_3: # %for.end
; CHECK-NEXT: vmovapd %ymm2, %ymm0
; CHECK-NEXT: retq
entry:
br label %for.cond
@ -379,11 +533,22 @@ for.end:
ret <4 x double> %c.addr.0
}
; CHECK-LABEL: fmaddpd_loop_256:
; CHECK: vfmadd231pd %ymm1, %ymm0, %ymm2
; CHECK: vmovapd %ymm2, %ymm0
; CHECK-NEXT: retq
define <4 x double> @fmaddpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
; CHECK-LABEL: fmaddpd_loop_256:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jge .LBB14_3
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB14_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vfmadd231pd {{.*#+}} ymm2 = (ymm0 * ymm1) + ymm2
; CHECK-NEXT: incl %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jl .LBB14_2
; CHECK-NEXT: .LBB14_3: # %for.end
; CHECK-NEXT: vmovapd %ymm2, %ymm0
; CHECK-NEXT: retq
entry:
br label %for.cond
@ -405,11 +570,22 @@ for.end:
ret <4 x double> %c.addr.0
}
; CHECK-LABEL: fmsubpd_loop_256:
; CHECK: vfmsub231pd %ymm1, %ymm0, %ymm2
; CHECK: vmovapd %ymm2, %ymm0
; CHECK-NEXT: retq
define <4 x double> @fmsubpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
; CHECK-LABEL: fmsubpd_loop_256:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jge .LBB15_3
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB15_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vfmsub231pd {{.*#+}} ymm2 = (ymm0 * ymm1) - ymm2
; CHECK-NEXT: incl %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jl .LBB15_2
; CHECK-NEXT: .LBB15_3: # %for.end
; CHECK-NEXT: vmovapd %ymm2, %ymm0
; CHECK-NEXT: retq
entry:
br label %for.cond
@ -431,11 +607,22 @@ for.end:
ret <4 x double> %c.addr.0
}
; CHECK-LABEL: fnmaddpd_loop_256:
; CHECK: vfnmadd231pd %ymm1, %ymm0, %ymm2
; CHECK: vmovapd %ymm2, %ymm0
; CHECK-NEXT: retq
define <4 x double> @fnmaddpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
; CHECK-LABEL: fnmaddpd_loop_256:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jge .LBB16_3
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB16_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vfnmadd231pd {{.*#+}} ymm2 = -(ymm0 * ymm1) + ymm2
; CHECK-NEXT: incl %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jl .LBB16_2
; CHECK-NEXT: .LBB16_3: # %for.end
; CHECK-NEXT: vmovapd %ymm2, %ymm0
; CHECK-NEXT: retq
entry:
br label %for.cond
@ -457,11 +644,22 @@ for.end:
ret <4 x double> %c.addr.0
}
; CHECK-LABEL: fnmsubpd_loop_256:
; CHECK: vfnmsub231pd %ymm1, %ymm0, %ymm2
; CHECK: vmovapd %ymm2, %ymm0
; CHECK-NEXT: retq
define <4 x double> @fnmsubpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) {
; CHECK-LABEL: fnmsubpd_loop_256:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jge .LBB17_3
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB17_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vfnmsub231pd {{.*#+}} ymm2 = -(ymm0 * ymm1) - ymm2
; CHECK-NEXT: incl %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jl .LBB17_2
; CHECK-NEXT: .LBB17_3: # %for.end
; CHECK-NEXT: vmovapd %ymm2, %ymm0
; CHECK-NEXT: retq
entry:
br label %for.cond
@ -490,12 +688,22 @@ declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4
declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
; CHECK-LABEL: fmaddsubps_loop_256:
; CHECK: vfmaddsub231ps %ymm1, %ymm0, %ymm2
; CHECK: vmovaps %ymm2, %ymm0
; CHECK-NEXT: retq
define <8 x float> @fmaddsubps_loop_256(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
; CHECK-LABEL: fmaddsubps_loop_256:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jge .LBB18_3
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB18_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vfmaddsub231ps {{.*#+}} ymm2 = (ymm0 * ymm1) +/- ymm2
; CHECK-NEXT: incl %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jl .LBB18_2
; CHECK-NEXT: .LBB18_3: # %for.end
; CHECK-NEXT: vmovaps %ymm2, %ymm0
; CHECK-NEXT: retq
entry:
br label %for.cond
@ -517,11 +725,22 @@ for.end:
ret <8 x float> %c.addr.0
}
; CHECK-LABEL: fmsubaddps_loop_256:
; CHECK: vfmsubadd231ps %ymm1, %ymm0, %ymm2
; CHECK: vmovaps %ymm2, %ymm0
; CHECK-NEXT: retq
define <8 x float> @fmsubaddps_loop_256(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
; CHECK-LABEL: fmsubaddps_loop_256:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jge .LBB19_3
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB19_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vfmsubadd231ps {{.*#+}} ymm2 = (ymm0 * ymm1) -/+ ymm2
; CHECK-NEXT: incl %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jl .LBB19_2
; CHECK-NEXT: .LBB19_3: # %for.end
; CHECK-NEXT: vmovaps %ymm2, %ymm0
; CHECK-NEXT: retq
entry:
br label %for.cond
@ -543,11 +762,22 @@ for.end:
ret <8 x float> %c.addr.0
}
; CHECK-LABEL: fmaddps_loop_256:
; CHECK: vfmadd231ps %ymm1, %ymm0, %ymm2
; CHECK: vmovaps %ymm2, %ymm0
; CHECK-NEXT: retq
define <8 x float> @fmaddps_loop_256(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
; CHECK-LABEL: fmaddps_loop_256:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jge .LBB20_3
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB20_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vfmadd231ps {{.*#+}} ymm2 = (ymm0 * ymm1) + ymm2
; CHECK-NEXT: incl %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jl .LBB20_2
; CHECK-NEXT: .LBB20_3: # %for.end
; CHECK-NEXT: vmovaps %ymm2, %ymm0
; CHECK-NEXT: retq
entry:
br label %for.cond
@ -569,11 +799,22 @@ for.end:
ret <8 x float> %c.addr.0
}
; CHECK-LABEL: fmsubps_loop_256:
; CHECK: vfmsub231ps %ymm1, %ymm0, %ymm2
; CHECK: vmovaps %ymm2, %ymm0
; CHECK-NEXT: retq
define <8 x float> @fmsubps_loop_256(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
; CHECK-LABEL: fmsubps_loop_256:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jge .LBB21_3
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB21_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vfmsub231ps {{.*#+}} ymm2 = (ymm0 * ymm1) - ymm2
; CHECK-NEXT: incl %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jl .LBB21_2
; CHECK-NEXT: .LBB21_3: # %for.end
; CHECK-NEXT: vmovaps %ymm2, %ymm0
; CHECK-NEXT: retq
entry:
br label %for.cond
@ -595,11 +836,22 @@ for.end:
ret <8 x float> %c.addr.0
}
; CHECK-LABEL: fnmaddps_loop_256:
; CHECK: vfnmadd231ps %ymm1, %ymm0, %ymm2
; CHECK: vmovaps %ymm2, %ymm0
; CHECK-NEXT: retq
define <8 x float> @fnmaddps_loop_256(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
; CHECK-LABEL: fnmaddps_loop_256:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jge .LBB22_3
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB22_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vfnmadd231ps {{.*#+}} ymm2 = -(ymm0 * ymm1) + ymm2
; CHECK-NEXT: incl %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jl .LBB22_2
; CHECK-NEXT: .LBB22_3: # %for.end
; CHECK-NEXT: vmovaps %ymm2, %ymm0
; CHECK-NEXT: retq
entry:
br label %for.cond
@ -621,11 +873,22 @@ for.end:
ret <8 x float> %c.addr.0
}
; CHECK-LABEL: fnmsubps_loop_256:
; CHECK: vfnmsub231ps %ymm1, %ymm0, %ymm2
; CHECK: vmovaps %ymm2, %ymm0
; CHECK-NEXT: retq
define <8 x float> @fnmsubps_loop_256(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) {
; CHECK-LABEL: fnmsubps_loop_256:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jge .LBB23_3
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB23_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vfnmsub231ps {{.*#+}} ymm2 = -(ymm0 * ymm1) - ymm2
; CHECK-NEXT: incl %eax
; CHECK-NEXT: cmpl %edi, %eax
; CHECK-NEXT: jl .LBB23_2
; CHECK-NEXT: .LBB23_3: # %for.end
; CHECK-NEXT: vmovaps %ymm2, %ymm0
; CHECK-NEXT: retq
entry:
br label %for.cond

View File

@ -1,14 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma | FileCheck -check-prefix=FMA3 -check-prefix=FMA3_256 %s
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma,+avx512f | FileCheck -check-prefix=FMA3 -check-prefix=FMA3_512 %s
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma4 | FileCheck -check-prefix=FMA4 %s
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma | FileCheck %s -check-prefixes=FMA3,FMA3_256
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma,+avx512f | FileCheck %s -check-prefixes=FMA3,FMA3_512
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma4 | FileCheck %s -check-prefixes=FMA4
; This test checks the fusing of MUL + ADDSUB to FMADDSUB.
define <2 x double> @mul_addsub_pd128(<2 x double> %A, <2 x double> %B, <2 x double> %C) #0 {
; FMA3-LABEL: mul_addsub_pd128:
; FMA3: # %bb.0: # %entry
; FMA3-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0
; FMA3-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2
; FMA3-NEXT: retq
;
; FMA4-LABEL: mul_addsub_pd128:
@ -26,7 +26,7 @@ entry:
define <4 x float> @mul_addsub_ps128(<4 x float> %A, <4 x float> %B, <4 x float> %C) #0 {
; FMA3-LABEL: mul_addsub_ps128:
; FMA3: # %bb.0: # %entry
; FMA3-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0
; FMA3-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2
; FMA3-NEXT: retq
;
; FMA4-LABEL: mul_addsub_ps128:
@ -44,7 +44,7 @@ entry:
define <4 x double> @mul_addsub_pd256(<4 x double> %A, <4 x double> %B, <4 x double> %C) #0 {
; FMA3-LABEL: mul_addsub_pd256:
; FMA3: # %bb.0: # %entry
; FMA3-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0
; FMA3-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2
; FMA3-NEXT: retq
;
; FMA4-LABEL: mul_addsub_pd256:
@ -62,7 +62,7 @@ entry:
define <8 x float> @mul_addsub_ps256(<8 x float> %A, <8 x float> %B, <8 x float> %C) #0 {
; FMA3-LABEL: mul_addsub_ps256:
; FMA3: # %bb.0: # %entry
; FMA3-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0
; FMA3-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2
; FMA3-NEXT: retq
;
; FMA4-LABEL: mul_addsub_ps256:
@ -80,13 +80,13 @@ entry:
define <8 x double> @mul_addsub_pd512(<8 x double> %A, <8 x double> %B, <8 x double> %C) #0 {
; FMA3_256-LABEL: mul_addsub_pd512:
; FMA3_256: # %bb.0: # %entry
; FMA3_256-NEXT: vfmaddsub213pd %ymm4, %ymm2, %ymm0
; FMA3_256-NEXT: vfmaddsub213pd %ymm5, %ymm3, %ymm1
; FMA3_256-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm2 * ymm0) +/- ymm4
; FMA3_256-NEXT: vfmaddsub213pd {{.*#+}} ymm1 = (ymm3 * ymm1) +/- ymm5
; FMA3_256-NEXT: retq
;
; FMA3_512-LABEL: mul_addsub_pd512:
; FMA3_512: # %bb.0: # %entry
; FMA3_512-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm0
; FMA3_512-NEXT: vfmaddsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) +/- zmm2
; FMA3_512-NEXT: retq
;
; FMA4-LABEL: mul_addsub_pd512:
@ -105,13 +105,13 @@ entry:
define <16 x float> @mul_addsub_ps512(<16 x float> %A, <16 x float> %B, <16 x float> %C) #0 {
; FMA3_256-LABEL: mul_addsub_ps512:
; FMA3_256: # %bb.0: # %entry
; FMA3_256-NEXT: vfmaddsub213ps %ymm4, %ymm2, %ymm0
; FMA3_256-NEXT: vfmaddsub213ps %ymm5, %ymm3, %ymm1
; FMA3_256-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) +/- ymm4
; FMA3_256-NEXT: vfmaddsub213ps {{.*#+}} ymm1 = (ymm3 * ymm1) +/- ymm5
; FMA3_256-NEXT: retq
;
; FMA3_512-LABEL: mul_addsub_ps512:
; FMA3_512: # %bb.0: # %entry
; FMA3_512-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm0
; FMA3_512-NEXT: vfmaddsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) +/- zmm2
; FMA3_512-NEXT: retq
;
; FMA4-LABEL: mul_addsub_ps512:
@ -130,7 +130,7 @@ entry:
define <4 x float> @buildvector_mul_addsub_ps128(<4 x float> %C, <4 x float> %D, <4 x float> %B) #0 {
; FMA3-LABEL: buildvector_mul_addsub_ps128:
; FMA3: # %bb.0: # %bb
; FMA3-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0
; FMA3-NEXT: vfmaddsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2
; FMA3-NEXT: retq
;
; FMA4-LABEL: buildvector_mul_addsub_ps128:
@ -161,7 +161,7 @@ bb:
define <2 x double> @buildvector_mul_addsub_pd128(<2 x double> %C, <2 x double> %D, <2 x double> %B) #0 {
; FMA3-LABEL: buildvector_mul_addsub_pd128:
; FMA3: # %bb.0: # %bb
; FMA3-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0
; FMA3-NEXT: vfmaddsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) +/- xmm2
; FMA3-NEXT: retq
;
; FMA4-LABEL: buildvector_mul_addsub_pd128:
@ -184,7 +184,7 @@ bb:
define <8 x float> @buildvector_mul_addsub_ps256(<8 x float> %C, <8 x float> %D, <8 x float> %B) #0 {
; FMA3-LABEL: buildvector_mul_addsub_ps256:
; FMA3: # %bb.0: # %bb
; FMA3-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0
; FMA3-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2
; FMA3-NEXT: retq
;
; FMA4-LABEL: buildvector_mul_addsub_ps256:
@ -231,7 +231,7 @@ bb:
define <4 x double> @buildvector_mul_addsub_pd256(<4 x double> %C, <4 x double> %D, <4 x double> %B) #0 {
; FMA3-LABEL: buildvector_mul_addsub_pd256:
; FMA3: # %bb.0: # %bb
; FMA3-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0
; FMA3-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) +/- ymm2
; FMA3-NEXT: retq
;
; FMA4-LABEL: buildvector_mul_addsub_pd256:
@ -262,13 +262,13 @@ bb:
define <16 x float> @buildvector_mul_addsub_ps512(<16 x float> %C, <16 x float> %D, <16 x float> %B) #0 {
; FMA3_256-LABEL: buildvector_mul_addsub_ps512:
; FMA3_256: # %bb.0: # %bb
; FMA3_256-NEXT: vfmaddsub213ps %ymm4, %ymm2, %ymm0
; FMA3_256-NEXT: vfmaddsub213ps %ymm5, %ymm3, %ymm1
; FMA3_256-NEXT: vfmaddsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) +/- ymm4
; FMA3_256-NEXT: vfmaddsub213ps {{.*#+}} ymm1 = (ymm3 * ymm1) +/- ymm5
; FMA3_256-NEXT: retq
;
; FMA3_512-LABEL: buildvector_mul_addsub_ps512:
; FMA3_512: # %bb.0: # %bb
; FMA3_512-NEXT: vfmaddsub213ps %zmm2, %zmm1, %zmm0
; FMA3_512-NEXT: vfmaddsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) +/- zmm2
; FMA3_512-NEXT: retq
;
; FMA4-LABEL: buildvector_mul_addsub_ps512:
@ -348,13 +348,13 @@ bb:
define <8 x double> @buildvector_mul_addsub_pd512(<8 x double> %C, <8 x double> %D, <8 x double> %B) #0 {
; FMA3_256-LABEL: buildvector_mul_addsub_pd512:
; FMA3_256: # %bb.0: # %bb
; FMA3_256-NEXT: vfmaddsub213pd %ymm4, %ymm2, %ymm0
; FMA3_256-NEXT: vfmaddsub213pd %ymm5, %ymm3, %ymm1
; FMA3_256-NEXT: vfmaddsub213pd {{.*#+}} ymm0 = (ymm2 * ymm0) +/- ymm4
; FMA3_256-NEXT: vfmaddsub213pd {{.*#+}} ymm1 = (ymm3 * ymm1) +/- ymm5
; FMA3_256-NEXT: retq
;
; FMA3_512-LABEL: buildvector_mul_addsub_pd512:
; FMA3_512: # %bb.0: # %bb
; FMA3_512-NEXT: vfmaddsub213pd %zmm2, %zmm1, %zmm0
; FMA3_512-NEXT: vfmaddsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) +/- zmm2
; FMA3_512-NEXT: retq
;
; FMA4-LABEL: buildvector_mul_addsub_pd512:

View File

@ -1,10 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=i686-unknown-unknown -mattr=+avx512f < %s | FileCheck %s
define float @test_x86_fma_intersection_fmf(float %a, float %b) {
define float @test_x86_fma_intersection_fmf(float %a, float %b) nounwind {
; CHECK-LABEL: test_x86_fma_intersection_fmf:
; CHECK: # %bb.0:
; CHECK: vfmadd132ss {{[0-9]+}}(%esp), %xmm0, %xmm0
; CHECK: retl
; CHECK: # %bb.0:
; CHECK-NEXT: pushl %eax
; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: vfmadd132ss {{.*#+}} xmm0 = (xmm0 * mem) + xmm0
; CHECK-NEXT: vmovss %xmm0, (%esp)
; CHECK-NEXT: flds (%esp)
; CHECK-NEXT: popl %eax
; CHECK-NEXT: retl
%tmp8 = fmul fast float %a, %b
%tmp9 = fadd fast float %tmp8, %b
%tmp10 = insertelement <4 x float> undef, float %tmp9, i32 1