1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00

[X86] Correct the FMA3 regular expressions in the znver1 scheduler model.

llvm-svn: 320458
This commit is contained in:
Craig Topper 2017-12-12 07:06:32 +00:00
parent 9fd137a3a9
commit 5ba4f37a1b
2 changed files with 36 additions and 36 deletions

View File

@ -1626,8 +1626,8 @@ def ZnWriteFMADDr : SchedWriteRes<[ZnFPU03]> {
}
def : InstRW<[ZnWriteFMADDr],
(instregex
"VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)r(Y)?",
"VF(N?)M(ADD|SUB)S(S|D)(r132|r231|r213)r",
"VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(213|132|231)(Y)?r",
"VF(N?)M(ADD|SUB)(132|231|213)S(S|D)r",
"VF(N?)M(ADD|SUB)S(S|D)4rr(_REV|_Int)?",
"VF(N?)M(ADD|SUB)P(S|D)4rr(Y)?(_REV)?")>;
@ -1638,8 +1638,8 @@ def ZnWriteFMADDm : SchedWriteRes<[ZnAGU, ZnFPU03]> {
}
def : InstRW<[ZnWriteFMADDm],
(instregex
"VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)m(Y)?",
"VF(N?)M(ADD|SUB)S(S|D)(r132|r231|r213)m",
"VF(N?)M(ADD|SUB|ADDSUB|SUBADD)(213|132|231)P(S|D)(Y)?m",
"VF(N?)M(ADD|SUB)(132|231|213)S(S|D)m",
"VF(N?)M(ADD|SUB)S(S|D)4(rm|mr)(_Int)?",
"VF(N?)M(ADD|SUB)P(S|D)4(rm|mr)(Y)?")>;

View File

@ -55,7 +55,7 @@ define <2 x double> @test_vfmadd213pd(<2 x double> %a0, <2 x double> %a1, <2 x d
; ZNVER1-LABEL: test_vfmadd213pd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfmadd213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmadd213pd (%rdi), %xmm1, %xmm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
%2 = load <2 x double>, <2 x double> *%a3
@ -103,7 +103,7 @@ define <4 x double> @test_vfmadd213pd_ymm(<4 x double> %a0, <4 x double> %a1, <4
; ZNVER1-LABEL: test_vfmadd213pd_ymm:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfmadd213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmadd213pd (%rdi), %ymm1, %ymm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
%2 = load <4 x double>, <4 x double> *%a3
@ -151,7 +151,7 @@ define <4 x float> @test_vfmadd213ps(<4 x float> %a0, <4 x float> %a1, <4 x floa
; ZNVER1-LABEL: test_vfmadd213ps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmadd213ps (%rdi), %xmm1, %xmm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
%2 = load <4 x float>, <4 x float> *%a3
@ -199,7 +199,7 @@ define <8 x float> @test_vfmadd213ps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x
; ZNVER1-LABEL: test_vfmadd213ps_ymm:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmadd213ps (%rdi), %ymm1, %ymm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
%2 = load <8 x float>, <8 x float> *%a3
@ -247,7 +247,7 @@ define <2 x double> @test_vfmadd213sd(<2 x double> %a0, <2 x double> %a1, <2 x d
; ZNVER1-LABEL: test_vfmadd213sd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmadd213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmadd213sd (%rdi), %xmm1, %xmm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
%2 = load <2 x double>, <2 x double> *%a3
@ -295,7 +295,7 @@ define <4 x float> @test_vfmadd213ss(<4 x float> %a0, <4 x float> %a1, <4 x floa
; ZNVER1-LABEL: test_vfmadd213ss:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmadd213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmadd213ss (%rdi), %xmm1, %xmm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
%2 = load <4 x float>, <4 x float> *%a3
@ -355,7 +355,7 @@ define <2 x double> @test_vfmaddsubpd(<2 x double> %a0, <2 x double> %a1, <2 x d
; ZNVER1-LABEL: test_vfmaddsubpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfmaddsub213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmaddsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmaddsub213pd (%rdi), %xmm1, %xmm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
%2 = load <2 x double>, <2 x double> *%a3
@ -403,7 +403,7 @@ define <4 x double> @test_vfmaddsubpd_ymm(<4 x double> %a0, <4 x double> %a1, <4
; ZNVER1-LABEL: test_vfmaddsubpd_ymm:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfmaddsub213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmaddsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmaddsub213pd (%rdi), %ymm1, %ymm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a4)
%2 = load <4 x double>, <4 x double> *%a3
@ -451,7 +451,7 @@ define <4 x float> @test_vfmaddsubps(<4 x float> %a0, <4 x float> %a1, <4 x floa
; ZNVER1-LABEL: test_vfmaddsubps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfmaddsub213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmaddsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmaddsub213ps (%rdi), %xmm1, %xmm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a4)
%2 = load <4 x float>, <4 x float> *%a3
@ -499,7 +499,7 @@ define <8 x float> @test_vfmaddsubps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x
; ZNVER1-LABEL: test_vfmaddsubps_ymm:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfmaddsub213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmaddsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmaddsub213ps (%rdi), %ymm1, %ymm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a8)
%2 = load <8 x float>, <8 x float> *%a3
@ -559,7 +559,7 @@ define <2 x double> @test_vfmsubaddpd(<2 x double> %a0, <2 x double> %a1, <2 x d
; ZNVER1-LABEL: test_vfmsubaddpd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfmsubadd213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmsubadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmsubadd213pd (%rdi), %xmm1, %xmm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
%2 = load <2 x double>, <2 x double> *%a3
@ -607,7 +607,7 @@ define <4 x double> @test_vfmsubaddpd_ymm(<4 x double> %a0, <4 x double> %a1, <4
; ZNVER1-LABEL: test_vfmsubaddpd_ymm:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfmsubadd213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmsubadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmsubadd213pd (%rdi), %ymm1, %ymm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a4)
%2 = load <4 x double>, <4 x double> *%a3
@ -655,7 +655,7 @@ define <4 x float> @test_vfmsubaddps(<4 x float> %a0, <4 x float> %a1, <4 x floa
; ZNVER1-LABEL: test_vfmsubaddps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfmsubadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmsubadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmsubadd213ps (%rdi), %xmm1, %xmm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a4)
%2 = load <4 x float>, <4 x float> *%a3
@ -703,7 +703,7 @@ define <8 x float> @test_vfmsubaddps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x
; ZNVER1-LABEL: test_vfmsubaddps_ymm:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfmsubadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmsubadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmsubadd213ps (%rdi), %ymm1, %ymm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a8)
%2 = load <8 x float>, <8 x float> *%a3
@ -763,7 +763,7 @@ define <2 x double> @test_vfmsub213pd(<2 x double> %a0, <2 x double> %a1, <2 x d
; ZNVER1-LABEL: test_vfmsub213pd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfmsub213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmsub213pd (%rdi), %xmm1, %xmm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
%2 = load <2 x double>, <2 x double> *%a3
@ -811,7 +811,7 @@ define <4 x double> @test_vfmsub213pd_ymm(<4 x double> %a0, <4 x double> %a1, <4
; ZNVER1-LABEL: test_vfmsub213pd_ymm:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfmsub213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmsub213pd (%rdi), %ymm1, %ymm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
%2 = load <4 x double>, <4 x double> *%a3
@ -859,7 +859,7 @@ define <4 x float> @test_vfmsub213ps(<4 x float> %a0, <4 x float> %a1, <4 x floa
; ZNVER1-LABEL: test_vfmsub213ps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfmsub213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmsub213ps (%rdi), %xmm1, %xmm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
%2 = load <4 x float>, <4 x float> *%a3
@ -907,7 +907,7 @@ define <8 x float> @test_vfmsub213ps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x
; ZNVER1-LABEL: test_vfmsub213ps_ymm:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfmsub213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmsub213ps (%rdi), %ymm1, %ymm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
%2 = load <8 x float>, <8 x float> *%a3
@ -955,7 +955,7 @@ define <2 x double> @test_vfmsub213sd(<2 x double> %a0, <2 x double> %a1, <2 x d
; ZNVER1-LABEL: test_vfmsub213sd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmsub213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmsub213sd (%rdi), %xmm1, %xmm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
%2 = load <2 x double>, <2 x double> *%a3
@ -1003,7 +1003,7 @@ define <4 x float> @test_vfmsub213ss(<4 x float> %a0, <4 x float> %a1, <4 x floa
; ZNVER1-LABEL: test_vfmsub213ss:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmsub213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfmsub213ss (%rdi), %xmm1, %xmm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
%2 = load <4 x float>, <4 x float> *%a3
@ -1063,7 +1063,7 @@ define <2 x double> @test_vfnmadd213pd(<2 x double> %a0, <2 x double> %a1, <2 x
; ZNVER1-LABEL: test_vfnmadd213pd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfnmadd213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfnmadd213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfnmadd213pd (%rdi), %xmm1, %xmm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
%2 = load <2 x double>, <2 x double> *%a3
@ -1111,7 +1111,7 @@ define <4 x double> @test_vfnmadd213pd_ymm(<4 x double> %a0, <4 x double> %a1, <
; ZNVER1-LABEL: test_vfnmadd213pd_ymm:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfnmadd213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfnmadd213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfnmadd213pd (%rdi), %ymm1, %ymm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
%2 = load <4 x double>, <4 x double> *%a3
@ -1159,7 +1159,7 @@ define <4 x float> @test_vfnmadd213ps(<4 x float> %a0, <4 x float> %a1, <4 x flo
; ZNVER1-LABEL: test_vfnmadd213ps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfnmadd213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfnmadd213ps (%rdi), %xmm1, %xmm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
%2 = load <4 x float>, <4 x float> *%a3
@ -1207,7 +1207,7 @@ define <8 x float> @test_vfnmadd213ps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x
; ZNVER1-LABEL: test_vfnmadd213ps_ymm:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfnmadd213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfnmadd213ps (%rdi), %ymm1, %ymm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
%2 = load <8 x float>, <8 x float> *%a3
@ -1255,7 +1255,7 @@ define <2 x double> @test_vfnmadd213sd(<2 x double> %a0, <2 x double> %a1, <2 x
; ZNVER1-LABEL: test_vfnmadd213sd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfnmadd213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfnmadd213sd (%rdi), %xmm1, %xmm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
%2 = load <2 x double>, <2 x double> *%a3
@ -1303,7 +1303,7 @@ define <4 x float> @test_vfnmadd213ss(<4 x float> %a0, <4 x float> %a1, <4 x flo
; ZNVER1-LABEL: test_vfnmadd213ss:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfnmadd213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfnmadd213ss (%rdi), %xmm1, %xmm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
%2 = load <4 x float>, <4 x float> *%a3
@ -1363,7 +1363,7 @@ define <2 x double> @test_vfnmsub213pd(<2 x double> %a0, <2 x double> %a1, <2 x
; ZNVER1-LABEL: test_vfnmsub213pd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfnmsub213pd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfnmsub213pd (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfnmsub213pd (%rdi), %xmm1, %xmm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
%2 = load <2 x double>, <2 x double> *%a3
@ -1411,7 +1411,7 @@ define <4 x double> @test_vfnmsub213pd_ymm(<4 x double> %a0, <4 x double> %a1, <
; ZNVER1-LABEL: test_vfnmsub213pd_ymm:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfnmsub213pd (%rdi), %ymm1, %ymm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfnmsub213pd (%rdi), %ymm1, %ymm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
%2 = load <4 x double>, <4 x double> *%a3
@ -1459,7 +1459,7 @@ define <4 x float> @test_vfnmsub213ps(<4 x float> %a0, <4 x float> %a1, <4 x flo
; ZNVER1-LABEL: test_vfnmsub213ps:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfnmsub213ps (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfnmsub213ps (%rdi), %xmm1, %xmm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
%2 = load <4 x float>, <4 x float> *%a3
@ -1507,7 +1507,7 @@ define <8 x float> @test_vfnmsub213ps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x
; ZNVER1-LABEL: test_vfnmsub213ps_ymm:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfnmsub213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfnmsub213ps (%rdi), %ymm1, %ymm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfnmsub213ps (%rdi), %ymm1, %ymm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
%2 = load <8 x float>, <8 x float> *%a3
@ -1555,7 +1555,7 @@ define <2 x double> @test_vfnmsub213sd(<2 x double> %a0, <2 x double> %a1, <2 x
; ZNVER1-LABEL: test_vfnmsub213sd:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfnmsub213sd (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfnmsub213sd (%rdi), %xmm1, %xmm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
%2 = load <2 x double>, <2 x double> *%a3
@ -1603,7 +1603,7 @@ define <4 x float> @test_vfnmsub213ss(<4 x float> %a0, <4 x float> %a1, <4 x flo
; ZNVER1-LABEL: test_vfnmsub213ss:
; ZNVER1: # %bb.0:
; ZNVER1-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfnmsub213ss (%rdi), %xmm1, %xmm0 # sched: [5:0.50]
; ZNVER1-NEXT: vfnmsub213ss (%rdi), %xmm1, %xmm0 # sched: [12:0.50]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
%2 = load <4 x float>, <4 x float> *%a3