diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td index 9e554db918c..fda45907eef 100644 --- a/lib/Target/X86/X86InstrFMA.td +++ b/lib/Target/X86/X86InstrFMA.td @@ -242,6 +242,7 @@ multiclass fma3s opc132, bits<8> opc213, bits<8> opc231, // This is because src1 is tied to dest, and the scalar intrinsics // require the pass-through values to come from the first source // operand, not the second. + // TODO: Use AVX512 instructions when possible. let Predicates = [HasFMA] in { def : Pat<(IntF32 VR128:$src1, VR128:$src2, VR128:$src3), (!cast(NAME#"213SSr_Int") @@ -250,6 +251,22 @@ multiclass fma3s opc132, bits<8> opc213, bits<8> opc231, def : Pat<(IntF64 VR128:$src1, VR128:$src2, VR128:$src3), (!cast(NAME#"213SDr_Int") VR128:$src1, VR128:$src2, VR128:$src3)>; + + def : Pat<(IntF32 VR128:$src1, VR128:$src2, sse_load_f32:$src3), + (!cast(NAME#"213SSm_Int") + VR128:$src1, VR128:$src2, sse_load_f32:$src3)>; + + def : Pat<(IntF64 VR128:$src1, VR128:$src2, sse_load_f64:$src3), + (!cast(NAME#"213SDm_Int") + VR128:$src1, VR128:$src2, sse_load_f64:$src3)>; + + def : Pat<(IntF32 VR128:$src1, sse_load_f32:$src3, VR128:$src2), + (!cast(NAME#"132SSm_Int") + VR128:$src1, VR128:$src2, sse_load_f32:$src3)>; + + def : Pat<(IntF64 VR128:$src1, sse_load_f64:$src3, VR128:$src2), + (!cast(NAME#"132SDm_Int") + VR128:$src1, VR128:$src2, sse_load_f64:$src3)>; } } diff --git a/test/CodeGen/X86/fma-intrinsics-x86.ll b/test/CodeGen/X86/fma-intrinsics-x86.ll index fff3a49bea2..d30f1c9899e 100644 --- a/test/CodeGen/X86/fma-intrinsics-x86.ll +++ b/test/CodeGen/X86/fma-intrinsics-x86.ll @@ -10,9 +10,9 @@ define <4 x float> @test_x86_fma_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 ; CHECK-LABEL: test_x86_fma_vfmadd_ss: ; CHECK-NEXT: # BB#0: ; -; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rdx), %xmm1, %xmm0 +; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%rdx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%rdx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vfmadd213ss (%r8), %xmm1, %xmm0 ; ; CHECK-FMA-NEXT: vfmadd213ss %xmm2, %xmm1, %xmm0 ; @@ -27,9 +27,9 @@ define <4 x float> @test_x86_fma_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, ; CHECK-LABEL: test_x86_fma_vfmadd_bac_ss: ; CHECK-NEXT: # BB#0: ; -; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vfmadd132ss (%rcx), %xmm1, %xmm0 +; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vfmadd213ss (%r8), %xmm1, %xmm0 ; ; CHECK-FMA-NEXT: vfmadd213ss %xmm2, %xmm0, %xmm1 ; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 @@ -45,9 +45,9 @@ define <2 x double> @test_x86_fma_vfmadd_sd(<2 x double> %a0, <2 x double> %a1, ; CHECK-LABEL: test_x86_fma_vfmadd_sd: ; CHECK-NEXT: # BB#0: ; -; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rdx), %xmm1, %xmm0 +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%rdx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%rdx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vfmadd213sd (%r8), %xmm1, %xmm0 ; ; CHECK-FMA-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 ; @@ -62,9 +62,9 @@ define <2 x double> @test_x86_fma_vfmadd_bac_sd(<2 x double> %a0, <2 x double> % ; CHECK-LABEL: test_x86_fma_vfmadd_bac_sd: ; CHECK-NEXT: # BB#0: ; -; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vfmadd132sd (%rcx), %xmm1, %xmm0 +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vfmadd213sd (%r8), %xmm1, %xmm0 ; ; CHECK-FMA-NEXT: vfmadd213sd %xmm2, %xmm0, %xmm1 ; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 @@ -154,9 +154,9 @@ define <4 x float> @test_x86_fma_vfmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 ; CHECK-LABEL: test_x86_fma_vfmsub_ss: ; CHECK-NEXT: # BB#0: ; -; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rdx), %xmm1, %xmm0 +; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%rdx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%rdx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vfmsub213ss (%r8), %xmm1, %xmm0 ; ; CHECK-FMA-NEXT: vfmsub213ss %xmm2, %xmm1, %xmm0 ; @@ -171,9 +171,9 @@ define <4 x float> @test_x86_fma_vfmsub_bac_ss(<4 x float> %a0, <4 x float> %a1, ; CHECK-LABEL: test_x86_fma_vfmsub_bac_ss: ; CHECK-NEXT: # BB#0: ; -; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vfmsub132ss (%rcx), %xmm1, %xmm0 +; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vfmsub213ss (%r8), %xmm1, %xmm0 ; ; CHECK-FMA-NEXT: vfmsub213ss %xmm2, %xmm0, %xmm1 ; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 @@ -190,9 +190,9 @@ define <2 x double> @test_x86_fma_vfmsub_sd(<2 x double> %a0, <2 x double> %a1, ; CHECK-LABEL: test_x86_fma_vfmsub_sd: ; CHECK-NEXT: # BB#0: ; -; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rdx), %xmm1, %xmm0 +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%rdx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%rdx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vfmsub213sd (%r8), %xmm1, %xmm0 ; ; CHECK-FMA-NEXT: vfmsub213sd %xmm2, %xmm1, %xmm0 ; @@ -207,9 +207,9 @@ define <2 x double> @test_x86_fma_vfmsub_bac_sd(<2 x double> %a0, <2 x double> % ; CHECK-LABEL: test_x86_fma_vfmsub_bac_sd: ; CHECK-NEXT: # BB#0: ; -; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vfmsub132sd (%rcx), %xmm1, %xmm0 +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vfmsub213sd (%r8), %xmm1, %xmm0 ; ; CHECK-FMA-NEXT: vfmsub213sd %xmm2, %xmm0, %xmm1 ; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 @@ -299,9 +299,9 @@ define <4 x float> @test_x86_fma_vfnmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 ; CHECK-LABEL: test_x86_fma_vfnmadd_ss: ; CHECK-NEXT: # BB#0: ; -; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rdx), %xmm1, %xmm0 +; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%rdx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%rdx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vfnmadd213ss (%r8), %xmm1, %xmm0 ; ; CHECK-FMA-NEXT: vfnmadd213ss %xmm2, %xmm1, %xmm0 ; @@ -316,9 +316,9 @@ define <4 x float> @test_x86_fma_vfnmadd_bac_ss(<4 x float> %a0, <4 x float> %a1 ; CHECK-LABEL: test_x86_fma_vfnmadd_bac_ss: ; CHECK-NEXT: # BB#0: ; -; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vfnmadd132ss (%rcx), %xmm1, %xmm0 +; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vfnmadd213ss (%r8), %xmm1, %xmm0 ; ; CHECK-FMA-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm1 ; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 @@ -335,9 +335,9 @@ define <2 x double> @test_x86_fma_vfnmadd_sd(<2 x double> %a0, <2 x double> %a1, ; CHECK-LABEL: test_x86_fma_vfnmadd_sd: ; CHECK-NEXT: # BB#0: ; -; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rdx), %xmm1, %xmm0 +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%rdx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%rdx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vfnmadd213sd (%r8), %xmm1, %xmm0 ; ; CHECK-FMA-NEXT: vfnmadd213sd %xmm2, %xmm1, %xmm0 ; @@ -352,9 +352,9 @@ define <2 x double> @test_x86_fma_vfnmadd_bac_sd(<2 x double> %a0, <2 x double> ; CHECK-LABEL: test_x86_fma_vfnmadd_bac_sd: ; CHECK-NEXT: # BB#0: ; -; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vfnmadd132sd (%rcx), %xmm1, %xmm0 +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vfnmadd213sd (%r8), %xmm1, %xmm0 ; ; CHECK-FMA-NEXT: vfnmadd213sd %xmm2, %xmm0, %xmm1 ; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0 @@ -444,9 +444,9 @@ define <4 x float> @test_x86_fma_vfnmsub_ss(<4 x float> %a0, <4 x float> %a1, <4 ; CHECK-LABEL: test_x86_fma_vfnmsub_ss: ; CHECK-NEXT: # BB#0: ; -; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rdx), %xmm1, %xmm0 +; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%rdx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rcx\), %xmm0|\(%rdx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vfnmsub213ss (%r8), %xmm1, %xmm0 ; ; CHECK-FMA-NEXT: vfnmsub213ss %xmm2, %xmm1, %xmm0 ; @@ -461,9 +461,9 @@ define <4 x float> @test_x86_fma_vfnmsub_bac_ss(<4 x float> %a0, <4 x float> %a1 ; CHECK-LABEL: test_x86_fma_vfnmsub_bac_ss: ; CHECK-NEXT: # BB#0: ; -; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vfnmsub132ss (%rcx), %xmm1, %xmm0 +; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovaps {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vfnmsub213ss (%r8), %xmm1, %xmm0 ; ; CHECK-FMA-NEXT: vfnmsub213ss %xmm2, %xmm0, %xmm1 ; CHECK-FMA-NEXT: vmovaps %xmm1, %xmm0 @@ -480,9 +480,9 @@ define <2 x double> @test_x86_fma_vfnmsub_sd(<2 x double> %a0, <2 x double> %a1, ; CHECK-LABEL: test_x86_fma_vfnmsub_sd: ; CHECK-NEXT: # BB#0: ; -; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rdx), %xmm1, %xmm0 +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%rdx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rcx\), %xmm0|\(%rdx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vfnmsub213sd (%r8), %xmm1, %xmm0 ; ; CHECK-FMA-NEXT: vfnmsub213sd %xmm2, %xmm1, %xmm0 ; @@ -497,9 +497,9 @@ define <2 x double> @test_x86_fma_vfnmsub_bac_sd(<2 x double> %a0, <2 x double> ; CHECK-LABEL: test_x86_fma_vfnmsub_bac_sd: ; CHECK-NEXT: # BB#0: ; -; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%r8\), %xmm1}} -; CHECK-FMA-WIN-NEXT: vfnmsub132sd (%rcx), %xmm1, %xmm0 +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vmovapd {{\(%rdx\), %xmm0|\(%rcx\), %xmm1}} +; CHECK-FMA-WIN-NEXT: vfnmsub213sd (%r8), %xmm1, %xmm0 ; ; CHECK-FMA-NEXT: vfnmsub213sd %xmm2, %xmm0, %xmm1 ; CHECK-FMA-NEXT: vmovapd %xmm1, %xmm0