1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-21 20:12:56 +02:00

[X86] Add strict fma support

Summary: Add strict fma support

Reviewers: craig.topper, RKSimon, LiuChen3

Subscribers: hiraditya, llvm-commits, LuoYuanke

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D71604
This commit is contained in:
Wang, Pengfei 2019-12-17 21:27:46 +08:00
parent fff5769629
commit b39ba5468d
8 changed files with 387 additions and 34 deletions

View File

@ -1240,8 +1240,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget.hasAnyFMA()) {
for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
MVT::v2f64, MVT::v4f64 })
MVT::v2f64, MVT::v4f64 }) {
setOperationAction(ISD::FMA, VT, Legal);
setOperationAction(ISD::STRICT_FMA, VT, Legal);
}
}
for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
@ -1434,6 +1436,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::FNEG, VT, Custom);
setOperationAction(ISD::FABS, VT, Custom);
setOperationAction(ISD::FMA, VT, Legal);
setOperationAction(ISD::STRICT_FMA, VT, Legal);
setOperationAction(ISD::FCOPYSIGN, VT, Custom);
}

View File

@ -6475,7 +6475,7 @@ multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
VEX_W;
}
defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
defm VFMADD213 : avx512_fma3p_213_f<0xA8, "vfmadd213", X86any_Fmadd, X86FmaddRnd>;
defm VFMSUB213 : avx512_fma3p_213_f<0xAA, "vfmsub213", X86Fmsub, X86FmsubRnd>;
defm VFMADDSUB213 : avx512_fma3p_213_f<0xA6, "vfmaddsub213", X86Fmaddsub, X86FmaddsubRnd>;
defm VFMSUBADD213 : avx512_fma3p_213_f<0xA7, "vfmsubadd213", X86Fmsubadd, X86FmsubaddRnd>;
@ -6553,7 +6553,7 @@ multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
VEX_W;
}
defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
defm VFMADD231 : avx512_fma3p_231_f<0xB8, "vfmadd231", X86any_Fmadd, X86FmaddRnd>;
defm VFMSUB231 : avx512_fma3p_231_f<0xBA, "vfmsub231", X86Fmsub, X86FmsubRnd>;
defm VFMADDSUB231 : avx512_fma3p_231_f<0xB6, "vfmaddsub231", X86Fmaddsub, X86FmaddsubRnd>;
defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86FmsubaddRnd>;
@ -6633,7 +6633,7 @@ multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
VEX_W;
}
defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
defm VFMADD132 : avx512_fma3p_132_f<0x98, "vfmadd132", X86any_Fmadd, X86FmaddRnd>;
defm VFMSUB132 : avx512_fma3p_132_f<0x9A, "vfmsub132", X86Fmsub, X86FmsubRnd>;
defm VFMADDSUB132 : avx512_fma3p_132_f<0x96, "vfmaddsub132", X86Fmaddsub, X86FmaddsubRnd>;
defm VFMSUBADD132 : avx512_fma3p_132_f<0x97, "vfmsubadd132", X86Fmsubadd, X86FmsubaddRnd>;
@ -6730,7 +6730,7 @@ multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
}
}
defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd>;
defm VFMADD : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86any_Fmadd, X86FmaddRnd>;
defm VFMSUB : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>;
defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
@ -6937,7 +6937,7 @@ multiclass avx512_scalar_fma_patterns<SDNode Op, SDNode RndOp, string Prefix,
}
}
defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SS",
defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86FmaddRnd, "VFMADD", "SS",
X86Movss, v4f32x_info, fp32imm0>;
defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SS",
X86Movss, v4f32x_info, fp32imm0>;
@ -6946,7 +6946,7 @@ defm : avx512_scalar_fma_patterns<X86Fnmadd, X86FnmaddRnd, "VFNMADD", "SS",
defm : avx512_scalar_fma_patterns<X86Fnmsub, X86FnmsubRnd, "VFNMSUB", "SS",
X86Movss, v4f32x_info, fp32imm0>;
defm : avx512_scalar_fma_patterns<X86Fmadd, X86FmaddRnd, "VFMADD", "SD",
defm : avx512_scalar_fma_patterns<X86any_Fmadd, X86FmaddRnd, "VFMADD", "SD",
X86Movsd, v2f64x_info, fp64imm0>;
defm : avx512_scalar_fma_patterns<X86Fmsub, X86FmsubRnd, "VFMSUB", "SD",
X86Movsd, v2f64x_info, fp64imm0>;

View File

@ -123,7 +123,7 @@ multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
// Fused Multiply-Add
let ExeDomain = SSEPackedSingle in {
defm VFMADD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", "PS",
loadv4f32, loadv8f32, X86Fmadd, v4f32, v8f32,
loadv4f32, loadv8f32, X86any_Fmadd, v4f32, v8f32,
SchedWriteFMA>;
defm VFMSUB : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", "PS",
loadv4f32, loadv8f32, X86Fmsub, v4f32, v8f32,
@ -138,7 +138,7 @@ let ExeDomain = SSEPackedSingle in {
let ExeDomain = SSEPackedDouble in {
defm VFMADD : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", "PD",
loadv2f64, loadv4f64, X86Fmadd, v2f64,
loadv2f64, loadv4f64, X86any_Fmadd, v2f64,
v4f64, SchedWriteFMA>, VEX_W;
defm VFMSUB : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", "PD",
loadv2f64, loadv4f64, X86Fmsub, v2f64,
@ -319,7 +319,7 @@ multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
VR128, sdmem, sched>, VEX_W;
}
defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", X86Fmadd,
defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", X86any_Fmadd,
SchedWriteFMA.Scl>, VEX_LIG;
defm VFMSUB : fma3s<0x9B, 0xAB, 0xBB, "vfmsub", X86Fmsub,
SchedWriteFMA.Scl>, VEX_LIG;
@ -372,12 +372,12 @@ multiclass scalar_fma_patterns<SDNode Op, string Prefix, string Suffix,
}
}
defm : scalar_fma_patterns<X86Fmadd, "VFMADD", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
defm : scalar_fma_patterns<X86any_Fmadd, "VFMADD", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
defm : scalar_fma_patterns<X86Fmsub, "VFMSUB", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
defm : scalar_fma_patterns<X86Fnmadd, "VFNMADD", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
defm : scalar_fma_patterns<X86Fnmsub, "VFNMSUB", "SS", X86Movss, v4f32, f32, FR32, loadf32>;
defm : scalar_fma_patterns<X86Fmadd, "VFMADD", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
defm : scalar_fma_patterns<X86any_Fmadd, "VFMADD", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
defm : scalar_fma_patterns<X86Fmsub, "VFMSUB", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
defm : scalar_fma_patterns<X86Fnmadd, "VFNMADD", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
defm : scalar_fma_patterns<X86Fnmsub, "VFNMSUB", "SD", X86Movsd, v2f64, f64, FR64, loadf64>;
@ -538,7 +538,7 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
let ExeDomain = SSEPackedSingle in {
// Scalar Instructions
defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32,
defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86any_Fmadd, loadf32,
SchedWriteFMA.Scl>,
fma4s_int<0x6A, "vfmaddss", ssmem, v4f32,
SchedWriteFMA.Scl>;
@ -555,7 +555,7 @@ let ExeDomain = SSEPackedSingle in {
fma4s_int<0x7E, "vfnmsubss", ssmem, v4f32,
SchedWriteFMA.Scl>;
// Packed Instructions
defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,
defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86any_Fmadd, v4f32, v8f32,
loadv4f32, loadv8f32, SchedWriteFMA>;
defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32,
loadv4f32, loadv8f32, SchedWriteFMA>;
@ -571,7 +571,7 @@ let ExeDomain = SSEPackedSingle in {
let ExeDomain = SSEPackedDouble in {
// Scalar Instructions
defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64,
defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86any_Fmadd, loadf64,
SchedWriteFMA.Scl>,
fma4s_int<0x6B, "vfmaddsd", sdmem, v2f64,
SchedWriteFMA.Scl>;
@ -588,7 +588,7 @@ let ExeDomain = SSEPackedDouble in {
fma4s_int<0x7F, "vfnmsubsd", sdmem, v2f64,
SchedWriteFMA.Scl>;
// Packed Instructions
defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,
defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86any_Fmadd, v2f64, v4f64,
loadv2f64, loadv4f64, SchedWriteFMA>;
defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64,
loadv2f64, loadv4f64, SchedWriteFMA>;
@ -629,12 +629,12 @@ multiclass scalar_fma4_patterns<SDNode Op, string Name,
}
}
defm : scalar_fma4_patterns<X86Fmadd, "VFMADDSS4", v4f32, f32, FR32, loadf32>;
defm : scalar_fma4_patterns<X86any_Fmadd, "VFMADDSS4", v4f32, f32, FR32, loadf32>;
defm : scalar_fma4_patterns<X86Fmsub, "VFMSUBSS4", v4f32, f32, FR32, loadf32>;
defm : scalar_fma4_patterns<X86Fnmadd, "VFNMADDSS4", v4f32, f32, FR32, loadf32>;
defm : scalar_fma4_patterns<X86Fnmsub, "VFNMSUBSS4", v4f32, f32, FR32, loadf32>;
defm : scalar_fma4_patterns<X86Fmadd, "VFMADDSD4", v2f64, f64, FR64, loadf64>;
defm : scalar_fma4_patterns<X86any_Fmadd, "VFMADDSD4", v2f64, f64, FR64, loadf64>;
defm : scalar_fma4_patterns<X86Fmsub, "VFMSUBSD4", v2f64, f64, FR64, loadf64>;
defm : scalar_fma4_patterns<X86Fnmadd, "VFNMADDSD4", v2f64, f64, FR64, loadf64>;
defm : scalar_fma4_patterns<X86Fnmsub, "VFNMSUBSD4", v2f64, f64, FR64, loadf64>;

View File

@ -493,7 +493,11 @@ def X86fgetexpSAE : SDNode<"X86ISD::FGETEXP_SAE", SDTFPUnaryOp>;
def X86fgetexps : SDNode<"X86ISD::FGETEXPS", SDTFPBinOp>;
def X86fgetexpSAEs : SDNode<"X86ISD::FGETEXPS_SAE", SDTFPBinOp>;
def X86Fmadd : SDNode<"ISD::FMA", SDTFPTernaryOp, [SDNPCommutative]>;
def X86Fmadd : SDNode<"ISD::FMA", SDTFPTernaryOp, [SDNPCommutative]>;
def X86strict_Fmadd : SDNode<"ISD::STRICT_FMA", SDTFPTernaryOp, [SDNPCommutative, SDNPHasChain]>;
def X86any_Fmadd : PatFrags<(ops node:$src1, node:$src2, node:$src3),
[(X86strict_Fmadd node:$src1, node:$src2, node:$src3),
(X86Fmadd node:$src1, node:$src2, node:$src3)]>;
def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFPTernaryOp, [SDNPCommutative]>;
def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFPTernaryOp, [SDNPCommutative]>;
def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFPTernaryOp, [SDNPCommutative]>;

View File

@ -1,11 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,SSE,SSE-X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,SSE,SSE-X64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX,AVX-X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX,AVX-X64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX,AVX-X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX,AVX-X64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,X87
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=SSE-X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=SSE-X64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+fma -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX-X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX-X64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX-X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX-X64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=X87
declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
@ -19,6 +19,8 @@ declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata)
declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata)
declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata)
declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata)
declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
define double @fadd_f64(double %a, double %b) nounwind strictfp {
; SSE-X86-LABEL: fadd_f64:
@ -579,4 +581,118 @@ define void @fsqrt_f32(float* %a) nounwind strictfp {
ret void
}
define double @fma_f64(double %a, double %b, double %c) nounwind strictfp {
; SSE-X86-LABEL: fma_f64:
; SSE-X86: # %bb.0:
; SSE-X86-NEXT: subl $24, %esp
; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-X86-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; SSE-X86-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
; SSE-X86-NEXT: movsd %xmm2, {{[0-9]+}}(%esp)
; SSE-X86-NEXT: movsd %xmm1, {{[0-9]+}}(%esp)
; SSE-X86-NEXT: movsd %xmm0, (%esp)
; SSE-X86-NEXT: calll fma
; SSE-X86-NEXT: addl $24, %esp
; SSE-X86-NEXT: retl
;
; SSE-X64-LABEL: fma_f64:
; SSE-X64: # %bb.0:
; SSE-X64-NEXT: pushq %rax
; SSE-X64-NEXT: callq fma
; SSE-X64-NEXT: popq %rax
; SSE-X64-NEXT: retq
;
; AVX-X86-LABEL: fma_f64:
; AVX-X86: # %bb.0:
; AVX-X86-NEXT: pushl %ebp
; AVX-X86-NEXT: movl %esp, %ebp
; AVX-X86-NEXT: andl $-8, %esp
; AVX-X86-NEXT: subl $8, %esp
; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; AVX-X86-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; AVX-X86-NEXT: vfmadd213sd {{.*#+}} xmm1 = (xmm0 * xmm1) + mem
; AVX-X86-NEXT: vmovsd %xmm1, (%esp)
; AVX-X86-NEXT: fldl (%esp)
; AVX-X86-NEXT: movl %ebp, %esp
; AVX-X86-NEXT: popl %ebp
; AVX-X86-NEXT: retl
;
; AVX-X64-LABEL: fma_f64:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
; AVX-X64-NEXT: retq
;
; X87-LABEL: fma_f64:
; X87: # %bb.0:
; X87-NEXT: subl $24, %esp
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fldl {{[0-9]+}}(%esp)
; X87-NEXT: fstpl {{[0-9]+}}(%esp)
; X87-NEXT: fstpl {{[0-9]+}}(%esp)
; X87-NEXT: fstpl (%esp)
; X87-NEXT: calll fma
; X87-NEXT: addl $24, %esp
; X87-NEXT: retl
%res = call double @llvm.experimental.constrained.fma.f64(double %a, double %b, double %c,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret double %res
}
define float @fma_f32(float %a, float %b, float %c) nounwind strictfp {
; SSE-X86-LABEL: fma_f32:
; SSE-X86: # %bb.0:
; SSE-X86-NEXT: subl $12, %esp
; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-X86-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-X86-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE-X86-NEXT: movss %xmm2, {{[0-9]+}}(%esp)
; SSE-X86-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
; SSE-X86-NEXT: movss %xmm0, (%esp)
; SSE-X86-NEXT: calll fmaf
; SSE-X86-NEXT: addl $12, %esp
; SSE-X86-NEXT: retl
;
; SSE-X64-LABEL: fma_f32:
; SSE-X64: # %bb.0:
; SSE-X64-NEXT: pushq %rax
; SSE-X64-NEXT: callq fmaf
; SSE-X64-NEXT: popq %rax
; SSE-X64-NEXT: retq
;
; AVX-X86-LABEL: fma_f32:
; AVX-X86: # %bb.0:
; AVX-X86-NEXT: pushl %eax
; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; AVX-X86-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; AVX-X86-NEXT: vfmadd213ss {{.*#+}} xmm1 = (xmm0 * xmm1) + mem
; AVX-X86-NEXT: vmovss %xmm1, (%esp)
; AVX-X86-NEXT: flds (%esp)
; AVX-X86-NEXT: popl %eax
; AVX-X86-NEXT: retl
;
; AVX-X64-LABEL: fma_f32:
; AVX-X64: # %bb.0:
; AVX-X64-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
; AVX-X64-NEXT: retq
;
; X87-LABEL: fma_f32:
; X87: # %bb.0:
; X87-NEXT: subl $12, %esp
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: flds {{[0-9]+}}(%esp)
; X87-NEXT: fstps {{[0-9]+}}(%esp)
; X87-NEXT: fstps {{[0-9]+}}(%esp)
; X87-NEXT: fstps (%esp)
; X87-NEXT: calll fmaf
; X87-NEXT: addl $12, %esp
; X87-NEXT: retl
%res = call float @llvm.experimental.constrained.fma.f32(float %a, float %b, float %c,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret float %res
}
attributes #0 = { strictfp }

View File

@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,SSE
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,SSE
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=CHECK,AVX
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=SSE,SSE-X86
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=SSE,SSE-X64
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+fma -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX
declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata)
declare <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float>, <4 x float>, metadata, metadata)
@ -18,6 +18,8 @@ declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, met
declare <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float>, metadata, metadata)
declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata)
declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata)
declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata)
define <2 x double> @f1(<2 x double> %a, <2 x double> %b) #0 {
; SSE-LABEL: f1:
@ -217,4 +219,184 @@ define <2 x double> @f12(<2 x double> %a0, <4 x float> %a1) #0 {
ret <2 x double> %res
}
define <4 x float> @f13(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 {
; SSE-X86-LABEL: f13:
; SSE-X86: # %bb.0:
; SSE-X86-NEXT: subl $108, %esp
; SSE-X86-NEXT: .cfi_def_cfa_offset 112
; SSE-X86-NEXT: movups %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; SSE-X86-NEXT: movups %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; SSE-X86-NEXT: movups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; SSE-X86-NEXT: movss %xmm2, {{[0-9]+}}(%esp)
; SSE-X86-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
; SSE-X86-NEXT: movss %xmm0, (%esp)
; SSE-X86-NEXT: calll fmaf
; SSE-X86-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; SSE-X86-NEXT: movups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-X86-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; SSE-X86-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; SSE-X86-NEXT: movups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-X86-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; SSE-X86-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; SSE-X86-NEXT: movups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-X86-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; SSE-X86-NEXT: movss %xmm0, (%esp)
; SSE-X86-NEXT: calll fmaf
; SSE-X86-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
; SSE-X86-NEXT: movups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE-X86-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; SSE-X86-NEXT: movups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE-X86-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; SSE-X86-NEXT: movups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE-X86-NEXT: movss %xmm0, (%esp)
; SSE-X86-NEXT: calll fmaf
; SSE-X86-NEXT: movups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE-X86-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; SSE-X86-NEXT: movups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE-X86-NEXT: movss %xmm0, {{[0-9]+}}(%esp)
; SSE-X86-NEXT: movups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE-X86-NEXT: movss %xmm0, (%esp)
; SSE-X86-NEXT: fstps {{[0-9]+}}(%esp)
; SSE-X86-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
; SSE-X86-NEXT: fstps {{[0-9]+}}(%esp)
; SSE-X86-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
; SSE-X86-NEXT: fstps {{[0-9]+}}(%esp)
; SSE-X86-NEXT: calll fmaf
; SSE-X86-NEXT: fstps {{[0-9]+}}(%esp)
; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-X86-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-X86-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; SSE-X86-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE-X86-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
; SSE-X86-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE-X86-NEXT: addl $108, %esp
; SSE-X86-NEXT: .cfi_def_cfa_offset 4
; SSE-X86-NEXT: retl
;
; SSE-X64-LABEL: f13:
; SSE-X64: # %bb.0:
; SSE-X64-NEXT: subq $88, %rsp
; SSE-X64-NEXT: .cfi_def_cfa_offset 96
; SSE-X64-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-X64-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE-X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE-X64-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE-X64-NEXT: callq fmaf
; SSE-X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
; SSE-X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-X64-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; SSE-X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; SSE-X64-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
; SSE-X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
; SSE-X64-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1]
; SSE-X64-NEXT: callq fmaf
; SSE-X64-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload
; SSE-X64-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
; SSE-X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
; SSE-X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; SSE-X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
; SSE-X64-NEXT: callq fmaf
; SSE-X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; SSE-X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; SSE-X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
; SSE-X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
; SSE-X64-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
; SSE-X64-NEXT: callq fmaf
; SSE-X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; SSE-X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; SSE-X64-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload
; SSE-X64-NEXT: # xmm1 = xmm1[0],mem[0]
; SSE-X64-NEXT: movaps %xmm1, %xmm0
; SSE-X64-NEXT: addq $88, %rsp
; SSE-X64-NEXT: .cfi_def_cfa_offset 8
; SSE-X64-NEXT: retq
;
; AVX-LABEL: f13:
; AVX: # %bb.0:
; AVX-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
; AVX-NEXT: ret{{[l|q]}}
%res = call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <4 x float> %res
}
define <2 x double> @f14(<2 x double> %a, <2 x double> %b, <2 x double> %c) #0 {
; SSE-X86-LABEL: f14:
; SSE-X86: # %bb.0:
; SSE-X86-NEXT: pushl %ebp
; SSE-X86-NEXT: .cfi_def_cfa_offset 8
; SSE-X86-NEXT: .cfi_offset %ebp, -8
; SSE-X86-NEXT: movl %esp, %ebp
; SSE-X86-NEXT: .cfi_def_cfa_register %ebp
; SSE-X86-NEXT: andl $-16, %esp
; SSE-X86-NEXT: subl $112, %esp
; SSE-X86-NEXT: movaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; SSE-X86-NEXT: movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; SSE-X86-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
; SSE-X86-NEXT: movlps %xmm2, {{[0-9]+}}(%esp)
; SSE-X86-NEXT: movlps %xmm1, {{[0-9]+}}(%esp)
; SSE-X86-NEXT: movlps %xmm0, (%esp)
; SSE-X86-NEXT: calll fma
; SSE-X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-X86-NEXT: movhps %xmm0, {{[0-9]+}}(%esp)
; SSE-X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-X86-NEXT: movhps %xmm0, {{[0-9]+}}(%esp)
; SSE-X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
; SSE-X86-NEXT: movhps %xmm0, (%esp)
; SSE-X86-NEXT: fstpl {{[0-9]+}}(%esp)
; SSE-X86-NEXT: calll fma
; SSE-X86-NEXT: fstpl {{[0-9]+}}(%esp)
; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; SSE-X86-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; SSE-X86-NEXT: movl %ebp, %esp
; SSE-X86-NEXT: popl %ebp
; SSE-X86-NEXT: .cfi_def_cfa %esp, 4
; SSE-X86-NEXT: retl
;
; SSE-X64-LABEL: f14:
; SSE-X64: # %bb.0:
; SSE-X64-NEXT: subq $72, %rsp
; SSE-X64-NEXT: .cfi_def_cfa_offset 80
; SSE-X64-NEXT: movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-X64-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
; SSE-X64-NEXT: callq fma
; SSE-X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; SSE-X64-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload
; SSE-X64-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; SSE-X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; SSE-X64-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
; SSE-X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Reload
; SSE-X64-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1]
; SSE-X64-NEXT: callq fma
; SSE-X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
; SSE-X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE-X64-NEXT: movaps %xmm1, %xmm0
; SSE-X64-NEXT: addq $72, %rsp
; SSE-X64-NEXT: .cfi_def_cfa_offset 8
; SSE-X64-NEXT: retq
;
; AVX-LABEL: f14:
; AVX: # %bb.0:
; AVX-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
; AVX-NEXT: ret{{[l|q]}}
%res = call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <2 x double> %res
}
attributes #0 = { strictfp }

View File

@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+fma -O3 -disable-strictnode-mutation | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma -O3 -disable-strictnode-mutation | FileCheck %s
; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 -disable-strictnode-mutation | FileCheck %s
@ -16,6 +16,8 @@ declare <4 x double> @llvm.experimental.constrained.sqrt.v4f64(<4 x double>, met
declare <8 x float> @llvm.experimental.constrained.sqrt.v8f32(<8 x float>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float>, metadata)
declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata, metadata)
declare <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double>, <4 x double>, <4 x double>, metadata, metadata)
declare <8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float>, <8 x float>, <8 x float>, metadata, metadata)
define <4 x double> @f1(<4 x double> %a, <4 x double> %b) #0 {
; CHECK-LABEL: f1:
@ -154,4 +156,26 @@ define <4 x float> @f12(<4 x double> %a) #0 {
ret <4 x float> %ret
}
define <8 x float> @f13(<8 x float> %a, <8 x float> %b, <8 x float> %c) #0 {
; CHECK-LABEL: f13:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
; CHECK-NEXT: ret{{[l|q]}}
%res = call <8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <8 x float> %res
}
define <4 x double> @f14(<4 x double> %a, <4 x double> %b, <4 x double> %c) #0 {
; CHECK-LABEL: f14:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
; CHECK-NEXT: ret{{[l|q]}}
%res = call <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <4 x double> %res
}
attributes #0 = { strictfp }

View File

@ -14,6 +14,8 @@ declare <8 x double> @llvm.experimental.constrained.sqrt.v8f64(<8 x double>, met
declare <16 x float> @llvm.experimental.constrained.sqrt.v16f32(<16 x float>, metadata, metadata)
declare <8 x double> @llvm.experimental.constrained.fpext.v8f64.v8f32(<8 x float>, metadata)
declare <8 x float> @llvm.experimental.constrained.fptrunc.v8f32.v8f64(<8 x double>, metadata, metadata)
declare <8 x double> @llvm.experimental.constrained.fma.v8f64(<8 x double>, <8 x double>, <8 x double>, metadata, metadata)
declare <16 x float> @llvm.experimental.constrained.fma.v16f32(<16 x float>, <16 x float>, <16 x float>, metadata, metadata)
define <8 x double> @f1(<8 x double> %a, <8 x double> %b) #0 {
; CHECK-LABEL: f1:
@ -151,4 +153,26 @@ define <8 x float> @f12(<8 x double> %a) #0 {
ret <8 x float> %ret
}
define <16 x float> @f13(<16 x float> %a, <16 x float> %b, <16 x float> %c) #0 {
; CHECK-LABEL: f13:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
; CHECK-NEXT: ret{{[l|q]}}
%res = call <16 x float> @llvm.experimental.constrained.fma.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <16 x float> %res
}
define <8 x double> @f14(<8 x double> %a, <8 x double> %b, <8 x double> %c) #0 {
; CHECK-LABEL: f14:
; CHECK: # %bb.0:
; CHECK-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
; CHECK-NEXT: ret{{[l|q]}}
%res = call <8 x double> @llvm.experimental.constrained.fma.v8f64(<8 x double> %a, <8 x double> %b, <8 x double> %c,
metadata !"round.dynamic",
metadata !"fpexcept.strict") #0
ret <8 x double> %res
}
attributes #0 = { strictfp }