mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
f7ac298f12
X86 was already specially marking fma as commutable which allowed tablegen to autogenerate commuted patterns. This moves it to the target independent definition and fix up the targets to remove now unneeded patterns. Unfortunately, the tests change because the commuted version of the patterns are generating operands in a different than the explicit patterns. Differential Revision: https://reviews.llvm.org/D91842
430 lines
13 KiB
LLVM
430 lines
13 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=thumbv8.1-m-none-eabi -mattr=+fullfp16 -fp-contract=fast | FileCheck %s
|
|
; RUN: llc < %s -mtriple=thumbv8.1-m-none-eabi -mattr=+fullfp16,+slowfpvfmx -fp-contract=fast | FileCheck %s -check-prefix=DONT-FUSE
|
|
|
|
; Check generated fp16 fused MAC and MLS.
|
|
|
|
define arm_aapcs_vfpcc void @fusedMACTest2(half *%a1, half *%a2, half *%a3) {
|
|
; CHECK-LABEL: fusedMACTest2:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vldr.16 s0, [r1]
|
|
; CHECK-NEXT: vldr.16 s2, [r0]
|
|
; CHECK-NEXT: vldr.16 s4, [r2]
|
|
; CHECK-NEXT: vfma.f16 s4, s2, s0
|
|
; CHECK-NEXT: vstr.16 s4, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
;
|
|
; DONT-FUSE-LABEL: fusedMACTest2:
|
|
; DONT-FUSE: @ %bb.0:
|
|
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
|
|
; DONT-FUSE-NEXT: vmul.f16 s0, s2, s0
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r2]
|
|
; DONT-FUSE-NEXT: vadd.f16 s0, s0, s2
|
|
; DONT-FUSE-NEXT: vstr.16 s0, [r0]
|
|
; DONT-FUSE-NEXT: bx lr
|
|
|
|
%f1 = load half, half *%a1, align 2
|
|
%f2 = load half, half *%a2, align 2
|
|
%f3 = load half, half *%a3, align 2
|
|
%1 = fmul half %f1, %f2
|
|
%2 = fadd half %1, %f3
|
|
store half %2, half *%a1, align 2
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @fusedMACTest4(half *%a1, half *%a2, half *%a3) {
|
|
; CHECK-LABEL: fusedMACTest4:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vldr.16 s0, [r2]
|
|
; CHECK-NEXT: vldr.16 s2, [r1]
|
|
; CHECK-NEXT: vldr.16 s4, [r0]
|
|
; CHECK-NEXT: vfms.f16 s4, s2, s0
|
|
; CHECK-NEXT: vstr.16 s4, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
;
|
|
; DONT-FUSE-LABEL: fusedMACTest4:
|
|
; DONT-FUSE: @ %bb.0:
|
|
; DONT-FUSE-NEXT: vldr.16 s0, [r2]
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r1]
|
|
; DONT-FUSE-NEXT: vmul.f16 s0, s2, s0
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
|
|
; DONT-FUSE-NEXT: vsub.f16 s0, s2, s0
|
|
; DONT-FUSE-NEXT: vstr.16 s0, [r0]
|
|
; DONT-FUSE-NEXT: bx lr
|
|
|
|
%f1 = load half, half *%a1, align 2
|
|
%f2 = load half, half *%a2, align 2
|
|
%f3 = load half, half *%a3, align 2
|
|
%1 = fmul half %f2, %f3
|
|
%2 = fsub half %f1, %1
|
|
store half %2, half *%a1, align 2
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @fusedMACTest6(half *%a1, half *%a2, half *%a3) {
|
|
; CHECK-LABEL: fusedMACTest6:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vldr.16 s0, [r1]
|
|
; CHECK-NEXT: vldr.16 s2, [r0]
|
|
; CHECK-NEXT: vldr.16 s4, [r2]
|
|
; CHECK-NEXT: vfnma.f16 s4, s2, s0
|
|
; CHECK-NEXT: vstr.16 s4, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
;
|
|
; DONT-FUSE-LABEL: fusedMACTest6:
|
|
; DONT-FUSE: @ %bb.0:
|
|
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
|
|
; DONT-FUSE-NEXT: vnmul.f16 s0, s2, s0
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r2]
|
|
; DONT-FUSE-NEXT: vsub.f16 s0, s0, s2
|
|
; DONT-FUSE-NEXT: vstr.16 s0, [r0]
|
|
; DONT-FUSE-NEXT: bx lr
|
|
|
|
%f1 = load half, half *%a1, align 2
|
|
%f2 = load half, half *%a2, align 2
|
|
%f3 = load half, half *%a3, align 2
|
|
%1 = fmul half %f1, %f2
|
|
%2 = fsub half -0.0, %1
|
|
%3 = fsub half %2, %f3
|
|
store half %3, half *%a1, align 2
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @fusedMACTest8(half *%a1, half *%a2, half *%a3) {
|
|
; CHECK-LABEL: fusedMACTest8:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vldr.16 s0, [r1]
|
|
; CHECK-NEXT: vldr.16 s2, [r0]
|
|
; CHECK-NEXT: vldr.16 s4, [r2]
|
|
; CHECK-NEXT: vfnms.f16 s4, s2, s0
|
|
; CHECK-NEXT: vstr.16 s4, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
;
|
|
; DONT-FUSE-LABEL: fusedMACTest8:
|
|
; DONT-FUSE: @ %bb.0:
|
|
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
|
|
; DONT-FUSE-NEXT: vmul.f16 s0, s2, s0
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r2]
|
|
; DONT-FUSE-NEXT: vsub.f16 s0, s0, s2
|
|
; DONT-FUSE-NEXT: vstr.16 s0, [r0]
|
|
; DONT-FUSE-NEXT: bx lr
|
|
|
|
%f1 = load half, half *%a1, align 2
|
|
%f2 = load half, half *%a2, align 2
|
|
%f3 = load half, half *%a3, align 2
|
|
%1 = fmul half %f1, %f2
|
|
%2 = fsub half %1, %f3
|
|
store half %2, half *%a1, align 2
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_fma_f16(half *%aa, half *%bb, half *%cc) nounwind readnone ssp {
|
|
; CHECK-LABEL: test_fma_f16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vldr.16 s0, [r1]
|
|
; CHECK-NEXT: vldr.16 s2, [r0]
|
|
; CHECK-NEXT: vldr.16 s4, [r2]
|
|
; CHECK-NEXT: vfma.f16 s4, s2, s0
|
|
; CHECK-NEXT: vstr.16 s4, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
;
|
|
; DONT-FUSE-LABEL: test_fma_f16:
|
|
; DONT-FUSE: @ %bb.0: @ %entry
|
|
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
|
|
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
|
|
; DONT-FUSE-NEXT: vfma.f16 s4, s2, s0
|
|
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
|
|
; DONT-FUSE-NEXT: bx lr
|
|
entry:
|
|
%a = load half, half *%aa, align 2
|
|
%b = load half, half *%bb, align 2
|
|
%c = load half, half *%cc, align 2
|
|
%tmp1 = tail call half @llvm.fma.f16(half %a, half %b, half %c) nounwind readnone
|
|
store half %tmp1, half *%aa, align 2
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_fnms_f16(half *%aa, half *%bb, half *%cc) nounwind readnone ssp {
|
|
; CHECK-LABEL: test_fnms_f16:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vldr.16 s0, [r1]
|
|
; CHECK-NEXT: vldr.16 s2, [r0]
|
|
; CHECK-NEXT: vldr.16 s4, [r2]
|
|
; CHECK-NEXT: vfma.f16 s4, s2, s0
|
|
; CHECK-NEXT: vstr.16 s4, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
;
|
|
; DONT-FUSE-LABEL: test_fnms_f16:
|
|
; DONT-FUSE: @ %bb.0:
|
|
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
|
|
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
|
|
; DONT-FUSE-NEXT: vfma.f16 s4, s2, s0
|
|
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
|
|
; DONT-FUSE-NEXT: bx lr
|
|
|
|
%a = load half, half *%aa, align 2
|
|
%b = load half, half *%bb, align 2
|
|
%c = load half, half *%cc, align 2
|
|
%tmp2 = fsub half -0.0, %c
|
|
%tmp3 = tail call half @llvm.fma.f16(half %a, half %b, half %c) nounwind readnone
|
|
store half %tmp3, half *%aa, align 2
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_fma_const_fold(half *%aa, half *%bb) nounwind {
|
|
; CHECK-LABEL: test_fma_const_fold:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vldr.16 s0, [r1]
|
|
; CHECK-NEXT: vldr.16 s2, [r0]
|
|
; CHECK-NEXT: vadd.f16 s0, s2, s0
|
|
; CHECK-NEXT: vstr.16 s0, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
;
|
|
; DONT-FUSE-LABEL: test_fma_const_fold:
|
|
; DONT-FUSE: @ %bb.0:
|
|
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
|
|
; DONT-FUSE-NEXT: vadd.f16 s0, s2, s0
|
|
; DONT-FUSE-NEXT: vstr.16 s0, [r0]
|
|
; DONT-FUSE-NEXT: bx lr
|
|
|
|
%a = load half, half *%aa, align 2
|
|
%b = load half, half *%bb, align 2
|
|
%ret = call half @llvm.fma.f16(half %a, half 1.0, half %b)
|
|
store half %ret, half *%aa, align 2
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @test_fma_canonicalize(half *%aa, half *%bb) nounwind {
|
|
; CHECK-LABEL: test_fma_canonicalize:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vldr.16 s0, [r0]
|
|
; CHECK-NEXT: vldr.16 s2, [r1]
|
|
; CHECK-NEXT: vmov.f16 s4, #2.000000e+00
|
|
; CHECK-NEXT: vfma.f16 s2, s0, s4
|
|
; CHECK-NEXT: vstr.16 s2, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
;
|
|
; DONT-FUSE-LABEL: test_fma_canonicalize:
|
|
; DONT-FUSE: @ %bb.0:
|
|
; DONT-FUSE-NEXT: vldr.16 s0, [r0]
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r1]
|
|
; DONT-FUSE-NEXT: vmov.f16 s4, #2.000000e+00
|
|
; DONT-FUSE-NEXT: vfma.f16 s2, s0, s4
|
|
; DONT-FUSE-NEXT: vstr.16 s2, [r0]
|
|
; DONT-FUSE-NEXT: bx lr
|
|
|
|
%a = load half, half *%aa, align 2
|
|
%b = load half, half *%bb, align 2
|
|
%ret = call half @llvm.fma.f16(half 2.0, half %a, half %b)
|
|
store half %ret, half *%aa, align 2
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @fms1(half *%a1, half *%a2, half *%a3) {
|
|
; CHECK-LABEL: fms1:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vldr.16 s0, [r1]
|
|
; CHECK-NEXT: vldr.16 s2, [r0]
|
|
; CHECK-NEXT: vldr.16 s4, [r2]
|
|
; CHECK-NEXT: vfms.f16 s4, s2, s0
|
|
; CHECK-NEXT: vstr.16 s4, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
;
|
|
; DONT-FUSE-LABEL: fms1:
|
|
; DONT-FUSE: @ %bb.0:
|
|
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
|
|
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
|
|
; DONT-FUSE-NEXT: vfms.f16 s4, s2, s0
|
|
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
|
|
; DONT-FUSE-NEXT: bx lr
|
|
|
|
%f1 = load half, half *%a1, align 2
|
|
%f2 = load half, half *%a2, align 2
|
|
%f3 = load half, half *%a3, align 2
|
|
%s = fsub half -0.0, %f1
|
|
%ret = call half @llvm.fma.f16(half %s, half %f2, half %f3)
|
|
store half %ret, half *%a1, align 2
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @fms2(half *%a1, half *%a2, half *%a3) {
|
|
; CHECK-LABEL: fms2:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vldr.16 s0, [r1]
|
|
; CHECK-NEXT: vldr.16 s2, [r0]
|
|
; CHECK-NEXT: vldr.16 s4, [r2]
|
|
; CHECK-NEXT: vfms.f16 s4, s2, s0
|
|
; CHECK-NEXT: vstr.16 s4, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
;
|
|
; DONT-FUSE-LABEL: fms2:
|
|
; DONT-FUSE: @ %bb.0:
|
|
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
|
|
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
|
|
; DONT-FUSE-NEXT: vfms.f16 s4, s2, s0
|
|
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
|
|
; DONT-FUSE-NEXT: bx lr
|
|
|
|
%f1 = load half, half *%a1, align 2
|
|
%f2 = load half, half *%a2, align 2
|
|
%f3 = load half, half *%a3, align 2
|
|
%s = fsub half -0.0, %f1
|
|
%ret = call half @llvm.fma.f16(half %f2, half %s, half %f3)
|
|
store half %ret, half *%a1, align 2
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @fnma1(half *%a1, half *%a2, half *%a3) {
|
|
; CHECK-LABEL: fnma1:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vldr.16 s0, [r1]
|
|
; CHECK-NEXT: vldr.16 s2, [r0]
|
|
; CHECK-NEXT: vldr.16 s4, [r2]
|
|
; CHECK-NEXT: vfnma.f16 s4, s2, s0
|
|
; CHECK-NEXT: vstr.16 s4, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
;
|
|
; DONT-FUSE-LABEL: fnma1:
|
|
; DONT-FUSE: @ %bb.0:
|
|
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
|
|
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
|
|
; DONT-FUSE-NEXT: vfnma.f16 s4, s2, s0
|
|
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
|
|
; DONT-FUSE-NEXT: bx lr
|
|
|
|
%f1 = load half, half *%a1, align 2
|
|
%f2 = load half, half *%a2, align 2
|
|
%f3 = load half, half *%a3, align 2
|
|
%fma = call half @llvm.fma.f16(half %f1, half %f2, half %f3)
|
|
%n1 = fsub half -0.0, %fma
|
|
store half %n1, half *%a1, align 2
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @fnma2(half *%a1, half *%a2, half *%a3) {
|
|
; CHECK-LABEL: fnma2:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vldr.16 s0, [r1]
|
|
; CHECK-NEXT: vldr.16 s2, [r0]
|
|
; CHECK-NEXT: vldr.16 s4, [r2]
|
|
; CHECK-NEXT: vfnma.f16 s4, s2, s0
|
|
; CHECK-NEXT: vstr.16 s4, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
;
|
|
; DONT-FUSE-LABEL: fnma2:
|
|
; DONT-FUSE: @ %bb.0:
|
|
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
|
|
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
|
|
; DONT-FUSE-NEXT: vfnma.f16 s4, s2, s0
|
|
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
|
|
; DONT-FUSE-NEXT: bx lr
|
|
|
|
%f1 = load half, half *%a1, align 2
|
|
%f2 = load half, half *%a2, align 2
|
|
%f3 = load half, half *%a3, align 2
|
|
%n1 = fsub half -0.0, %f1
|
|
%n3 = fsub half -0.0, %f3
|
|
%ret = call half @llvm.fma.f16(half %n1, half %f2, half %n3)
|
|
store half %ret, half *%a1, align 2
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @fnms1(half *%a1, half *%a2, half *%a3) {
|
|
; CHECK-LABEL: fnms1:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vldr.16 s0, [r1]
|
|
; CHECK-NEXT: vldr.16 s2, [r0]
|
|
; CHECK-NEXT: vldr.16 s4, [r2]
|
|
; CHECK-NEXT: vfnms.f16 s4, s2, s0
|
|
; CHECK-NEXT: vstr.16 s4, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
;
|
|
; DONT-FUSE-LABEL: fnms1:
|
|
; DONT-FUSE: @ %bb.0:
|
|
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
|
|
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
|
|
; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0
|
|
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
|
|
; DONT-FUSE-NEXT: bx lr
|
|
|
|
%f1 = load half, half *%a1, align 2
|
|
%f2 = load half, half *%a2, align 2
|
|
%f3 = load half, half *%a3, align 2
|
|
%n3 = fsub half -0.0, %f3
|
|
%ret = call half @llvm.fma.f16(half %f1, half %f2, half %n3)
|
|
store half %ret, half *%a1, align 2
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @fnms2(half *%a1, half *%a2, half *%a3) {
|
|
; CHECK-LABEL: fnms2:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vldr.16 s0, [r1]
|
|
; CHECK-NEXT: vldr.16 s2, [r0]
|
|
; CHECK-NEXT: vldr.16 s4, [r2]
|
|
; CHECK-NEXT: vfnms.f16 s4, s2, s0
|
|
; CHECK-NEXT: vstr.16 s4, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
;
|
|
; DONT-FUSE-LABEL: fnms2:
|
|
; DONT-FUSE: @ %bb.0:
|
|
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
|
|
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
|
|
; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0
|
|
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
|
|
; DONT-FUSE-NEXT: bx lr
|
|
|
|
%f1 = load half, half *%a1, align 2
|
|
%f2 = load half, half *%a2, align 2
|
|
%f3 = load half, half *%a3, align 2
|
|
%n1 = fsub half -0.0, %f1
|
|
%fma = call half @llvm.fma.f16(half %n1, half %f2, half %f3)
|
|
%n = fsub half -0.0, %fma
|
|
store half %n, half *%a1, align 2
|
|
ret void
|
|
}
|
|
|
|
define arm_aapcs_vfpcc void @fnms3(half *%a1, half *%a2, half *%a3) {
|
|
; CHECK-LABEL: fnms3:
|
|
; CHECK: @ %bb.0:
|
|
; CHECK-NEXT: vldr.16 s0, [r0]
|
|
; CHECK-NEXT: vldr.16 s2, [r1]
|
|
; CHECK-NEXT: vldr.16 s4, [r2]
|
|
; CHECK-NEXT: vfnms.f16 s4, s2, s0
|
|
; CHECK-NEXT: vstr.16 s4, [r0]
|
|
; CHECK-NEXT: bx lr
|
|
;
|
|
; DONT-FUSE-LABEL: fnms3:
|
|
; DONT-FUSE: @ %bb.0:
|
|
; DONT-FUSE-NEXT: vldr.16 s0, [r0]
|
|
; DONT-FUSE-NEXT: vldr.16 s2, [r1]
|
|
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
|
|
; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0
|
|
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
|
|
; DONT-FUSE-NEXT: bx lr
|
|
|
|
%f1 = load half, half *%a1, align 2
|
|
%f2 = load half, half *%a2, align 2
|
|
%f3 = load half, half *%a3, align 2
|
|
%n2 = fsub half -0.0, %f2
|
|
%fma = call half @llvm.fma.f16(half %f1, half %n2, half %f3)
|
|
%n1 = fsub half -0.0, %fma
|
|
store half %n1, half *%a1, align 2
|
|
ret void
|
|
}
|
|
|
|
|
|
declare half @llvm.fma.f16(half, half, half) nounwind readnone
|