1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00
llvm-mirror/test/CodeGen/ARM/fp16-fusedMAC.ll
Craig Topper f7ac298f12 [SelectionDAG][ARM][AArch64][Hexagon][RISCV][X86] Add SDNPCommutative to fma and fmad nodes in tablegen. Remove explicit commuted patterns from targets.
X86 was already specially marking fma as commutable which allowed
tablegen to autogenerate commuted patterns. This moves it to the target
independent definition and fix up the targets to remove now
unneeded patterns.

Unfortunately, the tests change because the commuted version of
the patterns are generating operands in a different than the
explicit patterns.

Differential Revision: https://reviews.llvm.org/D91842
2020-11-23 10:09:20 -08:00

430 lines
13 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=thumbv8.1-m-none-eabi -mattr=+fullfp16 -fp-contract=fast | FileCheck %s
; RUN: llc < %s -mtriple=thumbv8.1-m-none-eabi -mattr=+fullfp16,+slowfpvfmx -fp-contract=fast | FileCheck %s -check-prefix=DONT-FUSE
; Check generated fp16 fused MAC and MLS.
define arm_aapcs_vfpcc void @fusedMACTest2(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fusedMACTest2:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfma.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fusedMACTest2:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vmul.f16 s0, s2, s0
; DONT-FUSE-NEXT: vldr.16 s2, [r2]
; DONT-FUSE-NEXT: vadd.f16 s0, s0, s2
; DONT-FUSE-NEXT: vstr.16 s0, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%1 = fmul half %f1, %f2
%2 = fadd half %1, %f3
store half %2, half *%a1, align 2
ret void
}
define arm_aapcs_vfpcc void @fusedMACTest4(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fusedMACTest4:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r2]
; CHECK-NEXT: vldr.16 s2, [r1]
; CHECK-NEXT: vldr.16 s4, [r0]
; CHECK-NEXT: vfms.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fusedMACTest4:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r2]
; DONT-FUSE-NEXT: vldr.16 s2, [r1]
; DONT-FUSE-NEXT: vmul.f16 s0, s2, s0
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vsub.f16 s0, s2, s0
; DONT-FUSE-NEXT: vstr.16 s0, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%1 = fmul half %f2, %f3
%2 = fsub half %f1, %1
store half %2, half *%a1, align 2
ret void
}
define arm_aapcs_vfpcc void @fusedMACTest6(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fusedMACTest6:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfnma.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fusedMACTest6:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vnmul.f16 s0, s2, s0
; DONT-FUSE-NEXT: vldr.16 s2, [r2]
; DONT-FUSE-NEXT: vsub.f16 s0, s0, s2
; DONT-FUSE-NEXT: vstr.16 s0, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%1 = fmul half %f1, %f2
%2 = fsub half -0.0, %1
%3 = fsub half %2, %f3
store half %3, half *%a1, align 2
ret void
}
define arm_aapcs_vfpcc void @fusedMACTest8(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fusedMACTest8:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfnms.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fusedMACTest8:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vmul.f16 s0, s2, s0
; DONT-FUSE-NEXT: vldr.16 s2, [r2]
; DONT-FUSE-NEXT: vsub.f16 s0, s0, s2
; DONT-FUSE-NEXT: vstr.16 s0, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%1 = fmul half %f1, %f2
%2 = fsub half %1, %f3
store half %2, half *%a1, align 2
ret void
}
define arm_aapcs_vfpcc void @test_fma_f16(half *%aa, half *%bb, half *%cc) nounwind readnone ssp {
; CHECK-LABEL: test_fma_f16:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfma.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: test_fma_f16:
; DONT-FUSE: @ %bb.0: @ %entry
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
; DONT-FUSE-NEXT: vfma.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
entry:
%a = load half, half *%aa, align 2
%b = load half, half *%bb, align 2
%c = load half, half *%cc, align 2
%tmp1 = tail call half @llvm.fma.f16(half %a, half %b, half %c) nounwind readnone
store half %tmp1, half *%aa, align 2
ret void
}
define arm_aapcs_vfpcc void @test_fnms_f16(half *%aa, half *%bb, half *%cc) nounwind readnone ssp {
; CHECK-LABEL: test_fnms_f16:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfma.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: test_fnms_f16:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
; DONT-FUSE-NEXT: vfma.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%a = load half, half *%aa, align 2
%b = load half, half *%bb, align 2
%c = load half, half *%cc, align 2
%tmp2 = fsub half -0.0, %c
%tmp3 = tail call half @llvm.fma.f16(half %a, half %b, half %c) nounwind readnone
store half %tmp3, half *%aa, align 2
ret void
}
define arm_aapcs_vfpcc void @test_fma_const_fold(half *%aa, half *%bb) nounwind {
; CHECK-LABEL: test_fma_const_fold:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vadd.f16 s0, s2, s0
; CHECK-NEXT: vstr.16 s0, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: test_fma_const_fold:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vadd.f16 s0, s2, s0
; DONT-FUSE-NEXT: vstr.16 s0, [r0]
; DONT-FUSE-NEXT: bx lr
%a = load half, half *%aa, align 2
%b = load half, half *%bb, align 2
%ret = call half @llvm.fma.f16(half %a, half 1.0, half %b)
store half %ret, half *%aa, align 2
ret void
}
define arm_aapcs_vfpcc void @test_fma_canonicalize(half *%aa, half *%bb) nounwind {
; CHECK-LABEL: test_fma_canonicalize:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r0]
; CHECK-NEXT: vldr.16 s2, [r1]
; CHECK-NEXT: vmov.f16 s4, #2.000000e+00
; CHECK-NEXT: vfma.f16 s2, s0, s4
; CHECK-NEXT: vstr.16 s2, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: test_fma_canonicalize:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r0]
; DONT-FUSE-NEXT: vldr.16 s2, [r1]
; DONT-FUSE-NEXT: vmov.f16 s4, #2.000000e+00
; DONT-FUSE-NEXT: vfma.f16 s2, s0, s4
; DONT-FUSE-NEXT: vstr.16 s2, [r0]
; DONT-FUSE-NEXT: bx lr
%a = load half, half *%aa, align 2
%b = load half, half *%bb, align 2
%ret = call half @llvm.fma.f16(half 2.0, half %a, half %b)
store half %ret, half *%aa, align 2
ret void
}
define arm_aapcs_vfpcc void @fms1(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fms1:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfms.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fms1:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
; DONT-FUSE-NEXT: vfms.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%s = fsub half -0.0, %f1
%ret = call half @llvm.fma.f16(half %s, half %f2, half %f3)
store half %ret, half *%a1, align 2
ret void
}
define arm_aapcs_vfpcc void @fms2(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fms2:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfms.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fms2:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
; DONT-FUSE-NEXT: vfms.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%s = fsub half -0.0, %f1
%ret = call half @llvm.fma.f16(half %f2, half %s, half %f3)
store half %ret, half *%a1, align 2
ret void
}
define arm_aapcs_vfpcc void @fnma1(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fnma1:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfnma.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fnma1:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
; DONT-FUSE-NEXT: vfnma.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%fma = call half @llvm.fma.f16(half %f1, half %f2, half %f3)
%n1 = fsub half -0.0, %fma
store half %n1, half *%a1, align 2
ret void
}
define arm_aapcs_vfpcc void @fnma2(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fnma2:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfnma.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fnma2:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
; DONT-FUSE-NEXT: vfnma.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%n1 = fsub half -0.0, %f1
%n3 = fsub half -0.0, %f3
%ret = call half @llvm.fma.f16(half %n1, half %f2, half %n3)
store half %ret, half *%a1, align 2
ret void
}
define arm_aapcs_vfpcc void @fnms1(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fnms1:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfnms.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fnms1:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%n3 = fsub half -0.0, %f3
%ret = call half @llvm.fma.f16(half %f1, half %f2, half %n3)
store half %ret, half *%a1, align 2
ret void
}
define arm_aapcs_vfpcc void @fnms2(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fnms2:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfnms.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fnms2:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%n1 = fsub half -0.0, %f1
%fma = call half @llvm.fma.f16(half %n1, half %f2, half %f3)
%n = fsub half -0.0, %fma
store half %n, half *%a1, align 2
ret void
}
define arm_aapcs_vfpcc void @fnms3(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fnms3:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r0]
; CHECK-NEXT: vldr.16 s2, [r1]
; CHECK-NEXT: vldr.16 s4, [r2]
; CHECK-NEXT: vfnms.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fnms3:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r0]
; DONT-FUSE-NEXT: vldr.16 s2, [r1]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
%f2 = load half, half *%a2, align 2
%f3 = load half, half *%a3, align 2
%n2 = fsub half -0.0, %f2
%fma = call half @llvm.fma.f16(half %f1, half %n2, half %f3)
%n1 = fsub half -0.0, %fma
store half %n1, half *%a1, align 2
ret void
}
declare half @llvm.fma.f16(half, half, half) nounwind readnone