mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
e3dbaf1580
Adjust generateFMAsInMachineCombiner to return false if SVE is present in order to combine fmul+fadd into fma. Also add new pseudo instructions so as to select the most appropriate of FMLA/FMAD depending on register allocation. Depends on D96599 Differential Revision: https://reviews.llvm.org/D96424
747 lines
28 KiB
LLVM
747 lines
28 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
|
|
|
|
define <vscale x 8 x half> @fmla_h_sel(<vscale x 8 x i1> %pred, <vscale x 8 x half> %acc, <vscale x 8 x half> %m1, <vscale x 8 x half> %m2) {
|
|
; CHECK-LABEL: fmla_h_sel:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmla z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 8 x half> %m1, %m2
|
|
%add = fadd fast <vscale x 8 x half> %acc, %mul
|
|
%res = select <vscale x 8 x i1> %pred, <vscale x 8 x half> %add, <vscale x 8 x half> %acc
|
|
ret <vscale x 8 x half> %res
|
|
}
|
|
|
|
define <vscale x 4 x half> @fmla_hx4_sel(<vscale x 4 x i1> %pred, <vscale x 4 x half> %acc, <vscale x 4 x half> %m1, <vscale x 4 x half> %m2) {
|
|
; CHECK-LABEL: fmla_hx4_sel:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmla z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 4 x half> %m1, %m2
|
|
%add = fadd fast <vscale x 4 x half> %acc, %mul
|
|
%res = select <vscale x 4 x i1> %pred, <vscale x 4 x half> %add, <vscale x 4 x half> %acc
|
|
ret <vscale x 4 x half> %res
|
|
}
|
|
|
|
define <vscale x 2 x half> @fmla_hx2_sel(<vscale x 2 x i1> %pred, <vscale x 2 x half> %acc, <vscale x 2 x half> %m1, <vscale x 2 x half> %m2) {
|
|
; CHECK-LABEL: fmla_hx2_sel:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmla z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 2 x half> %m1, %m2
|
|
%add = fadd fast <vscale x 2 x half> %acc, %mul
|
|
%res = select <vscale x 2 x i1> %pred, <vscale x 2 x half> %add, <vscale x 2 x half> %acc
|
|
ret <vscale x 2 x half> %res
|
|
}
|
|
|
|
define <vscale x 4 x float> @fmla_s_sel(<vscale x 4 x i1> %pred, <vscale x 4 x float> %acc, <vscale x 4 x float> %m1, <vscale x 4 x float> %m2) {
|
|
; CHECK-LABEL: fmla_s_sel:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmla z0.s, p0/m, z1.s, z2.s
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 4 x float> %m1, %m2
|
|
%add = fadd fast <vscale x 4 x float> %acc, %mul
|
|
%res = select <vscale x 4 x i1> %pred, <vscale x 4 x float> %add, <vscale x 4 x float> %acc
|
|
ret <vscale x 4 x float> %res
|
|
}
|
|
|
|
define <vscale x 2 x float> @fmla_sx2_sel(<vscale x 2 x i1> %pred, <vscale x 2 x float> %acc, <vscale x 2 x float> %m1, <vscale x 2 x float> %m2) {
|
|
; CHECK-LABEL: fmla_sx2_sel:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmla z0.s, p0/m, z1.s, z2.s
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 2 x float> %m1, %m2
|
|
%add = fadd fast <vscale x 2 x float> %acc, %mul
|
|
%res = select <vscale x 2 x i1> %pred, <vscale x 2 x float> %add, <vscale x 2 x float> %acc
|
|
ret <vscale x 2 x float> %res
|
|
}
|
|
|
|
define <vscale x 2 x double> @fmla_d_sel(<vscale x 2 x i1> %pred, <vscale x 2 x double> %acc, <vscale x 2 x double> %m1, <vscale x 2 x double> %m2) {
|
|
; CHECK-LABEL: fmla_d_sel:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmla z0.d, p0/m, z1.d, z2.d
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 2 x double> %m1, %m2
|
|
%add = fadd fast <vscale x 2 x double> %acc, %mul
|
|
%res = select <vscale x 2 x i1> %pred, <vscale x 2 x double> %add, <vscale x 2 x double> %acc
|
|
ret <vscale x 2 x double> %res
|
|
}
|
|
|
|
define <vscale x 8 x half> @fmls_h_sel(<vscale x 8 x i1> %pred, <vscale x 8 x half> %acc, <vscale x 8 x half> %m1, <vscale x 8 x half> %m2) {
|
|
; CHECK-LABEL: fmls_h_sel:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmls z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 8 x half> %m1, %m2
|
|
%sub = fsub fast <vscale x 8 x half> %acc, %mul
|
|
%res = select <vscale x 8 x i1> %pred, <vscale x 8 x half> %sub, <vscale x 8 x half> %acc
|
|
ret <vscale x 8 x half> %res
|
|
}
|
|
|
|
define <vscale x 4 x half> @fmls_hx4_sel(<vscale x 4 x i1> %pred, <vscale x 4 x half> %acc, <vscale x 4 x half> %m1, <vscale x 4 x half> %m2) {
|
|
; CHECK-LABEL: fmls_hx4_sel:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmls z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 4 x half> %m1, %m2
|
|
%sub = fsub fast <vscale x 4 x half> %acc, %mul
|
|
%res = select <vscale x 4 x i1> %pred, <vscale x 4 x half> %sub, <vscale x 4 x half> %acc
|
|
ret <vscale x 4 x half> %res
|
|
}
|
|
|
|
define <vscale x 2 x half> @fmls_hx2_sel(<vscale x 2 x i1> %pred, <vscale x 2 x half> %acc, <vscale x 2 x half> %m1, <vscale x 2 x half> %m2) {
|
|
; CHECK-LABEL: fmls_hx2_sel:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmls z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 2 x half> %m1, %m2
|
|
%sub = fsub fast <vscale x 2 x half> %acc, %mul
|
|
%res = select <vscale x 2 x i1> %pred, <vscale x 2 x half> %sub, <vscale x 2 x half> %acc
|
|
ret <vscale x 2 x half> %res
|
|
}
|
|
|
|
define <vscale x 4 x float> @fmls_s_sel(<vscale x 4 x i1> %pred, <vscale x 4 x float> %acc, <vscale x 4 x float> %m1, <vscale x 4 x float> %m2) {
|
|
; CHECK-LABEL: fmls_s_sel:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmls z0.s, p0/m, z1.s, z2.s
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 4 x float> %m1, %m2
|
|
%sub = fsub fast <vscale x 4 x float> %acc, %mul
|
|
%res = select <vscale x 4 x i1> %pred, <vscale x 4 x float> %sub, <vscale x 4 x float> %acc
|
|
ret <vscale x 4 x float> %res
|
|
}
|
|
|
|
define <vscale x 2 x float> @fmls_sx2_sel(<vscale x 2 x i1> %pred, <vscale x 2 x float> %acc, <vscale x 2 x float> %m1, <vscale x 2 x float> %m2) {
|
|
; CHECK-LABEL: fmls_sx2_sel:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmls z0.s, p0/m, z1.s, z2.s
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 2 x float> %m1, %m2
|
|
%sub = fsub fast <vscale x 2 x float> %acc, %mul
|
|
%res = select <vscale x 2 x i1> %pred, <vscale x 2 x float> %sub, <vscale x 2 x float> %acc
|
|
ret <vscale x 2 x float> %res
|
|
}
|
|
|
|
define <vscale x 2 x double> @fmls_d_sel(<vscale x 2 x i1> %pred, <vscale x 2 x double> %acc, <vscale x 2 x double> %m1, <vscale x 2 x double> %m2) {
|
|
; CHECK-LABEL: fmls_d_sel:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: fmls z0.d, p0/m, z1.d, z2.d
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 2 x double> %m1, %m2
|
|
%sub = fsub fast <vscale x 2 x double> %acc, %mul
|
|
%res = select <vscale x 2 x i1> %pred, <vscale x 2 x double> %sub, <vscale x 2 x double> %acc
|
|
ret <vscale x 2 x double> %res
|
|
}
|
|
|
|
define <vscale x 8 x half> @fmad_h(<vscale x 8 x half> %m1, <vscale x 8 x half> %m2, <vscale x 8 x half> %acc) {
|
|
; CHECK-LABEL: fmad_h:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 8 x half> %m1, %m2
|
|
%res = fadd fast <vscale x 8 x half> %acc, %mul
|
|
ret <vscale x 8 x half> %res
|
|
}
|
|
|
|
define <vscale x 4 x half> @fmad_hx4(<vscale x 4 x half> %m1, <vscale x 4 x half> %m2, <vscale x 4 x half> %acc) {
|
|
; CHECK-LABEL: fmad_hx4:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 4 x half> %m1, %m2
|
|
%res = fadd fast <vscale x 4 x half> %acc, %mul
|
|
ret <vscale x 4 x half> %res
|
|
}
|
|
|
|
define <vscale x 2 x half> @fmad_hx2(<vscale x 2 x half> %m1, <vscale x 2 x half> %m2, <vscale x 2 x half> %acc) {
|
|
; CHECK-LABEL: fmad_hx2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 2 x half> %m1, %m2
|
|
%res = fadd fast <vscale x 2 x half> %acc, %mul
|
|
ret <vscale x 2 x half> %res
|
|
}
|
|
|
|
define <vscale x 4 x float> @fmad_s(<vscale x 4 x float> %m1, <vscale x 4 x float> %m2, <vscale x 4 x float> %acc) {
|
|
; CHECK-LABEL: fmad_s:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 4 x float> %m1, %m2
|
|
%res = fadd fast <vscale x 4 x float> %acc, %mul
|
|
ret <vscale x 4 x float> %res
|
|
}
|
|
|
|
define <vscale x 2 x float> @fmad_sx2(<vscale x 2 x float> %m1, <vscale x 2 x float> %m2, <vscale x 2 x float> %acc) {
|
|
; CHECK-LABEL: fmad_sx2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 2 x float> %m1, %m2
|
|
%res = fadd fast <vscale x 2 x float> %acc, %mul
|
|
ret <vscale x 2 x float> %res
|
|
}
|
|
|
|
define <vscale x 2 x double> @fmad_d(<vscale x 2 x double> %m1, <vscale x 2 x double> %m2, <vscale x 2 x double> %acc) {
|
|
; CHECK-LABEL: fmad_d:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z2.d
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 2 x double> %m1, %m2
|
|
%res = fadd fast <vscale x 2 x double> %acc, %mul
|
|
ret <vscale x 2 x double> %res
|
|
}
|
|
|
|
define <vscale x 8 x half> @fmla_h(<vscale x 8 x half> %acc, <vscale x 8 x half> %m1, <vscale x 8 x half> %m2) {
|
|
; CHECK-LABEL: fmla_h:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: fmla z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 8 x half> %m1, %m2
|
|
%res = fadd fast <vscale x 8 x half> %acc, %mul
|
|
ret <vscale x 8 x half> %res
|
|
}
|
|
|
|
define <vscale x 4 x half> @fmla_hx4(<vscale x 4 x half> %acc, <vscale x 4 x half> %m1, <vscale x 4 x half> %m2) {
|
|
; CHECK-LABEL: fmla_hx4:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: fmla z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 4 x half> %m1, %m2
|
|
%res = fadd fast <vscale x 4 x half> %acc, %mul
|
|
ret <vscale x 4 x half> %res
|
|
}
|
|
|
|
define <vscale x 2 x half> @fmla_hx2(<vscale x 2 x half> %acc, <vscale x 2 x half> %m1, <vscale x 2 x half> %m2) {
|
|
; CHECK-LABEL: fmla_hx2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: fmla z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 2 x half> %m1, %m2
|
|
%res = fadd fast <vscale x 2 x half> %acc, %mul
|
|
ret <vscale x 2 x half> %res
|
|
}
|
|
|
|
define <vscale x 4 x float> @fmla_s(<vscale x 4 x float> %acc, <vscale x 4 x float> %m1, <vscale x 4 x float> %m2) {
|
|
; CHECK-LABEL: fmla_s:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: fmla z0.s, p0/m, z1.s, z2.s
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 4 x float> %m1, %m2
|
|
%res = fadd fast <vscale x 4 x float> %acc, %mul
|
|
ret <vscale x 4 x float> %res
|
|
}
|
|
|
|
define <vscale x 2 x float> @fmla_sx2(<vscale x 2 x float> %acc, <vscale x 2 x float> %m1, <vscale x 2 x float> %m2) {
|
|
; CHECK-LABEL: fmla_sx2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: fmla z0.s, p0/m, z1.s, z2.s
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 2 x float> %m1, %m2
|
|
%res = fadd fast <vscale x 2 x float> %acc, %mul
|
|
ret <vscale x 2 x float> %res
|
|
}
|
|
|
|
define <vscale x 2 x double> @fmla_d(<vscale x 2 x double> %acc, <vscale x 2 x double> %m1, <vscale x 2 x double> %m2) {
|
|
; CHECK-LABEL: fmla_d:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: fmla z0.d, p0/m, z1.d, z2.d
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 2 x double> %m1, %m2
|
|
%res = fadd fast <vscale x 2 x double> %acc, %mul
|
|
ret <vscale x 2 x double> %res
|
|
}
|
|
|
|
define <vscale x 8 x half> @fmls_h(<vscale x 8 x half> %acc, <vscale x 8 x half> %m1, <vscale x 8 x half> %m2) {
|
|
; CHECK-LABEL: fmls_h:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: fmls z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 8 x half> %m1, %m2
|
|
%res = fsub fast <vscale x 8 x half> %acc, %mul
|
|
ret <vscale x 8 x half> %res
|
|
}
|
|
|
|
define <vscale x 4 x half> @fmls_hx4(<vscale x 4 x half> %acc, <vscale x 4 x half> %m1, <vscale x 4 x half> %m2) {
|
|
; CHECK-LABEL: fmls_hx4:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: fmls z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 4 x half> %m1, %m2
|
|
%res = fsub fast <vscale x 4 x half> %acc, %mul
|
|
ret <vscale x 4 x half> %res
|
|
}
|
|
|
|
define <vscale x 2 x half> @fmls_hx2(<vscale x 2 x half> %acc, <vscale x 2 x half> %m1, <vscale x 2 x half> %m2) {
|
|
; CHECK-LABEL: fmls_hx2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: fmls z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 2 x half> %m1, %m2
|
|
%res = fsub fast <vscale x 2 x half> %acc, %mul
|
|
ret <vscale x 2 x half> %res
|
|
}
|
|
|
|
define <vscale x 4 x float> @fmls_s(<vscale x 4 x float> %acc, <vscale x 4 x float> %m1, <vscale x 4 x float> %m2) {
|
|
; CHECK-LABEL: fmls_s:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: fmls z0.s, p0/m, z1.s, z2.s
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 4 x float> %m1, %m2
|
|
%res = fsub fast <vscale x 4 x float> %acc, %mul
|
|
ret <vscale x 4 x float> %res
|
|
}
|
|
|
|
define <vscale x 2 x float> @fmls_sx2(<vscale x 2 x float> %acc, <vscale x 2 x float> %m1, <vscale x 2 x float> %m2) {
|
|
; CHECK-LABEL: fmls_sx2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: fmls z0.s, p0/m, z1.s, z2.s
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 2 x float> %m1, %m2
|
|
%res = fsub fast <vscale x 2 x float> %acc, %mul
|
|
ret <vscale x 2 x float> %res
|
|
}
|
|
|
|
define <vscale x 2 x double> @fmls_d(<vscale x 2 x double> %acc, <vscale x 2 x double> %m1, <vscale x 2 x double> %m2) {
|
|
; CHECK-LABEL: fmls_d:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: fmls z0.d, p0/m, z1.d, z2.d
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 2 x double> %m1, %m2
|
|
%res = fsub fast <vscale x 2 x double> %acc, %mul
|
|
ret <vscale x 2 x double> %res
|
|
}
|
|
|
|
define <vscale x 8 x half> @fmsb_h(<vscale x 8 x half> %m1, <vscale x 8 x half> %m2, <vscale x 8 x half> %acc) {
|
|
; CHECK-LABEL: fmsb_h:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: fmsb z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 8 x half> %m1, %m2
|
|
%res = fsub fast <vscale x 8 x half> %acc, %mul
|
|
ret <vscale x 8 x half> %res
|
|
}
|
|
|
|
define <vscale x 4 x half> @fmsb_hx4(<vscale x 4 x half> %m1, <vscale x 4 x half> %m2, <vscale x 4 x half> %acc) {
|
|
; CHECK-LABEL: fmsb_hx4:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: fmsb z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 4 x half> %m1, %m2
|
|
%res = fsub fast <vscale x 4 x half> %acc, %mul
|
|
ret <vscale x 4 x half> %res
|
|
}
|
|
|
|
define <vscale x 2 x half> @fmsb_hx2(<vscale x 2 x half> %m1, <vscale x 2 x half> %m2, <vscale x 2 x half> %acc) {
|
|
; CHECK-LABEL: fmsb_hx2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: fmsb z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 2 x half> %m1, %m2
|
|
%res = fsub fast <vscale x 2 x half> %acc, %mul
|
|
ret <vscale x 2 x half> %res
|
|
}
|
|
|
|
define <vscale x 4 x float> @fmsb_s(<vscale x 4 x float> %m1, <vscale x 4 x float> %m2, <vscale x 4 x float> %acc) {
|
|
; CHECK-LABEL: fmsb_s:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: fmsb z0.s, p0/m, z1.s, z2.s
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 4 x float> %m1, %m2
|
|
%res = fsub fast <vscale x 4 x float> %acc, %mul
|
|
ret <vscale x 4 x float> %res
|
|
}
|
|
|
|
define <vscale x 2 x float> @fmsb_sx2(<vscale x 2 x float> %m1, <vscale x 2 x float> %m2, <vscale x 2 x float> %acc) {
|
|
; CHECK-LABEL: fmsb_sx2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: fmsb z0.s, p0/m, z1.s, z2.s
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 2 x float> %m1, %m2
|
|
%res = fsub fast <vscale x 2 x float> %acc, %mul
|
|
ret <vscale x 2 x float> %res
|
|
}
|
|
|
|
define <vscale x 2 x double> @fmsb_d(<vscale x 2 x double> %m1, <vscale x 2 x double> %m2, <vscale x 2 x double> %acc) {
|
|
; CHECK-LABEL: fmsb_d:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: fmsb z0.d, p0/m, z1.d, z2.d
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 2 x double> %m1, %m2
|
|
%res = fsub fast <vscale x 2 x double> %acc, %mul
|
|
ret <vscale x 2 x double> %res
|
|
}
|
|
|
|
define <vscale x 8 x half> @fnmad_h(<vscale x 8 x half> %m1, <vscale x 8 x half> %m2, <vscale x 8 x half> %acc) {
|
|
; CHECK-LABEL: fnmad_h:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: fnmad z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%neg_m1 = fneg fast <vscale x 8 x half> %m1
|
|
%mul = fmul fast <vscale x 8 x half> %neg_m1, %m2
|
|
%res = fsub fast <vscale x 8 x half> %mul, %acc
|
|
ret <vscale x 8 x half> %res
|
|
}
|
|
|
|
define <vscale x 4 x half> @fnmad_hx4(<vscale x 4 x half> %m1, <vscale x 4 x half> %m2, <vscale x 4 x half> %acc) {
|
|
; CHECK-LABEL: fnmad_hx4:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: fnmad z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%neg_m1 = fneg fast <vscale x 4 x half> %m1
|
|
%mul = fmul fast <vscale x 4 x half> %neg_m1, %m2
|
|
%res = fsub fast <vscale x 4 x half> %mul, %acc
|
|
ret <vscale x 4 x half> %res
|
|
}
|
|
|
|
define <vscale x 2 x half> @fnmad_hx2(<vscale x 2 x half> %m1, <vscale x 2 x half> %m2, <vscale x 2 x half> %acc) {
|
|
; CHECK-LABEL: fnmad_hx2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: fnmad z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%neg_m1 = fneg fast <vscale x 2 x half> %m1
|
|
%mul = fmul fast <vscale x 2 x half> %neg_m1, %m2
|
|
%res = fsub fast <vscale x 2 x half> %mul, %acc
|
|
ret <vscale x 2 x half> %res
|
|
}
|
|
|
|
define <vscale x 4 x float> @fnmad_s(<vscale x 4 x float> %m1, <vscale x 4 x float> %m2, <vscale x 4 x float> %acc) {
|
|
; CHECK-LABEL: fnmad_s:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: fnmad z0.s, p0/m, z1.s, z2.s
|
|
; CHECK-NEXT: ret
|
|
%neg_m1 = fneg fast <vscale x 4 x float> %m1
|
|
%mul = fmul fast <vscale x 4 x float> %neg_m1, %m2
|
|
%res = fsub fast <vscale x 4 x float> %mul, %acc
|
|
ret <vscale x 4 x float> %res
|
|
}
|
|
|
|
define <vscale x 2 x float> @fnmad_sx2(<vscale x 2 x float> %m1, <vscale x 2 x float> %m2, <vscale x 2 x float> %acc) {
|
|
; CHECK-LABEL: fnmad_sx2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: fnmad z0.s, p0/m, z1.s, z2.s
|
|
; CHECK-NEXT: ret
|
|
%neg_m1 = fneg fast <vscale x 2 x float> %m1
|
|
%mul = fmul fast <vscale x 2 x float> %neg_m1, %m2
|
|
%res = fsub fast <vscale x 2 x float> %mul, %acc
|
|
ret <vscale x 2 x float> %res
|
|
}
|
|
|
|
define <vscale x 2 x double> @fnmad_d(<vscale x 2 x double> %m1, <vscale x 2 x double> %m2, <vscale x 2 x double> %acc) {
|
|
; CHECK-LABEL: fnmad_d:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: fnmad z0.d, p0/m, z1.d, z2.d
|
|
; CHECK-NEXT: ret
|
|
%neg_m1 = fneg fast <vscale x 2 x double> %m1
|
|
%mul = fmul fast <vscale x 2 x double> %neg_m1, %m2
|
|
%res = fsub fast <vscale x 2 x double> %mul, %acc
|
|
ret <vscale x 2 x double> %res
|
|
}
|
|
|
|
define <vscale x 8 x half> @fnmla_h(<vscale x 8 x half> %acc, <vscale x 8 x half> %m1, <vscale x 8 x half> %m2) {
|
|
; CHECK-LABEL: fnmla_h:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: fnmla z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%neg_m1 = fneg fast <vscale x 8 x half> %m1
|
|
%mul = fmul fast <vscale x 8 x half> %neg_m1, %m2
|
|
%res = fsub fast <vscale x 8 x half> %mul, %acc
|
|
ret <vscale x 8 x half> %res
|
|
}
|
|
|
|
define <vscale x 4 x half> @fnmla_hx4(<vscale x 4 x half> %acc, <vscale x 4 x half> %m1, <vscale x 4 x half> %m2) {
|
|
; CHECK-LABEL: fnmla_hx4:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: fnmla z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%neg_m1 = fneg fast <vscale x 4 x half> %m1
|
|
%mul = fmul fast <vscale x 4 x half> %neg_m1, %m2
|
|
%res = fsub fast <vscale x 4 x half> %mul, %acc
|
|
ret <vscale x 4 x half> %res
|
|
}
|
|
|
|
define <vscale x 2 x half> @fnmla_hx2(<vscale x 2 x half> %acc, <vscale x 2 x half> %m1, <vscale x 2 x half> %m2) {
|
|
; CHECK-LABEL: fnmla_hx2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: fnmla z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%neg_m1 = fneg fast <vscale x 2 x half> %m1
|
|
%mul = fmul fast <vscale x 2 x half> %neg_m1, %m2
|
|
%res = fsub fast <vscale x 2 x half> %mul, %acc
|
|
ret <vscale x 2 x half> %res
|
|
}
|
|
|
|
define <vscale x 4 x float> @fnmla_s(<vscale x 4 x float> %acc, <vscale x 4 x float> %m1, <vscale x 4 x float> %m2) {
|
|
; CHECK-LABEL: fnmla_s:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: fnmla z0.s, p0/m, z1.s, z2.s
|
|
; CHECK-NEXT: ret
|
|
%neg_m1 = fneg fast <vscale x 4 x float> %m1
|
|
%mul = fmul fast <vscale x 4 x float> %neg_m1, %m2
|
|
%res = fsub fast <vscale x 4 x float> %mul, %acc
|
|
ret <vscale x 4 x float> %res
|
|
}
|
|
|
|
define <vscale x 2 x float> @fnmla_sx2(<vscale x 2 x float> %acc, <vscale x 2 x float> %m1, <vscale x 2 x float> %m2) {
|
|
; CHECK-LABEL: fnmla_sx2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: fnmla z0.s, p0/m, z1.s, z2.s
|
|
; CHECK-NEXT: ret
|
|
%neg_m1 = fneg fast <vscale x 2 x float> %m1
|
|
%mul = fmul fast <vscale x 2 x float> %neg_m1, %m2
|
|
%res = fsub fast <vscale x 2 x float> %mul, %acc
|
|
ret <vscale x 2 x float> %res
|
|
}
|
|
|
|
define <vscale x 2 x double> @fnmla_d(<vscale x 2 x double> %acc, <vscale x 2 x double> %m1, <vscale x 2 x double> %m2) {
|
|
; CHECK-LABEL: fnmla_d:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: fnmla z0.d, p0/m, z1.d, z2.d
|
|
; CHECK-NEXT: ret
|
|
%neg_m1 = fneg fast <vscale x 2 x double> %m1
|
|
%mul = fmul fast <vscale x 2 x double> %neg_m1, %m2
|
|
%res = fsub fast <vscale x 2 x double> %mul, %acc
|
|
ret <vscale x 2 x double> %res
|
|
}
|
|
|
|
define <vscale x 8 x half> @fnmla_h_reversed(<vscale x 8 x half> %acc, <vscale x 8 x half> %m1, <vscale x 8 x half> %m2) {
|
|
; CHECK-LABEL: fnmla_h_reversed:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: fnmla z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 8 x half> %m1, %m2
|
|
%add = fadd fast <vscale x 8 x half> %mul, %acc
|
|
%res = fneg fast <vscale x 8 x half> %add
|
|
ret <vscale x 8 x half> %res
|
|
}
|
|
|
|
define <vscale x 4 x half> @fnmla_hx4_reversed(<vscale x 4 x half> %acc, <vscale x 4 x half> %m1, <vscale x 4 x half> %m2) {
|
|
; CHECK-LABEL: fnmla_hx4_reversed:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: fnmla z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 4 x half> %m1, %m2
|
|
%add = fadd fast <vscale x 4 x half> %mul, %acc
|
|
%res = fneg fast <vscale x 4 x half> %add
|
|
ret <vscale x 4 x half> %res
|
|
}
|
|
|
|
define <vscale x 2 x half> @fnmla_hx2_reversed(<vscale x 2 x half> %acc, <vscale x 2 x half> %m1, <vscale x 2 x half> %m2) {
|
|
; CHECK-LABEL: fnmla_hx2_reversed:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: fnmla z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 2 x half> %m1, %m2
|
|
%add = fadd fast <vscale x 2 x half> %mul, %acc
|
|
%res = fneg fast <vscale x 2 x half> %add
|
|
ret <vscale x 2 x half> %res
|
|
}
|
|
|
|
define <vscale x 4 x float> @fnmla_s_reversed(<vscale x 4 x float> %acc, <vscale x 4 x float> %m1, <vscale x 4 x float> %m2) {
|
|
; CHECK-LABEL: fnmla_s_reversed:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: fnmla z0.s, p0/m, z1.s, z2.s
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 4 x float> %m1, %m2
|
|
%add = fadd fast <vscale x 4 x float> %mul, %acc
|
|
%res = fneg fast <vscale x 4 x float> %add
|
|
ret <vscale x 4 x float> %res
|
|
}
|
|
|
|
define <vscale x 2 x float> @fnmla_sx2_reversed(<vscale x 2 x float> %acc, <vscale x 2 x float> %m1, <vscale x 2 x float> %m2) {
|
|
; CHECK-LABEL: fnmla_sx2_reversed:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: fnmla z0.s, p0/m, z1.s, z2.s
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 2 x float> %m1, %m2
|
|
%add = fadd fast <vscale x 2 x float> %mul, %acc
|
|
%res = fneg fast <vscale x 2 x float> %add
|
|
ret <vscale x 2 x float> %res
|
|
}
|
|
|
|
define <vscale x 2 x double> @fnmla_d_reversed(<vscale x 2 x double> %acc, <vscale x 2 x double> %m1, <vscale x 2 x double> %m2) {
|
|
; CHECK-LABEL: fnmla_d_reversed:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: fnmla z0.d, p0/m, z1.d, z2.d
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 2 x double> %m1, %m2
|
|
%add = fadd fast <vscale x 2 x double> %mul, %acc
|
|
%res = fneg fast <vscale x 2 x double> %add
|
|
ret <vscale x 2 x double> %res
|
|
}
|
|
|
|
define <vscale x 8 x half> @fnmls_h(<vscale x 8 x half> %acc, <vscale x 8 x half> %m1, <vscale x 8 x half> %m2) {
|
|
; CHECK-LABEL: fnmls_h:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: fnmls z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 8 x half> %m1, %m2
|
|
%res = fsub fast <vscale x 8 x half> %mul, %acc
|
|
ret <vscale x 8 x half> %res
|
|
}
|
|
|
|
define <vscale x 4 x half> @fnmls_hx4(<vscale x 4 x half> %acc, <vscale x 4 x half> %m1, <vscale x 4 x half> %m2) {
|
|
; CHECK-LABEL: fnmls_hx4:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: fnmls z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 4 x half> %m1, %m2
|
|
%res = fsub fast <vscale x 4 x half> %mul, %acc
|
|
ret <vscale x 4 x half> %res
|
|
}
|
|
|
|
define <vscale x 2 x half> @fnmls_hx2(<vscale x 2 x half> %acc, <vscale x 2 x half> %m1, <vscale x 2 x half> %m2) {
|
|
; CHECK-LABEL: fnmls_hx2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: fnmls z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 2 x half> %m1, %m2
|
|
%res = fsub fast <vscale x 2 x half> %mul, %acc
|
|
ret <vscale x 2 x half> %res
|
|
}
|
|
|
|
define <vscale x 4 x float> @fnmls_s(<vscale x 4 x float> %acc, <vscale x 4 x float> %m1, <vscale x 4 x float> %m2) {
|
|
; CHECK-LABEL: fnmls_s:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: fnmls z0.s, p0/m, z1.s, z2.s
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 4 x float> %m1, %m2
|
|
%res = fsub fast <vscale x 4 x float> %mul, %acc
|
|
ret <vscale x 4 x float> %res
|
|
}
|
|
|
|
define <vscale x 2 x float> @fnmls_sx2(<vscale x 2 x float> %acc, <vscale x 2 x float> %m1, <vscale x 2 x float> %m2) {
|
|
; CHECK-LABEL: fnmls_sx2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: fnmls z0.s, p0/m, z1.s, z2.s
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 2 x float> %m1, %m2
|
|
%res = fsub fast <vscale x 2 x float> %mul, %acc
|
|
ret <vscale x 2 x float> %res
|
|
}
|
|
|
|
define <vscale x 2 x double> @fnmls_d(<vscale x 2 x double> %acc, <vscale x 2 x double> %m1, <vscale x 2 x double> %m2) {
|
|
; CHECK-LABEL: fnmls_d:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: fnmls z0.d, p0/m, z1.d, z2.d
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 2 x double> %m1, %m2
|
|
%res = fsub fast <vscale x 2 x double> %mul, %acc
|
|
ret <vscale x 2 x double> %res
|
|
}
|
|
|
|
define <vscale x 8 x half> @fnmsb_h(<vscale x 8 x half> %m1, <vscale x 8 x half> %m2, <vscale x 8 x half> %acc) {
|
|
; CHECK-LABEL: fnmsb_h:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.h
|
|
; CHECK-NEXT: fnmsb z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 8 x half> %m1, %m2
|
|
%res = fsub fast <vscale x 8 x half> %mul, %acc
|
|
ret <vscale x 8 x half> %res
|
|
}
|
|
|
|
define <vscale x 4 x half> @fnmsb_hx4(<vscale x 4 x half> %m1, <vscale x 4 x half> %m2, <vscale x 4 x half> %acc) {
|
|
; CHECK-LABEL: fnmsb_hx4:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: fnmsb z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 4 x half> %m1, %m2
|
|
%res = fsub fast <vscale x 4 x half> %mul, %acc
|
|
ret <vscale x 4 x half> %res
|
|
}
|
|
|
|
define <vscale x 2 x half> @fnmsb_hx2(<vscale x 2 x half> %m1, <vscale x 2 x half> %m2, <vscale x 2 x half> %acc) {
|
|
; CHECK-LABEL: fnmsb_hx2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: fnmsb z0.h, p0/m, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 2 x half> %m1, %m2
|
|
%res = fsub fast <vscale x 2 x half> %mul, %acc
|
|
ret <vscale x 2 x half> %res
|
|
}
|
|
|
|
define <vscale x 4 x float> @fnmsb_s(<vscale x 4 x float> %m1, <vscale x 4 x float> %m2, <vscale x 4 x float> %acc) {
|
|
; CHECK-LABEL: fnmsb_s:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: fnmsb z0.s, p0/m, z1.s, z2.s
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 4 x float> %m1, %m2
|
|
%res = fsub fast <vscale x 4 x float> %mul, %acc
|
|
ret <vscale x 4 x float> %res
|
|
}
|
|
|
|
define <vscale x 2 x float> @fnmsb_sx2(<vscale x 2 x float> %m1, <vscale x 2 x float> %m2, <vscale x 2 x float> %acc) {
|
|
; CHECK-LABEL: fnmsb_sx2:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: fnmsb z0.s, p0/m, z1.s, z2.s
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 2 x float> %m1, %m2
|
|
%res = fsub fast <vscale x 2 x float> %mul, %acc
|
|
ret <vscale x 2 x float> %res
|
|
}
|
|
|
|
define <vscale x 2 x double> @fnmsb_d(<vscale x 2 x double> %m1, <vscale x 2 x double> %m2, <vscale x 2 x double> %acc) {
|
|
; CHECK-LABEL: fnmsb_d:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: fnmsb z0.d, p0/m, z1.d, z2.d
|
|
; CHECK-NEXT: ret
|
|
%mul = fmul fast <vscale x 2 x double> %m1, %m2
|
|
%res = fsub fast <vscale x 2 x double> %mul, %acc
|
|
ret <vscale x 2 x double> %res
|
|
}
|