mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
4ccc1cffed
This adds fp16 VMOVX patterns, using the same patterns as rL362482 with some adjustments for MVE. It allows us to move fp16 registers without going into and out of gprs. VMOVX is able to move the top bits from a fp16 in a fp reg into the bottom bits of another register, zeroing the rest. This can be used for odd MVE register lanes. The top bits are not read by fp16 instructions, so no move is required there if we are dealing with even lanes. Differential revision: https://reviews.llvm.org/D66793 llvm-svn: 370184
26 lines
1008 B
LLVM
26 lines
1008 B
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s
|
|
|
|
define arm_aapcs_vfpcc <4 x float> @fma_v4f32(<4 x float> %dst, <4 x float> %s1, <4 x float> %s2) {
|
|
; CHECK-LABEL: fma_v4f32:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vfma.f32 q0, q1, q2
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %s1, <4 x float> %s2, <4 x float> %dst)
|
|
ret <4 x float> %0
|
|
}
|
|
|
|
define arm_aapcs_vfpcc <8 x half> @fma_v8f16(<8 x half> %dst, <8 x half> %s1, <8 x half> %s2) {
|
|
; CHECK-LABEL: fma_v8f16:
|
|
; CHECK: @ %bb.0: @ %entry
|
|
; CHECK-NEXT: vfma.f16 q0, q1, q2
|
|
; CHECK-NEXT: bx lr
|
|
entry:
|
|
%0 = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> %s1, <8 x half> %s2, <8 x half> %dst)
|
|
ret <8 x half> %0
|
|
}
|
|
|
|
declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
|
|
declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
|