mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
e6ae287b2e
This patch adds new ISD nodes, FCVTZS_MERGE_PASSTHRU & FCVTZU_MERGE_PASSTHRU, which are used to lower scalable vector FP_TO_SINT/FP_TO_UINT operations and the following intrinsics: - llvm.aarch64.sve.fcvtzu - llvm.aarch64.sve.fcvtzs Reviewed By: efriedma, paulwalker-arm Differential Revision: https://reviews.llvm.org/D87232
98 lines
3.1 KiB
LLVM
98 lines
3.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
|
|
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
|
|
|
|
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
|
|
; WARN-NOT: warning
|
|
|
|
; FP_TO_SINT
|
|
|
|
; Split operand
|
|
define <vscale x 4 x i32> @fcvtzs_s_nxv4f64(<vscale x 4 x double> %a) {
|
|
; CHECK-LABEL: fcvtzs_s_nxv4f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <vscale x 4 x double> %a to <vscale x 4 x i32>
|
|
ret <vscale x 4 x i32> %res
|
|
}
|
|
|
|
define <vscale x 8 x i16> @fcvtzs_h_nxv8f64(<vscale x 8 x double> %a) {
|
|
; CHECK-LABEL: fcvtzs_h_nxv8f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d
|
|
; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d
|
|
; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s
|
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
|
|
; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <vscale x 8 x double> %a to <vscale x 8 x i16>
|
|
ret <vscale x 8 x i16> %res
|
|
}
|
|
|
|
; Split result
|
|
define <vscale x 4 x i64> @fcvtzs_d_nxv4f32(<vscale x 4 x float> %a) {
|
|
; CHECK-LABEL: fcvtzs_d_nxv4f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: uunpklo z1.d, z0.s
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: uunpkhi z2.d, z0.s
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z1.s
|
|
; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.s
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <vscale x 4 x float> %a to <vscale x 4 x i64>
|
|
ret <vscale x 4 x i64> %res
|
|
}
|
|
|
|
define <vscale x 16 x i32> @fcvtzs_s_nxv16f16(<vscale x 16 x half> %a) {
|
|
; CHECK-LABEL: fcvtzs_s_nxv16f16:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: uunpklo z2.s, z0.h
|
|
; CHECK-NEXT: ptrue p0.s
|
|
; CHECK-NEXT: uunpkhi z3.s, z0.h
|
|
; CHECK-NEXT: uunpklo z4.s, z1.h
|
|
; CHECK-NEXT: uunpkhi z5.s, z1.h
|
|
; CHECK-NEXT: fcvtzs z0.s, p0/m, z2.h
|
|
; CHECK-NEXT: fcvtzs z1.s, p0/m, z3.h
|
|
; CHECK-NEXT: fcvtzs z2.s, p0/m, z4.h
|
|
; CHECK-NEXT: fcvtzs z3.s, p0/m, z5.h
|
|
; CHECK-NEXT: ret
|
|
%res = fptosi <vscale x 16 x half> %a to <vscale x 16 x i32>
|
|
ret <vscale x 16 x i32> %res
|
|
}
|
|
|
|
; FP_TO_UINT
|
|
|
|
; Split operand
|
|
define <vscale x 4 x i32> @fcvtzu_s_nxv4f64(<vscale x 4 x double> %a) {
|
|
; CHECK-LABEL: fcvtzu_s_nxv4f64:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d
|
|
; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d
|
|
; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <vscale x 4 x double> %a to <vscale x 4 x i32>
|
|
ret <vscale x 4 x i32> %res
|
|
}
|
|
|
|
; Split result
|
|
define <vscale x 4 x i64> @fcvtzu_d_nxv4f32(<vscale x 4 x float> %a) {
|
|
; CHECK-LABEL: fcvtzu_d_nxv4f32:
|
|
; CHECK: // %bb.0:
|
|
; CHECK-NEXT: uunpklo z1.d, z0.s
|
|
; CHECK-NEXT: ptrue p0.d
|
|
; CHECK-NEXT: uunpkhi z2.d, z0.s
|
|
; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.s
|
|
; CHECK-NEXT: fcvtzu z1.d, p0/m, z2.s
|
|
; CHECK-NEXT: ret
|
|
%res = fptoui <vscale x 4 x float> %a to <vscale x 4 x i64>
|
|
ret <vscale x 4 x i64> %res
|
|
}
|