1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00
llvm-mirror/test/CodeGen/AArch64/sve2-intrinsics-fp-converts.ll
Sander de Smalen 6a9013112e [AArch64] Define ACLE FP conversion intrinsics with more specific predicate.
This patch changes the FP conversion intrinsics to take a predicate
that matches the number of lanes for the vector with the widest element
type as opposed to using <vscale x 16 x i1>.

For example:
```<vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 8 x half>)```
now uses <vscale x 4 x i1> instead of <vscale x 16 x i1>

And similar for:
```<vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float>, <vscale x 2 x i1>, <vscale x 2 x double>)```
where the predicate now matches the wider type, so <vscale x 2 x i1>.

Reviewers: efriedma, SjoerdMeijer, paulwalker-arm, rengolin

Reviewed By: efriedma

Tags: #clang

Differential Revision: https://reviews.llvm.org/D78402
2020-04-23 10:53:23 +01:00

85 lines
3.8 KiB
LLVM

; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
;
; FCVTLT
;
define <vscale x 4 x float> @fcvtlt_f32_f16(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, <vscale x 8 x half> %b) {
; CHECK-LABEL: fcvtlt_f32_f16:
; CHECK: fcvtlt z0.s, p0/m, z1.h
; CHECK-NEXT: ret
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float> %a,
<vscale x 4 x i1> %pg,
<vscale x 8 x half> %b)
ret <vscale x 4 x float> %out
}
define <vscale x 2 x double> @fcvtlt_f64_f32(<vscale x 2 x double> %a, <vscale x 2 x i1> %pg, <vscale x 4 x float> %b) {
; CHECK-LABEL: fcvtlt_f64_f32:
; CHECK: fcvtlt z0.d, p0/m, z1.s
; CHECK-NEXT: ret
%out = call <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double> %a,
<vscale x 2 x i1> %pg,
<vscale x 4 x float> %b)
ret <vscale x 2 x double> %out
}
;
; FCVTNT
;
define <vscale x 8 x half> @fcvtnt_f16_f32(<vscale x 8 x half> %a, <vscale x 4 x i1> %pg, <vscale x 4 x float> %b) {
; CHECK-LABEL: fcvtnt_f16_f32:
; CHECK: fcvtnt z0.h, p0/m, z1.s
; CHECK-NEXT: ret
%out = call <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.f16f32(<vscale x 8 x half> %a,
<vscale x 4 x i1> %pg,
<vscale x 4 x float> %b)
ret <vscale x 8 x half> %out
}
define <vscale x 4 x float> @fcvtnt_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
; CHECK-LABEL: fcvtnt_f32_f64:
; CHECK: fcvtnt z0.s, p0/m, z1.d
; CHECK-NEXT: ret
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtnt.f32f64(<vscale x 4 x float> %a,
<vscale x 2 x i1> %pg,
<vscale x 2 x double> %b)
ret <vscale x 4 x float> %out
}
;
; FCVTX
;
define <vscale x 4 x float> @fcvtx_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
; CHECK-LABEL: fcvtx_f32_f64:
; CHECK: fcvtx z0.s, p0/m, z1.d
; CHECK-NEXT: ret
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtx.f32f64(<vscale x 4 x float> %a,
<vscale x 2 x i1> %pg,
<vscale x 2 x double> %b)
ret <vscale x 4 x float> %out
}
;
; FCVTXNT
;
define <vscale x 4 x float> @fcvtxnt_f32_f64(<vscale x 4 x float> %a, <vscale x 2 x i1> %pg, <vscale x 2 x double> %b) {
; CHECK-LABEL: fcvtxnt_f32_f64:
; CHECK: fcvtxnt z0.s, p0/m, z1.d
; CHECK-NEXT: ret
%out = call <vscale x 4 x float> @llvm.aarch64.sve.fcvtxnt.f32f64(<vscale x 4 x float> %a,
<vscale x 2 x i1> %pg,
<vscale x 2 x double> %b)
ret <vscale x 4 x float> %out
}
declare <vscale x 4 x float> @llvm.aarch64.sve.fcvtlt.f32f16(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 8 x half>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fcvtlt.f64f32(<vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 4 x float>)
declare <vscale x 8 x half> @llvm.aarch64.sve.fcvtnt.f16f32(<vscale x 8 x half>, <vscale x 4 x i1>, <vscale x 4 x float>)
declare <vscale x 4 x float> @llvm.aarch64.sve.fcvtnt.f32f64(<vscale x 4 x float>, <vscale x 2 x i1>, <vscale x 2 x double>)
declare <vscale x 4 x float> @llvm.aarch64.sve.fcvtx.f32f64(<vscale x 4 x float>, <vscale x 2 x i1>, <vscale x 2 x double>)
declare <vscale x 4 x float> @llvm.aarch64.sve.fcvtxnt.f32f64(<vscale x 4 x float>, <vscale x 2 x i1>, <vscale x 2 x double>)