1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00

[sve][acle] Implement some of the C intrinsics for brain float.

Summary:
The following intrinsics have been extended to support brain float types:

svbfloat16_t svclasta[_bf16](svbool_t pg, svbfloat16_t fallback, svbfloat16_t data)
bfloat16_t svclasta[_n_bf16](svbool_t pg, bfloat16_t fallback, svbfloat16_t data)
bfloat16_t svlasta[_bf16](svbool_t pg, svbfloat16_t op)

svbfloat16_t svclastb[_bf16](svbool_t pg, svbfloat16_t fallback, svbfloat16_t data)
bfloat16_t svclastb[_n_bf16](svbool_t pg, bfloat16_t fallback, svbfloat16_t data)
bfloat16_t svlastb[_bf16](svbool_t pg, svbfloat16_t op)

svbfloat16_t svdup[_n]_bf16(bfloat16_t op)
svbfloat16_t svdup[_n]_bf16_m(svbfloat16_t inactive, svbool_t pg, bfloat16_t op)
svbfloat16_t svdup[_n]_bf16_x(svbool_t pg, bfloat16_t op)
svbfloat16_t svdup[_n]_bf16_z(svbool_t pg, bfloat16_t op)

svbfloat16_t svdupq[_n]_bf16(bfloat16_t x0, bfloat16_t x1, bfloat16_t x2, bfloat16_t x3, bfloat16_t x4, bfloat16_t x5, bfloat16_t x6, bfloat16_t x7)
svbfloat16_t svdupq_lane[_bf16](svbfloat16_t data, uint64_t index)

svbfloat16_t svinsr[_n_bf16](svbfloat16_t op1, bfloat16_t op2)

Reviewers: sdesmalen, kmclaughlin, c-rhodes, ctetreau, efriedma

Subscribers: tschuett, hiraditya, rkruppe, psnobl, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D82345
This commit is contained in:
Francesco Petrogalli 2020-06-19 21:27:46 +00:00
parent 2c3749142b
commit b0f83ed2ae
6 changed files with 206 additions and 0 deletions

View File

@ -423,6 +423,11 @@ let Predicates = [HasSVE] in {
defm CPY_ZPmR : sve_int_perm_cpy_r<"cpy", AArch64dup_mt>;
defm CPY_ZPmV : sve_int_perm_cpy_v<"cpy", AArch64dup_mt>;
let Predicates = [HasSVE, HasBF16] in {
def : Pat<(nxv8bf16 (AArch64dup_mt nxv8i1:$pg, bf16:$splat, nxv8bf16:$passthru)),
(CPY_ZPmV_H $passthru, $pg, $splat)>;
}
// Duplicate FP scalar into all vector elements
def : Pat<(nxv8f16 (AArch64dup (f16 FPR16:$src))),
(DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>;
@ -436,6 +441,10 @@ let Predicates = [HasSVE] in {
(DUP_ZZI_S (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$src, ssub), 0)>;
def : Pat<(nxv2f64 (AArch64dup (f64 FPR64:$src))),
(DUP_ZZI_D (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$src, dsub), 0)>;
let Predicates = [HasSVE, HasBF16] in {
def : Pat<(nxv8bf16 (AArch64dup (bf16 FPR16:$src))),
(DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>;
}
// Duplicate +0.0 into all vector elements
def : Pat<(nxv8f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>;
@ -444,6 +453,9 @@ let Predicates = [HasSVE] in {
def : Pat<(nxv4f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>;
def : Pat<(nxv2f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>;
def : Pat<(nxv2f64 (AArch64dup (f64 fpimm0))), (DUP_ZI_D 0, 0)>;
let Predicates = [HasSVE, HasBF16] in {
def : Pat<(nxv8bf16 (AArch64dup (bf16 fpimm0))), (DUP_ZI_H 0, 0)>;
}
// Duplicate Int immediate into all vector elements
def : Pat<(nxv16i8 (AArch64dup (i32 (SVE8BitLslImm i32:$a, i32:$b)))),
@ -486,6 +498,10 @@ let Predicates = [HasSVE] in {
defm INSR_ZV : sve_int_perm_insrv<"insr", AArch64insr>;
defm EXT_ZZI : sve_int_perm_extract_i<"ext", AArch64ext>;
let Predicates = [HasSVE, HasBF16] in {
def : SVE_2_Op_Pat<nxv8bf16, AArch64insr, nxv8bf16, bf16, INSR_ZV_H>;
}
defm RBIT_ZPmZ : sve_int_perm_rev_rbit<"rbit", int_aarch64_sve_rbit>;
defm REVB_ZPmZ : sve_int_perm_rev_revb<"revb", int_aarch64_sve_revb, bswap>;
defm REVH_ZPmZ : sve_int_perm_rev_revh<"revh", int_aarch64_sve_revh>;
@ -554,11 +570,23 @@ let Predicates = [HasSVE] in {
defm CLASTA_ZPZ : sve_int_perm_clast_zz<0, "clasta", int_aarch64_sve_clasta>;
defm CLASTB_ZPZ : sve_int_perm_clast_zz<1, "clastb", int_aarch64_sve_clastb>;
let Predicates = [HasSVE, HasBF16] in {
def : SVE_3_Op_Pat<bf16, AArch64clasta_n, nxv8i1, bf16, nxv8bf16, CLASTA_VPZ_H>;
def : SVE_3_Op_Pat<bf16, AArch64clastb_n, nxv8i1, bf16, nxv8bf16, CLASTB_VPZ_H>;
def : SVE_3_Op_Pat<nxv8bf16, int_aarch64_sve_clasta, nxv8i1, nxv8bf16, nxv8bf16, CLASTA_ZPZ_H>;
def : SVE_3_Op_Pat<nxv8bf16, int_aarch64_sve_clastb, nxv8i1, nxv8bf16, nxv8bf16, CLASTB_ZPZ_H>;
}
defm LASTA_RPZ : sve_int_perm_last_r<0, "lasta", AArch64lasta>;
defm LASTB_RPZ : sve_int_perm_last_r<1, "lastb", AArch64lastb>;
defm LASTA_VPZ : sve_int_perm_last_v<0, "lasta", AArch64lasta>;
defm LASTB_VPZ : sve_int_perm_last_v<1, "lastb", AArch64lastb>;
let Predicates = [HasSVE, HasBF16] in {
def : SVE_2_Op_Pat<bf16, AArch64lasta, nxv8i1, nxv8bf16, LASTA_VPZ_H>;
def : SVE_2_Op_Pat<bf16, AArch64lastb, nxv8i1, nxv8bf16, LASTB_VPZ_H>;
}
// continuous load with reg+immediate
defm LD1B_IMM : sve_mem_cld_si<0b0000, "ld1b", Z_b, ZPR8>;
defm LD1B_H_IMM : sve_mem_cld_si<0b0001, "ld1b", Z_h, ZPR16>;
@ -1499,6 +1527,13 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
def : Pat<(nxv2f64 (bitconvert (nxv2i64 ZPR:$src))), (nxv2f64 ZPR:$src)>;
def : Pat<(nxv2f64 (bitconvert (nxv8f16 ZPR:$src))), (nxv2f64 ZPR:$src)>;
def : Pat<(nxv2f64 (bitconvert (nxv4f32 ZPR:$src))), (nxv2f64 ZPR:$src)>;
}
let Predicates = [IsLE, HasBF16, HasSVE] in {
def : Pat<(nxv2i64 (bitconvert (nxv8bf16 ZPR:$src))), (nxv2i64 ZPR:$src)>;
def : Pat<(nxv8bf16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
def : Pat<(nxv8bf16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8bf16 ZPR:$src)>;
}
let Predicates = [IsLE, HasSVE, HasBF16] in {

View File

@ -81,6 +81,14 @@ define <vscale x 8 x half> @dup_f16(half %b) {
ret <vscale x 8 x half> %out
}
define <vscale x 8 x bfloat> @dup_bf16(bfloat %b) #0 {
; CHECK-LABEL: dup_bf16:
; CHECK: mov z0.h, h0
; CHECK-NEXT: ret
%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.x.nxv8bf16(bfloat %b)
ret <vscale x 8 x bfloat> %out
}
define <vscale x 8 x half> @dup_imm_f16(half %b) {
; CHECK-LABEL: dup_imm_f16:
; CHECK: mov z0.h, #16.00000000
@ -126,5 +134,9 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16)
declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.x.nxv4i32(i32)
declare <vscale x 2 x i64> @llvm.aarch64.sve.dup.x.nxv2i64(i64)
declare <vscale x 8 x half> @llvm.aarch64.sve.dup.x.nxv8f16(half)
declare <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.x.nxv8bf16(bfloat)
declare <vscale x 4 x float> @llvm.aarch64.sve.dup.x.nxv4f32(float)
declare <vscale x 2 x double> @llvm.aarch64.sve.dup.x.nxv2f64(double)
; +bf16 is required for the bfloat version.
attributes #0 = { "target-features"="+sve,+bf16" }

View File

@ -57,6 +57,16 @@ define <vscale x 8 x half> @clasta_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half
ret <vscale x 8 x half> %out
}
define <vscale x 8 x bfloat> @clasta_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
; CHECK-LABEL: clasta_bf16:
; CHECK: clasta z0.h, p0, z0.h, z1.h
; CHECK-NEXT: ret
%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.clasta.nxv8bf16(<vscale x 8 x i1> %pg,
<vscale x 8 x bfloat> %a,
<vscale x 8 x bfloat> %b)
ret <vscale x 8 x bfloat> %out
}
define <vscale x 4 x float> @clasta_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: clasta_f32:
; CHECK: clasta z0.s, p0, z0.s, z1.s
@ -131,6 +141,16 @@ define half @clasta_n_f16(<vscale x 8 x i1> %pg, half %a, <vscale x 8 x half> %b
ret half %out
}
define bfloat @clasta_n_bf16(<vscale x 8 x i1> %pg, bfloat %a, <vscale x 8 x bfloat> %b) #0 {
; CHECK-LABEL: clasta_n_bf16:
; CHECK: clasta h0, p0, h0, z1.h
; CHECK-NEXT: ret
%out = call bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16(<vscale x 8 x i1> %pg,
bfloat %a,
<vscale x 8 x bfloat> %b)
ret bfloat %out
}
define float @clasta_n_f32(<vscale x 4 x i1> %pg, float %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: clasta_n_f32:
; CHECK: clasta s0, p0, s0, z1.s
@ -205,6 +225,16 @@ define <vscale x 8 x half> @clastb_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half
ret <vscale x 8 x half> %out
}
define <vscale x 8 x bfloat> @clastb_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a, <vscale x 8 x bfloat> %b) #0 {
; CHECK-LABEL: clastb_bf16:
; CHECK: clastb z0.h, p0, z0.h, z1.h
; CHECK-NEXT: ret
%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.clastb.nxv8bf16(<vscale x 8 x i1> %pg,
<vscale x 8 x bfloat> %a,
<vscale x 8 x bfloat> %b)
ret <vscale x 8 x bfloat> %out
}
define <vscale x 4 x float> @clastb_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: clastb_f32:
; CHECK: clastb z0.s, p0, z0.s, z1.s
@ -279,6 +309,16 @@ define half @clastb_n_f16(<vscale x 8 x i1> %pg, half %a, <vscale x 8 x half> %b
ret half %out
}
define bfloat @clastb_n_bf16(<vscale x 8 x i1> %pg, bfloat %a, <vscale x 8 x bfloat> %b) #0 {
; CHECK-LABEL: clastb_n_bf16:
; CHECK: clastb h0, p0, h0, z1.h
; CHECK-NEXT: ret
%out = call bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16(<vscale x 8 x i1> %pg,
bfloat %a,
<vscale x 8 x bfloat> %b)
ret bfloat %out
}
define float @clastb_n_f32(<vscale x 4 x i1> %pg, float %a, <vscale x 4 x float> %b) {
; CHECK-LABEL: clastb_n_f32:
; CHECK: clastb s0, p0, s0, z1.s
@ -343,6 +383,14 @@ define <vscale x 8 x half> @dupq_f16(<vscale x 8 x half> %a) {
ret <vscale x 8 x half> %out
}
define <vscale x 8 x bfloat> @dupq_bf16(<vscale x 8 x bfloat> %a) #0 {
; CHECK-LABEL: dupq_bf16:
; CHECK: mov z0.q, q0
; CHECK-NEXT: ret
%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> %a, i64 0)
ret <vscale x 8 x bfloat> %out
}
define <vscale x 4 x float> @dupq_f32(<vscale x 4 x float> %a) {
; CHECK-LABEL: dupq_f32:
; CHECK: mov z0.q, z0.q[1]
@ -432,6 +480,20 @@ define <vscale x 8 x half> @dupq_lane_f16(<vscale x 8 x half> %a, i64 %idx) {
ret <vscale x 8 x half> %out
}
; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
define <vscale x 8 x bfloat> @dupq_lane_bf16(<vscale x 8 x bfloat> %a, i64 %idx) #0 {
; CHECK-LABEL: dupq_lane_bf16:
; CHECK-DAG: index [[Z1:z[0-9]+]].d, #0, #1
; CHECK-DAG: and [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1
; CHECK-DAG: add [[X1:x[0-9]+]], x0, x0
; CHECK-DAG: mov [[Z3:z[0-9]+]].d, [[X1]]
; CHECK: add [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d
; CHECK: tbl z0.d, { z0.d }, [[Z4]].d
; CHECK-NEXT: ret
%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> %a, i64 %idx)
ret <vscale x 8 x bfloat> %out
}
; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant).
define <vscale x 4 x float> @dupq_lane_f32(<vscale x 4 x float> %a, i64 %idx) {
; CHECK-LABEL: dupq_lane_f32:
@ -605,6 +667,15 @@ define half @lasta_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
ret half %res
}
define bfloat @lasta_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a) #0 {
; CHECK-LABEL: lasta_bf16
; CHECK: lasta h0, p0, z0.h
; CHECK-NEXT: ret
%res = call bfloat @llvm.aarch64.sve.lasta.nxv8bf16(<vscale x 8 x i1> %pg,
<vscale x 8 x bfloat> %a)
ret bfloat %res
}
define float @lasta_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
; CHECK-LABEL: lasta_f32
; CHECK: lasta s0, p0, z0.s
@ -681,6 +752,15 @@ define half @lastb_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a) {
ret half %res
}
define bfloat @lastb_bf16(<vscale x 8 x i1> %pg, <vscale x 8 x bfloat> %a) #0 {
; CHECK-LABEL: lastb_bf16
; CHECK: lastb h0, p0, z0.h
; CHECK-NEXT: ret
%res = call bfloat @llvm.aarch64.sve.lastb.nxv8bf16(<vscale x 8 x i1> %pg,
<vscale x 8 x bfloat> %a)
ret bfloat %res
}
define float @lastb_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a) {
; CHECK-LABEL: lastb_f32
; CHECK: lastb s0, p0, z0.s
@ -1851,6 +1931,7 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.clasta.nxv8i16(<vscale x 8 x i1>, <
declare <vscale x 4 x i32> @llvm.aarch64.sve.clasta.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.clasta.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
declare <vscale x 8 x half> @llvm.aarch64.sve.clasta.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
declare <vscale x 8 x bfloat> @llvm.aarch64.sve.clasta.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
declare <vscale x 4 x float> @llvm.aarch64.sve.clasta.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.clasta.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
@ -1859,6 +1940,7 @@ declare i16 @llvm.aarch64.sve.clasta.n.nxv8i16(<vscale x 8 x i1>, i16, <vscale x
declare i32 @llvm.aarch64.sve.clasta.n.nxv4i32(<vscale x 4 x i1>, i32, <vscale x 4 x i32>)
declare i64 @llvm.aarch64.sve.clasta.n.nxv2i64(<vscale x 2 x i1>, i64, <vscale x 2 x i64>)
declare half @llvm.aarch64.sve.clasta.n.nxv8f16(<vscale x 8 x i1>, half, <vscale x 8 x half>)
declare bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16(<vscale x 8 x i1>, bfloat, <vscale x 8 x bfloat>)
declare float @llvm.aarch64.sve.clasta.n.nxv4f32(<vscale x 4 x i1>, float, <vscale x 4 x float>)
declare double @llvm.aarch64.sve.clasta.n.nxv2f64(<vscale x 2 x i1>, double, <vscale x 2 x double>)
@ -1867,6 +1949,7 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.clastb.nxv8i16(<vscale x 8 x i1>, <
declare <vscale x 4 x i32> @llvm.aarch64.sve.clastb.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.clastb.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
declare <vscale x 8 x half> @llvm.aarch64.sve.clastb.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
declare <vscale x 8 x bfloat> @llvm.aarch64.sve.clastb.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
declare <vscale x 4 x float> @llvm.aarch64.sve.clastb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.clastb.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
@ -1875,6 +1958,7 @@ declare i16 @llvm.aarch64.sve.clastb.n.nxv8i16(<vscale x 8 x i1>, i16, <vscale x
declare i32 @llvm.aarch64.sve.clastb.n.nxv4i32(<vscale x 4 x i1>, i32, <vscale x 4 x i32>)
declare i64 @llvm.aarch64.sve.clastb.n.nxv2i64(<vscale x 2 x i1>, i64, <vscale x 2 x i64>)
declare half @llvm.aarch64.sve.clastb.n.nxv8f16(<vscale x 8 x i1>, half, <vscale x 8 x half>)
declare bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16(<vscale x 8 x i1>, bfloat, <vscale x 8 x bfloat>)
declare float @llvm.aarch64.sve.clastb.n.nxv4f32(<vscale x 4 x i1>, float, <vscale x 4 x float>)
declare double @llvm.aarch64.sve.clastb.n.nxv2f64(<vscale x 2 x i1>, double, <vscale x 2 x double>)
@ -1888,6 +1972,7 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16
declare <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32>, i64)
declare <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64>, i64)
declare <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half>, i64)
declare <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat>, i64)
declare <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float>, i64)
declare <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double>, i64)
@ -1905,6 +1990,7 @@ declare i16 @llvm.aarch64.sve.lasta.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16
declare i32 @llvm.aarch64.sve.lasta.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
declare i64 @llvm.aarch64.sve.lasta.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
declare half @llvm.aarch64.sve.lasta.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)
declare bfloat @llvm.aarch64.sve.lasta.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>)
declare float @llvm.aarch64.sve.lasta.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>)
declare float @llvm.aarch64.sve.lasta.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
declare double @llvm.aarch64.sve.lasta.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)
@ -1914,6 +2000,7 @@ declare i16 @llvm.aarch64.sve.lastb.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16
declare i32 @llvm.aarch64.sve.lastb.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>)
declare i64 @llvm.aarch64.sve.lastb.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>)
declare half @llvm.aarch64.sve.lastb.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>)
declare bfloat @llvm.aarch64.sve.lastb.nxv8bf16(<vscale x 8 x i1>, <vscale x 8 x bfloat>)
declare float @llvm.aarch64.sve.lastb.nxv2f32(<vscale x 2 x i1>, <vscale x 2 x float>)
declare float @llvm.aarch64.sve.lastb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>)
declare double @llvm.aarch64.sve.lastb.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>)

View File

@ -57,6 +57,16 @@ define <vscale x 8 x half> @dup_f16(<vscale x 8 x half> %a, <vscale x 8 x i1> %p
ret <vscale x 8 x half> %out
}
define <vscale x 8 x bfloat> @dup_bf16(<vscale x 8 x bfloat> %a, <vscale x 8 x i1> %pg, bfloat %b) #0 {
; CHECK-LABEL: dup_bf16:
; CHECK: mov z0.h, p0/m, h1
; CHECK-NEXT: ret
%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> %a,
<vscale x 8 x i1> %pg,
bfloat %b)
ret <vscale x 8 x bfloat> %out
}
define <vscale x 4 x float> @dup_f32(<vscale x 4 x float> %a, <vscale x 4 x i1> %pg, float %b) {
; CHECK-LABEL: dup_f32:
; CHECK: mov z0.s, p0/m, s1
@ -77,10 +87,41 @@ define <vscale x 2 x double> @dup_f64(<vscale x 2 x double> %a, <vscale x 2 x i1
ret <vscale x 2 x double> %out
}
define <vscale x 8 x bfloat> @test_svdup_n_bf16_z(<vscale x 8 x i1> %pg, bfloat %op) #0 {
; CHECK-LABEL: test_svdup_n_bf16_z:
; CHECK: mov z1.h, #0
; CHECK: mov z1.h, p0/m, h0
; CHECK: mov z0.d, z1.d
; CHECK-NEXT: ret
%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> zeroinitializer, <vscale x 8 x i1> %pg, bfloat %op)
ret <vscale x 8 x bfloat> %out
}
define <vscale x 8 x bfloat> @test_svdup_n_bf16_m(<vscale x 8 x bfloat> %inactive, <vscale x 8 x i1> %pg, bfloat %op) #0 {
; CHECK-LABEL: test_svdup_n_bf16_m:
; CHECK: mov z0.h, p0/m, h1
; CHECK-NEXT: ret
%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> %inactive, <vscale x 8 x i1> %pg, bfloat %op)
ret <vscale x 8 x bfloat> %out
}
define <vscale x 8 x bfloat> @test_svdup_n_bf16_x(<vscale x 8 x i1> %pg, bfloat %op) #0 {
; CHECK-LABEL: test_svdup_n_bf16_x:
; CHECK: mov z0.h, p0/m, h0
; CHECK-NEXT: ret
%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat> undef, <vscale x 8 x i1> %pg, bfloat %op)
ret <vscale x 8 x bfloat> %out
}
declare <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i8)
declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i16)
declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32)
declare <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64)
declare <vscale x 8 x half> @llvm.aarch64.sve.dup.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, half)
declare <vscale x 8 x bfloat> @llvm.aarch64.sve.dup.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, bfloat)
declare <vscale x 4 x float> @llvm.aarch64.sve.dup.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, float)
declare <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double)
; +bf16 is required for the bfloat version.
attributes #0 = { "target-features"="+sve,+bf16" }

View File

@ -165,6 +165,14 @@ define <vscale x 8 x half> @insr_f16(<vscale x 8 x half> %a, half %b) {
ret <vscale x 8 x half> %out
}
define <vscale x 8 x bfloat> @insr_bf16(<vscale x 8 x bfloat> %a, bfloat %b) #0 {
; CHECK-LABEL: insr_bf16:
; CHECK: insr z0.h, h1
; CHECK-NEXT: ret
%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.insr.nxv8bf16(<vscale x 8 x bfloat> %a, bfloat %b)
ret <vscale x 8 x bfloat> %out
}
define <vscale x 4 x float> @insr_f32(<vscale x 4 x float> %a, float %b) {
; CHECK-LABEL: insr_f32:
; CHECK: insr z0.s, s1
@ -348,6 +356,7 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.insr.nxv8i16(<vscale x 8 x i16>, i1
declare <vscale x 4 x i32> @llvm.aarch64.sve.insr.nxv4i32(<vscale x 4 x i32>, i32)
declare <vscale x 2 x i64> @llvm.aarch64.sve.insr.nxv2i64(<vscale x 2 x i64>, i64)
declare <vscale x 8 x half> @llvm.aarch64.sve.insr.nxv8f16(<vscale x 8 x half>, half)
declare <vscale x 8 x bfloat> @llvm.aarch64.sve.insr.nxv8bf16(<vscale x 8 x bfloat>, bfloat)
declare <vscale x 4 x float> @llvm.aarch64.sve.insr.nxv4f32(<vscale x 4 x float>, float)
declare <vscale x 2 x double> @llvm.aarch64.sve.insr.nxv2f64(<vscale x 2 x double>, double)
@ -368,3 +377,6 @@ declare <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1>, <vsc
declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
; +bf16 is required for the bfloat version.
attributes #0 = { "target-features"="+sve,+bf16" }

View File

@ -172,6 +172,15 @@ define <vscale x 16 x i1> @sve_splat_16xi1(i1 %val) {
;; Splats of legal floating point vector types
define <vscale x 8 x bfloat> @splat_nxv8bf16(bfloat %val) #0 {
; CHECK-LABEL: splat_nxv8bf16:
; CHECK: mov z0.h, h0
; CHECK-NEXT: ret
%1 = insertelement <vscale x 8 x bfloat> undef, bfloat %val, i32 0
%2 = shufflevector <vscale x 8 x bfloat> %1, <vscale x 8 x bfloat> undef, <vscale x 8 x i32> zeroinitializer
ret <vscale x 8 x bfloat> %2
}
define <vscale x 8 x half> @splat_nxv8f16(half %val) {
; CHECK-LABEL: splat_nxv8f16:
; CHECK: mov z0.h, h0
@ -233,6 +242,13 @@ define <vscale x 8 x half> @splat_nxv8f16_zero() {
ret <vscale x 8 x half> zeroinitializer
}
define <vscale x 8 x bfloat> @splat_nxv8bf16_zero() #0 {
; CHECK-LABEL: splat_nxv8bf16_zero:
; CHECK: mov z0.h, #0
; CHECK-NEXT: ret
ret <vscale x 8 x bfloat> zeroinitializer
}
define <vscale x 4 x half> @splat_nxv4f16_zero() {
; CHECK-LABEL: splat_nxv4f16_zero:
; CHECK: mov z0.h, #0
@ -321,3 +337,6 @@ define <vscale x 2 x double> @splat_nxv2f64_imm() {
%2 = shufflevector <vscale x 2 x double> %1, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
ret <vscale x 2 x double> %2
}
; +bf16 is required for the bfloat version.
attributes #0 = { "target-features"="+sve,+bf16" }