mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 13:11:39 +01:00
aa3627ac28
Summary: List of intrinsics: svfloat32_t svbfdot[_f32](svfloat32_t op1, svbfloat16_t op2, svbfloat16_t op3) svfloat32_t svbfdot[_n_f32](svfloat32_t op1, svbfloat16_t op2, bfloat16_t op3) svfloat32_t svbfdot_lane[_f32](svfloat32_t op1, svbfloat16_t op2, svbfloat16_t op3, uint64_t imm_index) svfloat32_t svbfmmla[_f32](svfloat32_t op1, svbfloat16_t op2, svbfloat16_t op3) svfloat32_t svbfmlalb[_f32](svfloat32_t op1, svbfloat16_t op2, svbfloat16_t op3) svfloat32_t svbfmlalb[_n_f32](svfloat32_t op1, svbfloat16_t op2, bfloat16_t op3) svfloat32_t svbfmlalb_lane[_f32](svfloat32_t op1, svbfloat16_t op2, svbfloat16_t op3, uint64_t imm_index) svfloat32_t svbfmlalt[_f32](svfloat32_t op1, svbfloat16_t op2, svbfloat16_t op3) svfloat32_t svbfmlalt[_n_f32](svfloat32_t op1, svbfloat16_t op2, bfloat16_t op3) svfloat32_t svbfmlalt_lane[_f32](svfloat32_t op1, svbfloat16_t op2, svbfloat16_t op3, uint64_t imm_index) svbfloat16_t svcvt_bf16[_f32]_m(svbfloat16_t inactive, svbool_t pg, svfloat32_t op) svbfloat16_t svcvt_bf16[_f32]_x(svbool_t pg, svfloat32_t op) svbfloat16_t svcvt_bf16[_f32]_z(svbool_t pg, svfloat32_t op) svbfloat16_t svcvtnt_bf16[_f32]_m(svbfloat16_t even, svbool_t pg, svfloat32_t op) svbfloat16_t svcvtnt_bf16[_f32]_x(svbfloat16_t even, svbool_t pg, svfloat32_t op) For reference, see section 7.2 of "Arm C Language Extensions for SVE - Version 00bet4" Reviewers: sdesmalen, ctetreau, efriedma, david-arm, rengolin Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D82141
244 lines
12 KiB
LLVM
244 lines
12 KiB
LLVM
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 -asm-verbose=0 < %s | FileCheck %s
|
|
|
|
;
|
|
; BFDOT
|
|
;
|
|
|
|
define <vscale x 4 x float> @bfdot_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
|
; CHECK-LABEL: bfdot_f32:
|
|
; CHECK-NEXT: bfdot z0.s, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c)
|
|
ret <vscale x 4 x float> %out
|
|
}
|
|
|
|
define <vscale x 4 x float> @bfdot_lane_0_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
|
; CHECK-LABEL: bfdot_lane_0_f32:
|
|
; CHECK-NEXT: bfdot z0.s, z1.h, z2.h[0]
|
|
; CHECK-NEXT: ret
|
|
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 0)
|
|
ret <vscale x 4 x float> %out
|
|
}
|
|
|
|
define <vscale x 4 x float> @bfdot_lane_1_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
|
; CHECK-LABEL: bfdot_lane_1_f32:
|
|
; CHECK-NEXT: bfdot z0.s, z1.h, z2.h[1]
|
|
; CHECK-NEXT: ret
|
|
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 1)
|
|
ret <vscale x 4 x float> %out
|
|
}
|
|
|
|
define <vscale x 4 x float> @bfdot_lane_2_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
|
; CHECK-LABEL: bfdot_lane_2_f32:
|
|
; CHECK-NEXT: bfdot z0.s, z1.h, z2.h[2]
|
|
; CHECK-NEXT: ret
|
|
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 2)
|
|
ret <vscale x 4 x float> %out
|
|
}
|
|
|
|
define <vscale x 4 x float> @bfdot_lane_3_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
|
; CHECK-LABEL: bfdot_lane_3_f32:
|
|
; CHECK-NEXT: bfdot z0.s, z1.h, z2.h[3]
|
|
; CHECK-NEXT: ret
|
|
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 3)
|
|
ret <vscale x 4 x float> %out
|
|
}
|
|
|
|
;
|
|
; BFMLALB
|
|
;
|
|
|
|
define <vscale x 4 x float> @bfmlalb_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
|
; CHECK-LABEL: bfmlalb_f32:
|
|
; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c)
|
|
ret <vscale x 4 x float> %out
|
|
}
|
|
|
|
define <vscale x 4 x float> @bfmlalb_lane_0_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
|
; CHECK-LABEL: bfmlalb_lane_0_f32:
|
|
; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[0]
|
|
; CHECK-NEXT: ret
|
|
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 0)
|
|
ret <vscale x 4 x float> %out
|
|
}
|
|
|
|
define <vscale x 4 x float> @bfmlalb_lane_1_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
|
; CHECK-LABEL: bfmlalb_lane_1_f32:
|
|
; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[1]
|
|
; CHECK-NEXT: ret
|
|
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 1)
|
|
ret <vscale x 4 x float> %out
|
|
}
|
|
|
|
define <vscale x 4 x float> @bfmlalb_lane_2_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
|
; CHECK-LABEL: bfmlalb_lane_2_f32:
|
|
; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[2]
|
|
; CHECK-NEXT: ret
|
|
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 2)
|
|
ret <vscale x 4 x float> %out
|
|
}
|
|
|
|
define <vscale x 4 x float> @bfmlalb_lane_3_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
|
; CHECK-LABEL: bfmlalb_lane_3_f32:
|
|
; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[3]
|
|
; CHECK-NEXT: ret
|
|
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 3)
|
|
ret <vscale x 4 x float> %out
|
|
}
|
|
|
|
define <vscale x 4 x float> @bfmlalb_lane_4_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
|
; CHECK-LABEL: bfmlalb_lane_4_f32:
|
|
; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[4]
|
|
; CHECK-NEXT: ret
|
|
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 4)
|
|
ret <vscale x 4 x float> %out
|
|
}
|
|
|
|
define <vscale x 4 x float> @bfmlalb_lane_5_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
|
; CHECK-LABEL: bfmlalb_lane_5_f32:
|
|
; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[5]
|
|
; CHECK-NEXT: ret
|
|
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 5)
|
|
ret <vscale x 4 x float> %out
|
|
}
|
|
|
|
define <vscale x 4 x float> @bfmlalb_lane_6_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
|
; CHECK-LABEL: bfmlalb_lane_6_f32:
|
|
; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[6]
|
|
; CHECK-NEXT: ret
|
|
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 6)
|
|
ret <vscale x 4 x float> %out
|
|
}
|
|
|
|
define <vscale x 4 x float> @bfmlalb_lane_7_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
|
; CHECK-LABEL: bfmlalb_lane_7_f32:
|
|
; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[7]
|
|
; CHECK-NEXT: ret
|
|
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 7)
|
|
ret <vscale x 4 x float> %out
|
|
}
|
|
|
|
;
|
|
; BFMLALT
|
|
;
|
|
|
|
define <vscale x 4 x float> @bfmlalt_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
|
; CHECK-LABEL: bfmlalt_f32:
|
|
; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c)
|
|
ret <vscale x 4 x float> %out
|
|
}
|
|
|
|
define <vscale x 4 x float> @bfmlalt_lane_0_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
|
; CHECK-LABEL: bfmlalt_lane_0_f32:
|
|
; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[0]
|
|
; CHECK-NEXT: ret
|
|
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 0)
|
|
ret <vscale x 4 x float> %out
|
|
}
|
|
|
|
define <vscale x 4 x float> @bfmlalt_lane_1_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
|
; CHECK-LABEL: bfmlalt_lane_1_f32:
|
|
; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[1]
|
|
; CHECK-NEXT: ret
|
|
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 1)
|
|
ret <vscale x 4 x float> %out
|
|
}
|
|
|
|
define <vscale x 4 x float> @bfmlalt_lane_2_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
|
; CHECK-LABEL: bfmlalt_lane_2_f32:
|
|
; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[2]
|
|
; CHECK-NEXT: ret
|
|
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 2)
|
|
ret <vscale x 4 x float> %out
|
|
}
|
|
|
|
define <vscale x 4 x float> @bfmlalt_lane_3_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
|
; CHECK-LABEL: bfmlalt_lane_3_f32:
|
|
; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[3]
|
|
; CHECK-NEXT: ret
|
|
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 3)
|
|
ret <vscale x 4 x float> %out
|
|
}
|
|
|
|
define <vscale x 4 x float> @bfmlalt_lane_4_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
|
; CHECK-LABEL: bfmlalt_lane_4_f32:
|
|
; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[4]
|
|
; CHECK-NEXT: ret
|
|
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 4)
|
|
ret <vscale x 4 x float> %out
|
|
}
|
|
|
|
define <vscale x 4 x float> @bfmlalt_lane_5_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
|
; CHECK-LABEL: bfmlalt_lane_5_f32:
|
|
; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[5]
|
|
; CHECK-NEXT: ret
|
|
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 5)
|
|
ret <vscale x 4 x float> %out
|
|
}
|
|
|
|
define <vscale x 4 x float> @bfmlalt_lane_6_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
|
; CHECK-LABEL: bfmlalt_lane_6_f32:
|
|
; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[6]
|
|
; CHECK-NEXT: ret
|
|
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 6)
|
|
ret <vscale x 4 x float> %out
|
|
}
|
|
|
|
define <vscale x 4 x float> @bfmlalt_lane_7_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
|
; CHECK-LABEL: bfmlalt_lane_7_f32:
|
|
; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[7]
|
|
; CHECK-NEXT: ret
|
|
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 7)
|
|
ret <vscale x 4 x float> %out
|
|
}
|
|
|
|
;
|
|
; BFMMLA
|
|
;
|
|
|
|
define <vscale x 4 x float> @bfmmla_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
|
; CHECK-LABEL: bfmmla_f32:
|
|
; CHECK-NEXT: bfmmla z0.s, z1.h, z2.h
|
|
; CHECK-NEXT: ret
|
|
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmmla(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c)
|
|
ret <vscale x 4 x float> %out
|
|
}
|
|
|
|
;
|
|
; BFCVT
|
|
;
|
|
|
|
define <vscale x 8 x bfloat> @fcvt_bf16_f32(<vscale x 8 x bfloat> %a, <vscale x 8 x i1> %pg, <vscale x 4 x float> %b) nounwind {
|
|
; CHECK-LABEL: fcvt_bf16_f32:
|
|
; CHECK-NEXT: bfcvt z0.h, p0/m, z1.s
|
|
; CHECK-NEXT: ret
|
|
%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> %a, <vscale x 8 x i1> %pg, <vscale x 4 x float> %b)
|
|
ret <vscale x 8 x bfloat> %out
|
|
}
|
|
|
|
;
|
|
; BFCVTNT
|
|
;
|
|
|
|
define <vscale x 8 x bfloat> @fcvtnt_bf16_f32(<vscale x 8 x bfloat> %a, <vscale x 8 x i1> %pg, <vscale x 4 x float> %b) nounwind {
|
|
; CHECK-LABEL: fcvtnt_bf16_f32:
|
|
; CHECK-NEXT: bfcvtnt z0.h, p0/m, z1.s
|
|
; CHECK-NEXT: ret
|
|
%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> %a, <vscale x 8 x i1> %pg, <vscale x 4 x float> %b)
|
|
ret <vscale x 8 x bfloat> %out
|
|
}
|
|
|
|
declare <vscale x 4 x float> @llvm.aarch64.sve.bfdot(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
|
|
declare <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i64)
|
|
declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
|
|
declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i64)
|
|
declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
|
|
declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i64)
|
|
declare <vscale x 4 x float> @llvm.aarch64.sve.bfmmla(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
|
|
declare <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat>, <vscale x 8 x i1>, <vscale x 4 x float>)
|
|
declare <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat>, <vscale x 8 x i1>, <vscale x 4 x float>)
|