mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[sve][acle] Add SVE BFloat16 extensions.
Summary: List of intrinsics: svfloat32_t svbfdot[_f32](svfloat32_t op1, svbfloat16_t op2, svbfloat16_t op3) svfloat32_t svbfdot[_n_f32](svfloat32_t op1, svbfloat16_t op2, bfloat16_t op3) svfloat32_t svbfdot_lane[_f32](svfloat32_t op1, svbfloat16_t op2, svbfloat16_t op3, uint64_t imm_index) svfloat32_t svbfmmla[_f32](svfloat32_t op1, svbfloat16_t op2, svbfloat16_t op3) svfloat32_t svbfmlalb[_f32](svfloat32_t op1, svbfloat16_t op2, svbfloat16_t op3) svfloat32_t svbfmlalb[_n_f32](svfloat32_t op1, svbfloat16_t op2, bfloat16_t op3) svfloat32_t svbfmlalb_lane[_f32](svfloat32_t op1, svbfloat16_t op2, svbfloat16_t op3, uint64_t imm_index) svfloat32_t svbfmlalt[_f32](svfloat32_t op1, svbfloat16_t op2, svbfloat16_t op3) svfloat32_t svbfmlalt[_n_f32](svfloat32_t op1, svbfloat16_t op2, bfloat16_t op3) svfloat32_t svbfmlalt_lane[_f32](svfloat32_t op1, svbfloat16_t op2, svbfloat16_t op3, uint64_t imm_index) svbfloat16_t svcvt_bf16[_f32]_m(svbfloat16_t inactive, svbool_t pg, svfloat32_t op) svbfloat16_t svcvt_bf16[_f32]_x(svbool_t pg, svfloat32_t op) svbfloat16_t svcvt_bf16[_f32]_z(svbool_t pg, svfloat32_t op) svbfloat16_t svcvtnt_bf16[_f32]_m(svbfloat16_t even, svbool_t pg, svfloat32_t op) svbfloat16_t svcvtnt_bf16[_f32]_x(svbfloat16_t even, svbool_t pg, svfloat32_t op) For reference, see section 7.2 of "Arm C Language Extensions for SVE - Version 00bet4" Reviewers: sdesmalen, ctetreau, efriedma, david-arm, rengolin Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, cfe-commits, llvm-commits Tags: #clang, #llvm Differential Revision: https://reviews.llvm.org/D82141
This commit is contained in:
parent
f0d13192c9
commit
aa3627ac28
@ -1340,6 +1340,16 @@ class SVE_MatMul_Intrinsic
|
||||
[LLVMMatchType<0>, LLVMSubdivide4VectorType<0>, LLVMSubdivide4VectorType<0>],
|
||||
[IntrNoMem]>;
|
||||
|
||||
class SVE_4Vec_BF16
|
||||
: Intrinsic<[llvm_nxv4f32_ty],
|
||||
[llvm_nxv4f32_ty, llvm_nxv8bf16_ty, llvm_nxv8bf16_ty],
|
||||
[IntrNoMem]>;
|
||||
|
||||
class SVE_4Vec_BF16_Indexed
|
||||
: Intrinsic<[llvm_nxv4f32_ty],
|
||||
[llvm_nxv4f32_ty, llvm_nxv8bf16_ty, llvm_nxv8bf16_ty, llvm_i64_ty],
|
||||
[IntrNoMem, ImmArg<ArgIndex<3>>]>;
|
||||
|
||||
//
|
||||
// Vector tuple creation intrinsics (ACLE)
|
||||
//
|
||||
@ -1793,6 +1803,9 @@ def int_aarch64_sve_fcvtzs_i32f64 : Builtin_SVCVT<"svcvt_s32_f64_m", llvm_nxv4
|
||||
def int_aarch64_sve_fcvtzs_i64f16 : Builtin_SVCVT<"svcvt_s64_f16_m", llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>;
|
||||
def int_aarch64_sve_fcvtzs_i64f32 : Builtin_SVCVT<"svcvt_s64_f32_m", llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv4f32_ty>;
|
||||
|
||||
def int_aarch64_sve_fcvt_bf16f32 : Builtin_SVCVT<"svcvt_bf16_f32_m", llvm_nxv8bf16_ty, llvm_nxv8i1_ty, llvm_nxv4f32_ty>;
|
||||
def int_aarch64_sve_fcvtnt_bf16f32 : Builtin_SVCVT<"svcvtnt_bf16_f32_m", llvm_nxv8bf16_ty, llvm_nxv8i1_ty, llvm_nxv4f32_ty>;
|
||||
|
||||
def int_aarch64_sve_fcvtzu_i32f16 : Builtin_SVCVT<"svcvt_u32_f16_m", llvm_nxv4i32_ty, llvm_nxv4i1_ty, llvm_nxv8f16_ty>;
|
||||
def int_aarch64_sve_fcvtzu_i32f64 : Builtin_SVCVT<"svcvt_u32_f64_m", llvm_nxv4i32_ty, llvm_nxv2i1_ty, llvm_nxv2f64_ty>;
|
||||
def int_aarch64_sve_fcvtzu_i64f16 : Builtin_SVCVT<"svcvt_u64_f16_m", llvm_nxv2i64_ty, llvm_nxv2i1_ty, llvm_nxv8f16_ty>;
|
||||
@ -2343,6 +2356,19 @@ def int_aarch64_sve_sudot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic;
|
||||
//
|
||||
def int_aarch64_sve_fmmla : AdvSIMD_3VectorArg_Intrinsic;
|
||||
|
||||
//
|
||||
// SVE ACLE: 7.2. BFloat16 extensions
|
||||
//
|
||||
|
||||
def int_aarch64_sve_bfdot : SVE_4Vec_BF16;
|
||||
def int_aarch64_sve_bfmlalb : SVE_4Vec_BF16;
|
||||
def int_aarch64_sve_bfmlalt : SVE_4Vec_BF16;
|
||||
|
||||
def int_aarch64_sve_bfmmla : SVE_4Vec_BF16;
|
||||
|
||||
def int_aarch64_sve_bfdot_lane : SVE_4Vec_BF16_Indexed;
|
||||
def int_aarch64_sve_bfmlalb_lane : SVE_4Vec_BF16_Indexed;
|
||||
def int_aarch64_sve_bfmlalt_lane : SVE_4Vec_BF16_Indexed;
|
||||
}
|
||||
|
||||
//
|
||||
|
@ -1239,15 +1239,15 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
|
||||
defm FSQRT_ZPmZ : sve_fp_2op_p_zd_HSD<0b01101, "fsqrt", int_aarch64_sve_fsqrt>;
|
||||
|
||||
let Predicates = [HasBF16, HasSVE] in {
|
||||
def BFDOT_ZZZ : sve_bfloat_dot<"bfdot">;
|
||||
def BFDOT_ZZI : sve_bfloat_dot_indexed<"bfdot">;
|
||||
def BFMMLA_ZZZ : sve_bfloat_matmul<"bfmmla">;
|
||||
def BFMMLA_B_ZZZ : sve_bfloat_matmul_longvecl<0b0, "bfmlalb">;
|
||||
def BFMMLA_T_ZZZ : sve_bfloat_matmul_longvecl<0b1, "bfmlalt">;
|
||||
def BFMMLA_B_ZZI : sve_bfloat_matmul_longvecl_idx<0b0, "bfmlalb">;
|
||||
def BFMMLA_T_ZZI : sve_bfloat_matmul_longvecl_idx<0b1, "bfmlalt">;
|
||||
def BFCVT_ZPmZ : sve_bfloat_convert<0b1, "bfcvt">;
|
||||
def BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt">;
|
||||
defm BFDOT_ZZZ : sve_bfloat_dot<"bfdot", int_aarch64_sve_bfdot>;
|
||||
defm BFDOT_ZZI : sve_bfloat_dot_indexed<"bfdot", int_aarch64_sve_bfdot_lane>;
|
||||
defm BFMMLA_ZZZ : sve_bfloat_matmul<"bfmmla", int_aarch64_sve_bfmmla>;
|
||||
defm BFMMLA_B_ZZZ : sve_bfloat_matmul_longvecl<0b0, "bfmlalb", int_aarch64_sve_bfmlalb>;
|
||||
defm BFMMLA_T_ZZZ : sve_bfloat_matmul_longvecl<0b1, "bfmlalt", int_aarch64_sve_bfmlalt>;
|
||||
defm BFMMLA_B_ZZI : sve_bfloat_matmul_longvecl_idx<0b0, "bfmlalb", int_aarch64_sve_bfmlalb_lane>;
|
||||
defm BFMMLA_T_ZZI : sve_bfloat_matmul_longvecl_idx<0b1, "bfmlalt", int_aarch64_sve_bfmlalt_lane>;
|
||||
defm BFCVT_ZPmZ : sve_bfloat_convert<0b1, "bfcvt", int_aarch64_sve_fcvt_bf16f32>;
|
||||
defm BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32>;
|
||||
}
|
||||
|
||||
// InstAliases
|
||||
|
@ -7460,6 +7460,11 @@ class sve_bfloat_dot<string asm>
|
||||
let Inst{20-16} = Zm;
|
||||
}
|
||||
|
||||
multiclass sve_bfloat_dot<string asm, SDPatternOperator op> {
|
||||
def NAME : sve_bfloat_dot<asm>;
|
||||
def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, nxv8bf16, nxv8bf16 ,!cast<Instruction>(NAME)>;
|
||||
}
|
||||
|
||||
class sve_bfloat_dot_indexed<string asm>
|
||||
: sve_bfloat_dot_base<0b01, asm, "\t$Zda, $Zn, $Zm$iop",
|
||||
(ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexS:$iop)> {
|
||||
@ -7469,6 +7474,11 @@ class sve_bfloat_dot_indexed<string asm>
|
||||
let Inst{18-16} = Zm;
|
||||
}
|
||||
|
||||
multiclass sve_bfloat_dot_indexed<string asm, SDPatternOperator op> {
|
||||
def NAME : sve_bfloat_dot_indexed<asm>;
|
||||
def : SVE_4_Op_Imm_Pat<nxv4f32, op, nxv4f32, nxv8bf16, nxv8bf16, i64, VectorIndexS_timm, !cast<Instruction>(NAME)>;
|
||||
}
|
||||
|
||||
class sve_bfloat_matmul<string asm>
|
||||
: I<(outs ZPR32:$Zda), (ins ZPR32:$_Zda, ZPR16:$Zn, ZPR16:$Zm),
|
||||
asm, "\t$Zda, $Zn, $Zm", "", []>, Sched<[]> {
|
||||
@ -7486,6 +7496,11 @@ class sve_bfloat_matmul<string asm>
|
||||
let ElementSize = ElementSizeH;
|
||||
}
|
||||
|
||||
multiclass sve_bfloat_matmul<string asm, SDPatternOperator op> {
|
||||
def NAME : sve_bfloat_matmul<asm>;
|
||||
def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, nxv8bf16, nxv8bf16 ,!cast<Instruction>(NAME)>;
|
||||
}
|
||||
|
||||
class sve_bfloat_matmul_longvecl<bit BT, string asm>
|
||||
: sve_bfloat_matmul<asm> {
|
||||
let Inst{23} = 0b1;
|
||||
@ -7493,6 +7508,11 @@ class sve_bfloat_matmul_longvecl<bit BT, string asm>
|
||||
let Inst{10} = BT;
|
||||
}
|
||||
|
||||
multiclass sve_bfloat_matmul_longvecl<bit BT, string asm, SDPatternOperator op> {
|
||||
def NAME : sve_bfloat_matmul_longvecl<BT, asm>;
|
||||
def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, nxv8bf16, nxv8bf16 ,!cast<Instruction>(NAME)>;
|
||||
}
|
||||
|
||||
class sve_bfloat_matmul_longvecl_idx<bit BT, string asm>
|
||||
: sve_bfloat_dot_base<0b01, asm, "\t$Zda, $Zn, $Zm$iop",
|
||||
(ins ZPR32:$_Zda, ZPR16:$Zn, ZPR3b16:$Zm, VectorIndexH:$iop)> {
|
||||
@ -7505,6 +7525,11 @@ class sve_bfloat_matmul_longvecl_idx<bit BT, string asm>
|
||||
let Inst{10} = BT;
|
||||
}
|
||||
|
||||
multiclass sve_bfloat_matmul_longvecl_idx<bit BT, string asm, SDPatternOperator op> {
|
||||
def NAME : sve_bfloat_matmul_longvecl_idx<BT, asm>;
|
||||
def : SVE_4_Op_Imm_Pat<nxv4f32, op, nxv4f32, nxv8bf16, nxv8bf16, i64, VectorIndexH_timm, !cast<Instruction>(NAME)>;
|
||||
}
|
||||
|
||||
class sve_bfloat_convert<bit N, string asm>
|
||||
: I<(outs ZPR16:$Zd), (ins ZPR16:$_Zd, PPR3bAny:$Pg, ZPR32:$Zn),
|
||||
asm, "\t$Zd, $Pg/m, $Zn", "", []>, Sched<[]> {
|
||||
@ -7524,6 +7549,11 @@ class sve_bfloat_convert<bit N, string asm>
|
||||
let ElementSize = ElementSizeS;
|
||||
}
|
||||
|
||||
multiclass sve_bfloat_convert<bit N, string asm, SDPatternOperator op> {
|
||||
def NAME : sve_bfloat_convert<N, asm>;
|
||||
def : SVE_3_Op_Pat<nxv8bf16, op, nxv8bf16, nxv8i1, nxv4f32, !cast<Instruction>(NAME)>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// SVE Integer Matrix Multiply Group
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
243
test/CodeGen/AArch64/sve-intrinsics-bfloat.ll
Normal file
243
test/CodeGen/AArch64/sve-intrinsics-bfloat.ll
Normal file
@ -0,0 +1,243 @@
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 -asm-verbose=0 < %s | FileCheck %s
|
||||
|
||||
;
|
||||
; BFDOT
|
||||
;
|
||||
|
||||
define <vscale x 4 x float> @bfdot_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
||||
; CHECK-LABEL: bfdot_f32:
|
||||
; CHECK-NEXT: bfdot z0.s, z1.h, z2.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @bfdot_lane_0_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
||||
; CHECK-LABEL: bfdot_lane_0_f32:
|
||||
; CHECK-NEXT: bfdot z0.s, z1.h, z2.h[0]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 0)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @bfdot_lane_1_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
||||
; CHECK-LABEL: bfdot_lane_1_f32:
|
||||
; CHECK-NEXT: bfdot z0.s, z1.h, z2.h[1]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 1)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @bfdot_lane_2_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
||||
; CHECK-LABEL: bfdot_lane_2_f32:
|
||||
; CHECK-NEXT: bfdot z0.s, z1.h, z2.h[2]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 2)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @bfdot_lane_3_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
||||
; CHECK-LABEL: bfdot_lane_3_f32:
|
||||
; CHECK-NEXT: bfdot z0.s, z1.h, z2.h[3]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 3)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
;
|
||||
; BFMLALB
|
||||
;
|
||||
|
||||
define <vscale x 4 x float> @bfmlalb_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
||||
; CHECK-LABEL: bfmlalb_f32:
|
||||
; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @bfmlalb_lane_0_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
||||
; CHECK-LABEL: bfmlalb_lane_0_f32:
|
||||
; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[0]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 0)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @bfmlalb_lane_1_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
||||
; CHECK-LABEL: bfmlalb_lane_1_f32:
|
||||
; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[1]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 1)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @bfmlalb_lane_2_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
||||
; CHECK-LABEL: bfmlalb_lane_2_f32:
|
||||
; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[2]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 2)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @bfmlalb_lane_3_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
||||
; CHECK-LABEL: bfmlalb_lane_3_f32:
|
||||
; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[3]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 3)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @bfmlalb_lane_4_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
||||
; CHECK-LABEL: bfmlalb_lane_4_f32:
|
||||
; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[4]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 4)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @bfmlalb_lane_5_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
||||
; CHECK-LABEL: bfmlalb_lane_5_f32:
|
||||
; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[5]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 5)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @bfmlalb_lane_6_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
||||
; CHECK-LABEL: bfmlalb_lane_6_f32:
|
||||
; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[6]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 6)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @bfmlalb_lane_7_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
||||
; CHECK-LABEL: bfmlalb_lane_7_f32:
|
||||
; CHECK-NEXT: bfmlalb z0.s, z1.h, z2.h[7]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 7)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
;
|
||||
; BFMLALT
|
||||
;
|
||||
|
||||
define <vscale x 4 x float> @bfmlalt_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
||||
; CHECK-LABEL: bfmlalt_f32:
|
||||
; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @bfmlalt_lane_0_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
||||
; CHECK-LABEL: bfmlalt_lane_0_f32:
|
||||
; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[0]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 0)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @bfmlalt_lane_1_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
||||
; CHECK-LABEL: bfmlalt_lane_1_f32:
|
||||
; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[1]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 1)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @bfmlalt_lane_2_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
||||
; CHECK-LABEL: bfmlalt_lane_2_f32:
|
||||
; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[2]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 2)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @bfmlalt_lane_3_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
||||
; CHECK-LABEL: bfmlalt_lane_3_f32:
|
||||
; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[3]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 3)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @bfmlalt_lane_4_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
||||
; CHECK-LABEL: bfmlalt_lane_4_f32:
|
||||
; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[4]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 4)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @bfmlalt_lane_5_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
||||
; CHECK-LABEL: bfmlalt_lane_5_f32:
|
||||
; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[5]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 5)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @bfmlalt_lane_6_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
||||
; CHECK-LABEL: bfmlalt_lane_6_f32:
|
||||
; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[6]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 6)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @bfmlalt_lane_7_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
||||
; CHECK-LABEL: bfmlalt_lane_7_f32:
|
||||
; CHECK-NEXT: bfmlalt z0.s, z1.h, z2.h[7]
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c, i64 7)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
;
|
||||
; BFMMLA
|
||||
;
|
||||
|
||||
define <vscale x 4 x float> @bfmmla_f32(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c) nounwind {
|
||||
; CHECK-LABEL: bfmmla_f32:
|
||||
; CHECK-NEXT: bfmmla z0.s, z1.h, z2.h
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 4 x float> @llvm.aarch64.sve.bfmmla(<vscale x 4 x float> %a, <vscale x 8 x bfloat> %b, <vscale x 8 x bfloat> %c)
|
||||
ret <vscale x 4 x float> %out
|
||||
}
|
||||
|
||||
;
|
||||
; BFCVT
|
||||
;
|
||||
|
||||
define <vscale x 8 x bfloat> @fcvt_bf16_f32(<vscale x 8 x bfloat> %a, <vscale x 8 x i1> %pg, <vscale x 4 x float> %b) nounwind {
|
||||
; CHECK-LABEL: fcvt_bf16_f32:
|
||||
; CHECK-NEXT: bfcvt z0.h, p0/m, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat> %a, <vscale x 8 x i1> %pg, <vscale x 4 x float> %b)
|
||||
ret <vscale x 8 x bfloat> %out
|
||||
}
|
||||
|
||||
;
|
||||
; BFCVTNT
|
||||
;
|
||||
|
||||
define <vscale x 8 x bfloat> @fcvtnt_bf16_f32(<vscale x 8 x bfloat> %a, <vscale x 8 x i1> %pg, <vscale x 4 x float> %b) nounwind {
|
||||
; CHECK-LABEL: fcvtnt_bf16_f32:
|
||||
; CHECK-NEXT: bfcvtnt z0.h, p0/m, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%out = call <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat> %a, <vscale x 8 x i1> %pg, <vscale x 4 x float> %b)
|
||||
ret <vscale x 8 x bfloat> %out
|
||||
}
|
||||
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.bfdot(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.bfdot.lane(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i64)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlalb.lane(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i64)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.bfmlalt.lane(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, i64)
|
||||
declare <vscale x 4 x float> @llvm.aarch64.sve.bfmmla(<vscale x 4 x float>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
|
||||
declare <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvt.bf16f32(<vscale x 8 x bfloat>, <vscale x 8 x i1>, <vscale x 4 x float>)
|
||||
declare <vscale x 8 x bfloat> @llvm.aarch64.sve.fcvtnt.bf16f32(<vscale x 8 x bfloat>, <vscale x 8 x i1>, <vscale x 4 x float>)
|
Loading…
x
Reference in New Issue
Block a user