1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 03:33:20 +01:00

[AArch64][SVE] Add intrinsics for binary narrowing operations

Summary:
The following intrinsics for binary narrowing shift righ operations are
added:
  * @llvm.aarch64.sve.shrnb
  * @llvm.aarch64.sve.uqshrnb
  * @llvm.aarch64.sve.sqshrnb
  * @llvm.aarch64.sve.sqshrunb
  * @llvm.aarch64.sve.uqrshrnb
  * @llvm.aarch64.sve.sqrshrnb
  * @llvm.aarch64.sve.sqrshrunb
  * @llvm.aarch64.sve.shrnt
  * @llvm.aarch64.sve.uqshrnt
  * @llvm.aarch64.sve.sqshrnt
  * @llvm.aarch64.sve.sqshrunt
  * @llvm.aarch64.sve.uqrshrnt
  * @llvm.aarch64.sve.sqrshrnt
  * @llvm.aarch64.sve.sqrshrunt

Reviewers: sdesmalen, rengolin, efriedma

Reviewed By: efriedma

Subscribers: tschuett, kristof.beyls, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D71552
This commit is contained in:
Andrzej Warzynski 2019-12-20 09:27:10 +00:00
parent 3ddecf8632
commit 03e8b97579
5 changed files with 610 additions and 22 deletions

View File

@ -1021,6 +1021,17 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
[LLVMSubdivide2VectorType<0>, llvm_anyvector_ty, LLVMMatchType<0>], [LLVMSubdivide2VectorType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
[IntrNoMem]>; [IntrNoMem]>;
class SVE2_1VectorArg_Imm_Narrowing_Intrinsic
: Intrinsic<[LLVMSubdivide2VectorType<0>],
[llvm_anyvector_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<1>]>;
class SVE2_2VectorArg_Imm_Narrowing_Intrinsic
: Intrinsic<[LLVMSubdivide2VectorType<0>],
[LLVMSubdivide2VectorType<0>, llvm_anyvector_ty,
llvm_i32_ty],
[IntrNoMem, ImmArg<2>]>;
// NOTE: There is no relationship between these intrinsics beyond an attempt // NOTE: There is no relationship between these intrinsics beyond an attempt
// to reuse currently identical class definitions. // to reuse currently identical class definitions.
class AdvSIMD_SVE_LOGB_Intrinsic : AdvSIMD_SVE_CNT_Intrinsic; class AdvSIMD_SVE_LOGB_Intrinsic : AdvSIMD_SVE_CNT_Intrinsic;
@ -1559,4 +1570,32 @@ def int_aarch64_sve_subhnt : SVE2_Merged2VectorArg_Narrowing_Intrinsic;
def int_aarch64_sve_rsubhnb : SVE2_2VectorArg_Narrowing_Intrinsic; def int_aarch64_sve_rsubhnb : SVE2_2VectorArg_Narrowing_Intrinsic;
def int_aarch64_sve_rsubhnt : SVE2_Merged2VectorArg_Narrowing_Intrinsic; def int_aarch64_sve_rsubhnt : SVE2_Merged2VectorArg_Narrowing_Intrinsic;
// Narrowing shift right
def int_aarch64_sve_shrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
def int_aarch64_sve_shrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
def int_aarch64_sve_rshrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
def int_aarch64_sve_rshrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
// Saturating shift right - signed input/output
def int_aarch64_sve_sqshrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
def int_aarch64_sve_sqshrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
def int_aarch64_sve_sqrshrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
def int_aarch64_sve_sqrshrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
// Saturating shift right - unsigned input/output
def int_aarch64_sve_uqshrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
def int_aarch64_sve_uqshrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
def int_aarch64_sve_uqrshrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
def int_aarch64_sve_uqrshrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
// Saturating shift right - signed input, unsigned output
def int_aarch64_sve_sqshrunb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
def int_aarch64_sve_sqshrunt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
def int_aarch64_sve_sqrshrunb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
def int_aarch64_sve_sqrshrunt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
} }

View File

@ -624,6 +624,30 @@ def vecshiftR64Narrow : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm1_32Operand; let ParserMatchClass = Imm1_32Operand;
} }
// Same as vecshiftR#N, but use TargetConstant (TimmLeaf) instead of Constant
// (ImmLeaf)
def tvecshiftR8 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9);
}]> {
let EncoderMethod = "getVecShiftR8OpValue";
let DecoderMethod = "DecodeVecShiftR8Imm";
let ParserMatchClass = Imm1_8Operand;
}
def tvecshiftR16 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17);
}]> {
let EncoderMethod = "getVecShiftR16OpValue";
let DecoderMethod = "DecodeVecShiftR16Imm";
let ParserMatchClass = Imm1_16Operand;
}
def tvecshiftR32 : Operand<i32>, TImmLeaf<i32, [{
return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33);
}]> {
let EncoderMethod = "getVecShiftR32OpValue";
let DecoderMethod = "DecodeVecShiftR32Imm";
let ParserMatchClass = Imm1_32Operand;
}
def Imm0_1Operand : AsmImmRange<0, 1>; def Imm0_1Operand : AsmImmRange<0, 1>;
def Imm0_7Operand : AsmImmRange<0, 7>; def Imm0_7Operand : AsmImmRange<0, 7>;
def Imm0_15Operand : AsmImmRange<0, 15>; def Imm0_15Operand : AsmImmRange<0, 15>;

View File

@ -1426,24 +1426,24 @@ let Predicates = [HasSVE2] in {
defm SBCLT_ZZZ : sve2_int_addsub_long_carry<0b11, "sbclt">; defm SBCLT_ZZZ : sve2_int_addsub_long_carry<0b11, "sbclt">;
// SVE2 bitwise shift right narrow (bottom) // SVE2 bitwise shift right narrow (bottom)
defm SQSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b000, "sqshrunb">; defm SQSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b000, "sqshrunb", int_aarch64_sve_sqshrunb>;
defm SQRSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b001, "sqrshrunb">; defm SQRSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b001, "sqrshrunb", int_aarch64_sve_sqrshrunb>;
defm SHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b010, "shrnb">; defm SHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b010, "shrnb", int_aarch64_sve_shrnb>;
defm RSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b011, "rshrnb">; defm RSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b011, "rshrnb", int_aarch64_sve_rshrnb>;
defm SQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b100, "sqshrnb">; defm SQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b100, "sqshrnb", int_aarch64_sve_sqshrnb>;
defm SQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b101, "sqrshrnb">; defm SQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b101, "sqrshrnb", int_aarch64_sve_sqrshrnb>;
defm UQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b110, "uqshrnb">; defm UQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b110, "uqshrnb", int_aarch64_sve_uqshrnb>;
defm UQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b111, "uqrshrnb">; defm UQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b111, "uqrshrnb", int_aarch64_sve_uqrshrnb>;
// SVE2 bitwise shift right narrow (top) // SVE2 bitwise shift right narrow (top)
defm SQSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b000, "sqshrunt">; defm SQSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b000, "sqshrunt", int_aarch64_sve_sqshrunt>;
defm SQRSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b001, "sqrshrunt">; defm SQRSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b001, "sqrshrunt", int_aarch64_sve_sqrshrunt>;
defm SHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b010, "shrnt">; defm SHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b010, "shrnt", int_aarch64_sve_shrnt>;
defm RSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b011, "rshrnt">; defm RSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b011, "rshrnt", int_aarch64_sve_rshrnt>;
defm SQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b100, "sqshrnt">; defm SQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b100, "sqshrnt", int_aarch64_sve_sqshrnt>;
defm SQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b101, "sqrshrnt">; defm SQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b101, "sqrshrnt", int_aarch64_sve_sqrshrnt>;
defm UQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b110, "uqshrnt">; defm UQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b110, "uqshrnt", int_aarch64_sve_uqshrnt>;
defm UQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b111, "uqrshrnt">; defm UQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b111, "uqrshrnt", int_aarch64_sve_uqrshrnt>;
// SVE2 integer add/subtract narrow high part (bottom) // SVE2 integer add/subtract narrow high part (bottom)
defm ADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b00, "addhnb", int_aarch64_sve_addhnb>; defm ADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b00, "addhnb", int_aarch64_sve_addhnb>;

View File

@ -334,6 +334,11 @@ class SVE_4_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3, vt4:$Op4)), : Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3, vt4:$Op4)),
(inst $Op1, $Op2, $Op3, $Op4)>; (inst $Op1, $Op2, $Op3, $Op4)>;
class SVE_2_Op_Imm_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, Operand ImmTy, Instruction inst>
: Pat<(vtd (op vt1:$Op1, (vt2 ImmTy:$Op2))),
(inst $Op1, ImmTy:$Op2)>;
class SVE_3_Op_Imm_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1, class SVE_3_Op_Imm_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, ValueType vt3, Operand ImmTy, ValueType vt2, ValueType vt3, Operand ImmTy,
Instruction inst> Instruction inst>
@ -2965,17 +2970,21 @@ class sve2_int_bin_shift_imm_narrow_bottom<bits<3> tsz8_64, bits<3> opc,
let Inst{4-0} = Zd; let Inst{4-0} = Zd;
} }
multiclass sve2_int_bin_shift_imm_right_narrow_bottom<bits<3> opc, string asm> { multiclass sve2_int_bin_shift_imm_right_narrow_bottom<bits<3> opc, string asm,
SDPatternOperator op> {
def _B : sve2_int_bin_shift_imm_narrow_bottom<{0,0,1}, opc, asm, ZPR8, ZPR16, def _B : sve2_int_bin_shift_imm_narrow_bottom<{0,0,1}, opc, asm, ZPR8, ZPR16,
vecshiftR8>; tvecshiftR8>;
def _H : sve2_int_bin_shift_imm_narrow_bottom<{0,1,?}, opc, asm, ZPR16, ZPR32, def _H : sve2_int_bin_shift_imm_narrow_bottom<{0,1,?}, opc, asm, ZPR16, ZPR32,
vecshiftR16> { tvecshiftR16> {
let Inst{19} = imm{3}; let Inst{19} = imm{3};
} }
def _S : sve2_int_bin_shift_imm_narrow_bottom<{1,?,?}, opc, asm, ZPR32, ZPR64, def _S : sve2_int_bin_shift_imm_narrow_bottom<{1,?,?}, opc, asm, ZPR32, ZPR64,
vecshiftR32> { vecshiftR32> {
let Inst{20-19} = imm{4-3}; let Inst{20-19} = imm{4-3};
} }
def : SVE_2_Op_Imm_Pat<nxv16i8, op, nxv8i16, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv4i32, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv2i64, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
} }
class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc, class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc,
@ -3001,17 +3010,21 @@ class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc,
let Constraints = "$Zd = $_Zd"; let Constraints = "$Zd = $_Zd";
} }
multiclass sve2_int_bin_shift_imm_right_narrow_top<bits<3> opc, string asm> { multiclass sve2_int_bin_shift_imm_right_narrow_top<bits<3> opc, string asm,
SDPatternOperator op> {
def _B : sve2_int_bin_shift_imm_narrow_top<{0,0,1}, opc, asm, ZPR8, ZPR16, def _B : sve2_int_bin_shift_imm_narrow_top<{0,0,1}, opc, asm, ZPR8, ZPR16,
vecshiftR8>; tvecshiftR8>;
def _H : sve2_int_bin_shift_imm_narrow_top<{0,1,?}, opc, asm, ZPR16, ZPR32, def _H : sve2_int_bin_shift_imm_narrow_top<{0,1,?}, opc, asm, ZPR16, ZPR32,
vecshiftR16> { tvecshiftR16> {
let Inst{19} = imm{3}; let Inst{19} = imm{3};
} }
def _S : sve2_int_bin_shift_imm_narrow_top<{1,?,?}, opc, asm, ZPR32, ZPR64, def _S : sve2_int_bin_shift_imm_narrow_top<{1,?,?}, opc, asm, ZPR32, ZPR64,
vecshiftR32> { vecshiftR32> {
let Inst{20-19} = imm{4-3}; let Inst{20-19} = imm{4-3};
} }
def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv8i16, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv4i32, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv2i64, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
} }
class sve2_int_addsub_narrow_high_bottom<bits<2> sz, bits<2> opc, string asm, class sve2_int_addsub_narrow_high_bottom<bits<2> sz, bits<2> opc, string asm,

View File

@ -0,0 +1,512 @@
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
;
; SHRNB
;
define <vscale x 16 x i8> @shrnb_h(<vscale x 8 x i16> %a) {
; CHECK-LABEL: shrnb_h:
; CHECK: shrnb z0.b, z0.h, #8
; CHECK-NEXT: ret
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.shrnb.nxv8i16(<vscale x 8 x i16> %a,
i32 8)
ret <vscale x 16 x i8> %out
}
define <vscale x 8 x i16> @shrnb_s(<vscale x 4 x i32> %a) {
; CHECK-LABEL: shrnb_s:
; CHECK: shrnb z0.h, z0.s, #16
; CHECK-NEXT: ret
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.shrnb.nxv4i32(<vscale x 4 x i32> %a,
i32 16)
ret <vscale x 8 x i16> %out
}
define <vscale x 4 x i32> @shrnb_d(<vscale x 2 x i64> %a) {
; CHECK-LABEL: shrnb_d:
; CHECK: shrnb z0.s, z0.d, #32
; CHECK-NEXT: ret
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.shrnb.nxv2i64(<vscale x 2 x i64> %a,
i32 32)
ret <vscale x 4 x i32> %out
}
;
; UQSHRNB
;
define <vscale x 16 x i8> @uqshrnb_h(<vscale x 8 x i16> %a) {
; CHECK-LABEL: uqshrnb_h:
; CHECK: uqshrnb z0.b, z0.h, #1
; CHECK-NEXT: ret
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshrnb.nxv8i16(<vscale x 8 x i16> %a,
i32 1)
ret <vscale x 16 x i8> %out
}
define <vscale x 8 x i16> @uqshrnb_s(<vscale x 4 x i32> %a) {
; CHECK-LABEL: uqshrnb_s:
; CHECK: uqshrnb z0.h, z0.s, #1
; CHECK-NEXT: ret
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshrnb.nxv4i32(<vscale x 4 x i32> %a,
i32 1)
ret <vscale x 8 x i16> %out
}
define <vscale x 4 x i32> @uqshrnb_d(<vscale x 2 x i64> %a) {
; CHECK-LABEL: uqshrnb_d:
; CHECK: uqshrnb z0.s, z0.d, #1
; CHECK-NEXT: ret
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshrnb.nxv2i64(<vscale x 2 x i64> %a,
i32 1)
ret <vscale x 4 x i32> %out
}
;
; SQSHRNB
;
define <vscale x 16 x i8> @sqshrnb_h(<vscale x 8 x i16> %a) {
; CHECK-LABEL: sqshrnb_h:
; CHECK: sqshrnb z0.b, z0.h, #1
; CHECK-NEXT: ret
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshrnb.nxv8i16(<vscale x 8 x i16> %a,
i32 1)
ret <vscale x 16 x i8> %out
}
define <vscale x 8 x i16> @sqshrnb_s(<vscale x 4 x i32> %a) {
; CHECK-LABEL: sqshrnb_s:
; CHECK: sqshrnb z0.h, z0.s, #1
; CHECK-NEXT: ret
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshrnb.nxv4i32(<vscale x 4 x i32> %a,
i32 1)
ret <vscale x 8 x i16> %out
}
define <vscale x 4 x i32> @sqshrnb_d(<vscale x 2 x i64> %a) {
; CHECK-LABEL: sqshrnb_d:
; CHECK: sqshrnb z0.s, z0.d, #1
; CHECK-NEXT: ret
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshrnb.nxv2i64(<vscale x 2 x i64> %a,
i32 1)
ret <vscale x 4 x i32> %out
}
;
; SQSHRUNB
;
define <vscale x 16 x i8> @sqshrunb_h(<vscale x 8 x i16> %a) {
; CHECK-LABEL: qshrunb_h:
; CHECK: sqshrunb z0.b, z0.h, #7
; CHECK-NEXT: ret
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshrunb.nxv8i16(<vscale x 8 x i16> %a,
i32 7)
ret <vscale x 16 x i8> %out
}
define <vscale x 8 x i16> @sqshrunb_s(<vscale x 4 x i32> %a) {
; CHECK-LABEL: sqshrunb_s:
; CHECK: sqshrunb z0.h, z0.s, #15
; CHECK-NEXT: ret
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshrunb.nxv4i32(<vscale x 4 x i32> %a,
i32 15)
ret <vscale x 8 x i16> %out
}
define <vscale x 4 x i32> @sqshrunb_d(<vscale x 2 x i64> %a) {
; CHECK-LABEL: sqshrunb_d:
; CHECK: sqshrunb z0.s, z0.d, #31
; CHECK-NEXT: ret
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshrunb.nxv2i64(<vscale x 2 x i64> %a,
i32 31)
ret <vscale x 4 x i32> %out
}
;
; UQRSHRNB
;
define <vscale x 16 x i8> @uqrshrnb_h(<vscale x 8 x i16> %a) {
; CHECK-LABEL: uqrshrnb_h:
; CHECK: uqrshrnb z0.b, z0.h, #2
; CHECK-NEXT: ret
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshrnb.nxv8i16(<vscale x 8 x i16> %a,
i32 2)
ret <vscale x 16 x i8> %out
}
define <vscale x 8 x i16> @uqrshrnb_s(<vscale x 4 x i32> %a) {
; CHECK-LABEL: uqrshrnb_s:
; CHECK: uqrshrnb z0.h, z0.s, #2
; CHECK-NEXT: ret
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshrnb.nxv4i32(<vscale x 4 x i32> %a,
i32 2)
ret <vscale x 8 x i16> %out
}
define <vscale x 4 x i32> @uqrshrnb_d(<vscale x 2 x i64> %a) {
; CHECK-LABEL: uqrshrnb_d:
; CHECK: uqrshrnb z0.s, z0.d, #2
; CHECK-NEXT: ret
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshrnb.nxv2i64(<vscale x 2 x i64> %a,
i32 2)
ret <vscale x 4 x i32> %out
}
;
; SQRSHRNB
;
define <vscale x 16 x i8> @sqrshrnb_h(<vscale x 8 x i16> %a) {
; CHECK-LABEL: sqrshrnb_h:
; CHECK: sqrshrnb z0.b, z0.h, #2
; CHECK-NEXT: ret
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrnb.nxv8i16(<vscale x 8 x i16> %a,
i32 2)
ret <vscale x 16 x i8> %out
}
define <vscale x 8 x i16> @sqrshrnb_s(<vscale x 4 x i32> %a) {
; CHECK-LABEL: sqrshrnb_s:
; CHECK: sqrshrnb z0.h, z0.s, #2
; CHECK-NEXT: ret
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrnb.nxv4i32(<vscale x 4 x i32> %a,
i32 2)
ret <vscale x 8 x i16> %out
}
define <vscale x 4 x i32> @sqrshrnb_d(<vscale x 2 x i64> %a) {
; CHECK-LABEL: sqrshrnb_d:
; CHECK: sqrshrnb z0.s, z0.d, #2
; CHECK-NEXT: ret
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshrnb.nxv2i64(<vscale x 2 x i64> %a,
i32 2)
ret <vscale x 4 x i32> %out
}
;
; SQRSHRUNB
;
define <vscale x 16 x i8> @sqrshrunb_h(<vscale x 8 x i16> %a) {
; CHECK-LABEL: sqrshrunb_h:
; CHECK: sqrshrunb z0.b, z0.h, #6
; CHECK-NEXT: ret
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrunb.nxv8i16(<vscale x 8 x i16> %a,
i32 6)
ret <vscale x 16 x i8> %out
}
define <vscale x 8 x i16> @sqrshrunb_s(<vscale x 4 x i32> %a) {
; CHECK-LABEL: sqrshrunb_s:
; CHECK: sqrshrunb z0.h, z0.s, #14
; CHECK-NEXT: ret
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrunb.nxv4i32(<vscale x 4 x i32> %a,
i32 14)
ret <vscale x 8 x i16> %out
}
define <vscale x 4 x i32> @sqrshrunb_d(<vscale x 2 x i64> %a) {
; CHECK-LABEL: sqrshrunb_d:
; CHECK: sqrshrunb z0.s, z0.d, #30
; CHECK-NEXT: ret
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshrunb.nxv2i64(<vscale x 2 x i64> %a,
i32 30)
ret <vscale x 4 x i32> %out
}
;
; SHRNT
;
define <vscale x 16 x i8> @shrnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: shrnt_h:
; CHECK: shrnt z0.b, z1.h, #3
; CHECK-NEXT: ret
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.shrnt.nxv8i16(<vscale x 16 x i8> %a,
<vscale x 8 x i16> %b,
i32 3)
ret <vscale x 16 x i8> %out
}
define <vscale x 8 x i16> @shrnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: shrnt_s:
; CHECK: shrnt z0.h, z1.s, #3
; CHECK-NEXT: ret
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.shrnt.nxv4i32(<vscale x 8 x i16> %a,
<vscale x 4 x i32> %b,
i32 3)
ret <vscale x 8 x i16> %out
}
define <vscale x 4 x i32> @shrnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: shrnt_d:
; CHECK: shrnt z0.s, z1.d, #3
; CHECK-NEXT: ret
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.shrnt.nxv2i64(<vscale x 4 x i32> %a,
<vscale x 2 x i64> %b,
i32 3)
ret <vscale x 4 x i32> %out
}
;
; UQSHRNT
;
define <vscale x 16 x i8> @uqshrnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: uqshrnt_h:
; CHECK: uqshrnt z0.b, z1.h, #5
; CHECK-NEXT: ret
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshrnt.nxv8i16(<vscale x 16 x i8> %a,
<vscale x 8 x i16> %b,
i32 5)
ret <vscale x 16 x i8> %out
}
define <vscale x 8 x i16> @uqshrnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: uqshrnt_s:
; CHECK: uqshrnt z0.h, z1.s, #13
; CHECK-NEXT: ret
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshrnt.nxv4i32(<vscale x 8 x i16> %a,
<vscale x 4 x i32> %b,
i32 13)
ret <vscale x 8 x i16> %out
}
define <vscale x 4 x i32> @uqshrnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: uqshrnt_d:
; CHECK: uqshrnt z0.s, z1.d, #29
; CHECK-NEXT: ret
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshrnt.nxv2i64(<vscale x 4 x i32> %a,
<vscale x 2 x i64> %b,
i32 29)
ret <vscale x 4 x i32> %out
}
;
; SQSHRNT
;
define <vscale x 16 x i8> @sqshrnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: sqshrnt_h:
; CHECK: sqshrnt z0.b, z1.h, #5
; CHECK-NEXT: ret
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshrnt.nxv8i16(<vscale x 16 x i8> %a,
<vscale x 8 x i16> %b,
i32 5)
ret <vscale x 16 x i8> %out
}
define <vscale x 8 x i16> @sqshrnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: sqshrnt_s:
; CHECK: sqshrnt z0.h, z1.s, #13
; CHECK-NEXT: ret
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshrnt.nxv4i32(<vscale x 8 x i16> %a,
<vscale x 4 x i32> %b,
i32 13)
ret <vscale x 8 x i16> %out
}
define <vscale x 4 x i32> @sqshrnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: sqshrnt_d:
; CHECK: sqshrnt z0.s, z1.d, #29
; CHECK-NEXT: ret
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshrnt.nxv2i64(<vscale x 4 x i32> %a,
<vscale x 2 x i64> %b,
i32 29)
ret <vscale x 4 x i32> %out
}
;
; SQSHRUNT
;
define <vscale x 16 x i8> @sqshrunt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: sqshrunt_h:
; CHECK: sqshrunt z0.b, z1.h, #4
; CHECK-NEXT: ret
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshrunt.nxv8i16(<vscale x 16 x i8> %a,
<vscale x 8 x i16> %b,
i32 4)
ret <vscale x 16 x i8> %out
}
define <vscale x 8 x i16> @sqshrunt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: sqshrunt_s:
; CHECK: sqshrunt z0.h, z1.s, #4
; CHECK-NEXT: ret
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshrunt.nxv4i32(<vscale x 8 x i16> %a,
<vscale x 4 x i32> %b,
i32 4)
ret <vscale x 8 x i16> %out
}
define <vscale x 4 x i32> @sqshrunt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: sqshrunt_d:
; CHECK: sqshrunt z0.s, z1.d, #4
; CHECK-NEXT: ret
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshrunt.nxv2i64(<vscale x 4 x i32> %a,
<vscale x 2 x i64> %b,
i32 4)
ret <vscale x 4 x i32> %out
}
;
; UQRSHRNT
;
define <vscale x 16 x i8> @uqrshrnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: uqrshrnt_h:
; CHECK: uqrshrnt z0.b, z1.h, #8
; CHECK-NEXT: ret
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshrnt.nxv8i16(<vscale x 16 x i8> %a,
<vscale x 8 x i16> %b,
i32 8)
ret <vscale x 16 x i8> %out
}
define <vscale x 8 x i16> @uqrshrnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: uqrshrnt_s:
; CHECK: uqrshrnt z0.h, z1.s, #12
; CHECK-NEXT: ret
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshrnt.nxv4i32(<vscale x 8 x i16> %a,
<vscale x 4 x i32> %b,
i32 12)
ret <vscale x 8 x i16> %out
}
define <vscale x 4 x i32> @uqrshrnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: uqrshrnt_d:
; CHECK: uqrshrnt z0.s, z1.d, #28
; CHECK-NEXT: ret
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshrnt.nxv2i64(<vscale x 4 x i32> %a,
<vscale x 2 x i64> %b,
i32 28)
ret <vscale x 4 x i32> %out
}
;
; SQRSHRNT
;
define <vscale x 16 x i8> @sqrshrnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: sqrshrnt_h:
; CHECK: sqrshrnt z0.b, z1.h, #8
; CHECK-NEXT: ret
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrnt.nxv8i16(<vscale x 16 x i8> %a,
<vscale x 8 x i16> %b,
i32 8)
ret <vscale x 16 x i8> %out
}
define <vscale x 8 x i16> @sqrshrnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: sqrshrnt_s:
; CHECK: sqrshrnt z0.h, z1.s, #12
; CHECK-NEXT: ret
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrnt.nxv4i32(<vscale x 8 x i16> %a,
<vscale x 4 x i32> %b,
i32 12)
ret <vscale x 8 x i16> %out
}
define <vscale x 4 x i32> @sqrshrnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: sqrshrnt_d:
; CHECK: sqrshrnt z0.s, z1.d, #28
; CHECK-NEXT: ret
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshrnt.nxv2i64(<vscale x 4 x i32> %a,
<vscale x 2 x i64> %b,
i32 28)
ret <vscale x 4 x i32> %out
}
;
; SQRSHRUNT
;
define <vscale x 16 x i8> @sqrshrunt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) {
; CHECK-LABEL: sqrshrunt_h:
; CHECK: sqrshrunt z0.b, z1.h, #1
; CHECK-NEXT: ret
%out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrunt.nxv8i16(<vscale x 16 x i8> %a,
<vscale x 8 x i16> %b,
i32 1)
ret <vscale x 16 x i8> %out
}
define <vscale x 8 x i16> @sqrshrunt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) {
; CHECK-LABEL: sqrshrunt_s:
; CHECK: sqrshrunt z0.h, z1.s, #5
; CHECK-NEXT: ret
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrunt.nxv4i32(<vscale x 8 x i16> %a,
<vscale x 4 x i32> %b,
i32 5)
ret <vscale x 8 x i16> %out
}
define <vscale x 4 x i32> @sqrshrunt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
; CHECK-LABEL: sqrshrunt_d:
; CHECK: sqrshrunt z0.s, z1.d, #5
; CHECK-NEXT: ret
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshrunt.nxv2i64(<vscale x 4 x i32> %a,
<vscale x 2 x i64> %b,
i32 5)
ret <vscale x 4 x i32> %out
}
declare <vscale x 16 x i8> @llvm.aarch64.sve.shrnb.nxv8i16(<vscale x 8 x i16>, i32)
declare <vscale x 8 x i16> @llvm.aarch64.sve.shrnb.nxv4i32(<vscale x 4 x i32>, i32)
declare <vscale x 4 x i32> @llvm.aarch64.sve.shrnb.nxv2i64(<vscale x 2 x i64>, i32)
declare <vscale x 16 x i8> @llvm.aarch64.sve.uqshrnb.nxv8i16(<vscale x 8 x i16>, i32)
declare <vscale x 8 x i16> @llvm.aarch64.sve.uqshrnb.nxv4i32(<vscale x 4 x i32>, i32)
declare <vscale x 4 x i32> @llvm.aarch64.sve.uqshrnb.nxv2i64(<vscale x 2 x i64>, i32)
declare <vscale x 16 x i8> @llvm.aarch64.sve.sqshrnb.nxv8i16(<vscale x 8 x i16>, i32)
declare <vscale x 8 x i16> @llvm.aarch64.sve.sqshrnb.nxv4i32(<vscale x 4 x i32>, i32)
declare <vscale x 4 x i32> @llvm.aarch64.sve.sqshrnb.nxv2i64(<vscale x 2 x i64>, i32)
declare <vscale x 16 x i8> @llvm.aarch64.sve.uqrshrnb.nxv8i16(<vscale x 8 x i16>, i32)
declare <vscale x 8 x i16> @llvm.aarch64.sve.uqrshrnb.nxv4i32(<vscale x 4 x i32>, i32)
declare <vscale x 4 x i32> @llvm.aarch64.sve.uqrshrnb.nxv2i64(<vscale x 2 x i64>, i32)
declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrnb.nxv8i16(<vscale x 8 x i16>, i32)
declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrnb.nxv4i32(<vscale x 4 x i32>, i32)
declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrshrnb.nxv2i64(<vscale x 2 x i64>, i32)
declare <vscale x 16 x i8> @llvm.aarch64.sve.sqshrunb.nxv8i16(<vscale x 8 x i16>, i32)
declare <vscale x 8 x i16> @llvm.aarch64.sve.sqshrunb.nxv4i32(<vscale x 4 x i32>, i32)
declare <vscale x 4 x i32> @llvm.aarch64.sve.sqshrunb.nxv2i64(<vscale x 2 x i64>, i32)
declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrunb.nxv8i16(<vscale x 8 x i16>, i32)
declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrunb.nxv4i32(<vscale x 4 x i32>, i32)
declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrshrunb.nxv2i64(<vscale x 2 x i64>, i32)
declare <vscale x 16 x i8> @llvm.aarch64.sve.shrnt.nxv8i16(<vscale x 16 x i8>, <vscale x 8 x i16>, i32)
declare <vscale x 8 x i16> @llvm.aarch64.sve.shrnt.nxv4i32(<vscale x 8 x i16>, <vscale x 4 x i32>, i32)
declare <vscale x 4 x i32> @llvm.aarch64.sve.shrnt.nxv2i64(<vscale x 4 x i32>, <vscale x 2 x i64>, i32)
declare <vscale x 16 x i8> @llvm.aarch64.sve.uqshrnt.nxv8i16(<vscale x 16 x i8>, <vscale x 8 x i16>, i32)
declare <vscale x 8 x i16> @llvm.aarch64.sve.uqshrnt.nxv4i32(<vscale x 8 x i16>, <vscale x 4 x i32>, i32)
declare <vscale x 4 x i32> @llvm.aarch64.sve.uqshrnt.nxv2i64(<vscale x 4 x i32>, <vscale x 2 x i64>, i32)
declare <vscale x 16 x i8> @llvm.aarch64.sve.sqshrnt.nxv8i16(<vscale x 16 x i8>, <vscale x 8 x i16>, i32)
declare <vscale x 8 x i16> @llvm.aarch64.sve.sqshrnt.nxv4i32(<vscale x 8 x i16>, <vscale x 4 x i32>, i32)
declare <vscale x 4 x i32> @llvm.aarch64.sve.sqshrnt.nxv2i64(<vscale x 4 x i32>, <vscale x 2 x i64>, i32)
declare <vscale x 16 x i8> @llvm.aarch64.sve.sqshrunt.nxv8i16(<vscale x 16 x i8>, <vscale x 8 x i16>, i32)
declare <vscale x 8 x i16> @llvm.aarch64.sve.sqshrunt.nxv4i32(<vscale x 8 x i16>, <vscale x 4 x i32>, i32)
declare <vscale x 4 x i32> @llvm.aarch64.sve.sqshrunt.nxv2i64(<vscale x 4 x i32>, <vscale x 2 x i64>, i32)
declare <vscale x 16 x i8> @llvm.aarch64.sve.uqrshrnt.nxv8i16(<vscale x 16 x i8>, <vscale x 8 x i16>, i32)
declare <vscale x 8 x i16> @llvm.aarch64.sve.uqrshrnt.nxv4i32(<vscale x 8 x i16>, <vscale x 4 x i32>, i32)
declare <vscale x 4 x i32> @llvm.aarch64.sve.uqrshrnt.nxv2i64(<vscale x 4 x i32>, <vscale x 2 x i64>, i32)
declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrnt.nxv8i16(<vscale x 16 x i8>, <vscale x 8 x i16>, i32)
declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrnt.nxv4i32(<vscale x 8 x i16>, <vscale x 4 x i32>, i32)
declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrshrnt.nxv2i64(<vscale x 4 x i32>, <vscale x 2 x i64>, i32)
declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrunt.nxv8i16(<vscale x 16 x i8>, <vscale x 8 x i16>, i32)
declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrunt.nxv4i32(<vscale x 8 x i16>, <vscale x 4 x i32>, i32)
declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrshrunt.nxv2i64(<vscale x 4 x i32>, <vscale x 2 x i64>, i32)