1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

[AArch64][SVE] Fix umin/umax lowering to handle out of range imm.

Immediate must be in an integer range [0,255] for umin/umax instruction.
Extend pattern matching helper SelectSVEArithImm() to take in value type
bitwidth when checking immediate value is in range or not.

Reviewed By: sdesmalen

Differential Revision: https://reviews.llvm.org/D89831
This commit is contained in:
Huihui Zhang 2020-10-23 09:29:43 -07:00
parent e4448695d1
commit f5744161af
4 changed files with 371 additions and 41 deletions

View File

@ -191,6 +191,11 @@ public:
return SelectSVELogicalImm(N, VT, Imm);
}
template <MVT::SimpleValueType VT>
bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
return SelectSVEArithImm(N, VT, Imm);
}
template <unsigned Low, unsigned High, bool AllowSaturation = false>
bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
@ -327,7 +332,7 @@ private:
bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
bool AllowSaturation, SDValue &Imm);
bool SelectSVEArithImm(SDValue N, SDValue &Imm);
bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
SDValue &Offset);
};
@ -3128,13 +3133,28 @@ bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
return false;
}
bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, SDValue &Imm) {
bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
uint64_t ImmVal = CNode->getSExtValue();
SDLoc DL(N);
ImmVal = ImmVal & 0xFF;
uint64_t ImmVal = CNode->getZExtValue();
switch (VT.SimpleTy) {
case MVT::i8:
ImmVal &= 0xFF;
break;
case MVT::i16:
ImmVal &= 0xFFFF;
break;
case MVT::i32:
ImmVal &= 0xFFFFFFFF;
break;
case MVT::i64:
break;
default:
llvm_unreachable("Unexpected type");
}
if (ImmVal < 256) {
Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
return true;
}
}

View File

@ -206,7 +206,10 @@ def SVELogicalImm64Pat : ComplexPattern<i64, 1, "SelectSVELogicalImm<MVT::i64>",
def SVE8BitLslImm : ComplexPattern<i32, 2, "SelectSVE8BitLslImm", [imm]>;
def SVEArithUImmPat : ComplexPattern<i32, 1, "SelectSVEArithImm", []>;
def SVEArithUImm8Pat : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i8>", []>;
def SVEArithUImm16Pat : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i16>", []>;
def SVEArithUImm32Pat : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i32>", []>;
def SVEArithUImm64Pat : ComplexPattern<i32, 1, "SelectSVEArithImm<MVT::i64>", []>;
def SVEArithSImmPat : ComplexPattern<i32, 1, "SelectSVESignedArithImm", []>;
def SVEShiftImmL8 : ComplexPattern<i32, 1, "SelectSVEShiftImm<0, 7>", []>;
@ -3981,10 +3984,10 @@ multiclass sve_int_arith_imm1_unsigned<bits<2> opc, string asm, SDPatternOperato
def _S : sve_int_arith_imm<0b10, { 0b1010, opc }, asm, ZPR32, imm0_255>;
def _D : sve_int_arith_imm<0b11, { 0b1010, opc }, asm, ZPR64, imm0_255>;
def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv16i8, nxv16i1, op, ZPR8, i32, SVEArithUImmPat, !cast<Instruction>(NAME # _B)>;
def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv8i16, nxv8i1, op, ZPR16, i32, SVEArithUImmPat, !cast<Instruction>(NAME # _H)>;
def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv4i32, nxv4i1, op, ZPR32, i32, SVEArithUImmPat, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv2i64, nxv2i1, op, ZPR64, i64, SVEArithUImmPat, !cast<Instruction>(NAME # _D)>;
def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv16i8, nxv16i1, op, ZPR8, i32, SVEArithUImm8Pat, !cast<Instruction>(NAME # _B)>;
def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv8i16, nxv8i1, op, ZPR16, i32, SVEArithUImm16Pat, !cast<Instruction>(NAME # _H)>;
def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv4i32, nxv4i1, op, ZPR32, i32, SVEArithUImm32Pat, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Imm_Arith_Pred_Pat<nxv2i64, nxv2i1, op, ZPR64, i64, SVEArithUImm64Pat, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_arith_imm2<string asm, SDPatternOperator op> {

View File

@ -51,6 +51,20 @@ define <vscale x 8 x i16> @smax_i16_neg(<vscale x 8 x i16> %a) {
ret <vscale x 8 x i16> %res
}
define <vscale x 8 x i16> @smax_i16_out_of_range(<vscale x 8 x i16> %a) {
; CHECK-LABEL: smax_i16_out_of_range:
; CHECK: mov w8, #257
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%elt = insertelement <vscale x 8 x i16> undef, i16 257, i32 0
%splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
%cmp = icmp sgt <vscale x 8 x i16> %a, %splat
%res = select <vscale x 8 x i1> %cmp, <vscale x 8 x i16> %a, <vscale x 8 x i16> %splat
ret <vscale x 8 x i16> %res
}
define <vscale x 4 x i32> @smax_i32_pos(<vscale x 4 x i32> %a) {
; CHECK-LABEL: smax_i32_pos
; CHECK: smax z0.s, z0.s, #27
@ -73,6 +87,20 @@ define <vscale x 4 x i32> @smax_i32_neg(<vscale x 4 x i32> %a) {
ret <vscale x 4 x i32> %res
}
define <vscale x 4 x i32> @smax_i32_out_of_range(<vscale x 4 x i32> %a) {
; CHECK-LABEL: smax_i32_out_of_range:
; CHECK: mov w8, #-129
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%elt = insertelement <vscale x 4 x i32> undef, i32 -129, i32 0
%splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
%cmp = icmp sgt <vscale x 4 x i32> %a, %splat
%res = select <vscale x 4 x i1> %cmp, <vscale x 4 x i32> %a, <vscale x 4 x i32> %splat
ret <vscale x 4 x i32> %res
}
define <vscale x 2 x i64> @smax_i64_pos(<vscale x 2 x i64> %a) {
; CHECK-LABEL: smax_i64_pos
; CHECK: smax z0.d, z0.d, #27
@ -95,6 +123,20 @@ define <vscale x 2 x i64> @smax_i64_neg(<vscale x 2 x i64> %a) {
ret <vscale x 2 x i64> %res
}
define <vscale x 2 x i64> @smax_i64_out_of_range(<vscale x 2 x i64> %a) {
; CHECK-LABEL: smax_i64_out_of_range:
; CHECK: mov w8, #65535
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%elt = insertelement <vscale x 2 x i64> undef, i64 65535, i32 0
%splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
%cmp = icmp sgt <vscale x 2 x i64> %a, %splat
%res = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> %a, <vscale x 2 x i64> %splat
ret <vscale x 2 x i64> %res
}
;
; SMIN
;
@ -142,6 +184,20 @@ define <vscale x 8 x i16> @smin_i16_neg(<vscale x 8 x i16> %a) {
ret <vscale x 8 x i16> %res
}
define <vscale x 8 x i16> @smin_i16_out_of_range(<vscale x 8 x i16> %a) {
; CHECK-LABEL: smin_i16_out_of_range:
; CHECK: mov w8, #257
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%elt = insertelement <vscale x 8 x i16> undef, i16 257, i32 0
%splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
%cmp = icmp slt <vscale x 8 x i16> %a, %splat
%res = select <vscale x 8 x i1> %cmp, <vscale x 8 x i16> %a, <vscale x 8 x i16> %splat
ret <vscale x 8 x i16> %res
}
define <vscale x 4 x i32> @smin_i32_pos(<vscale x 4 x i32> %a) {
; CHECK-LABEL: smin_i32_pos
; CHECK: smin z0.s, z0.s, #27
@ -164,6 +220,20 @@ define <vscale x 4 x i32> @smin_i32_neg(<vscale x 4 x i32> %a) {
ret <vscale x 4 x i32> %res
}
define <vscale x 4 x i32> @smin_i32_out_of_range(<vscale x 4 x i32> %a) {
; CHECK-LABEL: smin_i32_out_of_range:
; CHECK: mov w8, #-129
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%elt = insertelement <vscale x 4 x i32> undef, i32 -129, i32 0
%splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
%cmp = icmp slt <vscale x 4 x i32> %a, %splat
%res = select <vscale x 4 x i1> %cmp, <vscale x 4 x i32> %a, <vscale x 4 x i32> %splat
ret <vscale x 4 x i32> %res
}
define <vscale x 2 x i64> @smin_i64_pos(<vscale x 2 x i64> %a) {
; CHECK-LABEL: smin_i64_pos
; CHECK: smin z0.d, z0.d, #27
@ -186,6 +256,20 @@ define <vscale x 2 x i64> @smin_i64_neg(<vscale x 2 x i64> %a) {
ret <vscale x 2 x i64> %res
}
define <vscale x 2 x i64> @smin_i64_out_of_range(<vscale x 2 x i64> %a) {
; CHECK-LABEL: smin_i64_out_of_range:
; CHECK: mov w8, #65535
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%elt = insertelement <vscale x 2 x i64> undef, i64 65535, i32 0
%splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
%cmp = icmp slt <vscale x 2 x i64> %a, %splat
%res = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> %a, <vscale x 2 x i64> %splat
ret <vscale x 2 x i64> %res
}
;
; UMAX
;
@ -222,11 +306,14 @@ define <vscale x 8 x i16> @umax_i16_pos(<vscale x 8 x i16> %a) {
ret <vscale x 8 x i16> %res
}
define <vscale x 8 x i16> @umax_i16_large(<vscale x 8 x i16> %a) {
; CHECK-LABEL: umax_i16_large
; CHECK: umax z0.h, z0.h, #129
define <vscale x 8 x i16> @umax_i16_out_of_range(<vscale x 8 x i16> %a) {
; CHECK-LABEL: umax_i16_out_of_range:
; CHECK: mov w8, #257
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%elt = insertelement <vscale x 8 x i16> undef, i16 129, i32 0
%elt = insertelement <vscale x 8 x i16> undef, i16 257, i32 0
%splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
%cmp = icmp ugt <vscale x 8 x i16> %a, %splat
%res = select <vscale x 8 x i1> %cmp, <vscale x 8 x i16> %a, <vscale x 8 x i16> %splat
@ -244,11 +331,14 @@ define <vscale x 4 x i32> @umax_i32_pos(<vscale x 4 x i32> %a) {
ret <vscale x 4 x i32> %res
}
define <vscale x 4 x i32> @umax_i32_large(<vscale x 4 x i32> %a) {
; CHECK-LABEL: umax_i32_large
; CHECK: umax z0.s, z0.s, #129
define <vscale x 4 x i32> @umax_i32_out_of_range(<vscale x 4 x i32> %a) {
; CHECK-LABEL: umax_i32_out_of_range:
; CHECK: mov w8, #257
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%elt = insertelement <vscale x 4 x i32> undef, i32 129, i32 0
%elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
%splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
%cmp = icmp ugt <vscale x 4 x i32> %a, %splat
%res = select <vscale x 4 x i1> %cmp, <vscale x 4 x i32> %a, <vscale x 4 x i32> %splat
@ -266,11 +356,14 @@ define <vscale x 2 x i64> @umax_i64_pos(<vscale x 2 x i64> %a) {
ret <vscale x 2 x i64> %res
}
define <vscale x 2 x i64> @umax_i64_large(<vscale x 2 x i64> %a) {
; CHECK-LABEL: umax_i64_large
; CHECK: umax z0.d, z0.d, #129
define <vscale x 2 x i64> @umax_i64_out_of_range(<vscale x 2 x i64> %a) {
; CHECK-LABEL: umax_i64_out_of_range:
; CHECK: mov w8, #65535
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%elt = insertelement <vscale x 2 x i64> undef, i64 129, i32 0
%elt = insertelement <vscale x 2 x i64> undef, i64 65535, i32 0
%splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
%cmp = icmp ugt <vscale x 2 x i64> %a, %splat
%res = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> %a, <vscale x 2 x i64> %splat
@ -313,11 +406,14 @@ define <vscale x 8 x i16> @umin_i16_pos(<vscale x 8 x i16> %a) {
ret <vscale x 8 x i16> %res
}
define <vscale x 8 x i16> @umin_i16_large(<vscale x 8 x i16> %a) {
; CHECK-LABEL: umin_i16_large
; CHECK: umin z0.h, z0.h, #129
define <vscale x 8 x i16> @umin_i16_out_of_range(<vscale x 8 x i16> %a) {
; CHECK-LABEL: umin_i16_out_of_range:
; CHECK: mov w8, #257
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%elt = insertelement <vscale x 8 x i16> undef, i16 129, i32 0
%elt = insertelement <vscale x 8 x i16> undef, i16 257, i32 0
%splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
%cmp = icmp ult <vscale x 8 x i16> %a, %splat
%res = select <vscale x 8 x i1> %cmp, <vscale x 8 x i16> %a, <vscale x 8 x i16> %splat
@ -335,11 +431,14 @@ define <vscale x 4 x i32> @umin_i32_pos(<vscale x 4 x i32> %a) {
ret <vscale x 4 x i32> %res
}
define <vscale x 4 x i32> @umin_i32_large(<vscale x 4 x i32> %a) {
; CHECK-LABEL: umin_i32_large
; CHECK: umin z0.s, z0.s, #129
define <vscale x 4 x i32> @umin_i32_out_of_range(<vscale x 4 x i32> %a) {
; CHECK-LABEL: umin_i32_out_of_range:
; CHECK: mov w8, #257
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%elt = insertelement <vscale x 4 x i32> undef, i32 129, i32 0
%elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
%splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
%cmp = icmp ult <vscale x 4 x i32> %a, %splat
%res = select <vscale x 4 x i1> %cmp, <vscale x 4 x i32> %a, <vscale x 4 x i32> %splat
@ -357,11 +456,14 @@ define <vscale x 2 x i64> @umin_i64_pos(<vscale x 2 x i64> %a) {
ret <vscale x 2 x i64> %res
}
define <vscale x 2 x i64> @umin_i64_large(<vscale x 2 x i64> %a) {
; CHECK-LABEL: umin_i64_large
; CHECK: umin z0.d, z0.d, #129
define <vscale x 2 x i64> @umin_i64_out_of_range(<vscale x 2 x i64> %a) {
; CHECK-LABEL: umin_i64_out_of_range:
; CHECK: mov w8, #65535
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%elt = insertelement <vscale x 2 x i64> undef, i64 129, i32 0
%elt = insertelement <vscale x 2 x i64> undef, i64 65535, i32 0
%splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
%cmp = icmp ult <vscale x 2 x i64> %a, %splat
%res = select <vscale x 2 x i1> %cmp, <vscale x 2 x i64> %a, <vscale x 2 x i64> %splat

View File

@ -35,6 +35,23 @@ define <vscale x 8 x i16> @smax_i16(<vscale x 8 x i16> %a) {
ret <vscale x 8 x i16> %out
}
define <vscale x 8 x i16> @smax_i16_out_of_range(<vscale x 8 x i16> %a) {
; CHECK-LABEL: smax_i16_out_of_range:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #129
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%elt = insertelement <vscale x 8 x i16> undef, i16 129, i32 0
%splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.smax.nxv8i16(<vscale x 8 x i1> %pg,
<vscale x 8 x i16> %a,
<vscale x 8 x i16> %splat)
ret <vscale x 8 x i16> %out
}
define <vscale x 4 x i32> @smax_i32(<vscale x 4 x i32> %a) {
; CHECK-LABEL: smax_i32:
; CHECK: // %bb.0:
@ -49,6 +66,23 @@ define <vscale x 4 x i32> @smax_i32(<vscale x 4 x i32> %a) {
ret <vscale x 4 x i32> %out
}
define <vscale x 4 x i32> @smax_i32_out_of_range(<vscale x 4 x i32> %a) {
; CHECK-LABEL: smax_i32_out_of_range:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #-129
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%elt = insertelement <vscale x 4 x i32> undef, i32 -129, i32 0
%splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1> %pg,
<vscale x 4 x i32> %a,
<vscale x 4 x i32> %splat)
ret <vscale x 4 x i32> %out
}
define <vscale x 2 x i64> @smax_i64(<vscale x 2 x i64> %a) {
; CHECK-LABEL: smax_i64:
; CHECK: // %bb.0:
@ -63,6 +97,24 @@ define <vscale x 2 x i64> @smax_i64(<vscale x 2 x i64> %a) {
ret <vscale x 2 x i64> %out
}
define <vscale x 2 x i64> @smax_i64_out_of_range(<vscale x 2 x i64> %a) {
; CHECK-LABEL: smax_i64_out_of_range:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #65535
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%elt = insertelement <vscale x 2 x i64> undef, i64 65535, i64 0
%splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.smax.nxv2i64(<vscale x 2 x i1> %pg,
<vscale x 2 x i64> %a,
<vscale x 2 x i64> %splat)
ret <vscale x 2 x i64> %out
}
; SMIN
define <vscale x 16 x i8> @smin_i8(<vscale x 16 x i8> %a) {
@ -93,6 +145,23 @@ define <vscale x 8 x i16> @smin_i16(<vscale x 8 x i16> %a) {
ret <vscale x 8 x i16> %out
}
define <vscale x 8 x i16> @smin_i16_out_of_range(<vscale x 8 x i16> %a) {
; CHECK-LABEL: smin_i16_out_of_range:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #-129
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%elt = insertelement <vscale x 8 x i16> undef, i16 -129, i32 0
%splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.smin.nxv8i16(<vscale x 8 x i1> %pg,
<vscale x 8 x i16> %a,
<vscale x 8 x i16> %splat)
ret <vscale x 8 x i16> %out
}
define <vscale x 4 x i32> @smin_i32(<vscale x 4 x i32> %a) {
; CHECK-LABEL: smin_i32:
; CHECK: // %bb.0:
@ -107,6 +176,24 @@ define <vscale x 4 x i32> @smin_i32(<vscale x 4 x i32> %a) {
ret <vscale x 4 x i32> %out
}
define <vscale x 4 x i32> @smin_i32_out_of_range(<vscale x 4 x i32> %a) {
; CHECK-LABEL: smin_i32_out_of_range:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #257
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
%splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1> %pg,
<vscale x 4 x i32> %a,
<vscale x 4 x i32> %splat)
ret <vscale x 4 x i32> %out
}
define <vscale x 2 x i64> @smin_i64(<vscale x 2 x i64> %a) {
; CHECK-LABEL: smin_i64:
; CHECK: // %bb.0:
@ -121,6 +208,22 @@ define <vscale x 2 x i64> @smin_i64(<vscale x 2 x i64> %a) {
ret <vscale x 2 x i64> %out
}
define <vscale x 2 x i64> @smin_i64_out_of_range(<vscale x 2 x i64> %a) {
; CHECK-LABEL: smin_i64_out_of_range:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, #-256 // =0xffffffffffffff00
; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%elt = insertelement <vscale x 2 x i64> undef, i64 -256, i64 0
%splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.smin.nxv2i64(<vscale x 2 x i1> %pg,
<vscale x 2 x i64> %a,
<vscale x 2 x i64> %splat)
ret <vscale x 2 x i64> %out
}
; UMAX
define <vscale x 16 x i8> @umax_i8(<vscale x 16 x i8> %a) {
@ -151,6 +254,23 @@ define <vscale x 8 x i16> @umax_i16(<vscale x 8 x i16> %a) {
ret <vscale x 8 x i16> %out
}
define <vscale x 8 x i16> @umax_i16_out_of_range(<vscale x 8 x i16> %a) {
; CHECK-LABEL: umax_i16_out_of_range:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #257
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%elt = insertelement <vscale x 8 x i16> undef, i16 257, i32 0
%splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.umax.nxv8i16(<vscale x 8 x i1> %pg,
<vscale x 8 x i16> %a,
<vscale x 8 x i16> %splat)
ret <vscale x 8 x i16> %out
}
define <vscale x 4 x i32> @umax_i32(<vscale x 4 x i32> %a) {
; CHECK-LABEL: umax_i32:
; CHECK: // %bb.0:
@ -165,6 +285,23 @@ define <vscale x 4 x i32> @umax_i32(<vscale x 4 x i32> %a) {
ret <vscale x 4 x i32> %out
}
define <vscale x 4 x i32> @umax_i32_out_of_range(<vscale x 4 x i32> %a) {
; CHECK-LABEL: umax_i32_out_of_range:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #257
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
%splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1> %pg,
<vscale x 4 x i32> %a,
<vscale x 4 x i32> %splat)
ret <vscale x 4 x i32> %out
}
define <vscale x 2 x i64> @umax_i64(<vscale x 2 x i64> %a) {
; CHECK-LABEL: umax_i64:
; CHECK: // %bb.0:
@ -179,6 +316,23 @@ define <vscale x 2 x i64> @umax_i64(<vscale x 2 x i64> %a) {
ret <vscale x 2 x i64> %out
}
define <vscale x 2 x i64> @umax_i64_out_of_range(<vscale x 2 x i64> %a) {
; CHECK-LABEL: umax_i64_out_of_range:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #65535
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%elt = insertelement <vscale x 2 x i64> undef, i64 65535, i64 0
%splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.umax.nxv2i64(<vscale x 2 x i1> %pg,
<vscale x 2 x i64> %a,
<vscale x 2 x i64> %splat)
ret <vscale x 2 x i64> %out
}
; UMIN
define <vscale x 16 x i8> @umin_i8(<vscale x 16 x i8> %a) {
@ -209,6 +363,23 @@ define <vscale x 8 x i16> @umin_i16(<vscale x 8 x i16> %a) {
ret <vscale x 8 x i16> %out
}
define <vscale x 8 x i16> @umin_i16_out_of_range(<vscale x 8 x i16> %a) {
; CHECK-LABEL: umin_i16_out_of_range:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #257
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: mov z1.h, w8
; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
%pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
%elt = insertelement <vscale x 8 x i16> undef, i16 257, i32 0
%splat = shufflevector <vscale x 8 x i16> %elt, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
%out = call <vscale x 8 x i16> @llvm.aarch64.sve.umin.nxv8i16(<vscale x 8 x i1> %pg,
<vscale x 8 x i16> %a,
<vscale x 8 x i16> %splat)
ret <vscale x 8 x i16> %out
}
define <vscale x 4 x i32> @umin_i32(<vscale x 4 x i32> %a) {
; CHECK-LABEL: umin_i32:
; CHECK: // %bb.0:
@ -223,6 +394,23 @@ define <vscale x 4 x i32> @umin_i32(<vscale x 4 x i32> %a) {
ret <vscale x 4 x i32> %out
}
define <vscale x 4 x i32> @umin_i32_out_of_range(<vscale x 4 x i32> %a) {
; CHECK-LABEL: umin_i32_out_of_range:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #257
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: mov z1.s, w8
; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
%pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
%elt = insertelement <vscale x 4 x i32> undef, i32 257, i32 0
%splat = shufflevector <vscale x 4 x i32> %elt, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
%out = call <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1> %pg,
<vscale x 4 x i32> %a,
<vscale x 4 x i32> %splat)
ret <vscale x 4 x i32> %out
}
define <vscale x 2 x i64> @umin_i64(<vscale x 2 x i64> %a) {
; CHECK-LABEL: umin_i64:
; CHECK: // %bb.0:
@ -237,6 +425,23 @@ define <vscale x 2 x i64> @umin_i64(<vscale x 2 x i64> %a) {
ret <vscale x 2 x i64> %out
}
define <vscale x 2 x i64> @umin_i64_out_of_range(<vscale x 2 x i64> %a) {
; CHECK-LABEL: umin_i64_out_of_range:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #65535
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: mov z1.d, x8
; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
%pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
%elt = insertelement <vscale x 2 x i64> undef, i64 65535, i64 0
%splat = shufflevector <vscale x 2 x i64> %elt, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
%out = call <vscale x 2 x i64> @llvm.aarch64.sve.umin.nxv2i64(<vscale x 2 x i1> %pg,
<vscale x 2 x i64> %a,
<vscale x 2 x i64> %splat)
ret <vscale x 2 x i64> %out
}
; SQADD
define <vscale x 16 x i8> @sqadd_b_lowimm(<vscale x 16 x i8> %a) {