mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[ARM] MVE saturating truncates
This adds some custom lowering for VQMOVN, an instruction that can be used to perform saturating truncates from a pair of min(max(X, -0x8000), 0x7fff), providing those constants are correct. This leaves a VQMOVNBs which saturates the value and inserts that into the bottom lanes of an existing vector. We then need to do something with the other lanes, extending the value using a vmovlb. Ideally, as will often be the case, only the bottom lane of what remains will be demanded, allowing the vmovlb to be removed. Which should mean the instruction is either equal or a win most of the time, and allows some extra follow-up folding to happen. Differential Revision: https://reviews.llvm.org/D77590
This commit is contained in:
parent
5658bcf0c4
commit
4120e7a927
@ -946,6 +946,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
|
||||
setTargetDAGCombine(ISD::ADD);
|
||||
setTargetDAGCombine(ISD::BITCAST);
|
||||
}
|
||||
if (Subtarget->hasMVEIntegerOps()) {
|
||||
setTargetDAGCombine(ISD::SMIN);
|
||||
setTargetDAGCombine(ISD::UMIN);
|
||||
setTargetDAGCombine(ISD::SMAX);
|
||||
setTargetDAGCombine(ISD::UMAX);
|
||||
}
|
||||
|
||||
if (!Subtarget->hasFP64()) {
|
||||
// When targeting a floating-point unit with only single-precision
|
||||
@ -1668,6 +1674,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case ARMISD::VTBL1: return "ARMISD::VTBL1";
|
||||
case ARMISD::VTBL2: return "ARMISD::VTBL2";
|
||||
case ARMISD::VMOVN: return "ARMISD::VMOVN";
|
||||
case ARMISD::VQMOVNs: return "ARMISD::VQMOVNs";
|
||||
case ARMISD::VQMOVNu: return "ARMISD::VQMOVNu";
|
||||
case ARMISD::VMULLs: return "ARMISD::VMULLs";
|
||||
case ARMISD::VMULLu: return "ARMISD::VMULLu";
|
||||
case ARMISD::VADDVs: return "ARMISD::VADDVs";
|
||||
@ -14864,6 +14872,107 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// PerformMinMaxCombine - Target-specific DAG combining for creating truncating
|
||||
/// saturates.
|
||||
static SDValue PerformMinMaxCombine(SDNode *N, SelectionDAG &DAG,
|
||||
const ARMSubtarget *ST) {
|
||||
EVT VT = N->getValueType(0);
|
||||
SDValue N0 = N->getOperand(0);
|
||||
if (!ST->hasMVEIntegerOps())
|
||||
return SDValue();
|
||||
|
||||
if (VT != MVT::v4i32 && VT != MVT::v8i16)
|
||||
return SDValue();
|
||||
|
||||
auto IsSignedSaturate = [&](SDNode *Min, SDNode *Max) {
|
||||
// Check one is a smin and the other is a smax
|
||||
if (Min->getOpcode() != ISD::SMIN)
|
||||
std::swap(Min, Max);
|
||||
if (Min->getOpcode() != ISD::SMIN || Max->getOpcode() != ISD::SMAX)
|
||||
return false;
|
||||
|
||||
APInt SaturateC;
|
||||
if (VT == MVT::v4i32)
|
||||
SaturateC = APInt(32, (1 << 15) - 1, true);
|
||||
else //if (VT == MVT::v8i16)
|
||||
SaturateC = APInt(16, (1 << 7) - 1, true);
|
||||
|
||||
APInt MinC, MaxC;
|
||||
if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
|
||||
MinC != SaturateC)
|
||||
return false;
|
||||
if (!ISD::isConstantSplatVector(Max->getOperand(1).getNode(), MaxC) ||
|
||||
MaxC != ~SaturateC)
|
||||
return false;
|
||||
return true;
|
||||
};
|
||||
|
||||
if (IsSignedSaturate(N, N0.getNode())) {
|
||||
SDLoc DL(N);
|
||||
MVT ExtVT, HalfVT;
|
||||
if (VT == MVT::v4i32) {
|
||||
HalfVT = MVT::v8i16;
|
||||
ExtVT = MVT::v4i16;
|
||||
} else { // if (VT == MVT::v8i16)
|
||||
HalfVT = MVT::v16i8;
|
||||
ExtVT = MVT::v8i8;
|
||||
}
|
||||
|
||||
// Create a VQMOVNB with undef top lanes, then signed extended into the top
|
||||
// half. That extend will hopefully be removed if only the bottom bits are
|
||||
// demanded (though a truncating store, for example).
|
||||
SDValue VQMOVN =
|
||||
DAG.getNode(ARMISD::VQMOVNs, DL, HalfVT, DAG.getUNDEF(HalfVT),
|
||||
N0->getOperand(0), DAG.getConstant(0, DL, MVT::i32));
|
||||
SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
|
||||
return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Bitcast,
|
||||
DAG.getValueType(ExtVT));
|
||||
}
|
||||
|
||||
auto IsUnsignedSaturate = [&](SDNode *Min) {
|
||||
// For unsigned, we just need to check for <= 0xffff
|
||||
if (Min->getOpcode() != ISD::UMIN)
|
||||
return false;
|
||||
|
||||
APInt SaturateC;
|
||||
if (VT == MVT::v4i32)
|
||||
SaturateC = APInt(32, (1 << 16) - 1, true);
|
||||
else //if (VT == MVT::v8i16)
|
||||
SaturateC = APInt(16, (1 << 8) - 1, true);
|
||||
|
||||
APInt MinC;
|
||||
if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
|
||||
MinC != SaturateC)
|
||||
return false;
|
||||
return true;
|
||||
};
|
||||
|
||||
if (IsUnsignedSaturate(N)) {
|
||||
SDLoc DL(N);
|
||||
MVT HalfVT;
|
||||
unsigned ExtConst;
|
||||
if (VT == MVT::v4i32) {
|
||||
HalfVT = MVT::v8i16;
|
||||
ExtConst = 0x0000FFFF;
|
||||
} else { //if (VT == MVT::v8i16)
|
||||
HalfVT = MVT::v16i8;
|
||||
ExtConst = 0x00FF;
|
||||
}
|
||||
|
||||
// Create a VQMOVNB with undef top lanes, then ZExt into the top half with
|
||||
// an AND. That extend will hopefully be removed if only the bottom bits are
|
||||
// demanded (though a truncating store, for example).
|
||||
SDValue VQMOVN =
|
||||
DAG.getNode(ARMISD::VQMOVNu, DL, HalfVT, DAG.getUNDEF(HalfVT), N0,
|
||||
DAG.getConstant(0, DL, MVT::i32));
|
||||
SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
|
||||
return DAG.getNode(ISD::AND, DL, VT, Bitcast,
|
||||
DAG.getConstant(ExtConst, DL, VT));
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static const APInt *isPowerOf2Constant(SDValue V) {
|
||||
ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
|
||||
if (!C)
|
||||
@ -15419,7 +15528,13 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
return PerformShiftCombine(N, DCI, Subtarget);
|
||||
case ISD::SIGN_EXTEND:
|
||||
case ISD::ZERO_EXTEND:
|
||||
case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
|
||||
case ISD::ANY_EXTEND:
|
||||
return PerformExtendCombine(N, DCI.DAG, Subtarget);
|
||||
case ISD::SMIN:
|
||||
case ISD::UMIN:
|
||||
case ISD::SMAX:
|
||||
case ISD::UMAX:
|
||||
return PerformMinMaxCombine(N, DCI.DAG, Subtarget);
|
||||
case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
|
||||
case ARMISD::BRCOND: return PerformBRCONDCombine(N, DCI.DAG);
|
||||
case ISD::LOAD: return PerformLOADCombine(N, DCI);
|
||||
|
@ -204,6 +204,10 @@ class VectorType;
|
||||
VTBL2, // 2-register shuffle with mask
|
||||
VMOVN, // MVE vmovn
|
||||
|
||||
// MVE Saturating truncates
|
||||
VQMOVNs, // Vector (V) Saturating (Q) Move and Narrow (N), signed (s)
|
||||
VQMOVNu, // Vector (V) Saturating (Q) Move and Narrow (N), unsigned (u)
|
||||
|
||||
// Vector multiply long:
|
||||
VMULLs, // ...signed
|
||||
VMULLu, // ...unsigned
|
||||
|
@ -4714,6 +4714,31 @@ defm : MVE_VQMOVN_p<MVE_VQMOVUNs32th, 1, 0, 1, MVE_v8i16, MVE_v4i32>;
|
||||
defm : MVE_VQMOVN_p<MVE_VQMOVUNs16bh, 1, 0, 0, MVE_v16i8, MVE_v8i16>;
|
||||
defm : MVE_VQMOVN_p<MVE_VQMOVUNs16th, 1, 0, 1, MVE_v16i8, MVE_v8i16>;
|
||||
|
||||
def SDTARMVMOVNQ : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
|
||||
SDTCisVec<2>, SDTCisVT<3, i32>]>;
|
||||
def MVEvqmovns : SDNode<"ARMISD::VQMOVNs", SDTARMVMOVNQ>;
|
||||
def MVEvqmovnu : SDNode<"ARMISD::VQMOVNu", SDTARMVMOVNQ>;
|
||||
|
||||
let Predicates = [HasMVEInt] in {
|
||||
def : Pat<(v8i16 (MVEvqmovns (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), (i32 0))),
|
||||
(v8i16 (MVE_VQMOVNs32bh (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm)))>;
|
||||
def : Pat<(v8i16 (MVEvqmovns (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), (i32 1))),
|
||||
(v8i16 (MVE_VQMOVNs32th (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm)))>;
|
||||
def : Pat<(v16i8 (MVEvqmovns (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 0))),
|
||||
(v16i8 (MVE_VQMOVNs16bh (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
|
||||
def : Pat<(v16i8 (MVEvqmovns (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 1))),
|
||||
(v16i8 (MVE_VQMOVNs16th (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
|
||||
|
||||
def : Pat<(v8i16 (MVEvqmovnu (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), (i32 0))),
|
||||
(v8i16 (MVE_VQMOVNu32bh (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm)))>;
|
||||
def : Pat<(v8i16 (MVEvqmovnu (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), (i32 1))),
|
||||
(v8i16 (MVE_VQMOVNu32th (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm)))>;
|
||||
def : Pat<(v16i8 (MVEvqmovnu (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 0))),
|
||||
(v16i8 (MVE_VQMOVNu16bh (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
|
||||
def : Pat<(v16i8 (MVEvqmovnu (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 1))),
|
||||
(v16i8 (MVE_VQMOVNu16th (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>;
|
||||
}
|
||||
|
||||
class MVE_VCVT_ff<string iname, string suffix, bit op, bit T,
|
||||
dag iops_extra, vpred_ops vpred, string cstr>
|
||||
: MVE_qDest_qSrc<iname, suffix, (outs MQPR:$Qd),
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -4,10 +4,8 @@
|
||||
define arm_aapcs_vfpcc <8 x i16> @vqmovni32_sminmax_t1(<4 x i32> %s0, <8 x i16> %src1) {
|
||||
; CHECK-LABEL: vqmovni32_sminmax_t1:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmvn.i32 q2, #0x7fff
|
||||
; CHECK-NEXT: vmax.s32 q0, q0, q2
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x7fff
|
||||
; CHECK-NEXT: vmin.s32 q0, q0, q2
|
||||
; CHECK-NEXT: vqmovnb.s32 q0, q0
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: vmovnt.i32 q1, q0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
@ -24,10 +22,8 @@ entry:
|
||||
define arm_aapcs_vfpcc <8 x i16> @vqmovni32_sminmax_t2(<4 x i32> %s0, <8 x i16> %src1) {
|
||||
; CHECK-LABEL: vqmovni32_sminmax_t2:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmvn.i32 q2, #0x7fff
|
||||
; CHECK-NEXT: vmax.s32 q0, q0, q2
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x7fff
|
||||
; CHECK-NEXT: vmin.s32 q0, q0, q2
|
||||
; CHECK-NEXT: vqmovnb.s32 q0, q0
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: vmovnt.i32 q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
@ -43,10 +39,8 @@ entry:
|
||||
define arm_aapcs_vfpcc <8 x i16> @vqmovni32_sminmax_b1(<4 x i32> %s0, <8 x i16> %src1) {
|
||||
; CHECK-LABEL: vqmovni32_sminmax_b1:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmvn.i32 q2, #0x7fff
|
||||
; CHECK-NEXT: vmax.s32 q0, q0, q2
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x7fff
|
||||
; CHECK-NEXT: vmin.s32 q0, q0, q2
|
||||
; CHECK-NEXT: vqmovnb.s32 q0, q0
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: vmovnb.i32 q1, q0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
@ -63,10 +57,8 @@ entry:
|
||||
define arm_aapcs_vfpcc <8 x i16> @vqmovni32_sminmax_b2(<4 x i32> %s0, <8 x i16> %src1) {
|
||||
; CHECK-LABEL: vqmovni32_sminmax_b2:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmvn.i32 q2, #0x7fff
|
||||
; CHECK-NEXT: vmax.s32 q0, q0, q2
|
||||
; CHECK-NEXT: vmov.i32 q2, #0x7fff
|
||||
; CHECK-NEXT: vmin.s32 q0, q0, q2
|
||||
; CHECK-NEXT: vqmovnb.s32 q0, q0
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: vmovnb.i32 q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
@ -83,8 +75,8 @@ entry:
|
||||
define arm_aapcs_vfpcc <8 x i16> @vqmovni32_uminmax_t1(<4 x i32> %s0, <8 x i16> %src1) {
|
||||
; CHECK-LABEL: vqmovni32_uminmax_t1:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q2, #0xffff
|
||||
; CHECK-NEXT: vmin.u32 q0, q0, q2
|
||||
; CHECK-NEXT: vqmovnb.u32 q0, q0
|
||||
; CHECK-NEXT: vmovlb.u16 q0, q0
|
||||
; CHECK-NEXT: vmovnt.i32 q1, q0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
@ -99,8 +91,8 @@ entry:
|
||||
define arm_aapcs_vfpcc <8 x i16> @vqmovni32_uminmax_t2(<4 x i32> %s0, <8 x i16> %src1) {
|
||||
; CHECK-LABEL: vqmovni32_uminmax_t2:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q2, #0xffff
|
||||
; CHECK-NEXT: vmin.u32 q0, q0, q2
|
||||
; CHECK-NEXT: vqmovnb.u32 q0, q0
|
||||
; CHECK-NEXT: vmovlb.u16 q0, q0
|
||||
; CHECK-NEXT: vmovnt.i32 q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
@ -114,8 +106,8 @@ entry:
|
||||
define arm_aapcs_vfpcc <8 x i16> @vqmovni32_uminmax_b1(<4 x i32> %s0, <8 x i16> %src1) {
|
||||
; CHECK-LABEL: vqmovni32_uminmax_b1:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q2, #0xffff
|
||||
; CHECK-NEXT: vmin.u32 q0, q0, q2
|
||||
; CHECK-NEXT: vqmovnb.u32 q0, q0
|
||||
; CHECK-NEXT: vmovlb.u16 q0, q0
|
||||
; CHECK-NEXT: vmovnb.i32 q1, q0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
@ -145,10 +137,8 @@ entry:
|
||||
define arm_aapcs_vfpcc <16 x i8> @vqmovni16_sminmax_t1(<8 x i16> %s0, <16 x i8> %src1) {
|
||||
; CHECK-LABEL: vqmovni16_sminmax_t1:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmvn.i16 q2, #0x7f
|
||||
; CHECK-NEXT: vmax.s16 q0, q0, q2
|
||||
; CHECK-NEXT: vmov.i16 q2, #0x7f
|
||||
; CHECK-NEXT: vmin.s16 q0, q0, q2
|
||||
; CHECK-NEXT: vqmovnb.s16 q0, q0
|
||||
; CHECK-NEXT: vmovlb.s8 q0, q0
|
||||
; CHECK-NEXT: vmovnt.i16 q1, q0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
@ -165,10 +155,8 @@ entry:
|
||||
define arm_aapcs_vfpcc <16 x i8> @vqmovni16_sminmax_t2(<8 x i16> %s0, <16 x i8> %src1) {
|
||||
; CHECK-LABEL: vqmovni16_sminmax_t2:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmvn.i16 q2, #0x7f
|
||||
; CHECK-NEXT: vmax.s16 q0, q0, q2
|
||||
; CHECK-NEXT: vmov.i16 q2, #0x7f
|
||||
; CHECK-NEXT: vmin.s16 q0, q0, q2
|
||||
; CHECK-NEXT: vqmovnb.s16 q0, q0
|
||||
; CHECK-NEXT: vmovlb.s8 q0, q0
|
||||
; CHECK-NEXT: vmovnt.i16 q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
@ -184,10 +172,8 @@ entry:
|
||||
define arm_aapcs_vfpcc <16 x i8> @vqmovni16_sminmax_b1(<8 x i16> %s0, <16 x i8> %src1) {
|
||||
; CHECK-LABEL: vqmovni16_sminmax_b1:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmvn.i16 q2, #0x7f
|
||||
; CHECK-NEXT: vmax.s16 q0, q0, q2
|
||||
; CHECK-NEXT: vmov.i16 q2, #0x7f
|
||||
; CHECK-NEXT: vmin.s16 q0, q0, q2
|
||||
; CHECK-NEXT: vqmovnb.s16 q0, q0
|
||||
; CHECK-NEXT: vmovlb.s8 q0, q0
|
||||
; CHECK-NEXT: vmovnb.i16 q1, q0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
@ -204,10 +190,8 @@ entry:
|
||||
define arm_aapcs_vfpcc <16 x i8> @vqmovni16_sminmax_b2(<8 x i16> %s0, <16 x i8> %src1) {
|
||||
; CHECK-LABEL: vqmovni16_sminmax_b2:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmvn.i16 q2, #0x7f
|
||||
; CHECK-NEXT: vmax.s16 q0, q0, q2
|
||||
; CHECK-NEXT: vmov.i16 q2, #0x7f
|
||||
; CHECK-NEXT: vmin.s16 q0, q0, q2
|
||||
; CHECK-NEXT: vqmovnb.s16 q0, q0
|
||||
; CHECK-NEXT: vmovlb.s8 q0, q0
|
||||
; CHECK-NEXT: vmovnb.i16 q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
@ -224,8 +208,8 @@ entry:
|
||||
define arm_aapcs_vfpcc <16 x i8> @vqmovni16_uminmax_t1(<8 x i16> %s0, <16 x i8> %src1) {
|
||||
; CHECK-LABEL: vqmovni16_uminmax_t1:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i16 q2, #0xff
|
||||
; CHECK-NEXT: vmin.u16 q0, q0, q2
|
||||
; CHECK-NEXT: vqmovnb.u16 q0, q0
|
||||
; CHECK-NEXT: vmovlb.u8 q0, q0
|
||||
; CHECK-NEXT: vmovnt.i16 q1, q0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
@ -240,8 +224,8 @@ entry:
|
||||
define arm_aapcs_vfpcc <16 x i8> @vqmovni16_uminmax_t2(<8 x i16> %s0, <16 x i8> %src1) {
|
||||
; CHECK-LABEL: vqmovni16_uminmax_t2:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i16 q2, #0xff
|
||||
; CHECK-NEXT: vmin.u16 q0, q0, q2
|
||||
; CHECK-NEXT: vqmovnb.u16 q0, q0
|
||||
; CHECK-NEXT: vmovlb.u8 q0, q0
|
||||
; CHECK-NEXT: vmovnt.i16 q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
@ -255,8 +239,8 @@ entry:
|
||||
define arm_aapcs_vfpcc <16 x i8> @vqmovni16_uminmax_b1(<8 x i16> %s0, <16 x i8> %src1) {
|
||||
; CHECK-LABEL: vqmovni16_uminmax_b1:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i16 q2, #0xff
|
||||
; CHECK-NEXT: vmin.u16 q0, q0, q2
|
||||
; CHECK-NEXT: vqmovnb.u16 q0, q0
|
||||
; CHECK-NEXT: vmovlb.u8 q0, q0
|
||||
; CHECK-NEXT: vmovnb.i16 q1, q0
|
||||
; CHECK-NEXT: vmov q0, q1
|
||||
; CHECK-NEXT: bx lr
|
||||
|
@ -4,10 +4,8 @@
|
||||
define arm_aapcs_vfpcc <4 x i32> @vqmovni32_smaxmin(<4 x i32> %s0) {
|
||||
; CHECK-LABEL: vqmovni32_smaxmin:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x7fff
|
||||
; CHECK-NEXT: vmin.s32 q0, q0, q1
|
||||
; CHECK-NEXT: vmvn.i32 q1, #0x7fff
|
||||
; CHECK-NEXT: vmax.s32 q0, q0, q1
|
||||
; CHECK-NEXT: vqmovnb.s32 q0, q0
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c1 = icmp slt <4 x i32> %s0, <i32 32767, i32 32767, i32 32767, i32 32767>
|
||||
@ -20,10 +18,8 @@ entry:
|
||||
define arm_aapcs_vfpcc <4 x i32> @vqmovni32_sminmax(<4 x i32> %s0) {
|
||||
; CHECK-LABEL: vqmovni32_sminmax:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmvn.i32 q1, #0x7fff
|
||||
; CHECK-NEXT: vmax.s32 q0, q0, q1
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x7fff
|
||||
; CHECK-NEXT: vmin.s32 q0, q0, q1
|
||||
; CHECK-NEXT: vqmovnb.s32 q0, q0
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c1 = icmp sgt <4 x i32> %s0, <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
|
||||
@ -36,8 +32,8 @@ entry:
|
||||
define arm_aapcs_vfpcc <4 x i32> @vqmovni32_umaxmin(<4 x i32> %s0) {
|
||||
; CHECK-LABEL: vqmovni32_umaxmin:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q1, #0xffff
|
||||
; CHECK-NEXT: vmin.u32 q0, q0, q1
|
||||
; CHECK-NEXT: vqmovnb.u32 q0, q0
|
||||
; CHECK-NEXT: vmovlb.u16 q0, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c1 = icmp ult <4 x i32> %s0, <i32 65535, i32 65535, i32 65535, i32 65535>
|
||||
@ -48,8 +44,8 @@ entry:
|
||||
define arm_aapcs_vfpcc <4 x i32> @vqmovni32_uminmax(<4 x i32> %s0) {
|
||||
; CHECK-LABEL: vqmovni32_uminmax:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i32 q1, #0xffff
|
||||
; CHECK-NEXT: vmin.u32 q0, q0, q1
|
||||
; CHECK-NEXT: vqmovnb.u32 q0, q0
|
||||
; CHECK-NEXT: vmovlb.u16 q0, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c2 = icmp ult <4 x i32> %s0, <i32 65535, i32 65535, i32 65535, i32 65535>
|
||||
@ -60,10 +56,8 @@ entry:
|
||||
define arm_aapcs_vfpcc <8 x i16> @vqmovni16_smaxmin(<8 x i16> %s0) {
|
||||
; CHECK-LABEL: vqmovni16_smaxmin:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i16 q1, #0x7f
|
||||
; CHECK-NEXT: vmin.s16 q0, q0, q1
|
||||
; CHECK-NEXT: vmvn.i16 q1, #0x7f
|
||||
; CHECK-NEXT: vmax.s16 q0, q0, q1
|
||||
; CHECK-NEXT: vqmovnb.s16 q0, q0
|
||||
; CHECK-NEXT: vmovlb.s8 q0, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c1 = icmp slt <8 x i16> %s0, <i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127, i16 127>
|
||||
@ -76,10 +70,8 @@ entry:
|
||||
define arm_aapcs_vfpcc <8 x i16> @vqmovni16_sminmax(<8 x i16> %s0) {
|
||||
; CHECK-LABEL: vqmovni16_sminmax:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmvn.i16 q1, #0x7f
|
||||
; CHECK-NEXT: vmax.s16 q0, q0, q1
|
||||
; CHECK-NEXT: vmov.i16 q1, #0x7f
|
||||
; CHECK-NEXT: vmin.s16 q0, q0, q1
|
||||
; CHECK-NEXT: vqmovnb.s16 q0, q0
|
||||
; CHECK-NEXT: vmovlb.s8 q0, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c1 = icmp sgt <8 x i16> %s0, <i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128, i16 -128>
|
||||
@ -92,8 +84,8 @@ entry:
|
||||
define arm_aapcs_vfpcc <8 x i16> @vqmovni16_umaxmin(<8 x i16> %s0) {
|
||||
; CHECK-LABEL: vqmovni16_umaxmin:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i16 q1, #0xff
|
||||
; CHECK-NEXT: vmin.u16 q0, q0, q1
|
||||
; CHECK-NEXT: vqmovnb.u16 q0, q0
|
||||
; CHECK-NEXT: vmovlb.u8 q0, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c1 = icmp ult <8 x i16> %s0, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
||||
@ -104,8 +96,8 @@ entry:
|
||||
define arm_aapcs_vfpcc <8 x i16> @vqmovni16_uminmax(<8 x i16> %s0) {
|
||||
; CHECK-LABEL: vqmovni16_uminmax:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vmov.i16 q1, #0xff
|
||||
; CHECK-NEXT: vmin.u16 q0, q0, q1
|
||||
; CHECK-NEXT: vqmovnb.u16 q0, q0
|
||||
; CHECK-NEXT: vmovlb.u8 q0, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%c2 = icmp ult <8 x i16> %s0, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
||||
|
@ -5,10 +5,8 @@ define arm_aapcs_vfpcc <4 x i32> @vqshrni32_smaxmin(<4 x i32> %so) {
|
||||
; CHECK-LABEL: vqshrni32_smaxmin:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vshr.s32 q0, q0, #3
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x7fff
|
||||
; CHECK-NEXT: vmin.s32 q0, q0, q1
|
||||
; CHECK-NEXT: vmvn.i32 q1, #0x7fff
|
||||
; CHECK-NEXT: vmax.s32 q0, q0, q1
|
||||
; CHECK-NEXT: vqmovnb.s32 q0, q0
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%s0 = ashr <4 x i32> %so, <i32 3, i32 3, i32 3, i32 3>
|
||||
@ -23,10 +21,8 @@ define arm_aapcs_vfpcc <4 x i32> @vqshrni32_sminmax(<4 x i32> %so) {
|
||||
; CHECK-LABEL: vqshrni32_sminmax:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vshr.s32 q0, q0, #3
|
||||
; CHECK-NEXT: vmvn.i32 q1, #0x7fff
|
||||
; CHECK-NEXT: vmax.s32 q0, q0, q1
|
||||
; CHECK-NEXT: vmov.i32 q1, #0x7fff
|
||||
; CHECK-NEXT: vmin.s32 q0, q0, q1
|
||||
; CHECK-NEXT: vqmovnb.s32 q0, q0
|
||||
; CHECK-NEXT: vmovlb.s16 q0, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%s0 = ashr <4 x i32> %so, <i32 3, i32 3, i32 3, i32 3>
|
||||
@ -41,8 +37,8 @@ define arm_aapcs_vfpcc <4 x i32> @vqshrni32_umaxmin(<4 x i32> %so) {
|
||||
; CHECK-LABEL: vqshrni32_umaxmin:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vshr.u32 q0, q0, #3
|
||||
; CHECK-NEXT: vmov.i32 q1, #0xffff
|
||||
; CHECK-NEXT: vmin.u32 q0, q0, q1
|
||||
; CHECK-NEXT: vqmovnb.u32 q0, q0
|
||||
; CHECK-NEXT: vmovlb.u16 q0, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%s0 = lshr <4 x i32> %so, <i32 3, i32 3, i32 3, i32 3>
|
||||
@ -55,8 +51,8 @@ define arm_aapcs_vfpcc <4 x i32> @vqshrni32_uminmax(<4 x i32> %so) {
|
||||
; CHECK-LABEL: vqshrni32_uminmax:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vshr.u32 q0, q0, #3
|
||||
; CHECK-NEXT: vmov.i32 q1, #0xffff
|
||||
; CHECK-NEXT: vmin.u32 q0, q0, q1
|
||||
; CHECK-NEXT: vqmovnb.u32 q0, q0
|
||||
; CHECK-NEXT: vmovlb.u16 q0, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%s0 = lshr <4 x i32> %so, <i32 3, i32 3, i32 3, i32 3>
|
||||
@ -69,10 +65,8 @@ define arm_aapcs_vfpcc <8 x i16> @vqshrni16_smaxmin(<8 x i16> %so) {
|
||||
; CHECK-LABEL: vqshrni16_smaxmin:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vshr.s16 q0, q0, #3
|
||||
; CHECK-NEXT: vmov.i16 q1, #0x7f
|
||||
; CHECK-NEXT: vmin.s16 q0, q0, q1
|
||||
; CHECK-NEXT: vmvn.i16 q1, #0x7f
|
||||
; CHECK-NEXT: vmax.s16 q0, q0, q1
|
||||
; CHECK-NEXT: vqmovnb.s16 q0, q0
|
||||
; CHECK-NEXT: vmovlb.s8 q0, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%s0 = ashr <8 x i16> %so, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
|
||||
@ -87,10 +81,8 @@ define arm_aapcs_vfpcc <8 x i16> @vqshrni16_sminmax(<8 x i16> %so) {
|
||||
; CHECK-LABEL: vqshrni16_sminmax:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vshr.s16 q0, q0, #3
|
||||
; CHECK-NEXT: vmvn.i16 q1, #0x7f
|
||||
; CHECK-NEXT: vmax.s16 q0, q0, q1
|
||||
; CHECK-NEXT: vmov.i16 q1, #0x7f
|
||||
; CHECK-NEXT: vmin.s16 q0, q0, q1
|
||||
; CHECK-NEXT: vqmovnb.s16 q0, q0
|
||||
; CHECK-NEXT: vmovlb.s8 q0, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%s0 = ashr <8 x i16> %so, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
|
||||
@ -105,8 +97,8 @@ define arm_aapcs_vfpcc <8 x i16> @vqshrni16_umaxmin(<8 x i16> %so) {
|
||||
; CHECK-LABEL: vqshrni16_umaxmin:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vshr.u16 q0, q0, #3
|
||||
; CHECK-NEXT: vmov.i16 q1, #0xff
|
||||
; CHECK-NEXT: vmin.u16 q0, q0, q1
|
||||
; CHECK-NEXT: vqmovnb.u16 q0, q0
|
||||
; CHECK-NEXT: vmovlb.u8 q0, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%s0 = lshr <8 x i16> %so, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
|
||||
@ -119,8 +111,8 @@ define arm_aapcs_vfpcc <8 x i16> @vqshrni16_uminmax(<8 x i16> %so) {
|
||||
; CHECK-LABEL: vqshrni16_uminmax:
|
||||
; CHECK: @ %bb.0: @ %entry
|
||||
; CHECK-NEXT: vshr.u16 q0, q0, #3
|
||||
; CHECK-NEXT: vmov.i16 q1, #0xff
|
||||
; CHECK-NEXT: vmin.u16 q0, q0, q1
|
||||
; CHECK-NEXT: vqmovnb.u16 q0, q0
|
||||
; CHECK-NEXT: vmovlb.u8 q0, q0
|
||||
; CHECK-NEXT: bx lr
|
||||
entry:
|
||||
%s0 = lshr <8 x i16> %so, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
|
||||
|
Loading…
x
Reference in New Issue
Block a user