1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[ARM] Add support for MVE pre and post inc loads and stores

This adds pre- and post- increment and decrements for MVE loads and stores. It
uses the builtin pre and post load/store detection, unlike Neon. Loads are
selected with the code in tryT2IndexedLoad, stores are selected with tablegen
patterns. The immediates have a +/-7bit range, multiplied by the size of the
element.

Differential Revision: https://reviews.llvm.org/D63840

llvm-svn: 368305
This commit is contained in:
David Green 2019-08-08 15:27:58 +00:00
parent 3265a2671b
commit b937fefeae
7 changed files with 423 additions and 275 deletions

View File

@ -1113,6 +1113,16 @@ def pre_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset),
let IsStore = 1;
let MemoryVT = f32;
}
def pre_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset),
(pre_truncst node:$val, node:$base, node:$offset)> {
let IsStore = 1;
let ScalarMemoryVT = i8;
}
def pre_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset),
(pre_truncst node:$val, node:$base, node:$offset)> {
let IsStore = 1;
let ScalarMemoryVT = i16;
}
def post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
(istore node:$val, node:$ptr, node:$offset), [{
@ -1150,6 +1160,16 @@ def post_truncstf32 : PatFrag<(ops node:$val, node:$base, node:$offset),
let IsStore = 1;
let MemoryVT = f32;
}
def post_truncstvi8 : PatFrag<(ops node:$val, node:$base, node:$offset),
(post_truncst node:$val, node:$base, node:$offset)> {
let IsStore = 1;
let ScalarMemoryVT = i8;
}
def post_truncstvi16 : PatFrag<(ops node:$val, node:$base, node:$offset),
(post_truncst node:$val, node:$base, node:$offset)> {
let IsStore = 1;
let ScalarMemoryVT = i16;
}
def nonvolatile_load : PatFrag<(ops node:$ptr),
(load node:$ptr), [{

View File

@ -146,9 +146,12 @@ public:
SDValue &OffImm);
bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
SDValue &OffImm);
template<unsigned Shift>
bool SelectT2AddrModeImm7(SDValue N, SDValue &Base,
SDValue &OffImm);
template <unsigned Shift>
bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
unsigned Shift);
template <unsigned Shift>
bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
SDValue &OffReg, SDValue &ShImm);
bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
@ -179,6 +182,7 @@ private:
bool tryARMIndexedLoad(SDNode *N);
bool tryT1IndexedLoad(SDNode *N);
bool tryT2IndexedLoad(SDNode *N);
bool tryMVEIndexedLoad(SDNode *N);
/// SelectVLD - Select NEON load intrinsics. NumVecs should be
/// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
@ -1307,6 +1311,31 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N,
return true;
}
template <unsigned Shift>
bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
SDValue &OffImm) {
return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
}
bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
SDValue &OffImm,
unsigned Shift) {
unsigned Opcode = Op->getOpcode();
ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
? cast<LoadSDNode>(Op)->getAddressingMode()
: cast<StoreSDNode>(Op)->getAddressingMode();
int RHSC;
if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { // 7 bits.
OffImm =
((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32)
: CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N),
MVT::i32);
return true;
}
return false;
}
bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
SDValue &Base,
SDValue &OffReg, SDValue &ShImm) {
@ -1565,6 +1594,68 @@ bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
return false;
}
bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
ISD::MemIndexedMode AM = LD->getAddressingMode();
if (AM == ISD::UNINDEXED)
return false;
EVT LoadedVT = LD->getMemoryVT();
if (!LoadedVT.isVector())
return false;
bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
SDValue Offset;
bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
unsigned Opcode = 0;
unsigned Align = LD->getAlignment();
bool IsLE = Subtarget->isLittle();
if (Align >= 2 && LoadedVT == MVT::v4i16 &&
SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1)) {
if (isSExtLd)
Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
else
Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
} else if (LoadedVT == MVT::v8i8 &&
SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) {
if (isSExtLd)
Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
else
Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
} else if (LoadedVT == MVT::v4i8 &&
SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0)) {
if (isSExtLd)
Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
else
Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
} else if (Align >= 4 &&
(IsLE || LoadedVT == MVT::v4i32 || LoadedVT == MVT::v4f32) &&
SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 2))
Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
else if (Align >= 2 &&
(IsLE || LoadedVT == MVT::v8i16 || LoadedVT == MVT::v8f16) &&
SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 1))
Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
else if ((IsLE || LoadedVT == MVT::v16i8) &&
SelectT2AddrModeImm7Offset(N, LD->getOffset(), Offset, 0))
Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
else
return false;
SDValue Chain = LD->getChain();
SDValue Base = LD->getBasePtr();
SDValue Ops[] = {Base, Offset,
CurDAG->getTargetConstant(ARMVCC::None, SDLoc(N), MVT::i32),
CurDAG->getRegister(0, MVT::i32), Chain};
SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), LD->getValueType(0),
MVT::i32, MVT::Other, Ops);
transferMemOperands(N, New);
ReplaceUses(SDValue(N, 0), SDValue(New, 1));
ReplaceUses(SDValue(N, 1), SDValue(New, 0));
ReplaceUses(SDValue(N, 2), SDValue(New, 2));
CurDAG->RemoveDeadNode(N);
return true;
}
/// Form a GPRPair pseudo register from a pair of GPR regs.
SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
SDLoc dl(V0.getNode());
@ -2987,6 +3078,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
return;
}
case ISD::LOAD: {
if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
return;
if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
if (tryT2IndexedLoad(N))
return;

View File

@ -273,6 +273,13 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::FP_TO_SINT, VT, Expand);
setOperationAction(ISD::FP_TO_UINT, VT, Expand);
}
// Pre and Post inc are supported on loads and stores
for (unsigned im = (unsigned)ISD::PRE_INC;
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
setIndexedLoadAction(im, VT, Legal);
setIndexedStoreAction(im, VT, Legal);
}
}
const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
@ -291,6 +298,13 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal);
setOperationAction(ISD::SETCC, VT, Custom);
// Pre and Post inc are supported on loads and stores
for (unsigned im = (unsigned)ISD::PRE_INC;
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
setIndexedLoadAction(im, VT, Legal);
setIndexedStoreAction(im, VT, Legal);
}
if (HasMVEFP) {
setOperationAction(ISD::FMINNUM, VT, Legal);
setOperationAction(ISD::FMAXNUM, VT, Legal);
@ -338,6 +352,17 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
// Pre and Post inc on these are legal, given the correct extends
for (unsigned im = (unsigned)ISD::PRE_INC;
im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
setIndexedLoadAction(im, MVT::v8i8, Legal);
setIndexedStoreAction(im, MVT::v8i8, Legal);
setIndexedLoadAction(im, MVT::v4i8, Legal);
setIndexedStoreAction(im, MVT::v4i8, Legal);
setIndexedLoadAction(im, MVT::v4i16, Legal);
setIndexedStoreAction(im, MVT::v4i16, Legal);
}
// Predicate types
const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1};
for (auto VT : pTypes) {
@ -14620,6 +14645,52 @@ static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
return false;
}
static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, unsigned Align,
bool isSEXTLoad, bool isLE, SDValue &Base,
SDValue &Offset, bool &isInc,
SelectionDAG &DAG) {
if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
return false;
if (!isa<ConstantSDNode>(Ptr->getOperand(1)))
return false;
ConstantSDNode *RHS = cast<ConstantSDNode>(Ptr->getOperand(1));
int RHSC = (int)RHS->getZExtValue();
auto IsInRange = [&](int RHSC, int Limit, int Scale) {
if (RHSC < 0 && RHSC > -Limit * Scale && RHSC % Scale == 0) {
assert(Ptr->getOpcode() == ISD::ADD);
isInc = false;
Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
return true;
} else if (RHSC > 0 && RHSC < Limit * Scale && RHSC % Scale == 0) {
isInc = Ptr->getOpcode() == ISD::ADD;
Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
return true;
}
return false;
};
// Try to find a matching instruction based on s/zext, Alignment, Offset and
// (in BE) type.
Base = Ptr->getOperand(0);
if (VT == MVT::v4i16) {
if (Align >= 2 && IsInRange(RHSC, 0x80, 2))
return true;
} else if (VT == MVT::v4i8 || VT == MVT::v8i8) {
if (IsInRange(RHSC, 0x80, 1))
return true;
} else if (Align >= 4 && (isLE || VT == MVT::v4i32 || VT == MVT::v4f32) &&
IsInRange(RHSC, 0x80, 4))
return true;
else if (Align >= 2 && (isLE || VT == MVT::v8i16 || VT == MVT::v8f16) &&
IsInRange(RHSC, 0x80, 2))
return true;
else if ((isLE || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1))
return true;
return false;
}
/// getPreIndexedAddressParts - returns true by value, base pointer and
/// offset pointer and addressing mode by reference if the node's address
/// can be legally represented as pre-indexed load / store address.
@ -14633,25 +14704,35 @@ ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
EVT VT;
SDValue Ptr;
unsigned Align;
bool isSEXTLoad = false;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
Ptr = LD->getBasePtr();
VT = LD->getMemoryVT();
VT = LD->getMemoryVT();
Align = LD->getAlignment();
isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
Ptr = ST->getBasePtr();
VT = ST->getMemoryVT();
VT = ST->getMemoryVT();
Align = ST->getAlignment();
} else
return false;
bool isInc;
bool isLegal = false;
if (Subtarget->isThumb2())
isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
Offset, isInc, DAG);
else
isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
Offset, isInc, DAG);
if (VT.isVector())
isLegal = Subtarget->hasMVEIntegerOps() &&
getMVEIndexedAddressParts(Ptr.getNode(), VT, Align, isSEXTLoad,
Subtarget->isLittle(), Base, Offset,
isInc, DAG);
else {
if (Subtarget->isThumb2())
isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
Offset, isInc, DAG);
else
isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
Offset, isInc, DAG);
}
if (!isLegal)
return false;
@ -14669,15 +14750,18 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
SelectionDAG &DAG) const {
EVT VT;
SDValue Ptr;
unsigned Align;
bool isSEXTLoad = false, isNonExt;
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
VT = LD->getMemoryVT();
VT = LD->getMemoryVT();
Ptr = LD->getBasePtr();
Align = LD->getAlignment();
isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
VT = ST->getMemoryVT();
VT = ST->getMemoryVT();
Ptr = ST->getBasePtr();
Align = ST->getAlignment();
isNonExt = !ST->isTruncatingStore();
} else
return false;
@ -14700,12 +14784,19 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
bool isInc;
bool isLegal = false;
if (Subtarget->isThumb2())
isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
isInc, DAG);
else
isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
if (VT.isVector())
isLegal = Subtarget->hasMVEIntegerOps() &&
getMVEIndexedAddressParts(Op, VT, Align, isSEXTLoad,
Subtarget->isLittle(), Base, Offset,
isInc, DAG);
else {
if (Subtarget->isThumb2())
isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
isInc, DAG);
else
isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
isInc, DAG);
}
if (!isLegal)
return false;

View File

@ -221,7 +221,9 @@ def t2am_imm7shift0OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<0>;
def t2am_imm7shift1OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<1>;
def t2am_imm7shift2OffsetAsmOperand : t2am_imm7shiftOffsetAsmOperand<2>;
class t2am_imm7_offset<int shift> : MemOperand {
class t2am_imm7_offset<int shift> : MemOperand,
ComplexPattern<i32, 1, "SelectT2AddrModeImm7Offset<"#shift#">",
[], [SDNPWantRoot]> {
// They are printed the same way as the imm8 version
let PrintMethod = "printT2AddrModeImm8OffsetOperand";
let ParserMatchClass =
@ -4812,6 +4814,38 @@ multiclass MVE_unpred_vector_load<Instruction RegImmInst, PatFrag LoadKind,
def : MVE_unpred_vector_load_typed<v2f64, RegImmInst, LoadKind, shift>;
}
class MVE_unpred_vector_offset_store_typed<ValueType Ty, Instruction Opcode,
PatFrag StoreKind, int shift>
: Pat<(StoreKind (Ty MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<shift>:$addr),
(Opcode MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<shift>:$addr)>;
multiclass MVE_unpred_vector_offset_store<Instruction RegImmInst, PatFrag StoreKind,
int shift> {
def : MVE_unpred_vector_offset_store_typed<v16i8, RegImmInst, StoreKind, shift>;
def : MVE_unpred_vector_offset_store_typed<v8i16, RegImmInst, StoreKind, shift>;
def : MVE_unpred_vector_offset_store_typed<v8f16, RegImmInst, StoreKind, shift>;
def : MVE_unpred_vector_offset_store_typed<v4i32, RegImmInst, StoreKind, shift>;
def : MVE_unpred_vector_offset_store_typed<v4f32, RegImmInst, StoreKind, shift>;
def : MVE_unpred_vector_offset_store_typed<v2i64, RegImmInst, StoreKind, shift>;
def : MVE_unpred_vector_offset_store_typed<v2f64, RegImmInst, StoreKind, shift>;
}
def aligned32_pre_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
(pre_store node:$val, node:$ptr, node:$offset), [{
return cast<StoreSDNode>(N)->getAlignment() >= 4;
}]>;
def aligned32_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
(post_store node:$val, node:$ptr, node:$offset), [{
return cast<StoreSDNode>(N)->getAlignment() >= 4;
}]>;
def aligned16_pre_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
(pre_store node:$val, node:$ptr, node:$offset), [{
return cast<StoreSDNode>(N)->getAlignment() == 2;
}]>;
def aligned16_post_store : PatFrag<(ops node:$val, node:$ptr, node:$offset),
(post_store node:$val, node:$ptr, node:$offset), [{
return cast<StoreSDNode>(N)->getAlignment() == 2;
}]>;
let Predicates = [HasMVEInt, IsLE] in {
defm : MVE_unpred_vector_store<MVE_VSTRBU8, byte_alignedstore, 0>;
defm : MVE_unpred_vector_store<MVE_VSTRHU16, hword_alignedstore, 1>;
@ -4820,6 +4854,13 @@ let Predicates = [HasMVEInt, IsLE] in {
defm : MVE_unpred_vector_load<MVE_VLDRBU8, byte_alignedload, 0>;
defm : MVE_unpred_vector_load<MVE_VLDRHU16, hword_alignedload, 1>;
defm : MVE_unpred_vector_load<MVE_VLDRWU32, alignedload32, 2>;
defm : MVE_unpred_vector_offset_store<MVE_VSTRBU8_pre, pre_store, 0>;
defm : MVE_unpred_vector_offset_store<MVE_VSTRBU8_post, post_store, 0>;
defm : MVE_unpred_vector_offset_store<MVE_VSTRHU16_pre, aligned16_pre_store, 1>;
defm : MVE_unpred_vector_offset_store<MVE_VSTRHU16_post, aligned16_post_store, 1>;
defm : MVE_unpred_vector_offset_store<MVE_VSTRWU32_pre, aligned32_pre_store, 2>;
defm : MVE_unpred_vector_offset_store<MVE_VSTRWU32_post, aligned32_post_store, 2>;
}
let Predicates = [HasMVEInt, IsBE] in {
@ -4860,6 +4901,17 @@ let Predicates = [HasMVEInt, IsBE] in {
(MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
def : Pat<(store (v8f16 MQPR:$val), t2addrmode_imm7<0>:$addr),
(MVE_VSTRBU8 (MVE_VREV16_8 MQPR:$val), t2addrmode_imm7<0>:$addr)>;
def : MVE_unpred_vector_offset_store_typed<v16i8, MVE_VSTRBU8_pre, pre_store, 0>;
def : MVE_unpred_vector_offset_store_typed<v16i8, MVE_VSTRBU8_post, post_store, 0>;
def : MVE_unpred_vector_offset_store_typed<v8i16, MVE_VSTRHU16_pre, aligned16_pre_store, 1>;
def : MVE_unpred_vector_offset_store_typed<v8i16, MVE_VSTRHU16_post, aligned16_post_store, 1>;
def : MVE_unpred_vector_offset_store_typed<v8f16, MVE_VSTRHU16_pre, aligned16_pre_store, 1>;
def : MVE_unpred_vector_offset_store_typed<v8f16, MVE_VSTRHU16_post, aligned16_post_store, 1>;
def : MVE_unpred_vector_offset_store_typed<v4i32, MVE_VSTRWU32_pre, aligned32_pre_store, 2>;
def : MVE_unpred_vector_offset_store_typed<v4i32, MVE_VSTRWU32_post, aligned32_post_store, 2>;
def : MVE_unpred_vector_offset_store_typed<v4f32, MVE_VSTRWU32_pre, aligned32_pre_store, 2>;
def : MVE_unpred_vector_offset_store_typed<v4f32, MVE_VSTRWU32_post, aligned32_post_store, 2>;
}
let Predicates = [HasMVEInt] in {
@ -4877,6 +4929,10 @@ let Predicates = [HasMVEInt] in {
let MinAlignment = 2 in {
def truncstorevi16_align2 : PatFrag<(ops node:$val, node:$ptr),
(truncstorevi16 node:$val, node:$ptr)>;
def post_truncstvi16_align2 : PatFrag<(ops node:$val, node:$base, node:$offset),
(post_truncstvi16 node:$val, node:$base, node:$offset)>;
def pre_truncstvi16_align2 : PatFrag<(ops node:$val, node:$base, node:$offset),
(pre_truncstvi16 node:$val, node:$base, node:$offset)>;
}
let Predicates = [HasMVEInt] in {
@ -4886,6 +4942,20 @@ let Predicates = [HasMVEInt] in {
(MVE_VSTRB32 MQPR:$val, t2addrmode_imm7<0>:$addr)>;
def : Pat<(truncstorevi16_align2 (v4i32 MQPR:$val), t2addrmode_imm7<1>:$addr),
(MVE_VSTRH32 MQPR:$val, t2addrmode_imm7<1>:$addr)>;
def : Pat<(post_truncstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr),
(MVE_VSTRB16_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>;
def : Pat<(post_truncstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr),
(MVE_VSTRB32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>;
def : Pat<(post_truncstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr),
(MVE_VSTRH32_post MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr)>;
def : Pat<(pre_truncstvi8 (v8i16 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr),
(MVE_VSTRB16_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>;
def : Pat<(pre_truncstvi8 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<0>:$addr),
(MVE_VSTRB32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<0>:$addr)>;
def : Pat<(pre_truncstvi16_align2 (v4i32 MQPR:$Rt), tGPR:$Rn, t2am_imm7_offset<1>:$addr),
(MVE_VSTRH32_pre MQPR:$Rt, tGPR:$Rn, t2am_imm7_offset<1>:$addr)>;
}

View File

@ -4,8 +4,7 @@
define i8* @ldrwu32_4(i8* %x, i8* %y) {
; CHECK-LABEL: ldrwu32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r0]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vldrw.u32 q0, [r0], #4
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -20,8 +19,7 @@ entry:
define i8* @ldrwu32_3(i8* %x, i8* %y) {
; CHECK-LABEL: ldrwu32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r0]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrb.u8 q0, [r0], #3
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -36,8 +34,7 @@ entry:
define i8* @ldrwu32_m4(i8* %x, i8* %y) {
; CHECK-LABEL: ldrwu32_m4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r0]
; CHECK-NEXT: subs r0, #4
; CHECK-NEXT: vldrw.u32 q0, [r0], #-4
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -52,8 +49,7 @@ entry:
define i8* @ldrwu32_508(i8* %x, i8* %y) {
; CHECK-LABEL: ldrwu32_508:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r0]
; CHECK-NEXT: add.w r0, r0, #508
; CHECK-NEXT: vldrw.u32 q0, [r0], #508
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -84,8 +80,7 @@ entry:
define i8* @ldrwu32_m508(i8* %x, i8* %y) {
; CHECK-LABEL: ldrwu32_m508:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r0]
; CHECK-NEXT: sub.w r0, r0, #508
; CHECK-NEXT: vldrw.u32 q0, [r0], #-508
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -117,8 +112,7 @@ entry:
define i8* @ldrhu32_4(i8* %x, i8* %y) {
; CHECK-LABEL: ldrhu32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u32 q0, [r0]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vldrh.u32 q0, [r0], #4
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -151,8 +145,7 @@ entry:
define i8* @ldrhu32_2(i8* %x, i8* %y) {
; CHECK-LABEL: ldrhu32_2:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u32 q0, [r0]
; CHECK-NEXT: adds r0, #2
; CHECK-NEXT: vldrh.u32 q0, [r0], #2
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -168,8 +161,7 @@ entry:
define i8* @ldrhu32_254(i8* %x, i8* %y) {
; CHECK-LABEL: ldrhu32_254:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u32 q0, [r0]
; CHECK-NEXT: adds r0, #254
; CHECK-NEXT: vldrh.u32 q0, [r0], #254
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -203,8 +195,7 @@ entry:
define i8* @ldrhs32_4(i8* %x, i8* %y) {
; CHECK-LABEL: ldrhs32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.s32 q0, [r0]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vldrh.s32 q0, [r0], #4
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -237,8 +228,7 @@ entry:
define i8* @ldrhs32_2(i8* %x, i8* %y) {
; CHECK-LABEL: ldrhs32_2:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.s32 q0, [r0]
; CHECK-NEXT: adds r0, #2
; CHECK-NEXT: vldrh.s32 q0, [r0], #2
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -254,8 +244,7 @@ entry:
define i8* @ldrhs32_254(i8* %x, i8* %y) {
; CHECK-LABEL: ldrhs32_254:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.s32 q0, [r0]
; CHECK-NEXT: adds r0, #254
; CHECK-NEXT: vldrh.s32 q0, [r0], #254
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -289,8 +278,7 @@ entry:
define i8* @ldrhu16_4(i8* %x, i8* %y) {
; CHECK-LABEL: ldrhu16_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u16 q0, [r0]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vldrh.u16 q0, [r0], #4
; CHECK-NEXT: vstrh.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -305,8 +293,7 @@ entry:
define i8* @ldrhu16_3(i8* %x, i8* %y) {
; CHECK-LABEL: ldrhu16_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u16 q0, [r0]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrb.u8 q0, [r0], #3
; CHECK-NEXT: vstrh.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -321,8 +308,7 @@ entry:
define i8* @ldrhu16_2(i8* %x, i8* %y) {
; CHECK-LABEL: ldrhu16_2:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u16 q0, [r0]
; CHECK-NEXT: adds r0, #2
; CHECK-NEXT: vldrh.u16 q0, [r0], #2
; CHECK-NEXT: vstrh.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -337,8 +323,7 @@ entry:
define i8* @ldrhu16_254(i8* %x, i8* %y) {
; CHECK-LABEL: ldrhu16_254:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u16 q0, [r0]
; CHECK-NEXT: adds r0, #254
; CHECK-NEXT: vldrh.u16 q0, [r0], #254
; CHECK-NEXT: vstrh.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -370,8 +355,7 @@ entry:
define i8* @ldrbu32_4(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbu32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u32 q0, [r0]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vldrb.u32 q0, [r0], #4
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -387,8 +371,7 @@ entry:
define i8* @ldrbu32_3(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbu32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u32 q0, [r0]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrb.u32 q0, [r0], #3
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -404,8 +387,7 @@ entry:
define i8* @ldrbu32_127(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbu32_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u32 q0, [r0]
; CHECK-NEXT: adds r0, #127
; CHECK-NEXT: vldrb.u32 q0, [r0], #127
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -439,8 +421,7 @@ entry:
define i8* @ldrbs32_4(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbs32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.s32 q0, [r0]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vldrb.s32 q0, [r0], #4
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -456,8 +437,7 @@ entry:
define i8* @ldrbs32_3(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbs32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.s32 q0, [r0]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrb.s32 q0, [r0], #3
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -473,8 +453,7 @@ entry:
define i8* @ldrbs32_127(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbs32_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.s32 q0, [r0]
; CHECK-NEXT: adds r0, #127
; CHECK-NEXT: vldrb.s32 q0, [r0], #127
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -508,8 +487,7 @@ entry:
define i8* @ldrbu16_4(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbu16_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u16 q0, [r0]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vldrb.u16 q0, [r0], #4
; CHECK-NEXT: vstrh.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -525,8 +503,7 @@ entry:
define i8* @ldrbu16_3(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbu16_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u16 q0, [r0]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrb.u16 q0, [r0], #3
; CHECK-NEXT: vstrh.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -542,8 +519,7 @@ entry:
define i8* @ldrbu16_127(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbu16_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u16 q0, [r0]
; CHECK-NEXT: adds r0, #127
; CHECK-NEXT: vldrb.u16 q0, [r0], #127
; CHECK-NEXT: vstrh.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -577,8 +553,7 @@ entry:
define i8* @ldrbs16_4(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbs16_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.s16 q0, [r0]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vldrb.s16 q0, [r0], #4
; CHECK-NEXT: vstrh.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -594,8 +569,7 @@ entry:
define i8* @ldrbs16_3(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbs16_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.s16 q0, [r0]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrb.s16 q0, [r0], #3
; CHECK-NEXT: vstrh.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -611,8 +585,7 @@ entry:
define i8* @ldrbs16_127(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbs16_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.s16 q0, [r0]
; CHECK-NEXT: adds r0, #127
; CHECK-NEXT: vldrb.s16 q0, [r0], #127
; CHECK-NEXT: vstrh.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -646,8 +619,7 @@ entry:
define i8* @ldrbu8_4(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbu8_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u8 q0, [r0]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vldrb.u8 q0, [r0], #4
; CHECK-NEXT: vstrb.8 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -662,8 +634,7 @@ entry:
define i8* @ldrbu8_3(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbu8_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u8 q0, [r0]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrb.u8 q0, [r0], #3
; CHECK-NEXT: vstrb.8 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -678,8 +649,7 @@ entry:
define i8* @ldrbu8_127(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbu8_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u8 q0, [r0]
; CHECK-NEXT: adds r0, #127
; CHECK-NEXT: vldrb.u8 q0, [r0], #127
; CHECK-NEXT: vstrb.8 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -710,8 +680,7 @@ entry:
define i8* @ldrwf32_4(i8* %x, i8* %y) {
; CHECK-LABEL: ldrwf32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r0]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vldrw.u32 q0, [r0], #4
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -726,8 +695,7 @@ entry:
define i8* @ldrwf16_4(i8* %x, i8* %y) {
; CHECK-LABEL: ldrwf16_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u16 q0, [r0]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vldrh.u16 q0, [r0], #4
; CHECK-NEXT: vstrh.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -742,8 +710,7 @@ entry:
define i8* @ldrwi32_align1(i8* %x, i8* %y) {
; CHECK-LABEL: ldrwi32_align1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u8 q0, [r0]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrb.u8 q0, [r0], #3
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -758,8 +725,7 @@ entry:
define i8* @ldrhi16_align1(i8* %x, i8* %y) {
; CHECK-LABEL: ldrhi16_align1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u8 q0, [r0]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrb.u8 q0, [r0], #3
; CHECK-NEXT: vstrh.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -798,8 +764,7 @@ entry:
define i8* @ldrf32_align1(i8* %x, i8* %y) {
; CHECK-LABEL: ldrf32_align1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u8 q0, [r0]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrb.u8 q0, [r0], #3
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -814,8 +779,7 @@ entry:
define i8* @ldrf16_align1(i8* %x, i8* %y) {
; CHECK-LABEL: ldrf16_align1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u8 q0, [r0]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrb.u8 q0, [r0], #3
; CHECK-NEXT: vstrh.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -835,8 +799,7 @@ define i8* @strw32_4(i8* %y, i8* %x) {
; CHECK-LABEL: strw32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vstrw.32 q0, [r0]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vstrb.8 q0, [r0], #4
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
@ -851,8 +814,7 @@ define i8* @strw32_3(i8* %y, i8* %x) {
; CHECK-LABEL: strw32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vstrw.32 q0, [r0]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vstrb.8 q0, [r0], #3
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
@ -867,8 +829,7 @@ define i8* @strw32_m4(i8* %y, i8* %x) {
; CHECK-LABEL: strw32_m4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vstrw.32 q0, [r0]
; CHECK-NEXT: subs r0, #4
; CHECK-NEXT: vstrb.8 q0, [r0], #-4
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 -4
@ -883,8 +844,7 @@ define i8* @strw32_508(i8* %y, i8* %x) {
; CHECK-LABEL: strw32_508:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vstrw.32 q0, [r0]
; CHECK-NEXT: add.w r0, r0, #508
; CHECK-NEXT: vstrw.32 q0, [r0], #508
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 508
@ -915,8 +875,7 @@ define i8* @strw32_m508(i8* %y, i8* %x) {
; CHECK-LABEL: strw32_m508:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vstrw.32 q0, [r0]
; CHECK-NEXT: sub.w r0, r0, #508
; CHECK-NEXT: vstrw.32 q0, [r0], #-508
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 -508
@ -948,8 +907,7 @@ define i8* @strh32_4(i8* %y, i8* %x) {
; CHECK-LABEL: strh32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u32 q0, [r1]
; CHECK-NEXT: vstrh.32 q0, [r0]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vstrh.32 q0, [r0], #4
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
@ -980,8 +938,7 @@ define i8* @strh32_2(i8* %y, i8* %x) {
; CHECK-LABEL: strh32_2:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u32 q0, [r1]
; CHECK-NEXT: vstrh.32 q0, [r0]
; CHECK-NEXT: adds r0, #2
; CHECK-NEXT: vstrh.32 q0, [r0], #2
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 2
@ -996,8 +953,7 @@ define i8* @strh32_254(i8* %y, i8* %x) {
; CHECK-LABEL: strh32_254:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u32 q0, [r1]
; CHECK-NEXT: vstrh.32 q0, [r0]
; CHECK-NEXT: adds r0, #254
; CHECK-NEXT: vstrh.32 q0, [r0], #254
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 254
@ -1029,8 +985,7 @@ define i8* @strh16_4(i8* %y, i8* %x) {
; CHECK-LABEL: strh16_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u16 q0, [r1]
; CHECK-NEXT: vstrh.16 q0, [r0]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vstrb.8 q0, [r0], #4
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
@ -1045,8 +1000,7 @@ define i8* @strh16_3(i8* %y, i8* %x) {
; CHECK-LABEL: strh16_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u16 q0, [r1]
; CHECK-NEXT: vstrh.16 q0, [r0]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vstrb.8 q0, [r0], #3
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
@ -1061,8 +1015,7 @@ define i8* @strh16_2(i8* %y, i8* %x) {
; CHECK-LABEL: strh16_2:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u16 q0, [r1]
; CHECK-NEXT: vstrh.16 q0, [r0]
; CHECK-NEXT: adds r0, #2
; CHECK-NEXT: vstrb.8 q0, [r0], #2
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 2
@ -1077,8 +1030,7 @@ define i8* @strh16_254(i8* %y, i8* %x) {
; CHECK-LABEL: strh16_254:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u16 q0, [r1]
; CHECK-NEXT: vstrh.16 q0, [r0]
; CHECK-NEXT: adds r0, #254
; CHECK-NEXT: vstrh.16 q0, [r0], #254
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 254
@ -1110,8 +1062,7 @@ define i8* @strb32_4(i8* %y, i8* %x) {
; CHECK-LABEL: strb32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u32 q0, [r1]
; CHECK-NEXT: vstrb.32 q0, [r0]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vstrb.32 q0, [r0], #4
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
@ -1126,8 +1077,7 @@ define i8* @strb32_3(i8* %y, i8* %x) {
; CHECK-LABEL: strb32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u32 q0, [r1]
; CHECK-NEXT: vstrb.32 q0, [r0]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vstrb.32 q0, [r0], #3
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
@ -1142,8 +1092,7 @@ define i8* @strb32_127(i8* %y, i8* %x) {
; CHECK-LABEL: strb32_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u32 q0, [r1]
; CHECK-NEXT: vstrb.32 q0, [r0]
; CHECK-NEXT: adds r0, #127
; CHECK-NEXT: vstrb.32 q0, [r0], #127
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 127
@ -1175,8 +1124,7 @@ define i8* @strb16_4(i8* %y, i8* %x) {
; CHECK-LABEL: strb16_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u16 q0, [r1]
; CHECK-NEXT: vstrb.16 q0, [r0]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vstrb.16 q0, [r0], #4
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
@ -1191,8 +1139,7 @@ define i8* @strb16_3(i8* %y, i8* %x) {
; CHECK-LABEL: strb16_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u16 q0, [r1]
; CHECK-NEXT: vstrb.16 q0, [r0]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vstrb.16 q0, [r0], #3
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
@ -1207,8 +1154,7 @@ define i8* @strb16_127(i8* %y, i8* %x) {
; CHECK-LABEL: strb16_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u16 q0, [r1]
; CHECK-NEXT: vstrb.16 q0, [r0]
; CHECK-NEXT: adds r0, #127
; CHECK-NEXT: vstrb.16 q0, [r0], #127
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 127
@ -1240,8 +1186,7 @@ define i8* @strb8_4(i8* %y, i8* %x) {
; CHECK-LABEL: strb8_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u8 q0, [r1]
; CHECK-NEXT: vstrb.8 q0, [r0]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vstrb.8 q0, [r0], #4
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
@ -1256,8 +1201,7 @@ define i8* @strb8_3(i8* %y, i8* %x) {
; CHECK-LABEL: strb8_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u8 q0, [r1]
; CHECK-NEXT: vstrb.8 q0, [r0]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vstrb.8 q0, [r0], #3
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
@ -1272,8 +1216,7 @@ define i8* @strb8_127(i8* %y, i8* %x) {
; CHECK-LABEL: strb8_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u8 q0, [r1]
; CHECK-NEXT: vstrb.8 q0, [r0]
; CHECK-NEXT: adds r0, #127
; CHECK-NEXT: vstrb.8 q0, [r0], #127
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 127
@ -1304,8 +1247,7 @@ define i8* @strf32_4(i8* %y, i8* %x) {
; CHECK-LABEL: strf32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vstrw.32 q0, [r0]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vstrb.8 q0, [r0], #4
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
@ -1320,8 +1262,7 @@ define i8* @strf16_4(i8* %y, i8* %x) {
; CHECK-LABEL: strf16_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u16 q0, [r1]
; CHECK-NEXT: vstrh.16 q0, [r0]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vstrb.8 q0, [r0], #4
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
@ -1336,8 +1277,7 @@ define i8* @strwi32_align1(i8* %y, i8* %x) {
; CHECK-LABEL: strwi32_align1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vstrb.8 q0, [r0]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vstrb.8 q0, [r0], #3
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
@ -1352,8 +1292,7 @@ define i8* @strhi16_align1(i8* %y, i8* %x) {
; CHECK-LABEL: strhi16_align1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u16 q0, [r1]
; CHECK-NEXT: vstrb.8 q0, [r0]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vstrb.8 q0, [r0], #3
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
@ -1392,8 +1331,7 @@ define i8* @strf32_align1(i8* %y, i8* %x) {
; CHECK-LABEL: strf32_align1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vstrb.8 q0, [r0]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vstrb.8 q0, [r0], #3
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
@ -1408,8 +1346,7 @@ define i8* @strf16_align1(i8* %y, i8* %x) {
; CHECK-LABEL: strf16_align1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u16 q0, [r1]
; CHECK-NEXT: vstrb.8 q0, [r0]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vstrb.8 q0, [r0], #3
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3

View File

@ -4,8 +4,7 @@
define i8* @ldrwu32_4(i8* %x, i8* %y) {
; CHECK-LABEL: ldrwu32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r0, #4]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vldrw.u32 q0, [r0, #4]!
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -20,8 +19,7 @@ entry:
define i8* @ldrwu32_3(i8* %x, i8* %y) {
; CHECK-LABEL: ldrwu32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrw.u32 q0, [r0]
; CHECK-NEXT: vldrb.u8 q0, [r0, #3]!
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -36,8 +34,7 @@ entry:
define i8* @ldrwu32_m4(i8* %x, i8* %y) {
; CHECK-LABEL: ldrwu32_m4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r0, #-4]
; CHECK-NEXT: subs r0, #4
; CHECK-NEXT: vldrw.u32 q0, [r0, #-4]!
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -52,8 +49,7 @@ entry:
define i8* @ldrwu32_508(i8* %x, i8* %y) {
; CHECK-LABEL: ldrwu32_508:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: add.w r0, r0, #508
; CHECK-NEXT: vldrw.u32 q0, [r0]
; CHECK-NEXT: vldrw.u32 q0, [r0, #508]!
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -84,8 +80,7 @@ entry:
define i8* @ldrwu32_m508(i8* %x, i8* %y) {
; CHECK-LABEL: ldrwu32_m508:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: sub.w r0, r0, #508
; CHECK-NEXT: vldrw.u32 q0, [r0]
; CHECK-NEXT: vldrw.u32 q0, [r0, #-508]!
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -117,8 +112,7 @@ entry:
define i8* @ldrhu32_4(i8* %x, i8* %y) {
; CHECK-LABEL: ldrhu32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u32 q0, [r0, #4]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vldrh.u32 q0, [r0, #4]!
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -151,8 +145,7 @@ entry:
define i8* @ldrhu32_2(i8* %x, i8* %y) {
; CHECK-LABEL: ldrhu32_2:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u32 q0, [r0, #2]
; CHECK-NEXT: adds r0, #2
; CHECK-NEXT: vldrh.u32 q0, [r0, #2]!
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -168,8 +161,7 @@ entry:
define i8* @ldrhu32_254(i8* %x, i8* %y) {
; CHECK-LABEL: ldrhu32_254:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adds r0, #254
; CHECK-NEXT: vldrh.u32 q0, [r0]
; CHECK-NEXT: vldrh.u32 q0, [r0, #254]!
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -203,8 +195,7 @@ entry:
define i8* @ldrhs32_4(i8* %x, i8* %y) {
; CHECK-LABEL: ldrhs32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.s32 q0, [r0, #4]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vldrh.s32 q0, [r0, #4]!
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -237,8 +228,7 @@ entry:
define i8* @ldrhs32_2(i8* %x, i8* %y) {
; CHECK-LABEL: ldrhs32_2:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.s32 q0, [r0, #2]
; CHECK-NEXT: adds r0, #2
; CHECK-NEXT: vldrh.s32 q0, [r0, #2]!
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -254,8 +244,7 @@ entry:
define i8* @ldrhs32_254(i8* %x, i8* %y) {
; CHECK-LABEL: ldrhs32_254:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adds r0, #254
; CHECK-NEXT: vldrh.s32 q0, [r0]
; CHECK-NEXT: vldrh.s32 q0, [r0, #254]!
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -289,8 +278,7 @@ entry:
define i8* @ldrhu16_4(i8* %x, i8* %y) {
; CHECK-LABEL: ldrhu16_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u16 q0, [r0, #4]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vldrh.u16 q0, [r0, #4]!
; CHECK-NEXT: vstrh.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -305,8 +293,7 @@ entry:
define i8* @ldrhu16_3(i8* %x, i8* %y) {
; CHECK-LABEL: ldrhu16_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrh.u16 q0, [r0]
; CHECK-NEXT: vldrb.u8 q0, [r0, #3]!
; CHECK-NEXT: vstrh.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -321,8 +308,7 @@ entry:
define i8* @ldrhu16_2(i8* %x, i8* %y) {
; CHECK-LABEL: ldrhu16_2:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u16 q0, [r0, #2]
; CHECK-NEXT: adds r0, #2
; CHECK-NEXT: vldrh.u16 q0, [r0, #2]!
; CHECK-NEXT: vstrh.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -337,8 +323,7 @@ entry:
define i8* @ldrhu16_254(i8* %x, i8* %y) {
; CHECK-LABEL: ldrhu16_254:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adds r0, #254
; CHECK-NEXT: vldrh.u16 q0, [r0]
; CHECK-NEXT: vldrh.u16 q0, [r0, #254]!
; CHECK-NEXT: vstrh.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -370,8 +355,7 @@ entry:
define i8* @ldrbu32_4(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbu32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u32 q0, [r0, #4]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vldrb.u32 q0, [r0, #4]!
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -387,8 +371,7 @@ entry:
define i8* @ldrbu32_3(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbu32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u32 q0, [r0, #3]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrb.u32 q0, [r0, #3]!
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -404,8 +387,7 @@ entry:
define i8* @ldrbu32_127(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbu32_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adds r0, #127
; CHECK-NEXT: vldrb.u32 q0, [r0]
; CHECK-NEXT: vldrb.u32 q0, [r0, #127]!
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -439,8 +421,7 @@ entry:
define i8* @ldrbs32_4(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbs32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.s32 q0, [r0, #4]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vldrb.s32 q0, [r0, #4]!
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -456,8 +437,7 @@ entry:
define i8* @ldrbs32_3(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbs32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.s32 q0, [r0, #3]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrb.s32 q0, [r0, #3]!
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -473,8 +453,7 @@ entry:
define i8* @ldrbs32_127(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbs32_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adds r0, #127
; CHECK-NEXT: vldrb.s32 q0, [r0]
; CHECK-NEXT: vldrb.s32 q0, [r0, #127]!
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -508,8 +487,7 @@ entry:
define i8* @ldrbu16_4(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbu16_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u16 q0, [r0, #4]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vldrb.u16 q0, [r0, #4]!
; CHECK-NEXT: vstrh.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -525,8 +503,7 @@ entry:
define i8* @ldrbu16_3(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbu16_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u16 q0, [r0, #3]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrb.u16 q0, [r0, #3]!
; CHECK-NEXT: vstrh.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -542,8 +519,7 @@ entry:
define i8* @ldrbu16_127(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbu16_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adds r0, #127
; CHECK-NEXT: vldrb.u16 q0, [r0]
; CHECK-NEXT: vldrb.u16 q0, [r0, #127]!
; CHECK-NEXT: vstrh.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -577,8 +553,7 @@ entry:
define i8* @ldrbs16_4(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbs16_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.s16 q0, [r0, #4]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vldrb.s16 q0, [r0, #4]!
; CHECK-NEXT: vstrh.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -594,8 +569,7 @@ entry:
define i8* @ldrbs16_3(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbs16_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.s16 q0, [r0, #3]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrb.s16 q0, [r0, #3]!
; CHECK-NEXT: vstrh.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -611,8 +585,7 @@ entry:
define i8* @ldrbs16_127(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbs16_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adds r0, #127
; CHECK-NEXT: vldrb.s16 q0, [r0]
; CHECK-NEXT: vldrb.s16 q0, [r0, #127]!
; CHECK-NEXT: vstrh.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -646,8 +619,7 @@ entry:
define i8* @ldrbu8_4(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbu8_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u8 q0, [r0, #4]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vldrb.u8 q0, [r0, #4]!
; CHECK-NEXT: vstrb.8 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -662,8 +634,7 @@ entry:
define i8* @ldrbu8_3(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbu8_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u8 q0, [r0, #3]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrb.u8 q0, [r0, #3]!
; CHECK-NEXT: vstrb.8 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -678,8 +649,7 @@ entry:
define i8* @ldrbu8_127(i8* %x, i8* %y) {
; CHECK-LABEL: ldrbu8_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adds r0, #127
; CHECK-NEXT: vldrb.u8 q0, [r0]
; CHECK-NEXT: vldrb.u8 q0, [r0, #127]!
; CHECK-NEXT: vstrb.8 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -710,8 +680,7 @@ entry:
define i8* @ldrwf32_4(i8* %x, i8* %y) {
; CHECK-LABEL: ldrwf32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r0, #4]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vldrw.u32 q0, [r0, #4]!
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -726,8 +695,7 @@ entry:
define i8* @ldrwf16_4(i8* %x, i8* %y) {
; CHECK-LABEL: ldrwf16_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u16 q0, [r0, #4]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vldrh.u16 q0, [r0, #4]!
; CHECK-NEXT: vstrh.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -742,8 +710,7 @@ entry:
define i8* @ldrwi32_align1(i8* %x, i8* %y) {
; CHECK-LABEL: ldrwi32_align1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u8 q0, [r0, #3]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrb.u8 q0, [r0, #3]!
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -758,8 +725,7 @@ entry:
define i8* @ldrhi16_align1(i8* %x, i8* %y) {
; CHECK-LABEL: ldrhi16_align1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u8 q0, [r0, #3]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrb.u8 q0, [r0, #3]!
; CHECK-NEXT: vstrh.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -798,8 +764,7 @@ entry:
define i8* @ldrf32_align1(i8* %x, i8* %y) {
; CHECK-LABEL: ldrf32_align1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u8 q0, [r0, #3]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrb.u8 q0, [r0, #3]!
; CHECK-NEXT: vstrw.32 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -814,8 +779,7 @@ entry:
define i8* @ldrf16_align1(i8* %x, i8* %y) {
; CHECK-LABEL: ldrf16_align1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u8 q0, [r0, #3]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrb.u8 q0, [r0, #3]!
; CHECK-NEXT: vstrh.16 q0, [r1]
; CHECK-NEXT: bx lr
entry:
@ -835,8 +799,7 @@ define i8* @strw32_4(i8* %y, i8* %x) {
; CHECK-LABEL: strw32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vstrw.32 q0, [r0, #4]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vstrb.8 q0, [r0, #4]!
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
@ -850,9 +813,8 @@ entry:
define i8* @strw32_3(i8* %y, i8* %x) {
; CHECK-LABEL: strw32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vstrw.32 q0, [r0]
; CHECK-NEXT: vstrb.8 q0, [r0, #3]!
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
@ -867,8 +829,7 @@ define i8* @strw32_m4(i8* %y, i8* %x) {
; CHECK-LABEL: strw32_m4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vstrw.32 q0, [r0, #-4]
; CHECK-NEXT: subs r0, #4
; CHECK-NEXT: vstrb.8 q0, [r0, #-4]!
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 -4
@ -882,9 +843,8 @@ entry:
define i8* @strw32_508(i8* %y, i8* %x) {
; CHECK-LABEL: strw32_508:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: add.w r0, r0, #508
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vstrw.32 q0, [r0]
; CHECK-NEXT: vstrw.32 q0, [r0, #508]!
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 508
@ -914,9 +874,8 @@ entry:
define i8* @strw32_m508(i8* %y, i8* %x) {
; CHECK-LABEL: strw32_m508:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: sub.w r0, r0, #508
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vstrw.32 q0, [r0]
; CHECK-NEXT: vstrw.32 q0, [r0, #-508]!
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 -508
@ -948,8 +907,7 @@ define i8* @strh32_4(i8* %y, i8* %x) {
; CHECK-LABEL: strh32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u32 q0, [r1]
; CHECK-NEXT: vstrh.32 q0, [r0, #4]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vstrh.32 q0, [r0, #4]!
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
@ -980,8 +938,7 @@ define i8* @strh32_2(i8* %y, i8* %x) {
; CHECK-LABEL: strh32_2:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u32 q0, [r1]
; CHECK-NEXT: vstrh.32 q0, [r0, #2]
; CHECK-NEXT: adds r0, #2
; CHECK-NEXT: vstrh.32 q0, [r0, #2]!
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 2
@ -995,9 +952,8 @@ entry:
define i8* @strh32_254(i8* %y, i8* %x) {
; CHECK-LABEL: strh32_254:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adds r0, #254
; CHECK-NEXT: vldrh.u32 q0, [r1]
; CHECK-NEXT: vstrh.32 q0, [r0]
; CHECK-NEXT: vstrh.32 q0, [r0, #254]!
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 254
@ -1029,8 +985,7 @@ define i8* @strh16_4(i8* %y, i8* %x) {
; CHECK-LABEL: strh16_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u16 q0, [r1]
; CHECK-NEXT: vstrh.16 q0, [r0, #4]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vstrb.8 q0, [r0, #4]!
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
@ -1044,9 +999,8 @@ entry:
define i8* @strh16_3(i8* %y, i8* %x) {
; CHECK-LABEL: strh16_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vldrh.u16 q0, [r1]
; CHECK-NEXT: vstrh.16 q0, [r0]
; CHECK-NEXT: vstrb.8 q0, [r0, #3]!
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
@ -1061,8 +1015,7 @@ define i8* @strh16_2(i8* %y, i8* %x) {
; CHECK-LABEL: strh16_2:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u16 q0, [r1]
; CHECK-NEXT: vstrh.16 q0, [r0, #2]
; CHECK-NEXT: adds r0, #2
; CHECK-NEXT: vstrb.8 q0, [r0, #2]!
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 2
@ -1076,9 +1029,8 @@ entry:
define i8* @strh16_254(i8* %y, i8* %x) {
; CHECK-LABEL: strh16_254:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adds r0, #254
; CHECK-NEXT: vldrh.u16 q0, [r1]
; CHECK-NEXT: vstrh.16 q0, [r0]
; CHECK-NEXT: vstrh.16 q0, [r0, #254]!
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 254
@ -1110,8 +1062,7 @@ define i8* @strb32_4(i8* %y, i8* %x) {
; CHECK-LABEL: strb32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u32 q0, [r1]
; CHECK-NEXT: vstrb.32 q0, [r0, #4]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vstrb.32 q0, [r0, #4]!
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
@ -1126,8 +1077,7 @@ define i8* @strb32_3(i8* %y, i8* %x) {
; CHECK-LABEL: strb32_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u32 q0, [r1]
; CHECK-NEXT: vstrb.32 q0, [r0, #3]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vstrb.32 q0, [r0, #3]!
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
@ -1141,9 +1091,8 @@ entry:
define i8* @strb32_127(i8* %y, i8* %x) {
; CHECK-LABEL: strb32_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adds r0, #127
; CHECK-NEXT: vldrb.u32 q0, [r1]
; CHECK-NEXT: vstrb.32 q0, [r0]
; CHECK-NEXT: vstrb.32 q0, [r0, #127]!
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 127
@ -1175,8 +1124,7 @@ define i8* @strb16_4(i8* %y, i8* %x) {
; CHECK-LABEL: strb16_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u16 q0, [r1]
; CHECK-NEXT: vstrb.16 q0, [r0, #4]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vstrb.16 q0, [r0, #4]!
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
@ -1191,8 +1139,7 @@ define i8* @strb16_3(i8* %y, i8* %x) {
; CHECK-LABEL: strb16_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u16 q0, [r1]
; CHECK-NEXT: vstrb.16 q0, [r0, #3]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vstrb.16 q0, [r0, #3]!
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
@ -1206,9 +1153,8 @@ entry:
define i8* @strb16_127(i8* %y, i8* %x) {
; CHECK-LABEL: strb16_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adds r0, #127
; CHECK-NEXT: vldrb.u16 q0, [r1]
; CHECK-NEXT: vstrb.16 q0, [r0]
; CHECK-NEXT: vstrb.16 q0, [r0, #127]!
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 127
@ -1240,8 +1186,7 @@ define i8* @strb8_4(i8* %y, i8* %x) {
; CHECK-LABEL: strb8_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u8 q0, [r1]
; CHECK-NEXT: vstrb.8 q0, [r0, #4]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vstrb.8 q0, [r0, #4]!
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
@ -1256,8 +1201,7 @@ define i8* @strb8_3(i8* %y, i8* %x) {
; CHECK-LABEL: strb8_3:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrb.u8 q0, [r1]
; CHECK-NEXT: vstrb.8 q0, [r0, #3]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vstrb.8 q0, [r0, #3]!
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
@ -1271,9 +1215,8 @@ entry:
define i8* @strb8_127(i8* %y, i8* %x) {
; CHECK-LABEL: strb8_127:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: adds r0, #127
; CHECK-NEXT: vldrb.u8 q0, [r1]
; CHECK-NEXT: vstrb.8 q0, [r0]
; CHECK-NEXT: vstrb.8 q0, [r0, #127]!
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 127
@ -1304,8 +1247,7 @@ define i8* @strf32_4(i8* %y, i8* %x) {
; CHECK-LABEL: strf32_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vstrw.32 q0, [r0, #4]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vstrb.8 q0, [r0, #4]!
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
@ -1320,8 +1262,7 @@ define i8* @strf16_4(i8* %y, i8* %x) {
; CHECK-LABEL: strf16_4:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u16 q0, [r1]
; CHECK-NEXT: vstrh.16 q0, [r0, #4]
; CHECK-NEXT: adds r0, #4
; CHECK-NEXT: vstrb.8 q0, [r0, #4]!
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 4
@ -1336,8 +1277,7 @@ define i8* @strwi32_align1(i8* %y, i8* %x) {
; CHECK-LABEL: strwi32_align1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vstrb.8 q0, [r0, #3]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vstrb.8 q0, [r0, #3]!
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
@ -1352,8 +1292,7 @@ define i8* @strhi16_align1(i8* %y, i8* %x) {
; CHECK-LABEL: strhi16_align1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u16 q0, [r1]
; CHECK-NEXT: vstrb.8 q0, [r0, #3]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vstrb.8 q0, [r0, #3]!
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
@ -1391,8 +1330,7 @@ define i8* @strf32_align1(i8* %y, i8* %x) {
; CHECK-LABEL: strf32_align1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrw.u32 q0, [r1]
; CHECK-NEXT: vstrb.8 q0, [r0, #3]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vstrb.8 q0, [r0, #3]!
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3
@ -1407,8 +1345,7 @@ define i8* @strf16_align1(i8* %y, i8* %x) {
; CHECK-LABEL: strf16_align1:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u16 q0, [r1]
; CHECK-NEXT: vstrb.8 q0, [r0, #3]
; CHECK-NEXT: adds r0, #3
; CHECK-NEXT: vstrb.8 q0, [r0, #3]!
; CHECK-NEXT: bx lr
entry:
%z = getelementptr inbounds i8, i8* %y, i32 3

View File

@ -55,8 +55,8 @@ entry:
define hidden void @fwd_float16_t(%struct.s_float16_t* noalias nocapture %v) local_unnamed_addr #0 {
; CHECK-LABEL: fwd_float16_t:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: vldrh.u16 q0, [r0]
; CHECK-NEXT: vstrh.16 q0, [r0, #16]
; CHECK-NEXT: vldrh.u16 q0, [r0], #16
; CHECK-NEXT: vstrh.16 q0, [r0]
; CHECK-NEXT: bx lr
entry:
%arrayidx3 = getelementptr inbounds %struct.s_float16_t, %struct.s_float16_t* %v, i32 0, i32 1, i32 0
@ -130,8 +130,8 @@ for.end:
define hidden void @bwd_float16_t(%struct.s_float16_t* noalias nocapture %v) local_unnamed_addr #0 {
; CHECK-LABEL: bwd_float16_t:
; CHECK: @ %bb.0: @ %for.end
; CHECK-NEXT: vldrh.u16 q0, [r0]
; CHECK-NEXT: vstrh.16 q0, [r0, #-16]
; CHECK-NEXT: vldrh.u16 q0, [r0], #-16
; CHECK-NEXT: vstrh.16 q0, [r0]
; CHECK-NEXT: bx lr
for.end:
%0 = bitcast %struct.s_float16_t* %v to <8 x half>*