From 2d0165e75c00d995b9a89ffa042689b7fc99be78 Mon Sep 17 00:00:00 2001 From: James Molloy Date: Fri, 15 Jul 2016 08:03:56 +0000 Subject: [PATCH] [Thumb-1] Select post-increment load and store where possible Thumb-1 doesn't have post-inc or pre-inc load or store instructions. However the LDM/STM instructions with writeback can function as post-inc load/store: ldm r0!, {r1} @ load from r0 into r1 and increment r0 by 4 Obviously, this only works if the post increment is 4. llvm-svn: 275540 --- lib/Target/ARM/ARMISelDAGToDAG.cpp | 29 ++++++++++ lib/Target/ARM/ARMISelLowering.cpp | 40 +++++++++++-- lib/Target/ARM/ARMInstrThumb.td | 18 ++++++ test/CodeGen/Thumb/ldm-stm-postinc.ll | 81 +++++++++++++++++++++++++++ 4 files changed, 164 insertions(+), 4 deletions(-) create mode 100644 test/CodeGen/Thumb/ldm-stm-postinc.ll diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 1b1b7751c6c..c5e1d976bbb 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -195,6 +195,7 @@ public: private: /// Indexed (pre/post inc/dec) load matching code for ARM. bool tryARMIndexedLoad(SDNode *N); + bool tryT1IndexedLoad(SDNode *N); bool tryT2IndexedLoad(SDNode *N); /// SelectVLD - Select NEON load intrinsics. NumVecs should be @@ -1543,6 +1544,31 @@ bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) { return false; } +bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) { + LoadSDNode *LD = cast(N); + EVT LoadedVT = LD->getMemoryVT(); + ISD::MemIndexedMode AM = LD->getAddressingMode(); + if (AM == ISD::UNINDEXED || LD->getExtensionType() != ISD::NON_EXTLOAD || + AM != ISD::POST_INC || LoadedVT.getSimpleVT().SimpleTy != MVT::i32) + return false; + + auto *COffs = dyn_cast(LD->getOffset()); + if (!COffs || COffs->getZExtValue() != 4) + return false; + + // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}. + // The encoding of LDM is not how the rest of ISel expects a post-inc load to + // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after + // ISel. + SDValue Chain = LD->getChain(); + SDValue Base = LD->getBasePtr(); + SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)), + CurDAG->getRegister(0, MVT::i32), Chain }; + ReplaceNode(N, CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, MVT::i32, + MVT::Other, Ops)); + return true; +} + bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { LoadSDNode *LD = cast(N); ISD::MemIndexedMode AM = LD->getAddressingMode(); @@ -3015,6 +3041,9 @@ void ARMDAGToDAGISel::Select(SDNode *N) { if (Subtarget->isThumb() && Subtarget->hasThumb2()) { if (tryT2IndexedLoad(N)) return; + } else if (Subtarget->isThumb()) { + if (tryT1IndexedLoad(N)) + return; } else if (tryARMIndexedLoad(N)) return; // Other cases are autogenerated. diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 12ed20c2ea4..1d07e773f9d 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -715,6 +715,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, setIndexedStoreAction(im, MVT::i16, Legal); setIndexedStoreAction(im, MVT::i32, Legal); } + } else { + // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}. + setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal); + setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal); } setOperationAction(ISD::SADDO, MVT::i32, Custom); @@ -8247,6 +8251,19 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MI.dump(); llvm_unreachable("Unexpected instr type to insert"); } + + // Thumb1 post-indexed loads are really just single-register LDMs. + case ARM::tLDR_postidx: { + BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD)) + .addOperand(MI->getOperand(1)) // Rn_wb + .addOperand(MI->getOperand(2)) // Rn + .addOperand(MI->getOperand(3)) // PredImm + .addOperand(MI->getOperand(4)) // PredReg + .addOperand(MI->getOperand(0)); // Rt + MI->eraseFromParent(); + return BB; + } + // The Thumb2 pre-indexed stores have the same MI operands, they just // define them differently in the .td files from the isel patterns, so // they need pseudos. @@ -11596,22 +11613,37 @@ bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const { - if (Subtarget->isThumb1Only()) - return false; - EVT VT; SDValue Ptr; - bool isSEXTLoad = false; + bool isSEXTLoad = false, isNonExt; if (LoadSDNode *LD = dyn_cast(N)) { VT = LD->getMemoryVT(); Ptr = LD->getBasePtr(); isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; + isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD; } else if (StoreSDNode *ST = dyn_cast(N)) { VT = ST->getMemoryVT(); Ptr = ST->getBasePtr(); + isNonExt = !ST->isTruncatingStore(); } else return false; + if (Subtarget->isThumb1Only()) { + // Thumb-1 can do a limited post-inc load or store as an updating LDM. It + // must be non-extending/truncating, i32, with an offset of 4. + assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!"); + if (Op->getOpcode() != ISD::ADD || !isNonExt) + return false; + auto *RHS = dyn_cast(Op->getOperand(1)); + if (!RHS || RHS->getZExtValue() != 4) + return false; + + Offset = Op->getOperand(1); + Base = Op->getOperand(0); + AM = ISD::POST_INC; + return true; + } + bool isInc; bool isLegal = false; if (Subtarget->isThumb2()) diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index bec1ea8763d..93a174f3678 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -1451,6 +1451,24 @@ def : T1Pat<(extloadi8 t_addrmode_rr:$addr), (tLDRBr t_addrmode_rr:$addr)>; def : T1Pat<(extloadi16 t_addrmode_is2:$addr), (tLDRHi t_addrmode_is2:$addr)>; def : T1Pat<(extloadi16 t_addrmode_rr:$addr), (tLDRHr t_addrmode_rr:$addr)>; +// post-inc loads and stores + +// post-inc LDR -> LDM r0!, {r1}. The way operands are layed out in LDMs is +// different to how ISel expects them for a post-inc load, so use a pseudo +// and expand it just after ISel. +let usesCustomInserter = 1, + Constraints = "$Rn = $Rn_wb,@earlyclobber $Rn_wb" in + def tLDR_postidx: tPseudoInst<(outs rGPR:$Rt, rGPR:$Rn_wb), + (ins rGPR:$Rn, pred:$p), + 4, IIC_iStore_ru, + []>; + +// post-inc STR -> STM r0!, {r1}. The layout of this (because it doesn't def +// multiple registers) is the same in ISel as MachineInstr, so there's no need +// for a pseudo. +def : T1Pat<(post_store rGPR:$Rt, rGPR:$Rn, 4), + (tSTMIA_UPD rGPR:$Rn, rGPR:$Rt)>; + // If it's impossible to use [r,r] address mode for sextload, select to // ldr{b|h} + sxt{b|h} instead. def : T1Pat<(sextloadi8 t_addrmode_is1:$addr), diff --git a/test/CodeGen/Thumb/ldm-stm-postinc.ll b/test/CodeGen/Thumb/ldm-stm-postinc.ll new file mode 100644 index 00000000000..f2e222bd5b9 --- /dev/null +++ b/test/CodeGen/Thumb/ldm-stm-postinc.ll @@ -0,0 +1,81 @@ +; RUN: llc -mtriple=thumbv7 -mcpu=cortex-m0 < %s -disable-lsr | FileCheck %s +; FIXME: LSR mangles the last two testcases pretty badly. When this is fixed, remove +; the -disable-lsr above. + +; CHECK-LABEL: @f +; CHECK: ldm {{r[0-9]}}!, {r{{[0-9]}}} +define i32 @f(i32* readonly %a, i32* readnone %b) { + %1 = icmp eq i32* %a, %b + br i1 %1, label %._crit_edge, label %.lr.ph + +.lr.ph: ; preds = %.lr.ph, %0 + %i.02 = phi i32 [ %3, %.lr.ph ], [ 0, %0 ] + %.01 = phi i32* [ %4, %.lr.ph ], [ %a, %0 ] + %2 = load i32, i32* %.01, align 4 + %3 = add nsw i32 %2, %i.02 + %4 = getelementptr inbounds i32, i32* %.01, i32 1 + %5 = icmp eq i32* %4, %b + br i1 %5, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + %i.0.lcssa = phi i32 [ 0, %0 ], [ %3, %.lr.ph ] + ret i32 %i.0.lcssa +} + +; CHECK-LABEL: @g +; CHECK-NOT: ldm +define i32 @g(i32* readonly %a, i32* readnone %b) { + %1 = icmp eq i32* %a, %b + br i1 %1, label %._crit_edge, label %.lr.ph + +.lr.ph: ; preds = %.lr.ph, %0 + %i.02 = phi i32 [ %3, %.lr.ph ], [ 0, %0 ] + %.01 = phi i32* [ %4, %.lr.ph ], [ %a, %0 ] + %2 = load i32, i32* %.01, align 4 + %3 = add nsw i32 %2, %i.02 + %4 = getelementptr inbounds i32, i32* %.01, i32 2 + %5 = icmp eq i32* %4, %b + br i1 %5, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + %i.0.lcssa = phi i32 [ 0, %0 ], [ %3, %.lr.ph ] + ret i32 %i.0.lcssa +} + +; CHECK-LABEL: @h +; CHECK: stm {{r[0-9]}}!, {r{{[0-9]}}} +define void @h(i32* %a, i32* readnone %b) { + %1 = icmp eq i32* %a, %b + br i1 %1, label %._crit_edge, label %.lr.ph + +.lr.ph: ; preds = %.lr.ph, %0 + %i.02 = phi i32 [ %2, %.lr.ph ], [ 0, %0 ] + %.01 = phi i32* [ %3, %.lr.ph ], [ %a, %0 ] + %2 = add nsw i32 %i.02, 1 + store i32 %i.02, i32* %.01, align 4 + %3 = getelementptr inbounds i32, i32* %.01, i32 1 + %4 = icmp eq i32* %3, %b + br i1 %4, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + ret void +} + +; CHECK-LABEL: @j +; CHECK-NOT: stm +define void @j(i32* %a, i32* readnone %b) { + %1 = icmp eq i32* %a, %b + br i1 %1, label %._crit_edge, label %.lr.ph + +.lr.ph: ; preds = %.lr.ph, %0 + %i.02 = phi i32 [ %2, %.lr.ph ], [ 0, %0 ] + %.01 = phi i32* [ %3, %.lr.ph ], [ %a, %0 ] + %2 = add nsw i32 %i.02, 1 + store i32 %i.02, i32* %.01, align 4 + %3 = getelementptr inbounds i32, i32* %.01, i32 2 + %4 = icmp eq i32* %3, %b + br i1 %4, label %._crit_edge, label %.lr.ph + +._crit_edge: ; preds = %.lr.ph, %0 + ret void +}