mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
[ARM] Eliminate redundant "mov rN, sp" instructions in Thumb1.
This takes sequences like "mov r4, sp; str r0, [r4]", and optimizes them to something like "str r0, [sp]". For regular stack variables, this optimization was already implemented: we lower loads and stores using frame indexes, which are expanded later. However, when constructing a call frame for a call with more than four arguments, the existing optimization doesn't apply. We need to use stores which are actually relative to the current value of sp, and don't have an associated frame index. This patch adds a special case to handle that construct. At the DAG level, this is an ISD::STORE where the address is a CopyFromReg from SP (plus a small constant offset). This applies only to Thumb1: in Thumb2 or ARM mode, a regular store instruction can access SP directly, so the COPY gets eliminated by existing code. The change to ARMDAGToDAGISel::SelectThumbAddrModeSP is a related cleanup: we shouldn't pretend that it can select anything other than frame indexes. Differential Revision: https://reviews.llvm.org/D59568 llvm-svn: 356601
This commit is contained in:
parent
086f86be13
commit
e844a79a1e
@ -1141,23 +1141,19 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
|
||||
if (!CurDAG->isBaseWithConstantOffset(N))
|
||||
return false;
|
||||
|
||||
RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
|
||||
if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
|
||||
(LHSR && LHSR->getReg() == ARM::SP)) {
|
||||
if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
|
||||
// If the RHS is + imm8 * scale, fold into addr mode.
|
||||
int RHSC;
|
||||
if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
|
||||
Base = N.getOperand(0);
|
||||
if (Base.getOpcode() == ISD::FrameIndex) {
|
||||
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
|
||||
// For LHS+RHS to result in an offset that's a multiple of 4 the object
|
||||
// indexed by the LHS must be 4-byte aligned.
|
||||
MachineFrameInfo &MFI = MF->getFrameInfo();
|
||||
if (MFI.getObjectAlignment(FI) < 4)
|
||||
MFI.setObjectAlignment(FI, 4);
|
||||
Base = CurDAG->getTargetFrameIndex(
|
||||
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
|
||||
}
|
||||
int FI = cast<FrameIndexSDNode>(Base)->getIndex();
|
||||
// For LHS+RHS to result in an offset that's a multiple of 4 the object
|
||||
// indexed by the LHS must be 4-byte aligned.
|
||||
MachineFrameInfo &MFI = MF->getFrameInfo();
|
||||
if (MFI.getObjectAlignment(FI) < 4)
|
||||
MFI.setObjectAlignment(FI, 4);
|
||||
Base = CurDAG->getTargetFrameIndex(
|
||||
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
|
||||
OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
|
||||
return true;
|
||||
}
|
||||
@ -2601,6 +2597,44 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
|
||||
|
||||
switch (N->getOpcode()) {
|
||||
default: break;
|
||||
case ISD::STORE: {
|
||||
// For Thumb1, match an sp-relative store in C++. This is a little
|
||||
// unfortunate, but I don't think I can make the chain check work
|
||||
// otherwise. (The chain of the store has to be the same as the chain
|
||||
// of the CopyFromReg, or else we can't replace the CopyFromReg with
|
||||
// a direct reference to "SP".)
|
||||
//
|
||||
// This is only necessary on Thumb1 because Thumb1 sp-relative stores use
|
||||
// a different addressing mode from other four-byte stores.
|
||||
//
|
||||
// This pattern usually comes up with call arguments.
|
||||
StoreSDNode *ST = cast<StoreSDNode>(N);
|
||||
SDValue Ptr = ST->getBasePtr();
|
||||
if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
|
||||
int RHSC = 0;
|
||||
if (Ptr.getOpcode() == ISD::ADD &&
|
||||
isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
|
||||
Ptr = Ptr.getOperand(0);
|
||||
|
||||
if (Ptr.getOpcode() == ISD::CopyFromReg &&
|
||||
cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
|
||||
Ptr.getOperand(0) == ST->getChain()) {
|
||||
SDValue Ops[] = {ST->getValue(),
|
||||
CurDAG->getRegister(ARM::SP, MVT::i32),
|
||||
CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
|
||||
getAL(CurDAG, dl),
|
||||
CurDAG->getRegister(0, MVT::i32),
|
||||
ST->getChain()};
|
||||
MachineSDNode *ResNode =
|
||||
CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
|
||||
MachineMemOperand *MemOp = ST->getMemOperand();
|
||||
CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
|
||||
ReplaceNode(N, ResNode);
|
||||
return;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case ISD::WRITE_REGISTER:
|
||||
if (tryWriteRegister(N))
|
||||
return;
|
||||
|
@ -254,8 +254,8 @@ declare void @_ZSt9terminatev()
|
||||
; CHECK-THUMB-FP-ELIM: .cfi_offset r6, -12
|
||||
; CHECK-THUMB-FP-ELIM: .cfi_offset r5, -16
|
||||
; CHECK-THUMB-FP-ELIM: .cfi_offset r4, -20
|
||||
; CHECK-THUMB-FP-ELIM: sub sp, #60
|
||||
; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 80
|
||||
; CHECK-THUMB-FP-ELIM: sub sp, #52
|
||||
; CHECK-THUMB-FP-ELIM: .cfi_def_cfa_offset 72
|
||||
; CHECK-THUMB-FP-ELIM: .cfi_endproc
|
||||
|
||||
; CHECK-THUMB-V7-FP-LABEL: _Z4testiiiiiddddd:
|
||||
|
@ -36,11 +36,10 @@ entry:
|
||||
}
|
||||
; CHECK-LABEL: test_args_sp
|
||||
; Load `e`
|
||||
; CHECK: ldr r0, [sp, #40]
|
||||
; CHECK-NEXT: mov r5, sp
|
||||
; CHECK-NEXT: str r3, [r5]
|
||||
; CHECK: ldr r0, [sp, #32]
|
||||
; CHECK-NEXT: str r3, [sp]
|
||||
; Pass `e` on stack
|
||||
; CHECK-NEXT: str r0, [r5, #4]
|
||||
; CHECK-NEXT: str r0, [sp, #4]
|
||||
; CHECK: bl g
|
||||
|
||||
; int test_varargs_sp(int a, ...) {
|
||||
@ -92,10 +91,9 @@ entry:
|
||||
; CHECK-NEXT: mov sp, r4
|
||||
; Load `e` via FP
|
||||
; CHECK: ldr r0, [r7, #8]
|
||||
; CHECK-NEXT: mov r5, sp
|
||||
; CHECK-NEXT: str r3, [r5]
|
||||
; CHECK-NEXT: str r3, [sp]
|
||||
; Pass `e` as argument
|
||||
; CHECK-NEXT: str r0, [r5, #4]
|
||||
; CHECK-NEXT: str r0, [sp, #4]
|
||||
; CHECK: bl g
|
||||
|
||||
; int test_varargs_realign(int a, ...) {
|
||||
@ -147,9 +145,9 @@ entry:
|
||||
; CHECK: sub sp, #4
|
||||
; Load `e` via FP
|
||||
; CHECK: ldr r5, [r7, #8]
|
||||
; CHECK-NEXT: mov r0, sp
|
||||
; Pass `d` and `e` as arguments
|
||||
; CHECK-NEXT: stm r0!, {r3, r5}
|
||||
; CHECK-NEXT: str r3, [sp]
|
||||
; CHECK-NEXT: str r5, [sp, #4]
|
||||
; CHECK: bl g
|
||||
|
||||
; int test_varargs_vla(int a, ...) {
|
||||
|
@ -45,13 +45,12 @@ entry:
|
||||
; CHECK: adcs r3, r4
|
||||
; CHECK: adds r4, r2, r7
|
||||
; CHECK: adcs r1, r6
|
||||
; CHECK: mov r2, sp
|
||||
; CHECK: str r4, [r2]
|
||||
; CHECK: str r1, [r2, #4]
|
||||
; CHECK: ldr r6, [r0, #16]
|
||||
; CHECK: ldr r7, [r0, #24]
|
||||
; CHECK: adcs r7, r6
|
||||
; CHECK: str r7, [r2, #8]
|
||||
; CHECK: ldr r6, [r0, #20]
|
||||
; CHECK: str r4, [sp]
|
||||
; CHECK: str r1, [sp, #4]
|
||||
; CHECK: ldr r2, [r0, #16]
|
||||
; CHECK: ldr r6, [r0, #24]
|
||||
; CHECK: adcs r6, r2
|
||||
; CHECK: str r6, [sp, #8]
|
||||
; CHECK: ldr r2, [r0, #20]
|
||||
; CHECK: ldr r0, [r0, #28]
|
||||
; CHECK: adcs r0, r6
|
||||
; CHECK: adcs r0, r2
|
||||
|
@ -501,10 +501,9 @@ if.end: ; preds = %for.body, %if.else
|
||||
; DISABLE-NEXT: beq [[ELSE_LABEL:LBB[0-9_]+]]
|
||||
;
|
||||
; Setup of the varags.
|
||||
; CHECK: mov [[TMP_SP:r[0-9]+]], sp
|
||||
; CHECK-NEXT: str r1, {{\[}}[[TMP_SP]]]
|
||||
; CHECK-NEXT: str r1, {{\[}}[[TMP_SP]], #4]
|
||||
; CHECK-NEXT: str r1, {{\[}}[[TMP_SP]], #8]
|
||||
; CHECK: str r1, [sp]
|
||||
; CHECK-NEXT: str r1, [sp, #4]
|
||||
; CHECK-NEXT: str r1, [sp, #8]
|
||||
; CHECK: movs r0, r1
|
||||
; CHECK-NEXT: movs r2, r1
|
||||
; CHECK-NEXT: movs r3, r1
|
||||
|
@ -12,15 +12,14 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
|
||||
; THUMBV6-NEXT: mov r7, r2
|
||||
; THUMBV6-NEXT: mov r4, r0
|
||||
; THUMBV6-NEXT: movs r5, #0
|
||||
; THUMBV6-NEXT: mov r0, sp
|
||||
; THUMBV6-NEXT: str r5, [r0, #12]
|
||||
; THUMBV6-NEXT: str r5, [r0, #8]
|
||||
; THUMBV6-NEXT: ldr r1, [sp, #116]
|
||||
; THUMBV6-NEXT: str r1, [sp, #72] @ 4-byte Spill
|
||||
; THUMBV6-NEXT: str r1, [r0, #4]
|
||||
; THUMBV6-NEXT: ldr r1, [sp, #112]
|
||||
; THUMBV6-NEXT: str r1, [sp, #44] @ 4-byte Spill
|
||||
; THUMBV6-NEXT: str r1, [r0]
|
||||
; THUMBV6-NEXT: str r5, [sp, #12]
|
||||
; THUMBV6-NEXT: str r5, [sp, #8]
|
||||
; THUMBV6-NEXT: ldr r0, [sp, #116]
|
||||
; THUMBV6-NEXT: str r0, [sp, #72] @ 4-byte Spill
|
||||
; THUMBV6-NEXT: str r0, [sp, #4]
|
||||
; THUMBV6-NEXT: ldr r0, [sp, #112]
|
||||
; THUMBV6-NEXT: str r0, [sp, #44] @ 4-byte Spill
|
||||
; THUMBV6-NEXT: str r0, [sp]
|
||||
; THUMBV6-NEXT: mov r0, r2
|
||||
; THUMBV6-NEXT: mov r1, r3
|
||||
; THUMBV6-NEXT: mov r2, r5
|
||||
|
Loading…
x
Reference in New Issue
Block a user