From 1a5573f2577133030073d54cfc0df7b3ed6dc75a Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Sun, 22 Oct 2017 11:56:35 +0000 Subject: [PATCH] [ARM] Dynamic stack alignment for 16-bit Thumb This patch implements dynamic stack (re-)alignment for 16-bit Thumb. When targeting processors, which support only the 16-bit Thumb instruction set the compiler ignores the alignment attributes of automatic variables and may silently generate incorrect code. Differential revision: https://reviews.llvm.org/D38143 llvm-svn: 316289 --- lib/Target/ARM/ARMBaseRegisterInfo.cpp | 6 +--- lib/Target/ARM/ARMFrameLowering.cpp | 14 ++++----- lib/Target/ARM/Thumb1FrameLowering.cpp | 34 +++++++++++++++++++--- test/CodeGen/ARM/thumb1_return_sequence.ll | 10 +++++-- test/CodeGen/Thumb/large-stack.ll | 4 +-- test/CodeGen/Thumb/long.ll | 9 ++++-- 6 files changed, 53 insertions(+), 24 deletions(-) diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index bf39aebaf44..63b14ee98d7 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -391,16 +391,12 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { const MachineRegisterInfo *MRI = &MF.getRegInfo(); - const ARMFunctionInfo *AFI = MF.getInfo(); const ARMFrameLowering *TFI = getFrameLowering(MF); // We can't realign the stack if: // 1. Dynamic stack realignment is explicitly disabled, - // 2. This is a Thumb1 function (it's not useful, so we don't bother), or - // 3. There are VLAs in the function and the base pointer is disabled. + // 2. There are VLAs in the function and the base pointer is disabled. if (!TargetRegisterInfo::canRealignStack(MF)) return false; - if (AFI->isThumb1OnlyFunction()) - return false; // Stack realignment requires a frame pointer. If we already started // register allocation with frame pointer elimination, it is too late now. if (!MRI->canReserveReg(getFramePointerReg(MF.getSubtarget()))) diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 4af744f5ec3..ce4add974d6 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -1610,14 +1610,14 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, if (AFI->getArgRegsSaveSize() > 0) SavedRegs.set(ARM::LR); - // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know - // for sure what the stack size will be, but for this, an estimate is good - // enough. If there anything changes it, it'll be a spill, which implies - // we've used all the registers and so R4 is already used, so not marking - // it here will be OK. + // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function + // requires stack alignment. We don't know for sure what the stack size + // will be, but for this, an estimate is good enough. If there anything + // changes it, it'll be a spill, which implies we've used all the registers + // and so R4 is already used, so not marking it here will be OK. // FIXME: It will be better just to find spare register here. - unsigned StackSize = MFI.estimateStackSize(MF); - if (MFI.hasVarSizedObjects() || StackSize > 508) + if (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF) || + MFI.estimateStackSize(MF) > 508) SavedRegs.set(ARM::R4); } diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index 13068992e8f..4f330e3a884 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -352,10 +352,36 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); AFI->setDPRCalleeSavedAreaSize(DPRCSSize); - // Thumb1 does not currently support dynamic stack realignment. Report a - // fatal error rather then silently generate bad code. - if (RegInfo->needsStackRealignment(MF)) - report_fatal_error("Dynamic stack realignment not supported for thumb1."); + if (RegInfo->needsStackRealignment(MF)) { + const unsigned NrBitsToZero = countTrailingZeros(MFI.getMaxAlignment()); + // Emit the following sequence, using R4 as a temporary, since we cannot use + // SP as a source or destination register for the shifts: + // mov r4, sp + // lsrs r4, r4, #NrBitsToZero + // lsls r4, r4, #NrBitsToZero + // mov sp, r4 + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) + .addReg(ARM::SP, RegState::Kill) + .add(predOps(ARMCC::AL)); + + BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSRri), ARM::R4) + .addDef(ARM::CPSR) + .addReg(ARM::R4, RegState::Kill) + .addImm(NrBitsToZero) + .add(predOps(ARMCC::AL)); + + BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSLri), ARM::R4) + .addDef(ARM::CPSR) + .addReg(ARM::R4, RegState::Kill) + .addImm(NrBitsToZero) + .add(predOps(ARMCC::AL)); + + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) + .addReg(ARM::R4, RegState::Kill) + .add(predOps(ARMCC::AL)); + + AFI->setShouldRestoreSPFromFP(true); + } // If we need a base pointer, set it up here. It's whatever the value // of the stack pointer is at this point. Any variable size objects diff --git a/test/CodeGen/ARM/thumb1_return_sequence.ll b/test/CodeGen/ARM/thumb1_return_sequence.ll index 67d1cad2cf6..c54712efb39 100644 --- a/test/CodeGen/ARM/thumb1_return_sequence.ll +++ b/test/CodeGen/ARM/thumb1_return_sequence.ll @@ -9,6 +9,8 @@ entry: ; -------- ; CHECK-V4T: push {[[SAVED:(r[4567](, )?)+]], lr} ; CHECK-V4T: sub sp, +; Stack is realigned because of the <6 x i32> type +; CHECK-V4T: mov sp, r4 ; CHECK-V5T: push {[[SAVED:(r[4567](, )?)+]], lr} %b = alloca <6 x i32>, align 16 @@ -21,7 +23,8 @@ entry: ; Epilogue ; -------- -; CHECK-V4T: add sp, +; Stack realignment means sp is restored from frame pointer +; CHECK-V4T: mov sp ; CHECK-V4T-NEXT: pop {[[SAVED]]} ; The ISA for v4 does not support pop pc, so make sure we do not emit ; one even when we do not need to update SP. @@ -70,8 +73,9 @@ entry: ; CHECK-V4T-NEXT: mov lr, [[POP_REG]] ; CHECK-V4T-NEXT: mov [[POP_REG]], r12 ; CHECK-V4T: bx lr -; CHECK-V5T: add sp, -; CHECK-V5T-NEXT: pop {[[SAVED]]} +; CHECK-V5T: lsls r4 +; CHECK-V5T-NEXT: mov sp, r4 +; CHECK-V5T: pop {[[SAVED]]} ; CHECK-V5T-NEXT: mov r12, [[POP_REG:r[0-7]]] ; CHECK-V5T-NEXT: pop {[[POP_REG]]} ; CHECK-V5T-NEXT: add sp, diff --git a/test/CodeGen/Thumb/large-stack.ll b/test/CodeGen/Thumb/large-stack.ll index b0152ddc4d3..f35bffba5ca 100644 --- a/test/CodeGen/Thumb/large-stack.ll +++ b/test/CodeGen/Thumb/large-stack.ll @@ -75,7 +75,7 @@ define i32 @test3() { ; CHECK: add sp, [[TEMP3]] %retval = alloca i32, align 4 %tmp = alloca i32, align 4 - %a = alloca [805306369 x i8], align 16 + %a = alloca [805306369 x i8], align 4 store i32 0, i32* %tmp %tmp1 = load i32, i32* %tmp ret i32 %tmp1 @@ -91,7 +91,7 @@ define i32 @test3_nofpelim() "no-frame-pointer-elim"="true" { ; CHECK: mov sp, r4 %retval = alloca i32, align 4 %tmp = alloca i32, align 4 - %a = alloca [805306369 x i8], align 16 + %a = alloca [805306369 x i8], align 8 store i32 0, i32* %tmp %tmp1 = load i32, i32* %tmp ret i32 %tmp1 diff --git a/test/CodeGen/Thumb/long.ll b/test/CodeGen/Thumb/long.ll index 13951ef4354..7fc46ffb362 100644 --- a/test/CodeGen/Thumb/long.ll +++ b/test/CodeGen/Thumb/long.ll @@ -1,4 +1,5 @@ -; RUN: llc -mtriple=thumb-eabi %s -verify-machineinstrs -o - | FileCheck %s +; RUN: llc -mtriple=thumb-eabi %s -verify-machineinstrs -o - | \ +; RUN: FileCheck %s -check-prefix CHECK --check-prefix CHECK-EABI ; RUN: llc -mtriple=thumb-apple-darwin %s -verify-machineinstrs -o - | \ ; RUN: FileCheck %s -check-prefix CHECK -check-prefix CHECK-DARWIN @@ -172,10 +173,12 @@ entry: %retval = load i64, i64* %a ; [#uses=1] ret i64 %retval ; CHECK-LABEL: f10: -; CHECK: sub sp, #8 +; CHECK-EABI: sub sp, #8 +; CHECK-DARWIN: add r7, sp, #4 ; CHECK: ldr r0, [sp] ; CHECK: ldr r1, [sp, #4] -; CHECK: add sp, #8 +; CHECK-EABI: add sp, #8 +; CHECK-DARWIN: mov sp, r4 } define i64 @f11(i64 %x, i64 %y) {