mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
[ARM] Dynamic stack alignment for 16-bit Thumb
This patch implements dynamic stack (re-)alignment for 16-bit Thumb. When targeting processors, which support only the 16-bit Thumb instruction set the compiler ignores the alignment attributes of automatic variables and may silently generate incorrect code. Differential revision: https://reviews.llvm.org/D38143 llvm-svn: 316289
This commit is contained in:
parent
7954e305df
commit
1a5573f257
@ -391,16 +391,12 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
|
||||
|
||||
bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
|
||||
const MachineRegisterInfo *MRI = &MF.getRegInfo();
|
||||
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
|
||||
const ARMFrameLowering *TFI = getFrameLowering(MF);
|
||||
// We can't realign the stack if:
|
||||
// 1. Dynamic stack realignment is explicitly disabled,
|
||||
// 2. This is a Thumb1 function (it's not useful, so we don't bother), or
|
||||
// 3. There are VLAs in the function and the base pointer is disabled.
|
||||
// 2. There are VLAs in the function and the base pointer is disabled.
|
||||
if (!TargetRegisterInfo::canRealignStack(MF))
|
||||
return false;
|
||||
if (AFI->isThumb1OnlyFunction())
|
||||
return false;
|
||||
// Stack realignment requires a frame pointer. If we already started
|
||||
// register allocation with frame pointer elimination, it is too late now.
|
||||
if (!MRI->canReserveReg(getFramePointerReg(MF.getSubtarget<ARMSubtarget>())))
|
||||
|
@ -1610,14 +1610,14 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
|
||||
if (AFI->getArgRegsSaveSize() > 0)
|
||||
SavedRegs.set(ARM::LR);
|
||||
|
||||
// Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know
|
||||
// for sure what the stack size will be, but for this, an estimate is good
|
||||
// enough. If there anything changes it, it'll be a spill, which implies
|
||||
// we've used all the registers and so R4 is already used, so not marking
|
||||
// it here will be OK.
|
||||
// Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
|
||||
// requires stack alignment. We don't know for sure what the stack size
|
||||
// will be, but for this, an estimate is good enough. If there anything
|
||||
// changes it, it'll be a spill, which implies we've used all the registers
|
||||
// and so R4 is already used, so not marking it here will be OK.
|
||||
// FIXME: It will be better just to find spare register here.
|
||||
unsigned StackSize = MFI.estimateStackSize(MF);
|
||||
if (MFI.hasVarSizedObjects() || StackSize > 508)
|
||||
if (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF) ||
|
||||
MFI.estimateStackSize(MF) > 508)
|
||||
SavedRegs.set(ARM::R4);
|
||||
}
|
||||
|
||||
|
@ -352,10 +352,36 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
|
||||
AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
|
||||
AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
|
||||
|
||||
// Thumb1 does not currently support dynamic stack realignment. Report a
|
||||
// fatal error rather then silently generate bad code.
|
||||
if (RegInfo->needsStackRealignment(MF))
|
||||
report_fatal_error("Dynamic stack realignment not supported for thumb1.");
|
||||
if (RegInfo->needsStackRealignment(MF)) {
|
||||
const unsigned NrBitsToZero = countTrailingZeros(MFI.getMaxAlignment());
|
||||
// Emit the following sequence, using R4 as a temporary, since we cannot use
|
||||
// SP as a source or destination register for the shifts:
|
||||
// mov r4, sp
|
||||
// lsrs r4, r4, #NrBitsToZero
|
||||
// lsls r4, r4, #NrBitsToZero
|
||||
// mov sp, r4
|
||||
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
|
||||
.addReg(ARM::SP, RegState::Kill)
|
||||
.add(predOps(ARMCC::AL));
|
||||
|
||||
BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSRri), ARM::R4)
|
||||
.addDef(ARM::CPSR)
|
||||
.addReg(ARM::R4, RegState::Kill)
|
||||
.addImm(NrBitsToZero)
|
||||
.add(predOps(ARMCC::AL));
|
||||
|
||||
BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSLri), ARM::R4)
|
||||
.addDef(ARM::CPSR)
|
||||
.addReg(ARM::R4, RegState::Kill)
|
||||
.addImm(NrBitsToZero)
|
||||
.add(predOps(ARMCC::AL));
|
||||
|
||||
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
|
||||
.addReg(ARM::R4, RegState::Kill)
|
||||
.add(predOps(ARMCC::AL));
|
||||
|
||||
AFI->setShouldRestoreSPFromFP(true);
|
||||
}
|
||||
|
||||
// If we need a base pointer, set it up here. It's whatever the value
|
||||
// of the stack pointer is at this point. Any variable size objects
|
||||
|
@ -9,6 +9,8 @@ entry:
|
||||
; --------
|
||||
; CHECK-V4T: push {[[SAVED:(r[4567](, )?)+]], lr}
|
||||
; CHECK-V4T: sub sp,
|
||||
; Stack is realigned because of the <6 x i32> type
|
||||
; CHECK-V4T: mov sp, r4
|
||||
; CHECK-V5T: push {[[SAVED:(r[4567](, )?)+]], lr}
|
||||
|
||||
%b = alloca <6 x i32>, align 16
|
||||
@ -21,7 +23,8 @@ entry:
|
||||
|
||||
; Epilogue
|
||||
; --------
|
||||
; CHECK-V4T: add sp,
|
||||
; Stack realignment means sp is restored from frame pointer
|
||||
; CHECK-V4T: mov sp
|
||||
; CHECK-V4T-NEXT: pop {[[SAVED]]}
|
||||
; The ISA for v4 does not support pop pc, so make sure we do not emit
|
||||
; one even when we do not need to update SP.
|
||||
@ -70,8 +73,9 @@ entry:
|
||||
; CHECK-V4T-NEXT: mov lr, [[POP_REG]]
|
||||
; CHECK-V4T-NEXT: mov [[POP_REG]], r12
|
||||
; CHECK-V4T: bx lr
|
||||
; CHECK-V5T: add sp,
|
||||
; CHECK-V5T-NEXT: pop {[[SAVED]]}
|
||||
; CHECK-V5T: lsls r4
|
||||
; CHECK-V5T-NEXT: mov sp, r4
|
||||
; CHECK-V5T: pop {[[SAVED]]}
|
||||
; CHECK-V5T-NEXT: mov r12, [[POP_REG:r[0-7]]]
|
||||
; CHECK-V5T-NEXT: pop {[[POP_REG]]}
|
||||
; CHECK-V5T-NEXT: add sp,
|
||||
|
@ -75,7 +75,7 @@ define i32 @test3() {
|
||||
; CHECK: add sp, [[TEMP3]]
|
||||
%retval = alloca i32, align 4
|
||||
%tmp = alloca i32, align 4
|
||||
%a = alloca [805306369 x i8], align 16
|
||||
%a = alloca [805306369 x i8], align 4
|
||||
store i32 0, i32* %tmp
|
||||
%tmp1 = load i32, i32* %tmp
|
||||
ret i32 %tmp1
|
||||
@ -91,7 +91,7 @@ define i32 @test3_nofpelim() "no-frame-pointer-elim"="true" {
|
||||
; CHECK: mov sp, r4
|
||||
%retval = alloca i32, align 4
|
||||
%tmp = alloca i32, align 4
|
||||
%a = alloca [805306369 x i8], align 16
|
||||
%a = alloca [805306369 x i8], align 8
|
||||
store i32 0, i32* %tmp
|
||||
%tmp1 = load i32, i32* %tmp
|
||||
ret i32 %tmp1
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: llc -mtriple=thumb-eabi %s -verify-machineinstrs -o - | FileCheck %s
|
||||
; RUN: llc -mtriple=thumb-eabi %s -verify-machineinstrs -o - | \
|
||||
; RUN: FileCheck %s -check-prefix CHECK --check-prefix CHECK-EABI
|
||||
; RUN: llc -mtriple=thumb-apple-darwin %s -verify-machineinstrs -o - | \
|
||||
; RUN: FileCheck %s -check-prefix CHECK -check-prefix CHECK-DARWIN
|
||||
|
||||
@ -172,10 +173,12 @@ entry:
|
||||
%retval = load i64, i64* %a ; <i64> [#uses=1]
|
||||
ret i64 %retval
|
||||
; CHECK-LABEL: f10:
|
||||
; CHECK: sub sp, #8
|
||||
; CHECK-EABI: sub sp, #8
|
||||
; CHECK-DARWIN: add r7, sp, #4
|
||||
; CHECK: ldr r0, [sp]
|
||||
; CHECK: ldr r1, [sp, #4]
|
||||
; CHECK: add sp, #8
|
||||
; CHECK-EABI: add sp, #8
|
||||
; CHECK-DARWIN: mov sp, r4
|
||||
}
|
||||
|
||||
define i64 @f11(i64 %x, i64 %y) {
|
||||
|
Loading…
Reference in New Issue
Block a user