mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[PPC] Fix two bugs in frame lowering.
1. The available program storage region of the red zone to compilers is 288 bytes rather than 244 bytes. 2. The formula for negative number alignment calculation should be y = x & ~(n-1) rather than y = (x + (n-1)) & ~(n-1). Differential Revision: https://reviews.llvm.org/D34337 llvm-svn: 307672
This commit is contained in:
parent
6f20b0e83b
commit
2a8d3d1229
@ -435,22 +435,19 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
|
||||
|
||||
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
|
||||
|
||||
// If we are a leaf function, and use up to 224 bytes of stack space,
|
||||
// don't have a frame pointer, calls, or dynamic alloca then we do not need
|
||||
// to adjust the stack pointer (we fit in the Red Zone).
|
||||
// The 32-bit SVR4 ABI has no Red Zone. However, it can still generate
|
||||
// stackless code if all local vars are reg-allocated.
|
||||
bool DisableRedZone = MF.getFunction()->hasFnAttribute(Attribute::NoRedZone);
|
||||
unsigned LR = RegInfo->getRARegister();
|
||||
if (!DisableRedZone &&
|
||||
(Subtarget.isPPC64() || // 32-bit SVR4, no stack-
|
||||
!Subtarget.isSVR4ABI() || // allocated locals.
|
||||
FrameSize == 0) &&
|
||||
FrameSize <= 224 && // Fits in red zone.
|
||||
!MFI.hasVarSizedObjects() && // No dynamic alloca.
|
||||
!MFI.adjustsStack() && // No calls.
|
||||
!MustSaveLR(MF, LR) &&
|
||||
!RegInfo->hasBasePointer(MF)) { // No special alignment.
|
||||
bool DisableRedZone = MF.getFunction()->hasFnAttribute(Attribute::NoRedZone);
|
||||
bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
|
||||
!MFI.adjustsStack() && // No calls.
|
||||
!MustSaveLR(MF, LR) && // No need to save LR.
|
||||
!RegInfo->hasBasePointer(MF); // No special alignment.
|
||||
|
||||
// Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
|
||||
// code if all local vars are reg-allocated.
|
||||
bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
|
||||
|
||||
// Check whether we can skip adjusting the stack pointer (by using red zone)
|
||||
if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
|
||||
// No need for frame
|
||||
if (UpdateMF)
|
||||
MFI.setStackSize(0);
|
||||
@ -1869,8 +1866,13 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
|
||||
}
|
||||
|
||||
if (HasVRSaveArea) {
|
||||
// Insert alignment padding, we need 16-byte alignment.
|
||||
LowerBound = (LowerBound - 15) & ~(15);
|
||||
// Insert alignment padding, we need 16-byte alignment. Note: for postive
|
||||
// number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
|
||||
// we are using negative number here (the stack grows downward). We should
|
||||
// use formula : y = x & (~(n-1)). Where x is the size before aligning, n
|
||||
// is the alignment size ( n = 16 here) and y is the size after aligning.
|
||||
assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
|
||||
LowerBound &= ~(15);
|
||||
|
||||
for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
|
||||
int FI = VRegs[i].getFrameIdx();
|
||||
|
@ -272,6 +272,13 @@ public:
|
||||
|
||||
return 16;
|
||||
}
|
||||
|
||||
// DarwinABI has a 224-byte red zone. PPC32 SVR4ABI(Non-DarwinABI) has no
|
||||
// red zone and PPC64 SVR4ABI has a 288-byte red zone.
|
||||
unsigned getRedZoneSize() const {
|
||||
return isDarwinABI() ? 224 : (isPPC64() ? 288 : 0);
|
||||
}
|
||||
|
||||
bool hasHTM() const { return HasHTM; }
|
||||
bool hasFusion() const { return HasFusion; }
|
||||
bool hasFloat128() const { return HasFloat128; }
|
||||
|
32
test/CodeGen/PowerPC/ppc-redzone-alignment-bug.ll
Normal file
32
test/CodeGen/PowerPC/ppc-redzone-alignment-bug.ll
Normal file
@ -0,0 +1,32 @@
|
||||
; Note the formula for negative number alignment calculation should be y = x & ~(n-1) rather than y = (x + (n-1)) & ~(n-1).
|
||||
; after patch https://reviews.llvm.org/D34337, we could save 16 bytes in the best case.
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-BE
|
||||
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=CHECK-LE
|
||||
|
||||
define signext i32 @bar(i32 signext %ii) {
|
||||
entry:
|
||||
%0 = tail call i32 asm sideeffect "add $0, $1, $2\0A", "=r,r,r,~{f14},~{r15},~{v20}"(i32 %ii, i32 10)
|
||||
ret i32 %0
|
||||
; Before the fix by patch D34337:
|
||||
; stdu 1, -544(1)
|
||||
; std 15, 264(1)
|
||||
; stfd 14, 400(1)
|
||||
; stdu 1, -560(1)
|
||||
; std 15, 280(1)
|
||||
; stfd 14, 416(1)
|
||||
|
||||
; After the fix by patch D34337:
|
||||
; CHECK-LE: stdu 1, -528(1)
|
||||
; CHECK-LE:std 15, 248(1)
|
||||
; CHECK-LE:stfd 14, 384(1)
|
||||
; CHECK-BE: stdu 1, -544(1)
|
||||
; CHECK-BE:std 15, 264(1)
|
||||
; CHECK-BE:stfd 14, 400(1)
|
||||
}
|
||||
|
||||
define signext i32 @foo() {
|
||||
entry:
|
||||
%call = tail call signext i32 @bar(i32 signext 5)
|
||||
ret i32 %call
|
||||
}
|
||||
|
@ -29,11 +29,11 @@ entry:
|
||||
|
||||
define i8* @bigstack() nounwind {
|
||||
entry:
|
||||
%0 = alloca i8, i32 230
|
||||
%0 = alloca i8, i32 290
|
||||
ret i8* %0
|
||||
}
|
||||
; PPC32-LABEL: bigstack:
|
||||
; PPC32: stwu 1, -240(1)
|
||||
; PPC32: stwu 1, -304(1)
|
||||
|
||||
; PPC64-LABEL: bigstack:
|
||||
; PPC64: stdu 1, -288(1)
|
||||
; PPC64: stdu 1, -352(1)
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: llc -relocation-model=static -verify-machineinstrs < %s -march=ppc64 -tailcallopt | grep TC_RETURNd8
|
||||
; RUN: llc -relocation-model=static -verify-machineinstrs -march=ppc64 < %s | FileCheck %s
|
||||
define fastcc i32 @tailcallee(i32 %a1, i32 %a2, i32 %a3, i32 %a4) {
|
||||
entry:
|
||||
ret i32 %a3
|
||||
@ -6,6 +7,9 @@ entry:
|
||||
|
||||
define fastcc i32 @tailcaller(i32 %in1, i32 %in2) {
|
||||
entry:
|
||||
%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 ) ; <i32> [#uses=1]
|
||||
%tmp11 = tail call fastcc i32 @tailcallee( i32 %in1, i32 %in2, i32 %in1, i32 %in2 )
|
||||
ret i32 %tmp11
|
||||
; CHECK-LABEL: tailcaller
|
||||
; CHECK-NOT: stdu
|
||||
; CHECK: b tailcallee
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user