mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
This CL changes the function prologue and epilogue emitted on X86 when stack needs realignment.
It is intended to fix PR11468. Old prologue and epilogue looked like this: push %rbp mov %rsp, %rbp and $alignment, %rsp push %r14 push %r15 ... pop %r15 pop %r14 mov %rbp, %rsp pop %rbp The problem was to reference the locations of callee-saved registers in exception handling: locations of callee-saved had to be re-calculated regarding the stack alignment operation. It would take some effort to implement this in LLVM, as currently MachineLocation can only have the form "Register + Offset". Funciton prologue and epilogue are now changed to: push %rbp mov %rsp, %rbp push %14 push %15 and $alignment, %rsp ... lea -$size_of_saved_registers(%rbp), %rsp pop %r15 pop %r14 pop %rbp Reviewed by Chad Rosier. llvm-svn: 160248
This commit is contained in:
parent
0e3ed1b1f3
commit
c68bb48704
@ -722,10 +722,14 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
|
|||||||
if (HasFP) {
|
if (HasFP) {
|
||||||
// Calculate required stack adjustment.
|
// Calculate required stack adjustment.
|
||||||
uint64_t FrameSize = StackSize - SlotSize;
|
uint64_t FrameSize = StackSize - SlotSize;
|
||||||
if (RegInfo->needsStackRealignment(MF))
|
if (RegInfo->needsStackRealignment(MF)) {
|
||||||
FrameSize = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
|
// Callee-saved registers are pushed on stack before the stack
|
||||||
|
// is realigned.
|
||||||
NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
|
FrameSize -= X86FI->getCalleeSavedFrameSize();
|
||||||
|
NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
|
||||||
|
} else {
|
||||||
|
NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
|
||||||
|
}
|
||||||
|
|
||||||
// Get the offset of the stack slot for the EBP register, which is
|
// Get the offset of the stack slot for the EBP register, which is
|
||||||
// guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
|
// guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
|
||||||
@ -782,19 +786,6 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
|
|||||||
for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
|
for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
|
||||||
I != E; ++I)
|
I != E; ++I)
|
||||||
I->addLiveIn(FramePtr);
|
I->addLiveIn(FramePtr);
|
||||||
|
|
||||||
// Realign stack
|
|
||||||
if (RegInfo->needsStackRealignment(MF)) {
|
|
||||||
MachineInstr *MI =
|
|
||||||
BuildMI(MBB, MBBI, DL,
|
|
||||||
TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr)
|
|
||||||
.addReg(StackPtr)
|
|
||||||
.addImm(-MaxAlign)
|
|
||||||
.setMIFlag(MachineInstr::FrameSetup);
|
|
||||||
|
|
||||||
// The EFLAGS implicit def is dead.
|
|
||||||
MI->getOperand(3).setIsDead();
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
|
NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
|
||||||
}
|
}
|
||||||
@ -824,6 +815,27 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Realign stack after we pushed callee-saved registers (so that we'll be
|
||||||
|
// able to calculate their offsets from the frame pointer).
|
||||||
|
|
||||||
|
// NOTE: We push the registers before realigning the stack, so
|
||||||
|
// vector callee-saved (xmm) registers may be saved w/o proper
|
||||||
|
// alignment in this way. However, currently these regs are saved in
|
||||||
|
// stack slots (see X86FrameLowering::spillCalleeSavedRegisters()), so
|
||||||
|
// this shouldn't be a problem.
|
||||||
|
if (RegInfo->needsStackRealignment(MF)) {
|
||||||
|
assert(HasFP && "There should be a frame pointer if stack is realigned.");
|
||||||
|
MachineInstr *MI =
|
||||||
|
BuildMI(MBB, MBBI, DL,
|
||||||
|
TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr)
|
||||||
|
.addReg(StackPtr)
|
||||||
|
.addImm(-MaxAlign)
|
||||||
|
.setMIFlag(MachineInstr::FrameSetup);
|
||||||
|
|
||||||
|
// The EFLAGS implicit def is dead.
|
||||||
|
MI->getOperand(3).setIsDead();
|
||||||
|
}
|
||||||
|
|
||||||
DL = MBB.findDebugLoc(MBBI);
|
DL = MBB.findDebugLoc(MBBI);
|
||||||
|
|
||||||
// If there is an SUB32ri of ESP immediately before this instruction, merge
|
// If there is an SUB32ri of ESP immediately before this instruction, merge
|
||||||
@ -975,7 +987,6 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
|
|||||||
unsigned SlotSize = RegInfo->getSlotSize();
|
unsigned SlotSize = RegInfo->getSlotSize();
|
||||||
unsigned FramePtr = RegInfo->getFrameRegister(MF);
|
unsigned FramePtr = RegInfo->getFrameRegister(MF);
|
||||||
unsigned StackPtr = RegInfo->getStackRegister();
|
unsigned StackPtr = RegInfo->getStackRegister();
|
||||||
unsigned BasePtr = RegInfo->getBaseRegister();
|
|
||||||
|
|
||||||
switch (RetOpcode) {
|
switch (RetOpcode) {
|
||||||
default:
|
default:
|
||||||
@ -1013,10 +1024,14 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
|
|||||||
if (hasFP(MF)) {
|
if (hasFP(MF)) {
|
||||||
// Calculate required stack adjustment.
|
// Calculate required stack adjustment.
|
||||||
uint64_t FrameSize = StackSize - SlotSize;
|
uint64_t FrameSize = StackSize - SlotSize;
|
||||||
if (RegInfo->needsStackRealignment(MF))
|
if (RegInfo->needsStackRealignment(MF)) {
|
||||||
FrameSize = (FrameSize + MaxAlign - 1)/MaxAlign*MaxAlign;
|
// Callee-saved registers were pushed on stack before the stack
|
||||||
|
// was realigned.
|
||||||
NumBytes = FrameSize - CSSize;
|
FrameSize -= CSSize;
|
||||||
|
NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
|
||||||
|
} else {
|
||||||
|
NumBytes = FrameSize - CSSize;
|
||||||
|
}
|
||||||
|
|
||||||
// Pop EBP.
|
// Pop EBP.
|
||||||
BuildMI(MBB, MBBI, DL,
|
BuildMI(MBB, MBBI, DL,
|
||||||
@ -1026,7 +1041,6 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Skip the callee-saved pop instructions.
|
// Skip the callee-saved pop instructions.
|
||||||
MachineBasicBlock::iterator LastCSPop = MBBI;
|
|
||||||
while (MBBI != MBB.begin()) {
|
while (MBBI != MBB.begin()) {
|
||||||
MachineBasicBlock::iterator PI = prior(MBBI);
|
MachineBasicBlock::iterator PI = prior(MBBI);
|
||||||
unsigned Opc = PI->getOpcode();
|
unsigned Opc = PI->getOpcode();
|
||||||
@ -1037,6 +1051,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
|
|||||||
|
|
||||||
--MBBI;
|
--MBBI;
|
||||||
}
|
}
|
||||||
|
MachineBasicBlock::iterator FirstCSPop = MBBI;
|
||||||
|
|
||||||
DL = MBBI->getDebugLoc();
|
DL = MBBI->getDebugLoc();
|
||||||
|
|
||||||
@ -1045,40 +1060,19 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
|
|||||||
if (NumBytes || MFI->hasVarSizedObjects())
|
if (NumBytes || MFI->hasVarSizedObjects())
|
||||||
mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
|
mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
|
||||||
|
|
||||||
// Restore the SP from the BP, if necessary.
|
|
||||||
if (RegInfo->hasBasePointer(MF)) {
|
|
||||||
BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
|
|
||||||
StackPtr).addReg(BasePtr);
|
|
||||||
|
|
||||||
// When restoring from the BP we must use a cached SP adjustment.
|
|
||||||
NumBytes = X86FI->getBasePtrStackAdjustment();
|
|
||||||
}
|
|
||||||
|
|
||||||
// If dynamic alloca is used, then reset esp to point to the last callee-saved
|
// If dynamic alloca is used, then reset esp to point to the last callee-saved
|
||||||
// slot before popping them off! Same applies for the case, when stack was
|
// slot before popping them off! Same applies for the case, when stack was
|
||||||
// realigned.
|
// realigned.
|
||||||
if (RegInfo->needsStackRealignment(MF)) {
|
if (RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) {
|
||||||
// We cannot use LEA here, because stack pointer was realigned. We need to
|
if (RegInfo->needsStackRealignment(MF))
|
||||||
// deallocate local frame back.
|
MBBI = FirstCSPop;
|
||||||
if (CSSize) {
|
if (CSSize != 0) {
|
||||||
emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, UseLEA, TII,
|
unsigned Opc = getLEArOpcode(Is64Bit);
|
||||||
*RegInfo);
|
addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
|
||||||
MBBI = prior(LastCSPop);
|
FramePtr, false, -CSSize);
|
||||||
}
|
|
||||||
|
|
||||||
BuildMI(MBB, MBBI, DL,
|
|
||||||
TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
|
|
||||||
StackPtr).addReg(FramePtr);
|
|
||||||
} else if (MFI->hasVarSizedObjects()) {
|
|
||||||
if (CSSize) {
|
|
||||||
unsigned Opc = Is64Bit ? X86::LEA64r : X86::LEA32r;
|
|
||||||
MachineInstr *MI =
|
|
||||||
addRegOffset(BuildMI(MF, DL, TII.get(Opc), StackPtr),
|
|
||||||
FramePtr, false, -CSSize);
|
|
||||||
MBB.insert(MBBI, MI);
|
|
||||||
} else {
|
} else {
|
||||||
BuildMI(MBB, MBBI, DL,
|
unsigned Opc = (Is64Bit ? X86::MOV64rr : X86::MOV32rr);
|
||||||
TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), StackPtr)
|
BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
|
||||||
.addReg(FramePtr);
|
.addReg(FramePtr);
|
||||||
}
|
}
|
||||||
} else if (NumBytes) {
|
} else if (NumBytes) {
|
||||||
|
@ -85,20 +85,19 @@ entry:
|
|||||||
; CHECK: _t4
|
; CHECK: _t4
|
||||||
; CHECK: pushq %rbp
|
; CHECK: pushq %rbp
|
||||||
; CHECK: movq %rsp, %rbp
|
; CHECK: movq %rsp, %rbp
|
||||||
; CHECK: andq $-32, %rsp
|
|
||||||
; CHECK: pushq %r14
|
; CHECK: pushq %r14
|
||||||
; CHECK: pushq %rbx
|
; CHECK: pushq %rbx
|
||||||
; CHECK: subq $[[STACKADJ:[0-9]+]], %rsp
|
; CHECK: andq $-32, %rsp
|
||||||
|
; CHECK: subq ${{[0-9]+}}, %rsp
|
||||||
; CHECK: movq %rsp, %rbx
|
; CHECK: movq %rsp, %rbx
|
||||||
;
|
;
|
||||||
; CHECK: leaq {{[0-9]*}}(%rbx), %rdi
|
; CHECK: leaq {{[0-9]*}}(%rbx), %rdi
|
||||||
; CHECK: leaq {{[0-9]*}}(%rbx), %rdx
|
; CHECK: leaq {{[0-9]*}}(%rbx), %rdx
|
||||||
; CHECK: callq _t4_helper
|
; CHECK: callq _t4_helper
|
||||||
;
|
;
|
||||||
; CHECK: addq $[[STACKADJ]], %rsp
|
; CHECK: leaq -16(%rbp), %rsp
|
||||||
; CHECK: popq %rbx
|
; CHECK: popq %rbx
|
||||||
; CHECK: popq %r14
|
; CHECK: popq %r14
|
||||||
; CHECK: movq %rbp, %rsp
|
|
||||||
; CHECK: popq %rbp
|
; CHECK: popq %rbp
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -176,19 +175,17 @@ entry:
|
|||||||
; CHECK: _t7
|
; CHECK: _t7
|
||||||
; CHECK: pushq %rbp
|
; CHECK: pushq %rbp
|
||||||
; CHECK: movq %rsp, %rbp
|
; CHECK: movq %rsp, %rbp
|
||||||
; CHECK: andq $-32, %rsp
|
|
||||||
; CHECK: pushq %rbx
|
; CHECK: pushq %rbx
|
||||||
; CHECK: subq $[[ADJ:[0-9]+]], %rsp
|
; CHECK: andq $-32, %rsp
|
||||||
|
; CHECK: subq ${{[0-9]+}}, %rsp
|
||||||
; CHECK: movq %rsp, %rbx
|
; CHECK: movq %rsp, %rbx
|
||||||
|
|
||||||
; Stack adjustment for byval
|
; Stack adjustment for byval
|
||||||
; CHECK: subq {{.*}}, %rsp
|
; CHECK: subq {{.*}}, %rsp
|
||||||
; CHECK: callq _bar
|
; CHECK: callq _bar
|
||||||
; CHECK-NOT: addq {{.*}}, %rsp
|
; CHECK-NOT: addq {{.*}}, %rsp
|
||||||
; CHECK: movq %rbx, %rsp
|
; CHECK: leaq -8(%rbp), %rsp
|
||||||
; CHECK: addq $[[ADJ]], %rsp
|
|
||||||
; CHECK: popq %rbx
|
; CHECK: popq %rbx
|
||||||
; CHECK: movq %rbp, %rsp
|
|
||||||
; CHECK: popq %rbp
|
; CHECK: popq %rbp
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -229,14 +226,12 @@ entry:
|
|||||||
; FORCE-ALIGN: _t9
|
; FORCE-ALIGN: _t9
|
||||||
; FORCE-ALIGN: pushq %rbp
|
; FORCE-ALIGN: pushq %rbp
|
||||||
; FORCE-ALIGN: movq %rsp, %rbp
|
; FORCE-ALIGN: movq %rsp, %rbp
|
||||||
; FORCE-ALIGN: andq $-32, %rsp
|
|
||||||
; FORCE-ALIGN: pushq %rbx
|
; FORCE-ALIGN: pushq %rbx
|
||||||
; FORCE-ALIGN: subq $24, %rsp
|
; FORCE-ALIGN: andq $-32, %rsp
|
||||||
|
; FORCE-ALIGN: subq $32, %rsp
|
||||||
; FORCE-ALIGN: movq %rsp, %rbx
|
; FORCE-ALIGN: movq %rsp, %rbx
|
||||||
|
|
||||||
; FORCE-ALIGN: movq %rbx, %rsp
|
; FORCE-ALIGN: leaq -8(%rbp), %rsp
|
||||||
; FORCE-ALIGN: addq $24, %rsp
|
|
||||||
; FORCE-ALIGN: popq %rbx
|
; FORCE-ALIGN: popq %rbx
|
||||||
; FORCE-ALIGN: movq %rbp, %rsp
|
|
||||||
; FORCE-ALIGN: popq %rbp
|
; FORCE-ALIGN: popq %rbp
|
||||||
}
|
}
|
||||||
|
@ -19,10 +19,10 @@ define i64 @g(i32 %i) nounwind {
|
|||||||
; CHECK: g:
|
; CHECK: g:
|
||||||
; CHECK: pushl %ebp
|
; CHECK: pushl %ebp
|
||||||
; CHECK-NEXT: movl %esp, %ebp
|
; CHECK-NEXT: movl %esp, %ebp
|
||||||
|
; CHECK-NEXT: pushl
|
||||||
|
; CHECK-NEXT: pushl
|
||||||
; CHECK-NEXT: andl $-32, %esp
|
; CHECK-NEXT: andl $-32, %esp
|
||||||
; CHECK-NEXT: pushl
|
; CHECK-NEXT: subl $32, %esp
|
||||||
; CHECK-NEXT: pushl
|
|
||||||
; CHECK-NEXT: subl $24, %esp
|
|
||||||
;
|
;
|
||||||
; Now setup the base pointer (%ebx).
|
; Now setup the base pointer (%ebx).
|
||||||
; CHECK-NEXT: movl %esp, %ebx
|
; CHECK-NEXT: movl %esp, %ebx
|
||||||
@ -46,17 +46,13 @@ define i64 @g(i32 %i) nounwind {
|
|||||||
; CHECK-NEXT: addl $32, %esp
|
; CHECK-NEXT: addl $32, %esp
|
||||||
; CHECK-NOT: {{[^ ,]*}}, %esp
|
; CHECK-NOT: {{[^ ,]*}}, %esp
|
||||||
;
|
;
|
||||||
; Restore %esp from %ebx (base pointer) so we can pop the callee-saved
|
; Restore %esp from %ebp (frame pointer) and subtract the size of
|
||||||
; registers. This is the state prior to the allocation of VLAs.
|
; zone with callee-saved registers to pop them.
|
||||||
|
; This is the state prior to stack realignment and the allocation of VLAs.
|
||||||
; CHECK-NOT: popl
|
; CHECK-NOT: popl
|
||||||
; CHECK: movl %ebx, %esp
|
; CHECK: leal -8(%ebp), %esp
|
||||||
; CHECK-NEXT: addl $24, %esp
|
|
||||||
; CHECK-NEXT: popl
|
; CHECK-NEXT: popl
|
||||||
; CHECK-NEXT: popl
|
; CHECK-NEXT: popl
|
||||||
;
|
|
||||||
; Finally we need to restore %esp from %ebp due to dynamic stack
|
|
||||||
; realignment.
|
|
||||||
; CHECK-NEXT: movl %ebp, %esp
|
|
||||||
; CHECK-NEXT: popl %ebp
|
; CHECK-NEXT: popl %ebp
|
||||||
; CHECK-NEXT: ret
|
; CHECK-NEXT: ret
|
||||||
|
|
||||||
|
33
test/CodeGen/X86/pr11468.ll
Normal file
33
test/CodeGen/X86/pr11468.ll
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
; RUN: llc < %s -force-align-stack -stack-alignment=32 -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s
|
||||||
|
; PR11468
|
||||||
|
|
||||||
|
define void @f(i64 %sz) uwtable {
|
||||||
|
entry:
|
||||||
|
%a = alloca i32, align 32
|
||||||
|
store volatile i32 0, i32* %a, align 32
|
||||||
|
; force to push r14 on stack
|
||||||
|
call void asm sideeffect "nop", "~{r14},~{dirflag},~{fpsr},~{flags}"() nounwind, !srcloc !0
|
||||||
|
ret void
|
||||||
|
|
||||||
|
; CHECK: _f
|
||||||
|
; CHECK: pushq %rbp
|
||||||
|
; CHECK: .cfi_offset %rbp, -16
|
||||||
|
; CHECK: movq %rsp, %rbp
|
||||||
|
; CHECK: .cfi_def_cfa_register %rbp
|
||||||
|
|
||||||
|
; We first push register on stack, and then realign it, so that
|
||||||
|
; .cfi_offset value is correct
|
||||||
|
; CHECK: pushq %r14
|
||||||
|
; CHECK: andq $-32, %rsp
|
||||||
|
; CHECK: .cfi_offset %r14, -24
|
||||||
|
|
||||||
|
; Restore %rsp from %rbp and subtract the total size of saved regsiters.
|
||||||
|
; CHECK: leaq -8(%rbp), %rsp
|
||||||
|
|
||||||
|
; Pop saved registers.
|
||||||
|
; CHECK: popq %r14
|
||||||
|
; CHECK: popq %rbp
|
||||||
|
}
|
||||||
|
|
||||||
|
!0 = metadata !{i32 125}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user