mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 11:02:59 +02:00
[X86] Fix stack probe issue on windows32.
Summary: On windows if the frame size exceed 4096 bytes, compiler need to generate a call to _alloca_probe. X86CallFrameOptimization pass changes the reserved stack size and cause of stack probe function not be inserted. This patch fix the issue by detecting the call frame size, if the size exceed 4096 bytes, drop X86CallFrameOptimization. Reviewers: craig.topper, wxiao3, annita.zhang, rnk, RKSimon Reviewed By: rnk Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D65923 llvm-svn: 368503
This commit is contained in:
parent
f00f383150
commit
78c709910b
@ -155,12 +155,22 @@ bool X86CallFrameOptimization::isLegal(MachineFunction &MF) {
|
||||
// This is bad, and breaks SP adjustment.
|
||||
// So, check that all of the frames in the function are closed inside
|
||||
// the same block, and, for good measure, that there are no nested frames.
|
||||
//
|
||||
// If any call allocates more argument stack memory than the stack
|
||||
// probe size, don't do this optimization. Otherwise, this pass
|
||||
// would need to synthesize additional stack probe calls to allocate
|
||||
// memory for arguments.
|
||||
unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode();
|
||||
unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
|
||||
bool UseStackProbe =
|
||||
!STI->getTargetLowering()->getStackProbeSymbolName(MF).empty();
|
||||
unsigned StackProbeSize = STI->getTargetLowering()->getStackProbeSize(MF);
|
||||
for (MachineBasicBlock &BB : MF) {
|
||||
bool InsideFrameSequence = false;
|
||||
for (MachineInstr &MI : BB) {
|
||||
if (MI.getOpcode() == FrameSetupOpcode) {
|
||||
if (TII->getFrameSize(MI) >= StackProbeSize && UseStackProbe)
|
||||
return false;
|
||||
if (InsideFrameSequence)
|
||||
return false;
|
||||
InsideFrameSequence = true;
|
||||
|
@ -1022,14 +1022,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
|
||||
X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
|
||||
|
||||
bool UseStackProbe = !STI.getTargetLowering()->getStackProbeSymbolName(MF).empty();
|
||||
|
||||
// The default stack probe size is 4096 if the function has no stackprobesize
|
||||
// attribute.
|
||||
unsigned StackProbeSize = 4096;
|
||||
if (Fn.hasFnAttribute("stack-probe-size"))
|
||||
Fn.getFnAttribute("stack-probe-size")
|
||||
.getValueAsString()
|
||||
.getAsInteger(0, StackProbeSize);
|
||||
unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
|
||||
|
||||
// Re-align the stack on 64-bit if the x86-interrupt calling convention is
|
||||
// used and an error code was pushed, since the x86-64 ABI requires a 16-byte
|
||||
|
@ -44970,3 +44970,16 @@ X86TargetLowering::getStackProbeSymbolName(MachineFunction &MF) const {
|
||||
return Subtarget.isTargetCygMing() ? "___chkstk_ms" : "__chkstk";
|
||||
return Subtarget.isTargetCygMing() ? "_alloca" : "_chkstk";
|
||||
}
|
||||
|
||||
unsigned
|
||||
X86TargetLowering::getStackProbeSize(MachineFunction &MF) const {
|
||||
// The default stack probe size is 4096 if the function has no stackprobesize
|
||||
// attribute.
|
||||
unsigned StackProbeSize = 4096;
|
||||
const Function &Fn = MF.getFunction();
|
||||
if (Fn.hasFnAttribute("stack-probe-size"))
|
||||
Fn.getFnAttribute("stack-probe-size")
|
||||
.getValueAsString()
|
||||
.getAsInteger(0, StackProbeSize);
|
||||
return StackProbeSize;
|
||||
}
|
||||
|
@ -1207,6 +1207,8 @@ namespace llvm {
|
||||
|
||||
StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
|
||||
|
||||
unsigned getStackProbeSize(MachineFunction &MF) const;
|
||||
|
||||
bool hasVectorBlend() const override { return true; }
|
||||
|
||||
unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
|
||||
|
59
test/CodeGen/X86/nomovtopush.ll
Normal file
59
test/CodeGen/X86/nomovtopush.ll
Normal file
@ -0,0 +1,59 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=i386-pc-windows-msvc | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
|
||||
target triple = "i386-pc-windows-msvc"
|
||||
|
||||
%struct._param_str = type { i32, i32, [4096 x i32], i32 }
|
||||
|
||||
@g_d = common dso_local local_unnamed_addr global i32 0, align 4
|
||||
@g_c = common dso_local local_unnamed_addr global i32 0, align 4
|
||||
@g_b = common dso_local local_unnamed_addr global i32 0, align 4
|
||||
@g_a = common dso_local local_unnamed_addr global i32 0, align 4
|
||||
@g_param = common dso_local global %struct._param_str zeroinitializer, align 4
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define dso_local i32 @test() local_unnamed_addr {
|
||||
; CHECK-LABEL: test:
|
||||
; CHECK: # %bb.0: # %entry
|
||||
; CHECK-NEXT: pushl %edi
|
||||
; CHECK-NEXT: pushl %esi
|
||||
; CHECK-NEXT: movl $16396, %eax # imm = 0x400C
|
||||
; CHECK-NEXT: calll __chkstk
|
||||
; CHECK-NEXT: movl _g_d, %eax
|
||||
; CHECK-NEXT: movl _g_c, %ecx
|
||||
; CHECK-NEXT: movl _g_b, %edx
|
||||
; CHECK-NEXT: movl _g_a, %esi
|
||||
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: movl %edx, {{[0-9]+}}(%esp)
|
||||
; CHECK-NEXT: movl %esi, (%esp)
|
||||
; CHECK-NEXT: calll _bar
|
||||
; CHECK-NEXT: movl $4099, %ecx # imm = 0x1003
|
||||
; CHECK-NEXT: movl %esp, %edi
|
||||
; CHECK-NEXT: movl $_g_param, %esi
|
||||
; CHECK-NEXT: rep;movsl (%esi), %es:(%edi)
|
||||
; CHECK-NEXT: calll _foo
|
||||
; CHECK-NEXT: xorl %eax, %eax
|
||||
; CHECK-NEXT: addl $16396, %esp # imm = 0x400C
|
||||
; CHECK-NEXT: popl %esi
|
||||
; CHECK-NEXT: popl %edi
|
||||
; CHECK-NEXT: retl
|
||||
entry:
|
||||
%0 = load i32, i32* @g_d, align 4, !tbaa !3
|
||||
%1 = load i32, i32* @g_c, align 4, !tbaa !3
|
||||
%2 = load i32, i32* @g_b, align 4, !tbaa !3
|
||||
%3 = load i32, i32* @g_a, align 4, !tbaa !3
|
||||
%call = tail call i32 @bar(i32 %3, i32 %2, i32 %1, i32 %0) #2
|
||||
tail call void @foo(%struct._param_str* byval nonnull align 4 @g_param) #2
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
declare dso_local i32 @bar(i32, i32, i32, i32) local_unnamed_addr
|
||||
|
||||
declare dso_local void @foo(%struct._param_str* byval align 4) local_unnamed_addr
|
||||
|
||||
!3 = !{!4, !4, i64 0}
|
||||
!4 = !{!"int", !5, i64 0}
|
||||
!5 = !{!"omnipotent char", !6, i64 0}
|
||||
!6 = !{!"Simple C/C++ TBAA"}
|
Loading…
Reference in New Issue
Block a user