1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[X86] Fix stack probe issue on windows32.

Summary:
On windows if the frame size exceed 4096 bytes, compiler need to
generate a call to _alloca_probe. X86CallFrameOptimization pass
changes the reserved stack size and cause of stack probe function
not be inserted. This patch fix the issue by detecting the call
frame size, if the size exceed 4096 bytes, drop X86CallFrameOptimization.

Reviewers: craig.topper, wxiao3, annita.zhang, rnk, RKSimon

Reviewed By: rnk

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D65923

llvm-svn: 368503
This commit is contained in:
Luo, Yuanke 2019-08-10 02:49:02 +00:00
parent f00f383150
commit 78c709910b
5 changed files with 85 additions and 8 deletions

View File

@ -155,12 +155,22 @@ bool X86CallFrameOptimization::isLegal(MachineFunction &MF) {
// This is bad, and breaks SP adjustment.
// So, check that all of the frames in the function are closed inside
// the same block, and, for good measure, that there are no nested frames.
//
// If any call allocates more argument stack memory than the stack
// probe size, don't do this optimization. Otherwise, this pass
// would need to synthesize additional stack probe calls to allocate
// memory for arguments.
unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode();
unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode();
bool UseStackProbe =
!STI->getTargetLowering()->getStackProbeSymbolName(MF).empty();
unsigned StackProbeSize = STI->getTargetLowering()->getStackProbeSize(MF);
for (MachineBasicBlock &BB : MF) {
bool InsideFrameSequence = false;
for (MachineInstr &MI : BB) {
if (MI.getOpcode() == FrameSetupOpcode) {
if (TII->getFrameSize(MI) >= StackProbeSize && UseStackProbe)
return false;
if (InsideFrameSequence)
return false;
InsideFrameSequence = true;

View File

@ -1022,14 +1022,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
bool UseStackProbe = !STI.getTargetLowering()->getStackProbeSymbolName(MF).empty();
// The default stack probe size is 4096 if the function has no stackprobesize
// attribute.
unsigned StackProbeSize = 4096;
if (Fn.hasFnAttribute("stack-probe-size"))
Fn.getFnAttribute("stack-probe-size")
.getValueAsString()
.getAsInteger(0, StackProbeSize);
unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
// Re-align the stack on 64-bit if the x86-interrupt calling convention is
// used and an error code was pushed, since the x86-64 ABI requires a 16-byte

View File

@ -44970,3 +44970,16 @@ X86TargetLowering::getStackProbeSymbolName(MachineFunction &MF) const {
return Subtarget.isTargetCygMing() ? "___chkstk_ms" : "__chkstk";
return Subtarget.isTargetCygMing() ? "_alloca" : "_chkstk";
}
unsigned
X86TargetLowering::getStackProbeSize(MachineFunction &MF) const {
// The default stack probe size is 4096 if the function has no stackprobesize
// attribute.
unsigned StackProbeSize = 4096;
const Function &Fn = MF.getFunction();
if (Fn.hasFnAttribute("stack-probe-size"))
Fn.getFnAttribute("stack-probe-size")
.getValueAsString()
.getAsInteger(0, StackProbeSize);
return StackProbeSize;
}

View File

@ -1207,6 +1207,8 @@ namespace llvm {
StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
unsigned getStackProbeSize(MachineFunction &MF) const;
bool hasVectorBlend() const override { return true; }
unsigned getMaxSupportedInterleaveFactor() const override { return 4; }

View File

@ -0,0 +1,59 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i386-pc-windows-msvc | FileCheck %s
target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
target triple = "i386-pc-windows-msvc"
%struct._param_str = type { i32, i32, [4096 x i32], i32 }
@g_d = common dso_local local_unnamed_addr global i32 0, align 4
@g_c = common dso_local local_unnamed_addr global i32 0, align 4
@g_b = common dso_local local_unnamed_addr global i32 0, align 4
@g_a = common dso_local local_unnamed_addr global i32 0, align 4
@g_param = common dso_local global %struct._param_str zeroinitializer, align 4
; Function Attrs: nounwind
define dso_local i32 @test() local_unnamed_addr {
; CHECK-LABEL: test:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushl %edi
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: movl $16396, %eax # imm = 0x400C
; CHECK-NEXT: calll __chkstk
; CHECK-NEXT: movl _g_d, %eax
; CHECK-NEXT: movl _g_c, %ecx
; CHECK-NEXT: movl _g_b, %edx
; CHECK-NEXT: movl _g_a, %esi
; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl %edx, {{[0-9]+}}(%esp)
; CHECK-NEXT: movl %esi, (%esp)
; CHECK-NEXT: calll _bar
; CHECK-NEXT: movl $4099, %ecx # imm = 0x1003
; CHECK-NEXT: movl %esp, %edi
; CHECK-NEXT: movl $_g_param, %esi
; CHECK-NEXT: rep;movsl (%esi), %es:(%edi)
; CHECK-NEXT: calll _foo
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: addl $16396, %esp # imm = 0x400C
; CHECK-NEXT: popl %esi
; CHECK-NEXT: popl %edi
; CHECK-NEXT: retl
entry:
%0 = load i32, i32* @g_d, align 4, !tbaa !3
%1 = load i32, i32* @g_c, align 4, !tbaa !3
%2 = load i32, i32* @g_b, align 4, !tbaa !3
%3 = load i32, i32* @g_a, align 4, !tbaa !3
%call = tail call i32 @bar(i32 %3, i32 %2, i32 %1, i32 %0) #2
tail call void @foo(%struct._param_str* byval nonnull align 4 @g_param) #2
ret i32 0
}
declare dso_local i32 @bar(i32, i32, i32, i32) local_unnamed_addr
declare dso_local void @foo(%struct._param_str* byval align 4) local_unnamed_addr
!3 = !{!4, !4, i64 0}
!4 = !{!"int", !5, i64 0}
!5 = !{!"omnipotent char", !6, i64 0}
!6 = !{!"Simple C/C++ TBAA"}