1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 19:12:56 +02:00

[ARM][FIX] Ran out of registers due tail recursion

Summary:
- pr42062
When compiling for MinSize,
ARMTargetLowering::LowerCall decides to indirect
multiple calls to a same function. However,
it disconsiders the limitation that thumb1
indirect calls require the callee to be in a
register from r0 to r3 (llvm limiation).
If all those registers are used by arguments, the
compiler dies with "error: run out of registers
during register allocation".
This patch tells the function
IsEligibleForTailCallOptimization if we intend to
perform indirect calls, as to avoid tail call
optimization.

Reviewers: dmgreen, efriedma

Reviewed By: efriedma

Subscribers: javed.absar, kristof.beyls, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D62683

llvm-svn: 362366
This commit is contained in:
Diogo N. Sampaio 2019-06-03 08:58:05 +00:00
parent f289a718d2
commit 23ccb5de58
3 changed files with 82 additions and 49 deletions

View File

@ -1832,29 +1832,40 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
bool isVarArg = CLI.IsVarArg;
MachineFunction &MF = DAG.getMachineFunction();
bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
bool isThisReturn = false;
bool isSibCall = false;
bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
bool isThisReturn = false;
auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
bool PreferIndirect = false;
// Disable tail calls if they're not supported.
if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
isTailCall = false;
if (isa<GlobalAddressSDNode>(Callee)) {
// If we're optimizing for minimum size and the function is called three or
// more times in this block, we can improve codesize by calling indirectly
// as BLXr has a 16-bit encoding.
auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
auto *BB = CLI.CS.getParent();
PreferIndirect =
Subtarget->isThumb() && Subtarget->hasMinSize() &&
count_if(GV->users(), [&BB](const User *U) {
return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
}) > 2;
}
if (isTailCall) {
// Check if it's really possible to do a tail call.
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
isVarArg, isStructRet, MF.getFunction().hasStructRetAttr(),
Outs, OutVals, Ins, DAG);
isTailCall = IsEligibleForTailCallOptimization(
Callee, CallConv, isVarArg, isStructRet,
MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG,
PreferIndirect);
if (!isTailCall && CLI.CS && CLI.CS.isMustTailCall())
report_fatal_error("failed to perform tail call elimination on a call "
"site marked musttail");
// We don't support GuaranteedTailCallOpt for ARM, only automatically
// detected sibcalls.
if (isTailCall) {
if (isTailCall)
++NumTailCalls;
isSibCall = true;
}
}
// Analyze operands of the call, assigning locations to each operand.
@ -1866,14 +1877,14 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
// For tail calls, memory operands are available in our caller's stack.
if (isSibCall)
if (isTailCall) {
// For tail calls, memory operands are available in our caller's stack.
NumBytes = 0;
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
if (!isSibCall)
} else {
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
}
SDValue StackPtr =
DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
@ -1995,7 +2006,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
Ops));
}
} else if (!isSibCall) {
} else if (!isTailCall) {
assert(VA.isMemLoc());
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
@ -2067,17 +2078,6 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
}
} else if (isa<GlobalAddressSDNode>(Callee)) {
// If we're optimizing for minimum size and the function is called three or
// more times in this block, we can improve codesize by calling indirectly
// as BLXr has a 16-bit encoding.
auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
auto *BB = CLI.CS.getParent();
bool PreferIndirect =
Subtarget->isThumb() && Subtarget->hasMinSize() &&
count_if(GV->users(), [&BB](const User *U) {
return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
}) > 2;
if (!PreferIndirect) {
isDirect = true;
bool isDef = GV->isStrongDefinitionForLinker();
@ -2309,28 +2309,25 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization. Targets which want to do tail call
/// optimization should implement this function.
bool
ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
CallingConv::ID CalleeCC,
bool isVarArg,
bool isCalleeStructRet,
bool isCallerStructRet,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG& DAG) const {
bool ARMTargetLowering::IsEligibleForTailCallOptimization(
SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
bool isCalleeStructRet, bool isCallerStructRet,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG,
const bool isIndirect) const {
MachineFunction &MF = DAG.getMachineFunction();
const Function &CallerF = MF.getFunction();
CallingConv::ID CallerCC = CallerF.getCallingConv();
assert(Subtarget->supportsTailCall());
// Tail calls to function pointers cannot be optimized for Thumb1 if the args
// Indirect tail calls cannot be optimized for Thumb1 if the args
// to the call take up r0-r3. The reason is that there are no legal registers
// left to hold the pointer to the function to be called.
if (Subtarget->isThumb1Only() && Outs.size() >= 4 &&
!isa<GlobalAddressSDNode>(Callee.getNode()))
return false;
(!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect))
return false;
// Look for obvious safe cases to perform tail call optimization that do not
// require ABI changes. This is what gcc calls sibcall.

View File

@ -763,15 +763,13 @@ class VectorType;
/// IsEligibleForTailCallOptimization - Check whether the call is eligible
/// for tail call optimization. Targets which want to do tail call
/// optimization should implement this function.
bool IsEligibleForTailCallOptimization(SDValue Callee,
CallingConv::ID CalleeCC,
bool isVarArg,
bool isCalleeStructRet,
bool isCallerStructRet,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
SelectionDAG& DAG) const;
bool IsEligibleForTailCallOptimization(
SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
bool isCalleeStructRet, bool isCallerStructRet,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG,
const bool isIndirect) const;
bool CanLowerReturn(CallingConv::ID CallConv,
MachineFunction &MF, bool isVarArg,

View File

@ -0,0 +1,38 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -o - %s 2>&1 | FileCheck %s --implicit-check-not=error
target triple = "thumbv8m.base-arm-none-eabi"
@foo = external global i8
declare i32 @bar(i8* nocapture, i32, i32, i8* nocapture)
define void @food(i8* %a) #0 {
; CHECK-LABEL: food:
; CHECK: mov [[ARG0:r[4-7]]], r0
; CHECK-NEXT: movs r1, #8
; CHECK-NEXT: movs r2, #1
; CHECK-NEXT: ldr [[FOO_R:r[4-7]]], [[FOO_ADDR:\..*]]
; CHECK-NEXT: ldr [[BAR_R:r[4-7]]], [[BAR_ADDR:\..*]]
; CHECK-NEXT: mov r3, [[FOO_R]]
; CHECK-NEXT: blx [[BAR_R]]
; CHECK-NEXT: movs r1, #9
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: mov r0, [[ARG0]]
; CHECK-NEXT: mov r3, [[FOO_R]]
; CHECK-NEXT: blx [[BAR_R]]
; CHECK-NEXT: movs r1, #7
; CHECK-NEXT: movs r2, #2
; CHECK-NEXT: mov r0, [[ARG0]]
; CHECK-NEXT: mov r3, [[FOO_R]]
; CHECK-NEXT: blx [[BAR_R]]
; CHECK-NEXT: pop {r4, r5, r6, pc}
; CHECK: [[FOO_ADDR]]:
; CHECK-NEXT: .long foo
; CHECK: [[BAR_ADDR]]:
; CHECK-NEXT: .long bar
entry:
%0 = tail call i32 @bar(i8* %a, i32 8, i32 1, i8* nonnull @foo)
%1 = tail call i32 @bar(i8* %a, i32 9, i32 0, i8* nonnull @foo)
%2 = tail call i32 @bar(i8* %a, i32 7, i32 2, i8* nonnull @foo)
ret void
}
attributes #0 = { minsize "target-cpu"="cortex-m23" }