1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00

[PowerPC][Future] Implement PC Relative Tail Calls

Tail Calls were initially disabled for PC Relative code because it was not safe
to make certain assumptions about the tail calls (namely that all compiled
functions no longer used the TOC pointer in R2). However, once all of the
TOC pointer references have been removed it is safe to tail call everything
that was tail called prior to the PC relative additions as well as a number of
new cases.
For example, it is now possible to tail call indirect functions as there is no
need to save and restore the TOC pointer for indirect functions if the caller
is marked as may clobber R2 (st_other=1). For the same reason it is now also
possible to tail call functions that are external.

Differential Revision: https://reviews.llvm.org/D77788
This commit is contained in:
Stefan Pintilie 2020-04-23 21:04:11 -05:00
parent 6c31f9725f
commit 8344cff456
9 changed files with 341 additions and 152 deletions

View File

@ -1537,13 +1537,14 @@ void PPCLinuxAsmPrinter::emitFunctionBodyStart() {
// 3) A function does not use the TOC pointer R2 but does have calls.
// In this case st_other=1 since we do not know whether or not any
// of the callees clobber R2. This case is dealt with in this else if
// block.
// block. Tail calls are considered calls and the st_other should also
// be set to 1 in that case as well.
// 4) The function does not use the TOC pointer but R2 is used inside
// the function. In this case st_other=1 once again.
// 5) This function uses inline asm. We mark R2 as reserved if the function
// has inline asm so we have to assume that it may be used.
if (MF->getFrameInfo().hasCalls() || MF->hasInlineAsm() ||
(!PPCFI->usesTOCBasePtr() && UsesX2OrR2)) {
// has inline asm as we have to assume that it may be used.
if (MF->getFrameInfo().hasCalls() || MF->getFrameInfo().hasTailCall() ||
MF->hasInlineAsm() || (!PPCFI->usesTOCBasePtr() && UsesX2OrR2)) {
PPCTargetStreamer *TS =
static_cast<PPCTargetStreamer *>(OutStreamer->getTargetStreamer());
if (TS)

View File

@ -1674,13 +1674,25 @@ void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
DebugLoc dl = MBBI->getDebugLoc();
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
// Create branch instruction for pseudo tail call return instruction
// Create branch instruction for pseudo tail call return instruction.
// The TCRETURNdi variants are direct calls. Valid targets for those are
// MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel
// since we can tail call external functions with PC-Rel (i.e. we don't need
// to worry about different TOC pointers). Some of the external functions will
// be MO_GlobalAddress while others like memcpy for example, are going to
// be MO_ExternalSymbol.
unsigned RetOpcode = MBBI->getOpcode();
if (RetOpcode == PPC::TCRETURNdi) {
MBBI = MBB.getLastNonDebugInstr();
MachineOperand &JumpTarget = MBBI->getOperand(0);
BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
if (JumpTarget.isGlobal())
BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
else if (JumpTarget.isSymbol())
BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
addExternalSymbol(JumpTarget.getSymbolName());
else
llvm_unreachable("Expecting Global or External Symbol");
} else if (RetOpcode == PPC::TCRETURNri) {
MBBI = MBB.getLastNonDebugInstr();
assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
@ -1692,8 +1704,14 @@ void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
} else if (RetOpcode == PPC::TCRETURNdi8) {
MBBI = MBB.getLastNonDebugInstr();
MachineOperand &JumpTarget = MBBI->getOperand(0);
BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
if (JumpTarget.isGlobal())
BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
else if (JumpTarget.isSymbol())
BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
addExternalSymbol(JumpTarget.getSymbolName());
else
llvm_unreachable("Expecting Global or External Symbol");
} else if (RetOpcode == PPC::TCRETURNri8) {
MBBI = MBB.getLastNonDebugInstr();
assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");

View File

@ -4780,16 +4780,6 @@ bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
// FIXME: Tail calls are currently disabled when using PC Relative addressing.
// The issue is that PC Relative is only partially implemented and so there
// is currently a mix of functions that require the TOC and functions that do
// not require it. If we have A calls B calls C and both A and B require the
// TOC and C does not and is marked as clobbering R2 then it is not safe for
// B to tail call C. Since we do not have the information of whether or not
// a funciton needs to use the TOC here in this function we need to be
// conservatively safe and disable all tail calls for now.
if (Subtarget.isUsingPCRelativeCalls()) return false;
if (DisableSCO && !TailCallOpt) return false;
// Variadic argument functions are not supported.
@ -4829,15 +4819,22 @@ bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
needStackSlotPassParameters(Subtarget, Outs))
return false;
// No TCO/SCO on indirect call because Caller have to restore its TOC
if (!isFunctionGlobalAddress(Callee) &&
!isa<ExternalSymbolSDNode>(Callee))
// All variants of 64-bit ELF ABIs without PC-Relative addressing require that
// the caller and callee share the same TOC for TCO/SCO. If the caller and
// callee potentially have different TOC bases then we cannot tail call since
// we need to restore the TOC pointer after the call.
// ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
// We cannot guarantee this for indirect calls or calls to external functions.
// When PC-Relative addressing is used, the concept of the TOC is no longer
// applicable so this check is not required.
// Check first for indirect calls.
if (!Subtarget.isUsingPCRelativeCalls() &&
!isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee))
return false;
// If the caller and callee potentially have different TOC bases then we
// cannot tail call since we need to restore the TOC pointer after the call.
// ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
if (!callsShareTOCBase(&Caller, Callee, getTargetMachine()))
// Check if we share the TOC base.
if (!Subtarget.isUsingPCRelativeCalls() &&
!callsShareTOCBase(&Caller, Callee, getTargetMachine()))
return false;
// TCO allows altering callee ABI, so we don't have to check further.
@ -4849,11 +4846,14 @@ bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
// If callee use the same argument list that caller is using, then we can
// apply SCO on this case. If it is not, then we need to check if callee needs
// stack for passing arguments.
assert(CB && "Expected to have a CallBase!");
if (!hasSameArgumentList(&Caller, *CB) &&
needStackSlotPassParameters(Subtarget, Outs)) {
// PC Relative tail calls may not have a CallBase.
// If there is no CallBase we cannot verify if we have the same argument
// list so assume that we don't have the same argument list.
if (CB && !hasSameArgumentList(&Caller, *CB) &&
needStackSlotPassParameters(Subtarget, Outs))
return false;
else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
return false;
}
return true;
}
@ -5534,13 +5534,18 @@ SDValue PPCTargetLowering::FinishCall(
// Emit tail call.
if (CFlags.IsTailCall) {
// Indirect tail call when using PC Relative calls do not have the same
// constraints.
assert(((Callee.getOpcode() == ISD::Register &&
cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
Callee.getOpcode() == ISD::TargetExternalSymbol ||
Callee.getOpcode() == ISD::TargetGlobalAddress ||
isa<ConstantSDNode>(Callee)) &&
"Expecting a global address, external symbol, absolute value or "
"register");
isa<ConstantSDNode>(Callee) ||
(CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
"Expecting a global address, external symbol, absolute value, "
"register or an indirect tail call when PC Relative calls are "
"used.");
// PC Relative calls also use TC_RETURN as the way to mark tail calls.
assert(CallOpc == PPCISD::TC_RETURN &&
"Unexpected call opcode for a tail call.");
DAG.getMachineFunction().getFrameInfo().setHasTailCall();
@ -5598,17 +5603,19 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (!getTargetMachine().Options.GuaranteedTailCallOpt)
++NumSiblingCalls;
assert(isa<GlobalAddressSDNode>(Callee) &&
// PC Relative calls no longer guarantee that the callee is a Global
// Address Node. The callee could be an indirect tail call in which
// case the SDValue for the callee could be a load (to load the address
// of a function pointer) or it may be a register copy (to move the
// address of the callee from a function parameter into a virtual
// register). It may also be an ExternalSymbolSDNode (ex memcopy).
assert((Subtarget.isUsingPCRelativeCalls() ||
isa<GlobalAddressSDNode>(Callee)) &&
"Callee should be an llvm::Function object.");
LLVM_DEBUG(
const GlobalValue *GV =
cast<GlobalAddressSDNode>(Callee)->getGlobal();
const unsigned Width =
80 - strlen("TCO caller: ") - strlen(", callee linkage: 0, 0");
dbgs() << "TCO caller: "
<< left_justify(DAG.getMachineFunction().getName(), Width)
<< ", callee linkage: " << GV->getVisibility() << ", "
<< GV->getLinkage() << "\n");
LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
<< "\nTCO callee: ");
LLVM_DEBUG(Callee.dump());
}
}

View File

@ -86,14 +86,22 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
RefKind = MCSymbolRefExpr::VK_PPC_GOT_PCREL;
const MachineInstr *MI = MO.getParent();
if (MI->getOpcode() == PPC::BL8_NOTOC)
RefKind = MCSymbolRefExpr::VK_PPC_NOTOC;
const MachineFunction *MF = MI->getMF();
const Module *M = MF->getFunction().getParent();
const PPCSubtarget *Subtarget = &(MF->getSubtarget<PPCSubtarget>());
const TargetMachine &TM = Printer.TM;
unsigned MIOpcode = MI->getOpcode();
assert((Subtarget->isUsingPCRelativeCalls() || MIOpcode != PPC::BL8_NOTOC) &&
"BL8_NOTOC is only valid when using PC Relative Calls.");
if (Subtarget->isUsingPCRelativeCalls()) {
if (MIOpcode == PPC::TAILB || MIOpcode == PPC::TAILB8 ||
MIOpcode == PPC::TCRETURNdi || MIOpcode == PPC::TCRETURNdi8 ||
MIOpcode == PPC::BL8_NOTOC) {
RefKind = MCSymbolRefExpr::VK_PPC_NOTOC;
}
}
const MCExpr *Expr = MCSymbolRefExpr::create(Symbol, RefKind, Ctx);
// If -msecure-plt -fPIC, add 32768 to symbol.
if (Subtarget->isSecurePlt() && TM.isPositionIndependent() &&

View File

@ -7,13 +7,11 @@
; CHECK-S-LABEL: caller
; CHECK-S: bl callee@notoc
; CHECK-S: blr
; CHECK-S: b callee@notoc
; CHECK-O-LABEL: caller
; CHECK-O: bl
; CHECK-O: b
; CHECK-O-NEXT: R_PPC64_REL24_NOTOC callee
; CHECK-O: blr
define dso_local signext i32 @caller() local_unnamed_addr {
entry:
%call = tail call signext i32 bitcast (i32 (...)* @callee to i32 ()*)()
@ -25,13 +23,11 @@ declare signext i32 @callee(...) local_unnamed_addr
; Some calls can be considered Extrnal Symbols.
; CHECK-S-LABEL: ExternalSymbol
; CHECK-S: bl memcpy@notoc
; CHECK-S: blr
; CHECK-S: b memcpy@notoc
; CHECK-O-LABEL: ExternalSymbol
; CHECK-O: bl
; CHECK-O: b
; CHECK-O-NEXT: R_PPC64_REL24_NOTOC memcpy
; CHECK-O: blr
define dso_local void @ExternalSymbol(i8* nocapture %out, i8* nocapture readonly %in, i64 %num) local_unnamed_addr {
entry:
tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %out, i8* align 1 %in, i64 %num, i1 false)

View File

@ -193,19 +193,10 @@ define dso_local signext i32 @TailCallLocal1(i32 signext %a) local_unnamed_addr
; CHECK-ALL-LABEL: TailCallLocal1:
; CHECK-S: .localentry TailCallLocal1
; CHECK-S: # %bb.0: # %entry
; CHECK-S-NEXT: mflr r0
; CHECK-S-NEXT: std r0, 16(r1)
; CHECK-S-NEXT: stdu r1, -32(r1)
; CHECK-S-NEXT: .cfi_def_cfa_offset 32
; CHECK-S-NEXT: .cfi_offset lr, 16
; CHECK-S-NEXT: plwz r4, globalVar@PCREL(0), 1
; CHECK-S: plwz r4, globalVar@PCREL(0), 1
; CHECK-S-NEXT: add r3, r4, r3
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: bl localCall@notoc
; CHECK-S-NEXT: addi r1, r1, 32
; CHECK-S-NEXT: ld r0, 16(r1)
; CHECK-S-NEXT: mtlr r0
; CHECK-S-NEXT: blr
; CHECK-S-NEXT: b localCall@notoc
entry:
%0 = load i32, i32* @globalVar, align 4
%add = add nsw i32 %0, %a
@ -217,20 +208,11 @@ define dso_local signext i32 @TailCallLocal2(i32 signext %a) local_unnamed_addr
; CHECK-ALL-LABEL: TailCallLocal2:
; CHECK-S: .localentry TailCallLocal2
; CHECK-S: # %bb.0: # %entry
; CHECK-S-NEXT: mflr r0
; CHECK-S-NEXT: std r0, 16(r1)
; CHECK-S-NEXT: stdu r1, -32(r1)
; CHECK-S-NEXT: .cfi_def_cfa_offset 32
; CHECK-S-NEXT: .cfi_offset lr, 16
; CHECK-S-NEXT: pld r4, externGlobalVar@got@pcrel(0), 1
; CHECK-S: pld r4, externGlobalVar@got@pcrel(0), 1
; CHECK-S-NEXT: lwz r4, 0(r4)
; CHECK-S-NEXT: add r3, r4, r3
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: bl localCall@notoc
; CHECK-S-NEXT: addi r1, r1, 32
; CHECK-S-NEXT: ld r0, 16(r1)
; CHECK-S-NEXT: mtlr r0
; CHECK-S-NEXT: blr
; CHECK-S-NEXT: b localCall@notoc
entry:
%0 = load i32, i32* @externGlobalVar, align 4
%add = add nsw i32 %0, %a
@ -243,16 +225,7 @@ define dso_local signext i32 @TailCallLocalNoGlobal(i32 signext %a) local_unname
; CHECK-S: .localentry TailCallLocalNoGlobal, 1
; CHECK-P9: .localentry TailCallLocalNoGlobal, .Lfunc_lep9-.Lfunc_gep9
; CHECK-ALL: # %bb.0: # %entry
; CHECK-S-NEXT: mflr r0
; CHECK-S-NEXT: std r0, 16(r1)
; CHECK-S-NEXT: stdu r1, -32(r1)
; CHECK-S-NEXT: .cfi_def_cfa_offset 32
; CHECK-S-NEXT: .cfi_offset lr, 16
; CHECK-S-NEXT: bl localCall@notoc
; CHECK-S-NEXT: addi r1, r1, 32
; CHECK-S-NEXT: ld r0, 16(r1)
; CHECK-S-NEXT: mtlr r0
; CHECK-S-NEXT: blr
; CHECK-S: b localCall@notoc
entry:
%call = tail call signext i32 @localCall(i32 signext %a)
ret i32 %call
@ -262,19 +235,10 @@ define dso_local signext i32 @TailCallExtern1(i32 signext %a) local_unnamed_addr
; CHECK-ALL-LABEL: TailCallExtern1:
; CHECK-S: .localentry TailCallExtern1
; CHECK-S: # %bb.0: # %entry
; CHECK-S-NEXT: mflr r0
; CHECK-S-NEXT: std r0, 16(r1)
; CHECK-S-NEXT: stdu r1, -32(r1)
; CHECK-S-NEXT: .cfi_def_cfa_offset 32
; CHECK-S-NEXT: .cfi_offset lr, 16
; CHECK-S-NEXT: plwz r4, globalVar@PCREL(0), 1
; CHECK-S: plwz r4, globalVar@PCREL(0), 1
; CHECK-S-NEXT: add r3, r4, r3
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: bl externCall@notoc
; CHECK-S-NEXT: addi r1, r1, 32
; CHECK-S-NEXT: ld r0, 16(r1)
; CHECK-S-NEXT: mtlr r0
; CHECK-S-NEXT: blr
; CHECK-S-NEXT: b externCall@notoc
entry:
%0 = load i32, i32* @globalVar, align 4
%add = add nsw i32 %0, %a
@ -286,20 +250,11 @@ define dso_local signext i32 @TailCallExtern2(i32 signext %a) local_unnamed_addr
; CHECK-ALL-LABEL: TailCallExtern2:
; CHECK-S: .localentry TailCallExtern2
; CHECK-S: # %bb.0: # %entry
; CHECK-S-NEXT: mflr r0
; CHECK-S-NEXT: std r0, 16(r1)
; CHECK-S-NEXT: stdu r1, -32(r1)
; CHECK-S-NEXT: .cfi_def_cfa_offset 32
; CHECK-S-NEXT: .cfi_offset lr, 16
; CHECK-S-NEXT: pld r4, externGlobalVar@got@pcrel(0), 1
; CHECK-S: pld r4, externGlobalVar@got@pcrel(0), 1
; CHECK-S-NEXT: lwz r4, 0(r4)
; CHECK-S-NEXT: add r3, r4, r3
; CHECK-S-NEXT: extsw r3, r3
; CHECK-S-NEXT: bl externCall@notoc
; CHECK-S-NEXT: addi r1, r1, 32
; CHECK-S-NEXT: ld r0, 16(r1)
; CHECK-S-NEXT: mtlr r0
; CHECK-S-NEXT: blr
; CHECK-S-NEXT: b externCall@notoc
entry:
%0 = load i32, i32* @externGlobalVar, align 4
%add = add nsw i32 %0, %a
@ -311,16 +266,8 @@ define dso_local signext i32 @TailCallExternNoGlobal(i32 signext %a) local_unnam
; CHECK-ALL-LABEL: TailCallExternNoGlobal:
; CHECK-S: .localentry TailCallExternNoGlobal, 1
; CHECK-S-NEXT: # %bb.0: # %entry
; CHECK-S-NEXT: mflr r0
; CHECK-S-NEXT: std r0, 16(r1)
; CHECK-S-NEXT: stdu r1, -32(r1)
; CHECK-S-NEXT: .cfi_def_cfa_offset 32
; CHECK-S-NEXT: .cfi_offset lr, 16
; CHECK-S-NEXT: bl externCall@notoc
; CHECK-S-NEXT: addi r1, r1, 32
; CHECK-S-NEXT: ld r0, 16(r1)
; CHECK-S-NEXT: mtlr r0
; CHECK-S-NEXT: blr
; CHECK-S-NEXT: b externCall@notoc
; CHECK-S-NEXT: #TC_RETURNd8 externCall@notoc
entry:
%call = tail call signext i32 @externCall(i32 signext %a)
ret i32 %call
@ -443,18 +390,10 @@ entry:
define dso_local signext i32 @IndirectCallOnly(i32 signext %a, i32 (i32)* nocapture %call_param) local_unnamed_addr {
; CHECK-ALL-LABEL: IndirectCallOnly:
; CHECK-S: # %bb.0: # %entry
; CHECK-S-NEXT: mflr r0
; CHECK-S-NEXT: std r0, 16(r1)
; CHECK-S-NEXT: stdu r1, -32(r1)
; CHECK-S-NEXT: .cfi_def_cfa_offset 32
; CHECK-S-NEXT: .cfi_offset lr, 16
; CHECK-S-NEXT: mtctr r4
; CHECK-S-NEXT: mr r12, r4
; CHECK-S-NEXT: bctrl
; CHECK-S-NEXT: addi r1, r1, 32
; CHECK-S-NEXT: ld r0, 16(r1)
; CHECK-S-NEXT: mtlr r0
; CHECK-S-NEXT: blr
; CHECK-S-NEXT: bctr
; CHECK-S-NEXT: #TC_RETURNr8 ctr
entry:
%call = tail call signext i32 %call_param(i32 signext %a)
ret i32 %call

View File

@ -215,20 +215,13 @@ entry:
define dso_local void @ReadFuncPtr() local_unnamed_addr {
; CHECK-LABEL: ReadFuncPtr:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mflr r0
; CHECK-NEXT: std r0, 16(r1)
; CHECK-NEXT: stdu r1, -32(r1)
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK: .localentry ReadFuncPtr, 1
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: pld r3, ptrfunc@got@pcrel(0), 1
; CHECK-NEXT: ld r12, 0(r3)
; CHECK-NEXT: mtctr r12
; CHECK-NEXT: bctrl
; CHECK-NEXT: addi r1, r1, 32
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
; CHECK-NEXT: bctr
; CHECK-NEXT: #TC_RETURNr8 ctr 0
entry:
%0 = load void ()*, void ()** bitcast (void (...)** @ptrfunc to void ()**), align 8
tail call void %0()

View File

@ -9,20 +9,10 @@
define dso_local void @IndirectCallExternFuncPtr(void ()* nocapture %ptrfunc) {
; CHECK-LABEL: IndirectCallExternFuncPtr:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mflr r0
; CHECK-NEXT: std r0, 16(r1)
; CHECK-NEXT: stdu r1, -32(r1)
; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: mtctr r3
; CHECK-NEXT: mr r12, r3
; CHECK-NEXT: bctrl
; CHECK-NEXT: addi r1, r1, 32
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
; CHECK-NEXT: bctr
; CHECK-NEXT: #TC_RETURNr8 ctr
entry:
tail call void %ptrfunc()
ret void

View File

@ -0,0 +1,237 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=future -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s
; The tests check the behaviour of PC Relative tail calls. When using
; PC Relative we are able to do more tail calling than we have done in
; the past as we no longer need to restore the TOC pointer into R2 after
; most calls.
@Func = external local_unnamed_addr global i32 (...)*, align 8
@FuncLocal = common dso_local local_unnamed_addr global i32 (...)* null, align 8
; No calls in this function but we assign the function pointers.
define dso_local void @AssignFuncPtr() local_unnamed_addr {
; CHECK-LABEL: AssignFuncPtr:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pld r3, Func@got@pcrel(0), 1
; CHECK-NEXT: pld r4, Function@got@pcrel(0), 1
; CHECK-NEXT: std r4, 0(r3)
; CHECK-NEXT: pstd r4, FuncLocal@PCREL(0), 1
; CHECK-NEXT: blr
entry:
store i32 (...)* @Function, i32 (...)** @Func, align 8
store i32 (...)* @Function, i32 (...)** @FuncLocal, align 8
ret void
}
declare signext i32 @Function(...)
define dso_local void @TailCallLocalFuncPtr() local_unnamed_addr {
; CHECK-LABEL: TailCallLocalFuncPtr:
; CHECK: .localentry TailCallLocalFuncPtr, 1
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: pld r12, FuncLocal@PCREL(0), 1
; CHECK-NEXT: mtctr r12
; CHECK-NEXT: bctr
; CHECK-NEXT: #TC_RETURNr8 ctr 0
entry:
%0 = load i32 ()*, i32 ()** bitcast (i32 (...)** @FuncLocal to i32 ()**), align 8
%call = tail call signext i32 %0()
ret void
}
define dso_local void @TailCallExtrnFuncPtr() local_unnamed_addr {
; CHECK-LABEL: TailCallExtrnFuncPtr:
; CHECK: .localentry TailCallExtrnFuncPtr, 1
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: pld r3, Func@got@pcrel(0), 1
; CHECK-NEXT: ld r12, 0(r3)
; CHECK-NEXT: mtctr r12
; CHECK-NEXT: bctr
; CHECK-NEXT: #TC_RETURNr8 ctr 0
entry:
%0 = load i32 ()*, i32 ()** bitcast (i32 (...)** @Func to i32 ()**), align 8
%call = tail call signext i32 %0()
ret void
}
define dso_local signext i32 @TailCallParamFuncPtr(i32 (...)* nocapture %passedfunc) local_unnamed_addr {
; CHECK-LABEL: TailCallParamFuncPtr:
; CHECK: .localentry TailCallParamFuncPtr, 1
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: mtctr r3
; CHECK-NEXT: mr r12, r3
; CHECK-NEXT: bctr
; CHECK-NEXT: #TC_RETURNr8 ctr 0
entry:
%callee.knr.cast = bitcast i32 (...)* %passedfunc to i32 ()*
%call = tail call signext i32 %callee.knr.cast()
ret i32 %call
}
define dso_local signext i32 @NoTailIndirectCall(i32 (...)* nocapture %passedfunc, i32 signext %a) local_unnamed_addr {
; CHECK-LABEL: NoTailIndirectCall:
; CHECK: .localentry NoTailIndirectCall, 1
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: mflr r0
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: .cfi_offset r30, -16
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r0, 16(r1)
; CHECK-NEXT: stdu r1, -48(r1)
; CHECK-NEXT: mtctr r3
; CHECK-NEXT: mr r12, r3
; CHECK-NEXT: mr r30, r4
; CHECK-NEXT: bctrl
; CHECK-NEXT: add r3, r3, r30
; CHECK-NEXT: extsw r3, r3
; CHECK-NEXT: addi r1, r1, 48
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
entry:
%callee.knr.cast = bitcast i32 (...)* %passedfunc to i32 ()*
%call = tail call signext i32 %callee.knr.cast()
%add = add nsw i32 %call, %a
ret i32 %add
}
define dso_local signext i32 @TailCallDirect() local_unnamed_addr {
; CHECK-LABEL: TailCallDirect:
; CHECK: .localentry TailCallDirect, 1
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: b Function@notoc
; CHECK-NEXT: #TC_RETURNd8 Function@notoc 0
entry:
%call = tail call signext i32 bitcast (i32 (...)* @Function to i32 ()*)()
ret i32 %call
}
define dso_local signext i32 @NoTailCallDirect(i32 signext %a) local_unnamed_addr {
; CHECK-LABEL: NoTailCallDirect:
; CHECK: .localentry NoTailCallDirect, 1
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: mflr r0
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: .cfi_offset r30, -16
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r0, 16(r1)
; CHECK-NEXT: stdu r1, -48(r1)
; CHECK-NEXT: mr r30, r3
; CHECK-NEXT: bl Function@notoc
; CHECK-NEXT: add r3, r3, r30
; CHECK-NEXT: extsw r3, r3
; CHECK-NEXT: addi r1, r1, 48
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
entry:
%call = tail call signext i32 bitcast (i32 (...)* @Function to i32 ()*)()
%add = add nsw i32 %call, %a
ret i32 %add
}
define dso_local signext i32 @TailCallDirectLocal() local_unnamed_addr {
; CHECK-LABEL: TailCallDirectLocal:
; CHECK: .localentry TailCallDirectLocal, 1
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: b LocalFunction@notoc
; CHECK-NEXT: #TC_RETURNd8 LocalFunction@notoc 0
entry:
%call = tail call fastcc signext i32 @LocalFunction()
ret i32 %call
}
define dso_local signext i32 @NoTailCallDirectLocal(i32 signext %a) local_unnamed_addr {
; CHECK-LABEL: NoTailCallDirectLocal:
; CHECK: .localentry NoTailCallDirectLocal, 1
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: mflr r0
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: .cfi_offset r30, -16
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r0, 16(r1)
; CHECK-NEXT: stdu r1, -48(r1)
; CHECK-NEXT: mr r30, r3
; CHECK-NEXT: bl LocalFunction@notoc
; CHECK-NEXT: add r3, r3, r30
; CHECK-NEXT: extsw r3, r3
; CHECK-NEXT: addi r1, r1, 48
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
entry:
%call = tail call fastcc signext i32 @LocalFunction()
%add = add nsw i32 %call, %a
ret i32 %add
}
define dso_local signext i32 @TailCallAbs() local_unnamed_addr {
; CHECK-LABEL: TailCallAbs:
; CHECK: .localentry TailCallAbs, 1
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: li r3, 400
; CHECK-NEXT: mtctr r3
; CHECK-NEXT: li r12, 400
; CHECK-NEXT: bctr
; CHECK-NEXT: #TC_RETURNr8 ctr 0
entry:
%call = tail call signext i32 inttoptr (i64 400 to i32 ()*)()
ret i32 %call
}
define dso_local signext i32 @NoTailCallAbs(i32 signext %a) local_unnamed_addr {
; CHECK-LABEL: NoTailCallAbs:
; CHECK: .localentry NoTailCallAbs, 1
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: mflr r0
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .cfi_offset lr, 16
; CHECK-NEXT: .cfi_offset r30, -16
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r0, 16(r1)
; CHECK-NEXT: stdu r1, -48(r1)
; CHECK-NEXT: mr r30, r3
; CHECK-NEXT: li r3, 400
; CHECK-NEXT: mtctr r3
; CHECK-NEXT: li r12, 400
; CHECK-NEXT: bctrl
; CHECK-NEXT: add r3, r3, r30
; CHECK-NEXT: extsw r3, r3
; CHECK-NEXT: addi r1, r1, 48
; CHECK-NEXT: ld r0, 16(r1)
; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: mtlr r0
; CHECK-NEXT: blr
entry:
%call = tail call signext i32 inttoptr (i64 400 to i32 ()*)()
%add = add nsw i32 %call, %a
ret i32 %add
}
; Function Attrs: noinline
; This function should be tail called and not inlined.
define internal fastcc signext i32 @LocalFunction() unnamed_addr #0 {
; CHECK-LABEL: LocalFunction:
; CHECK: .localentry LocalFunction, 1
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: #APP
; CHECK-NEXT: li r3, 42
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: extsw r3, r3
; CHECK-NEXT: blr
entry:
%0 = tail call i32 asm "li $0, 42", "=&r"()
ret i32 %0
}
attributes #0 = { noinline }