1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00

[Hexagon] Implement noreturn optimization

Eliminate the stack frame in functions with the noreturn nounwind
attributes, and when the noreturn-stack-elim target feature is
enabled. This reduces the code and stack space needed for noreturn
functions.

Differential Revision: https://reviews.llvm.org/D54210

llvm-svn: 346532
This commit is contained in:
Brendon Cahoon 2018-11-09 18:16:24 +00:00
parent 9ebbccbe64
commit 1faabfff72
5 changed files with 133 additions and 1 deletions

View File

@ -60,6 +60,9 @@ def FeatureDuplex: SubtargetFeature<"duplex", "EnableDuplex", "true",
"Enable generation of duplex instruction">;
def FeatureReservedR19: SubtargetFeature<"reserved-r19", "ReservedR19",
"true", "Reserve register R19">;
def FeatureNoreturnStackElim: SubtargetFeature<"noreturn-stack-elim",
"NoreturnStackElim", "true",
"Eliminate stack allocation in a noreturn function when possible">;
//===----------------------------------------------------------------------===//
// Hexagon Instruction Predicate Definitions.

View File

@ -550,6 +550,36 @@ void HexagonFrameLowering::emitPrologue(MachineFunction &MF,
}
}
/// Returns true if the target can safely skip saving callee-saved registers
/// for noreturn nounwind functions.
bool HexagonFrameLowering::enableCalleeSaveSkip(
const MachineFunction &MF) const {
const auto &F = MF.getFunction();
assert(F.hasFnAttribute(Attribute::NoReturn) &&
F.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
!F.getFunction().hasFnAttribute(Attribute::UWTable));
// No need to save callee saved registers if the function does not return.
return MF.getSubtarget<HexagonSubtarget>().noreturnStackElim();
}
// Helper function used to determine when to eliminate the stack frame for
// functions marked as noreturn and when the noreturn-stack-elim options are
// specified. When both these conditions are true, then a FP may not be needed
// if the function makes a call. It is very similar to enableCalleeSaveSkip,
// but it used to check if the allocframe can be eliminated as well.
static bool enableAllocFrameElim(const MachineFunction &MF) {
const auto &F = MF.getFunction();
const auto &MFI = MF.getFrameInfo();
const auto &HST = MF.getSubtarget<HexagonSubtarget>();
assert(!MFI.hasVarSizedObjects() &&
!HST.getRegisterInfo()->needsStackRealignment(MF));
return F.hasFnAttribute(Attribute::NoReturn) &&
F.hasFnAttribute(Attribute::NoUnwind) &&
!F.hasFnAttribute(Attribute::UWTable) && HST.noreturnStackElim() &&
MFI.getStackSize() == 0;
}
void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB,
bool PrologueStubs) const {
MachineFunction &MF = *MBB.getParent();
@ -994,7 +1024,7 @@ bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
}
const auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
if (MFI.hasCalls() || HMFI.hasClobberLR())
if ((MFI.hasCalls() && !enableAllocFrameElim(MF)) || HMFI.hasClobberLR())
return true;
return false;

View File

@ -41,6 +41,8 @@ public:
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
override {}
bool enableCalleeSaveSkip(const MachineFunction &MF) const override;
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI,
const TargetRegisterInfo *TRI) const override {

View File

@ -56,6 +56,7 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo {
bool HasMemNoShuf = false;
bool EnableDuplex = false;
bool ReservedR19 = false;
bool NoreturnStackElim = false;
public:
Hexagon::ArchEnum HexagonArchVersion;
@ -168,6 +169,8 @@ public:
bool hasReservedR19() const { return ReservedR19; }
bool usePredicatedCalls() const;
bool noreturnStackElim() const { return NoreturnStackElim; }
bool useBSBScheduling() const { return UseBSBScheduling; }
bool enableMachineScheduler() const override;

View File

@ -0,0 +1,94 @@
; RUN: llc -mtriple=hexagon-unknown--elf -hexagon-initial-cfg-cleanup=false < %s | FileCheck %s
; RUN: llc -mtriple=hexagon-unknown--elf -hexagon-initial-cfg-cleanup=false -mattr=+noreturn-stack-elim < %s | FileCheck %s --check-prefix=CHECK-FLAG
; Test the noreturn stack elimination feature. We've added a new flag/feature
; that attempts to eliminate the local stack for noreturn nounwind functions.
; The optimization eliminates the need to save callee saved registers, and
; eliminates the allocframe, when no local stack space is needed.
%struct.A = type { i32, i32 }
; Test the case when noreturn-stack-elim determins that both callee saved
; register do not need to be saved, and the allocframe can be eliminated.
; CHECK-LABEL: test1
; CHECK: memd(r29+#-16) = r17:16
; CHECK: allocframe
; CHECK-FLAG-LABEL: test1
; CHECK-FLAG-NOT: memd(r29+#-16) = r17:16
; CHECK-FLAG-NOT: allocframe
define dso_local void @test1(i32 %a, %struct.A* %b) local_unnamed_addr #0 {
entry:
%n = getelementptr inbounds %struct.A, %struct.A* %b, i32 0, i32 0
store i32 %a, i32* %n, align 4
tail call void @f1() #3
tail call void @nrf1(%struct.A* %b) #4
unreachable
}
; Test that noreturn-stack-elim doesn't eliminate the local stack, when
; a function needs to allocate a local variable.
; CHECK-LABEL: test2
; CHECK: allocframe
; CHECK-FLAG-LABEL: test2
; CHECK-FLAG: allocframe
define dso_local void @test2() local_unnamed_addr #0 {
entry:
%a = alloca i32, align 4
%0 = bitcast i32* %a to i8*
call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #4
call void @f3(i32* nonnull %a) #4
unreachable
}
; Test that noreturn-stack-elim can elimnate the allocframe when no locals
; are allocated on the stack.
; CHECK-LABEL: test3
; CHECK: allocframe
; CHECK-FLAG-LABEL: test3
; CHECK-FLAG-NOT: allocframe
define dso_local void @test3(i32 %a) local_unnamed_addr #0 {
entry:
%add = add nsw i32 %a, 5
call void @f2(i32 %add)
unreachable
}
; Test that nothing is optimized when an alloca is needed for local stack.
; CHECK-LABEL: test4
; CHECK: allocframe
; CHECK-FLAG-LABEL: test4
; CHECK-FLAG: allocframe
define dso_local void @test4(i32 %n) local_unnamed_addr #0 {
entry:
%vla = alloca i32, i32 %n, align 8
call void @f3(i32* nonnull %vla) #4
unreachable
}
declare dso_local void @f1() local_unnamed_addr
declare dso_local void @f2(i32) local_unnamed_addr
declare dso_local void @f3(i32*) local_unnamed_addr
declare dso_local void @nrf1(%struct.A*) local_unnamed_addr #2
declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #5
attributes #0 = { noreturn nounwind }
attributes #2 = { noreturn }
attributes #3 = { nounwind }
attributes #4 = { noreturn nounwind }
attributes #5 = { argmemonly nounwind }