1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 12:41:49 +01:00

[Scheduling][ARM] Consistently enable PostRA Machine scheduling

In the ARM backend, for historical reasons we have only some targets
using Machine Scheduling. The rest use the old list scheduler as they
are using itinaries and the list scheduler seems to produce better code
(and not crash running out of register on v6m codes). So whether to use
the MIScheduler or not is checked at runtime from the subtarget
features.

This is fine, except for post-ra scheduling. Whether to use the old
post-ra list scheduler or the post-ra machine schedule is decided as the
pass manager is set up, in arms case from a newly constructed subtarget.
Under some situations, like LTO, this won't include the correct cpu so
can pick the wrong option. This can have a surprising effect on
performance.

To fix that, this patch overrides targetSchedulesPostRAScheduling and
addPreSched2 in the ARM backend, adding _both_ post-ra schedulers and
picking at runtime which to execute. To pick between the two I've had to
add a enablePostRAMachineScheduler() method that normally returns
enableMachineScheduler() && enablePostRAScheduler(), which can be
overridden to enable just one of PostRAMachineScheduler vs
PostRAScheduler.

Thanks to David Penry for the identifying this problem.

Differential Revision: https://reviews.llvm.org/D69775
This commit is contained in:
David Green 2019-11-05 09:10:58 +00:00
parent 75ed81b55e
commit 1643bee451
17 changed files with 72 additions and 18 deletions

View File

@ -206,6 +206,10 @@ public:
/// which is the preferred way to influence this.
virtual bool enablePostRAScheduler() const;
/// True if the subtarget should run a machine scheduler after register
/// allocation.
virtual bool enablePostRAMachineScheduler() const;
/// True if the subtarget should run the atomic expansion pass.
virtual bool enableAtomicExpand() const;

View File

@ -402,7 +402,7 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
if (EnablePostRAMachineSched.getNumOccurrences()) {
if (!EnablePostRAMachineSched)
return false;
} else if (!mf.getSubtarget().enablePostRAScheduler()) {
} else if (!mf.getSubtarget().enablePostRAMachineScheduler()) {
LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
return false;
}

View File

@ -54,6 +54,10 @@ bool TargetSubtargetInfo::enablePostRAScheduler() const {
return getSchedModel().PostRAScheduler;
}
bool TargetSubtargetInfo::enablePostRAMachineScheduler() const {
return enableMachineScheduler() && enablePostRAScheduler();
}
bool TargetSubtargetInfo::useAA() const {
return false;
}

View File

@ -381,9 +381,19 @@ bool ARMSubtarget::enableMachineScheduler() const {
// This overrides the PostRAScheduler bit in the SchedModel for any CPU.
bool ARMSubtarget::enablePostRAScheduler() const {
if (enableMachineScheduler())
return false;
if (disablePostRAScheduler())
return false;
// Thumb1 cores will generally not benefit from post-ra scheduling
return !isThumb1Only();
}
bool ARMSubtarget::enablePostRAMachineScheduler() const {
if (!enableMachineScheduler())
return false;
if (disablePostRAScheduler())
return false;
// Don't reschedule potential IT blocks.
return !isThumb1Only();
}

View File

@ -806,6 +806,9 @@ public:
/// True for some subtargets at > -O0.
bool enablePostRAScheduler() const override;
/// True for some subtargets at > -O0.
bool enablePostRAMachineScheduler() const override;
/// Enable use of alias analysis during code generation (during MI
/// scheduling, DAGCombine, etc.).
bool useAA() const override { return UseAA; }

View File

@ -322,14 +322,7 @@ namespace {
class ARMPassConfig : public TargetPassConfig {
public:
ARMPassConfig(ARMBaseTargetMachine &TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {
if (TM.getOptLevel() != CodeGenOpt::None) {
ARMGenSubtargetInfo STI(TM.getTargetTriple(), TM.getTargetCPU(),
TM.getTargetFeatureString());
if (STI.hasFeature(ARM::FeatureUseMISched))
substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
}
}
: TargetPassConfig(TM, PM) {}
ARMBaseTargetMachine &getARMTargetMachine() const {
return getTM<ARMBaseTargetMachine>();
@ -523,6 +516,13 @@ void ARMPassConfig::addPreSched2() {
}
addPass(createMVEVPTBlockPass());
addPass(createThumb2ITBlockPass());
// Add both scheduling passes to give the subtarget an opportunity to pick
// between them.
if (getOptLevel() != CodeGenOpt::None) {
addPass(&PostMachineSchedulerID);
addPass(&PostRASchedulerID);
}
}
void ARMPassConfig::addPreEmitPass() {

View File

@ -70,6 +70,8 @@ public:
TargetTriple.isOSWindows() ||
TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16;
}
bool targetSchedulesPostRAScheduling() const override { return true; };
};
/// ARM/Thumb little endian target machine.

View File

@ -141,6 +141,7 @@
; CHECK-NEXT: Thumb IT blocks insertion pass
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Machine Natural Loop Construction
; CHECK-NEXT: PostRA Machine Instruction Scheduler
; CHECK-NEXT: Post RA top-down list latency scheduler
; CHECK-NEXT: Analyze Machine Code For Garbage Collection
; CHECK-NEXT: Machine Block Frequency Analysis

View File

@ -1,5 +1,5 @@
; REQUIRES: asserts
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -mattr=use-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
;
@a = global i32 0, align 4

View File

@ -1,5 +1,5 @@
; REQUIRES: asserts
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -mattr=use-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
; CHECK: ********** MI Scheduling **********
; We need second, post-ra scheduling to have LDM instruction combined from single-loads

View File

@ -1,5 +1,5 @@
; REQUIRES: asserts
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -mattr=use-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
; N=3 STMIA_UPD should have latency 2cyc and writeback latency 1cyc
; CHECK: ********** MI Scheduling **********

View File

@ -1,5 +1,5 @@
; REQUIRES: asserts
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -mattr=use-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
; N=3 STMIB should have latency 2cyc
; CHECK: ********** MI Scheduling **********

View File

@ -1,5 +1,5 @@
; REQUIRES: asserts
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -mattr=use-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
;
@a = global double 0.0, align 4

View File

@ -1,5 +1,5 @@
; REQUIRES: asserts
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -mattr=use-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
; CHECK: ********** MI Scheduling **********
; We need second, post-ra scheduling to have VLDM instruction combined from single-loads

View File

@ -1,5 +1,5 @@
; REQUIRES: asserts
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -mattr=use-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
; CHECK: ********** MI Scheduling **********
; We need second, post-ra scheduling to have VSTM instruction combined from single-stores

View File

@ -1,5 +1,5 @@
; REQUIRES: asserts
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -mattr=use-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
; CHECK: ********** MI Scheduling **********
; We need second, post-ra scheduling to have VSTM instruction combined from single-stores

View File

@ -0,0 +1,30 @@
; REQUIRES: asserts
; RUN: llc < %s -mtriple=thumbv8m.main-none-eabi -debug-only=machine-scheduler,post-RA-sched -print-before=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
; CHECK-LABEL: test_misched
; Pre and post ra machine scheduling
; CHECK: ********** MI Scheduling **********
; CHECK: t2LDRi12
; CHECK: Latency : 2
; CHECK: ********** MI Scheduling **********
; CHECK: t2LDRi12
; CHECK: Latency : 2
define i32 @test_misched(i32* %ptr) "target-cpu"="cortex-m33" {
entry:
%l = load i32, i32* %ptr
store i32 0, i32* %ptr
ret i32 %l
}
; CHECK-LABEL: test_rasched
; CHECK: Subtarget disables post-MI-sched.
; CHECK: ********** List Scheduling **********
define i32 @test_rasched(i32* %ptr) {
entry:
%l = load i32, i32* %ptr
store i32 0, i32* %ptr
ret i32 %l
}