1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 19:12:56 +02:00

- Allow target to specify when is register pressure "too high". In most cases,

it's too late to start backing off aggressive latency scheduling when most
  of the registers are in use so the threshold should be a bit tighter.
- Correctly handle live out's and extract_subreg etc.
- Enable register pressure aware scheduling by default for hybrid scheduler.
  For ARM, this is almost always a win on # of instructions. It's runtime
  neutral for most of the tests. But for some kernels with high register
  pressure it can be a huge win. e.g. 464.h264ref reduced number of spills by
  54 and sped up by 20%.

llvm-svn: 109279
This commit is contained in:
Evan Cheng 2010-07-23 22:39:59 +00:00
parent 06fcdd6563
commit f215e55d5f
5 changed files with 164 additions and 64 deletions

View File

@ -186,6 +186,14 @@ public:
return RepRegClassCostForVT[VT.getSimpleVT().SimpleTy];
}
/// getRegPressureLimit - Return the register pressure "high water mark" for
/// the specific register class. The scheduler is in high register pressure
/// mode (for the specific register class) if it goes over the limit.
virtual unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
return 0;
}
/// isTypeLegal - Return true if the target has native support for the
/// specified value type. This means that it has a register that directly
/// holds it without promotions or expansions.

View File

@ -28,16 +28,12 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <climits>
using namespace llvm;
static cl::opt<bool> RegPressureAware("reg-pressure-aware-sched",
cl::init(false), cl::Hidden);
STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
STATISTIC(NumUnfolds, "Number of nodes unfolded");
STATISTIC(NumDups, "Number of duplicated nodes");
@ -1075,7 +1071,7 @@ namespace {
std::fill(RegPressure.begin(), RegPressure.end(), 0);
for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
E = TRI->regclass_end(); I != E; ++I)
RegLimit[(*I)->getID()] = tri->getAllocatableSet(MF, *I).count() - 1;
RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF);
}
}
@ -1172,10 +1168,12 @@ namespace {
SU->NodeQueueId = 0;
}
bool HighRegPressure(const SUnit *SU) const {
bool HighRegPressure(const SUnit *SU, unsigned &Excess) const {
if (!TLI)
return false;
bool High = false;
Excess = 0;
for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
I != E; ++I) {
if (I->isCtrl())
@ -1183,12 +1181,41 @@ namespace {
SUnit *PredSU = I->getSUnit();
const SDNode *PN = PredSU->getNode();
if (!PN->isMachineOpcode()) {
if (PN->getOpcode() == ISD::CopyToReg) {
EVT VT = PN->getOperand(1).getValueType();
if (PN->getOpcode() == ISD::CopyFromReg) {
EVT VT = PN->getValueType(0);
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
unsigned Cost = TLI->getRepRegClassCostFor(VT);
if (RegLimit[RCId] < (RegPressure[RCId] + Cost))
return true;
if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) {
High = true;
Excess += (RegPressure[RCId] + Cost) - RegLimit[RCId];
}
}
continue;
}
unsigned POpc = PN->getMachineOpcode();
if (POpc == TargetOpcode::IMPLICIT_DEF)
continue;
if (POpc == TargetOpcode::EXTRACT_SUBREG) {
EVT VT = PN->getOperand(0).getValueType();
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
unsigned Cost = TLI->getRepRegClassCostFor(VT);
// Check if this increases register pressure of the specific register
// class to the point where it would cause spills.
if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) {
High = true;
Excess += (RegPressure[RCId] + Cost) - RegLimit[RCId];
}
continue;
} else if (POpc == TargetOpcode::INSERT_SUBREG ||
POpc == TargetOpcode::SUBREG_TO_REG) {
EVT VT = PN->getValueType(0);
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
unsigned Cost = TLI->getRepRegClassCostFor(VT);
// Check if this increases register pressure of the specific register
// class to the point where it would cause spills.
if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) {
High = true;
Excess += (RegPressure[RCId] + Cost) - RegLimit[RCId];
}
continue;
}
@ -1201,12 +1228,14 @@ namespace {
unsigned Cost = TLI->getRepRegClassCostFor(VT);
// Check if this increases register pressure of the specific register
// class to the point where it would cause spills.
if (RegLimit[RCId] < (RegPressure[RCId] + Cost))
return true;
if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) {
High = true;
Excess += (RegPressure[RCId] + Cost) - RegLimit[RCId];
}
}
}
return false;
return High;
}
void ScheduledNode(SUnit *SU) {
@ -1214,13 +1243,18 @@ namespace {
return;
const SDNode *N = SU->getNode();
if (!N->isMachineOpcode())
return;
unsigned Opc = N->getMachineOpcode();
if (Opc == TargetOpcode::COPY_TO_REGCLASS ||
Opc == TargetOpcode::REG_SEQUENCE ||
Opc == TargetOpcode::IMPLICIT_DEF)
return;
if (!N->isMachineOpcode()) {
if (N->getOpcode() != ISD::CopyToReg)
return;
} else {
unsigned Opc = N->getMachineOpcode();
if (Opc == TargetOpcode::EXTRACT_SUBREG ||
Opc == TargetOpcode::INSERT_SUBREG ||
Opc == TargetOpcode::SUBREG_TO_REG ||
Opc == TargetOpcode::REG_SEQUENCE ||
Opc == TargetOpcode::IMPLICIT_DEF)
return;
}
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
@ -1231,8 +1265,8 @@ namespace {
continue;
const SDNode *PN = PredSU->getNode();
if (!PN->isMachineOpcode()) {
if (PN->getOpcode() == ISD::CopyToReg) {
EVT VT = PN->getOperand(1).getValueType();
if (PN->getOpcode() == ISD::CopyFromReg) {
EVT VT = PN->getValueType(0);
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
}
@ -1241,6 +1275,18 @@ namespace {
unsigned POpc = PN->getMachineOpcode();
if (POpc == TargetOpcode::IMPLICIT_DEF)
continue;
if (POpc == TargetOpcode::EXTRACT_SUBREG) {
EVT VT = PN->getOperand(0).getValueType();
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
continue;
} else if (POpc == TargetOpcode::INSERT_SUBREG ||
POpc == TargetOpcode::SUBREG_TO_REG) {
EVT VT = PN->getValueType(0);
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
continue;
}
unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
for (unsigned i = 0; i != NumDefs; ++i) {
EVT VT = PN->getValueType(i);
@ -1251,19 +1297,19 @@ namespace {
}
}
if (!SU->NumSuccs)
return;
unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
for (unsigned i = 0; i != NumDefs; ++i) {
EVT VT = N->getValueType(i);
if (!N->hasAnyUseOfValue(i))
continue;
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
// Register pressure tracking is imprecise. This can happen.
RegPressure[RCId] = 0;
else
RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
if (SU->NumSuccs) {
unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
for (unsigned i = 0; i != NumDefs; ++i) {
EVT VT = N->getValueType(i);
if (!N->hasAnyUseOfValue(i))
continue;
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
// Register pressure tracking is imprecise. This can happen.
RegPressure[RCId] = 0;
else
RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
}
}
dumpRegPressure();
@ -1274,10 +1320,14 @@ namespace {
return;
const SDNode *N = SU->getNode();
if (!N->isMachineOpcode())
return;
if (!N->isMachineOpcode()) {
if (N->getOpcode() != ISD::CopyToReg)
return;
}
unsigned Opc = N->getMachineOpcode();
if (Opc == TargetOpcode::COPY_TO_REGCLASS ||
if (Opc == TargetOpcode::EXTRACT_SUBREG ||
Opc == TargetOpcode::INSERT_SUBREG ||
Opc == TargetOpcode::SUBREG_TO_REG ||
Opc == TargetOpcode::REG_SEQUENCE ||
Opc == TargetOpcode::IMPLICIT_DEF)
return;
@ -1291,8 +1341,8 @@ namespace {
continue;
const SDNode *PN = PredSU->getNode();
if (!PN->isMachineOpcode()) {
if (PN->getOpcode() == ISD::CopyToReg) {
EVT VT = PN->getOperand(1).getValueType();
if (PN->getOpcode() == ISD::CopyFromReg) {
EVT VT = PN->getValueType(0);
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
}
@ -1301,6 +1351,18 @@ namespace {
unsigned POpc = PN->getMachineOpcode();
if (POpc == TargetOpcode::IMPLICIT_DEF)
continue;
if (POpc == TargetOpcode::EXTRACT_SUBREG) {
EVT VT = PN->getOperand(0).getValueType();
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
continue;
} else if (POpc == TargetOpcode::INSERT_SUBREG ||
POpc == TargetOpcode::SUBREG_TO_REG) {
EVT VT = PN->getValueType(0);
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
continue;
}
unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
for (unsigned i = 0; i != NumDefs; ++i) {
EVT VT = PN->getValueType(i);
@ -1315,17 +1377,17 @@ namespace {
}
}
if (!SU->NumSuccs)
return;
unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
EVT VT = N->getValueType(i);
if (VT == MVT::Flag || VT == MVT::Other)
continue;
if (!N->hasAnyUseOfValue(i))
continue;
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
if (SU->NumSuccs) {
unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
EVT VT = N->getValueType(i);
if (VT == MVT::Flag || VT == MVT::Other)
continue;
if (!N->hasAnyUseOfValue(i))
continue;
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
}
}
dumpRegPressure();
@ -1464,13 +1526,20 @@ bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
}
bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
bool LHigh = SPQ->HighRegPressure(left);
bool RHigh = SPQ->HighRegPressure(right);
unsigned LExcess, RExcess;
bool LHigh = SPQ->HighRegPressure(left, LExcess);
bool RHigh = SPQ->HighRegPressure(right, RExcess);
if (LHigh && !RHigh)
return true;
else if (!LHigh && RHigh)
return false;
else if (!LHigh && !RHigh) {
else if (LHigh && RHigh) {
if (LExcess > RExcess)
return true;
else if (LExcess < RExcess)
return false;
// Otherwise schedule for register pressure reduction.
} else {
// Low register pressure situation, schedule for latency if possible.
bool LStall = left->SchedulingPref == Sched::Latency &&
SPQ->getCurCycle() < left->getHeight();
@ -1889,8 +1958,7 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
const TargetLowering *TLI = &IS->getTargetLowering();
HybridBURRPriorityQueue *PQ =
new HybridBURRPriorityQueue(*IS->MF, RegPressureAware, TII, TRI,
(RegPressureAware ? TLI : 0));
new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
PQ->setScheduleDAG(SD);
return SD;

View File

@ -166,6 +166,7 @@ static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
: TargetLowering(TM, createTLOF(TM)) {
Subtarget = &TM.getSubtarget<ARMSubtarget>();
RegInfo = TM.getRegisterInfo();
if (Subtarget->isTargetDarwin()) {
// Uses VFP for Thumb libfuncs if available.
@ -729,6 +730,23 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
return Sched::RegPressure;
}
unsigned
ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const {
unsigned FPDiff = RegInfo->hasFP(MF) ? 1 : 0;
switch (RC->getID()) {
default:
return 0;
case ARM::tGPRRegClassID:
return 5 - FPDiff;
case ARM::GPRRegClassID:
return 10 - FPDiff - (Subtarget->isR9Reserved() ? 1 : 0);
case ARM::SPRRegClassID: // Currently not used as 'rep' register class.
case ARM::DPRRegClassID:
return 32 - 10;
}
}
//===----------------------------------------------------------------------===//
// Lowering Code
//===----------------------------------------------------------------------===//

View File

@ -17,6 +17,7 @@
#include "ARMSubtarget.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/CallingConvLower.h"
@ -268,6 +269,9 @@ namespace llvm {
Sched::Preference getSchedulingPreference(SDNode *N) const;
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const;
bool isShuffleMaskLegal(const SmallVectorImpl<int> &M, EVT VT) const;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
@ -285,6 +289,8 @@ namespace llvm {
/// make the right decision when generating code for different targets.
const ARMSubtarget *Subtarget;
const TargetRegisterInfo *RegInfo;
/// ARMPCLabelIndex - Keep track of the number of ARM PC labels created.
///
unsigned ARMPCLabelIndex;

View File

@ -4,14 +4,14 @@
; constant offset addressing, so that each of the following stores
; uses the same register.
; CHECK: vstr.32 s0, [r{{.*}}, #-128]
; CHECK: vstr.32 s0, [r{{.*}}, #-96]
; CHECK: vstr.32 s0, [r{{.*}}, #-64]
; CHECK: vstr.32 s0, [r{{.*}}, #-32]
; CHECK: vstr.32 s0, [r{{.*}}]
; CHECK: vstr.32 s0, [r{{.*}}, #32]
; CHECK: vstr.32 s0, [r{{.*}}, #64]
; CHECK: vstr.32 s0, [r{{.*}}, #96]
; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-128]
; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-96]
; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-64]
; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-32]
; CHECK: vstr.32 s{{.*}}, [r{{.*}}]
; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #32]
; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #64]
; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #96]
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"