mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-19 11:02:59 +02:00
SLPVectorizer: limit the scheduling region size per basic block.
Usually large blocks are not a problem. But if a large block (> 10k instructions) contains many (potential) chains of vector instructions, and those chains are spread over a wide range of instructions, then scheduling becomes a compile time problem. This change introduces a limit for the accumulate scheduling region size of a block. For real-world functions this limit will never be exceeded (it's about 10x larger than the maximum value seen in the test-suite and external test suite). llvm-svn: 248917
This commit is contained in:
parent
6c3b87438e
commit
4c2c900c73
@ -73,6 +73,14 @@ static cl::opt<int>
|
||||
MaxVectorRegSizeOption("slp-max-reg-size", cl::init(128), cl::Hidden,
|
||||
cl::desc("Attempt to vectorize for this register size in bits"));
|
||||
|
||||
/// Limits the size of scheduling regions in a block.
|
||||
/// It avoid long compile times for _very_ large blocks where vector
|
||||
/// instructions are spread over a wide range.
|
||||
/// This limit is way higher than needed by real-world functions.
|
||||
static cl::opt<int>
|
||||
ScheduleRegionSizeBudget("slp-schedule-budget", cl::init(100000), cl::Hidden,
|
||||
cl::desc("Limit the size of the SLP scheduling region per block"));
|
||||
|
||||
namespace {
|
||||
|
||||
// FIXME: Set this via cl::opt to allow overriding.
|
||||
@ -89,6 +97,10 @@ static const unsigned AliasedCheckLimit = 10;
|
||||
// This limit is useful for very large basic blocks.
|
||||
static const unsigned MaxMemDepDistance = 160;
|
||||
|
||||
/// If the ScheduleRegionSizeBudget is exhausted, we allow small scheduling
|
||||
/// regions to be handled.
|
||||
static const int MinScheduleRegionSize = 16;
|
||||
|
||||
/// \brief Predicate for the element types that the SLP vectorizer supports.
|
||||
///
|
||||
/// The most important thing to filter here are types which are invalid in LLVM
|
||||
@ -720,6 +732,8 @@ private:
|
||||
: BB(BB), ChunkSize(BB->size()), ChunkPos(ChunkSize),
|
||||
ScheduleStart(nullptr), ScheduleEnd(nullptr),
|
||||
FirstLoadStoreInRegion(nullptr), LastLoadStoreInRegion(nullptr),
|
||||
ScheduleRegionSize(0),
|
||||
ScheduleRegionSizeLimit(ScheduleRegionSizeBudget),
|
||||
// Make sure that the initial SchedulingRegionID is greater than the
|
||||
// initial SchedulingRegionID in ScheduleData (which is 0).
|
||||
SchedulingRegionID(1) {}
|
||||
@ -731,6 +745,13 @@ private:
|
||||
FirstLoadStoreInRegion = nullptr;
|
||||
LastLoadStoreInRegion = nullptr;
|
||||
|
||||
// Reduce the maximum schedule region size by the size of the
|
||||
// previous scheduling run.
|
||||
ScheduleRegionSizeLimit -= ScheduleRegionSize;
|
||||
if (ScheduleRegionSizeLimit < MinScheduleRegionSize)
|
||||
ScheduleRegionSizeLimit = MinScheduleRegionSize;
|
||||
ScheduleRegionSize = 0;
|
||||
|
||||
// Make a new scheduling region, i.e. all existing ScheduleData is not
|
||||
// in the new region yet.
|
||||
++SchedulingRegionID;
|
||||
@ -807,7 +828,8 @@ private:
|
||||
void cancelScheduling(ArrayRef<Value *> VL);
|
||||
|
||||
/// Extends the scheduling region so that V is inside the region.
|
||||
void extendSchedulingRegion(Value *V);
|
||||
/// \returns true if the region size is within the limit.
|
||||
bool extendSchedulingRegion(Value *V);
|
||||
|
||||
/// Initialize the ScheduleData structures for new instructions in the
|
||||
/// scheduling region.
|
||||
@ -861,6 +883,12 @@ private:
|
||||
/// (can be null).
|
||||
ScheduleData *LastLoadStoreInRegion;
|
||||
|
||||
/// The current size of the scheduling region.
|
||||
int ScheduleRegionSize;
|
||||
|
||||
/// The maximum size allowed for the scheduling region.
|
||||
int ScheduleRegionSizeLimit;
|
||||
|
||||
/// The ID of the scheduling region. For a new vectorization iteration this
|
||||
/// is incremented which "removes" all ScheduleData from the region.
|
||||
int SchedulingRegionID;
|
||||
@ -1080,7 +1108,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
||||
|
||||
if (!BS.tryScheduleBundle(VL, this)) {
|
||||
DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n");
|
||||
BS.cancelScheduling(VL);
|
||||
assert((!BS.getScheduleData(VL[0]) ||
|
||||
!BS.getScheduleData(VL[0])->isPartOfBundle()) &&
|
||||
"tryScheduleBundle should cancelScheduling on failure");
|
||||
newTreeEntry(VL, false);
|
||||
return;
|
||||
}
|
||||
@ -2686,8 +2716,15 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
|
||||
ScheduleData *Bundle = nullptr;
|
||||
bool ReSchedule = false;
|
||||
DEBUG(dbgs() << "SLP: bundle: " << *VL[0] << "\n");
|
||||
|
||||
// Make sure that the scheduling region contains all
|
||||
// instructions of the bundle.
|
||||
for (Value *V : VL) {
|
||||
if (!extendSchedulingRegion(V))
|
||||
return false;
|
||||
}
|
||||
|
||||
for (Value *V : VL) {
|
||||
extendSchedulingRegion(V);
|
||||
ScheduleData *BundleMember = getScheduleData(V);
|
||||
assert(BundleMember &&
|
||||
"no ScheduleData for bundle member (maybe not in same basic block)");
|
||||
@ -2748,7 +2785,11 @@ bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
|
||||
schedule(pickedSD, ReadyInsts);
|
||||
}
|
||||
}
|
||||
return Bundle->isReady();
|
||||
if (!Bundle->isReady()) {
|
||||
cancelScheduling(VL);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL) {
|
||||
@ -2777,9 +2818,9 @@ void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL) {
|
||||
}
|
||||
}
|
||||
|
||||
void BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V) {
|
||||
bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V) {
|
||||
if (getScheduleData(V))
|
||||
return;
|
||||
return true;
|
||||
Instruction *I = dyn_cast<Instruction>(V);
|
||||
assert(I && "bundle member must be an instruction");
|
||||
assert(!isa<PHINode>(I) && "phi nodes don't need to be scheduled");
|
||||
@ -2790,7 +2831,7 @@ void BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V) {
|
||||
ScheduleEnd = I->getNextNode();
|
||||
assert(ScheduleEnd && "tried to vectorize a TerminatorInst?");
|
||||
DEBUG(dbgs() << "SLP: initialize schedule region to " << *I << "\n");
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
// Search up and down at the same time, because we don't know if the new
|
||||
// instruction is above or below the existing scheduling region.
|
||||
@ -2799,12 +2840,17 @@ void BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V) {
|
||||
BasicBlock::iterator DownIter(ScheduleEnd);
|
||||
BasicBlock::iterator LowerEnd = BB->end();
|
||||
for (;;) {
|
||||
if (++ScheduleRegionSize > ScheduleRegionSizeLimit) {
|
||||
DEBUG(dbgs() << "SLP: exceeded schedule region size limit\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (UpIter != UpperEnd) {
|
||||
if (&*UpIter == I) {
|
||||
initScheduleData(I, ScheduleStart, nullptr, FirstLoadStoreInRegion);
|
||||
ScheduleStart = I;
|
||||
DEBUG(dbgs() << "SLP: extend schedule region start to " << *I << "\n");
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
UpIter++;
|
||||
}
|
||||
@ -2815,13 +2861,14 @@ void BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V) {
|
||||
ScheduleEnd = I->getNextNode();
|
||||
assert(ScheduleEnd && "tried to vectorize a TerminatorInst?");
|
||||
DEBUG(dbgs() << "SLP: extend schedule region end to " << *I << "\n");
|
||||
return;
|
||||
return true;
|
||||
}
|
||||
DownIter++;
|
||||
}
|
||||
assert((UpIter != UpperEnd || DownIter != LowerEnd) &&
|
||||
"instruction not found in block");
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI,
|
||||
|
66
test/Transforms/SLPVectorizer/X86/schedule_budget.ll
Normal file
66
test/Transforms/SLPVectorizer/X86/schedule_budget.ll
Normal file
@ -0,0 +1,66 @@
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -S -slp-schedule-budget=16 -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.9.0"
|
||||
|
||||
; Test if the budget for the scheduling region size works.
|
||||
; We test with a reduced budget of 16 which should prevent vectorizing the loads.
|
||||
|
||||
declare void @unknown()
|
||||
|
||||
; CHECK-LABEL: @test
|
||||
; CHECK: load float
|
||||
; CHECK: load float
|
||||
; CHECK: load float
|
||||
; CHECK: load float
|
||||
; CHECK: call void @unknown
|
||||
define void @test(float * %a, float * %b) {
|
||||
entry:
|
||||
%l0 = load float, float* %a
|
||||
%a1 = getelementptr inbounds float, float* %a, i64 1
|
||||
%l1 = load float, float* %a1
|
||||
%a2 = getelementptr inbounds float, float* %a, i64 2
|
||||
%l2 = load float, float* %a2
|
||||
%a3 = getelementptr inbounds float, float* %a, i64 3
|
||||
%l3 = load float, float* %a3
|
||||
|
||||
; some unrelated instructions inbetween to enlarge the scheduling region
|
||||
call void @unknown()
|
||||
call void @unknown()
|
||||
call void @unknown()
|
||||
call void @unknown()
|
||||
call void @unknown()
|
||||
call void @unknown()
|
||||
call void @unknown()
|
||||
call void @unknown()
|
||||
call void @unknown()
|
||||
call void @unknown()
|
||||
call void @unknown()
|
||||
call void @unknown()
|
||||
call void @unknown()
|
||||
call void @unknown()
|
||||
call void @unknown()
|
||||
call void @unknown()
|
||||
call void @unknown()
|
||||
call void @unknown()
|
||||
call void @unknown()
|
||||
call void @unknown()
|
||||
call void @unknown()
|
||||
call void @unknown()
|
||||
call void @unknown()
|
||||
call void @unknown()
|
||||
call void @unknown()
|
||||
call void @unknown()
|
||||
call void @unknown()
|
||||
call void @unknown()
|
||||
|
||||
store float %l0, float* %b
|
||||
%b1 = getelementptr inbounds float, float* %b, i64 1
|
||||
store float %l1, float* %b1
|
||||
%b2 = getelementptr inbounds float, float* %b, i64 2
|
||||
store float %l2, float* %b2
|
||||
%b3 = getelementptr inbounds float, float* %b, i64 3
|
||||
store float %l3, float* %b3
|
||||
ret void
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user