1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 19:12:56 +02:00

[SystemZ] Implementation of getUnrollingPreferences().

This commit enables more unrolling for SystemZ by implementing the
SystemZTargetTransformInfo::getUnrollingPreferences() method.

It has been found that it is better to only unroll moderately, so the
DefaultUnrollRuntimeCount has been moved into UnrollingPreferences in order
to set this to a lower value for SystemZ (4).

Reviewers: Evgeny Stupachenko, Ulrich Weigand.
https://reviews.llvm.org/D24451

llvm-svn: 282570
This commit is contained in:
Jonas Paulsson 2016-09-28 09:41:38 +00:00
parent 8fe0e6eb01
commit 3c5fa71cd5
4 changed files with 64 additions and 6 deletions

View File

@ -264,6 +264,8 @@ public:
/// transformation will select an unrolling factor based on the current cost
/// threshold and other factors.
unsigned Count;
/// Default unroll count for loops with run-time trip count.
unsigned DefaultUnrollRuntimeCount;
// Set the maximum unrolling factor. The unrolling factor may be selected
// using the appropriate cost threshold, but may not exceed this number
// (set to UINT_MAX to disable). This does not apply in cases where the

View File

@ -238,6 +238,63 @@ SystemZTTIImpl::getPopcntSupport(unsigned TyWidth) {
return TTI::PSK_Software;
}
void SystemZTTIImpl::getUnrollingPreferences(Loop *L,
TTI::UnrollingPreferences &UP) {
// Find out if L contains a call, what the machine instruction count
// estimate is, and how many stores there are.
bool HasCall = false;
unsigned NumStores = 0;
for (auto &BB : L->blocks())
for (auto &I : *BB) {
if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) {
ImmutableCallSite CS(&I);
if (const Function *F = CS.getCalledFunction()) {
if (isLoweredToCall(F))
HasCall = true;
if (F->getIntrinsicID() == Intrinsic::memcpy ||
F->getIntrinsicID() == Intrinsic::memset)
NumStores++;
} else { // indirect call.
HasCall = true;
}
}
if (isa<StoreInst>(&I)) {
NumStores++;
Type *MemAccessTy = I.getOperand(0)->getType();
if((MemAccessTy->isIntegerTy() || MemAccessTy->isFloatingPointTy()) &&
(getDataLayout().getTypeSizeInBits(MemAccessTy) == 128))
NumStores++; // 128 bit fp/int stores get split.
}
}
// The z13 processor will run out of store tags if too many stores
// are fed into it too quickly. Therefore make sure there are not
// too many stores in the resulting unrolled loop.
unsigned const Max = (NumStores ? (12 / NumStores) : UINT_MAX);
if (HasCall) {
// Only allow full unrolling if loop has any calls.
UP.FullUnrollMaxCount = Max;
UP.MaxCount = 1;
return;
}
UP.MaxCount = Max;
if (UP.MaxCount <= 1)
return;
// Allow partial and runtime trip count unrolling.
UP.Partial = UP.Runtime = true;
UP.PartialThreshold = 75;
UP.DefaultUnrollRuntimeCount = 4;
// Allow expensive instructions in the pre-header of the loop.
UP.AllowExpensiveTripCount = true;
UP.Force = true;
}
unsigned SystemZTTIImpl::getNumberOfRegisters(bool Vector) {
if (!Vector)
// Discount the stack pointer. Also leave out %r0, since it can't

View File

@ -50,6 +50,8 @@ public:
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
/// @}
/// \name Vector TTI Implementations

View File

@ -102,10 +102,6 @@ static cl::opt<unsigned> PragmaUnrollThreshold(
/// code expansion would result.
static const unsigned NoThreshold = UINT_MAX;
/// Default unroll count for loops with run-time trip count if
/// -unroll-count is not set
static const unsigned DefaultUnrollRuntimeCount = 8;
/// Gather the various unrolling parameters based on the defaults, compiler
/// flags, TTI overrides and user specified parameters.
static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
@ -122,6 +118,7 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
UP.PartialThreshold = UP.Threshold;
UP.PartialOptSizeThreshold = 0;
UP.Count = 0;
UP.DefaultUnrollRuntimeCount = 8;
UP.MaxCount = UINT_MAX;
UP.FullUnrollMaxCount = UINT_MAX;
UP.Partial = false;
@ -803,7 +800,7 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
// largest power-of-two factor that satisfies the threshold limit.
// As we'll create fixup loop, do the type of unrolling only if
// remainder loop is allowed.
UP.Count = DefaultUnrollRuntimeCount;
UP.Count = UP.DefaultUnrollRuntimeCount;
UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
while (UP.Count != 0 && UnrolledSize > UP.PartialThreshold) {
UP.Count >>= 1;
@ -852,7 +849,7 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
return false;
}
if (UP.Count == 0)
UP.Count = DefaultUnrollRuntimeCount;
UP.Count = UP.DefaultUnrollRuntimeCount;
UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
// Reduce unroll count to be the largest power-of-two factor of