|
|
|
@ -37,8 +37,8 @@
|
|
|
|
|
//
|
|
|
|
|
// TODO: Handle multiple loops at a time.
|
|
|
|
|
//
|
|
|
|
|
// TODO: Should TargetLowering::AddrMode::BaseGV be changed to a ConstantExpr
|
|
|
|
|
// instead of a GlobalValue?
|
|
|
|
|
// TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead
|
|
|
|
|
// of a GlobalValue?
|
|
|
|
|
//
|
|
|
|
|
// TODO: When truncation is free, truncate ICmp users' operands to make it a
|
|
|
|
|
// smaller encoding (on x86 at least).
|
|
|
|
@ -63,6 +63,7 @@
|
|
|
|
|
#include "llvm/Analysis/IVUsers.h"
|
|
|
|
|
#include "llvm/Analysis/LoopPass.h"
|
|
|
|
|
#include "llvm/Analysis/ScalarEvolutionExpander.h"
|
|
|
|
|
#include "llvm/Analysis/TargetTransformInfo.h"
|
|
|
|
|
#include "llvm/Assembly/Writer.h"
|
|
|
|
|
#include "llvm/IR/Constants.h"
|
|
|
|
|
#include "llvm/IR/DerivedTypes.h"
|
|
|
|
@ -72,7 +73,6 @@
|
|
|
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
|
#include "llvm/Support/ValueHandle.h"
|
|
|
|
|
#include "llvm/Support/raw_ostream.h"
|
|
|
|
|
#include "llvm/Target/TargetLowering.h"
|
|
|
|
|
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
|
|
|
|
#include "llvm/Transforms/Utils/Local.h"
|
|
|
|
|
#include <algorithm>
|
|
|
|
@ -1270,46 +1270,42 @@ void LSRUse::dump() const {
|
|
|
|
|
/// isLegalUse - Test whether the use described by AM is "legal", meaning it can
|
|
|
|
|
/// be completely folded into the user instruction at isel time. This includes
|
|
|
|
|
/// address-mode folding and special icmp tricks.
|
|
|
|
|
static bool isLegalUse(const AddrMode &AM,
|
|
|
|
|
LSRUse::KindType Kind, Type *AccessTy,
|
|
|
|
|
const TargetLowering *TLI) {
|
|
|
|
|
static bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind,
|
|
|
|
|
Type *AccessTy, GlobalValue *BaseGV, int64_t BaseOffset,
|
|
|
|
|
bool HasBaseReg, int64_t Scale) {
|
|
|
|
|
switch (Kind) {
|
|
|
|
|
case LSRUse::Address:
|
|
|
|
|
// If we have low-level target information, ask the target if it can
|
|
|
|
|
// completely fold this address.
|
|
|
|
|
if (TLI) return TLI->isLegalAddressingMode(AM, AccessTy);
|
|
|
|
|
return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
|
|
|
|
|
|
|
|
|
|
// Otherwise, just guess that reg+reg addressing is legal.
|
|
|
|
|
return !AM.BaseGV && AM.BaseOffs == 0 && AM.Scale <= 1;
|
|
|
|
|
//return ;
|
|
|
|
|
|
|
|
|
|
case LSRUse::ICmpZero:
|
|
|
|
|
// There's not even a target hook for querying whether it would be legal to
|
|
|
|
|
// fold a GV into an ICmp.
|
|
|
|
|
if (AM.BaseGV)
|
|
|
|
|
if (BaseGV)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
// ICmp only has two operands; don't allow more than two non-trivial parts.
|
|
|
|
|
if (AM.Scale != 0 && AM.HasBaseReg && AM.BaseOffs != 0)
|
|
|
|
|
if (Scale != 0 && HasBaseReg && BaseOffset != 0)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
// ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
|
|
|
|
|
// putting the scaled register in the other operand of the icmp.
|
|
|
|
|
if (AM.Scale != 0 && AM.Scale != -1)
|
|
|
|
|
if (Scale != 0 && Scale != -1)
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
// If we have low-level target information, ask the target if it can fold an
|
|
|
|
|
// integer immediate on an icmp.
|
|
|
|
|
if (AM.BaseOffs != 0) {
|
|
|
|
|
if (!TLI)
|
|
|
|
|
return false;
|
|
|
|
|
if (BaseOffset != 0) {
|
|
|
|
|
// We have one of:
|
|
|
|
|
// ICmpZero BaseReg + Offset => ICmp BaseReg, -Offset
|
|
|
|
|
// ICmpZero -1*ScaleReg + Offset => ICmp ScaleReg, Offset
|
|
|
|
|
// ICmpZero BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset
|
|
|
|
|
// ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset
|
|
|
|
|
// Offs is the ICmp immediate.
|
|
|
|
|
int64_t Offs = AM.BaseOffs;
|
|
|
|
|
if (AM.Scale == 0)
|
|
|
|
|
Offs = -(uint64_t)Offs; // The cast does the right thing with INT64_MIN.
|
|
|
|
|
return TLI->isLegalICmpImmediate(Offs);
|
|
|
|
|
if (Scale == 0)
|
|
|
|
|
// The cast does the right thing with INT64_MIN.
|
|
|
|
|
BaseOffset = -(uint64_t)BaseOffset;
|
|
|
|
|
return TTI.isLegalICmpImmediate(BaseOffset);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
|
|
|
|
@ -1317,92 +1313,87 @@ static bool isLegalUse(const AddrMode &AM,
|
|
|
|
|
|
|
|
|
|
case LSRUse::Basic:
|
|
|
|
|
// Only handle single-register values.
|
|
|
|
|
return !AM.BaseGV && AM.Scale == 0 && AM.BaseOffs == 0;
|
|
|
|
|
return !BaseGV && Scale == 0 && BaseOffset == 0;
|
|
|
|
|
|
|
|
|
|
case LSRUse::Special:
|
|
|
|
|
// Special case Basic to handle -1 scales.
|
|
|
|
|
return !AM.BaseGV && (AM.Scale == 0 || AM.Scale == -1) && AM.BaseOffs == 0;
|
|
|
|
|
return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
llvm_unreachable("Invalid LSRUse Kind!");
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool isLegalUse(AddrMode AM,
|
|
|
|
|
int64_t MinOffset, int64_t MaxOffset,
|
|
|
|
|
LSRUse::KindType Kind, Type *AccessTy,
|
|
|
|
|
const TargetLowering *TLI) {
|
|
|
|
|
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
|
|
|
|
|
int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
|
|
|
|
|
GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg,
|
|
|
|
|
int64_t Scale) {
|
|
|
|
|
// Check for overflow.
|
|
|
|
|
if (((int64_t)((uint64_t)AM.BaseOffs + MinOffset) > AM.BaseOffs) !=
|
|
|
|
|
if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) !=
|
|
|
|
|
(MinOffset > 0))
|
|
|
|
|
return false;
|
|
|
|
|
AM.BaseOffs = (uint64_t)AM.BaseOffs + MinOffset;
|
|
|
|
|
if (isLegalUse(AM, Kind, AccessTy, TLI)) {
|
|
|
|
|
AM.BaseOffs = (uint64_t)AM.BaseOffs - MinOffset;
|
|
|
|
|
// Check for overflow.
|
|
|
|
|
if (((int64_t)((uint64_t)AM.BaseOffs + MaxOffset) > AM.BaseOffs) !=
|
|
|
|
|
(MaxOffset > 0))
|
|
|
|
|
return false;
|
|
|
|
|
AM.BaseOffs = (uint64_t)AM.BaseOffs + MaxOffset;
|
|
|
|
|
return isLegalUse(AM, Kind, AccessTy, TLI);
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
MinOffset = (uint64_t)BaseOffset + MinOffset;
|
|
|
|
|
if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) !=
|
|
|
|
|
(MaxOffset > 0))
|
|
|
|
|
return false;
|
|
|
|
|
MaxOffset = (uint64_t)BaseOffset + MaxOffset;
|
|
|
|
|
|
|
|
|
|
return isLegalUse(TTI, Kind, AccessTy, BaseGV, MinOffset, HasBaseReg,
|
|
|
|
|
Scale) &&
|
|
|
|
|
isLegalUse(TTI, Kind, AccessTy, BaseGV, MaxOffset, HasBaseReg, Scale);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool isAlwaysFoldable(int64_t BaseOffs,
|
|
|
|
|
GlobalValue *BaseGV,
|
|
|
|
|
bool HasBaseReg,
|
|
|
|
|
static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset,
|
|
|
|
|
int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy,
|
|
|
|
|
const Formula &F) {
|
|
|
|
|
return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.AM.BaseGV,
|
|
|
|
|
F.AM.BaseOffs, F.AM.HasBaseReg, F.AM.Scale);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
|
|
|
|
|
LSRUse::KindType Kind, Type *AccessTy,
|
|
|
|
|
const TargetLowering *TLI) {
|
|
|
|
|
GlobalValue *BaseGV, int64_t BaseOffset,
|
|
|
|
|
bool HasBaseReg) {
|
|
|
|
|
// Fast-path: zero is always foldable.
|
|
|
|
|
if (BaseOffs == 0 && !BaseGV) return true;
|
|
|
|
|
if (BaseOffset == 0 && !BaseGV) return true;
|
|
|
|
|
|
|
|
|
|
// Conservatively, create an address with an immediate and a
|
|
|
|
|
// base and a scale.
|
|
|
|
|
AddrMode AM;
|
|
|
|
|
AM.BaseOffs = BaseOffs;
|
|
|
|
|
AM.BaseGV = BaseGV;
|
|
|
|
|
AM.HasBaseReg = HasBaseReg;
|
|
|
|
|
AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
|
|
|
|
|
int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
|
|
|
|
|
|
|
|
|
|
// Canonicalize a scale of 1 to a base register if the formula doesn't
|
|
|
|
|
// already have a base register.
|
|
|
|
|
if (!AM.HasBaseReg && AM.Scale == 1) {
|
|
|
|
|
AM.Scale = 0;
|
|
|
|
|
AM.HasBaseReg = true;
|
|
|
|
|
if (!HasBaseReg && Scale == 1) {
|
|
|
|
|
Scale = 0;
|
|
|
|
|
HasBaseReg = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return isLegalUse(AM, Kind, AccessTy, TLI);
|
|
|
|
|
return isLegalUse(TTI, Kind, AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static bool isAlwaysFoldable(const SCEV *S,
|
|
|
|
|
int64_t MinOffset, int64_t MaxOffset,
|
|
|
|
|
bool HasBaseReg,
|
|
|
|
|
LSRUse::KindType Kind, Type *AccessTy,
|
|
|
|
|
const TargetLowering *TLI,
|
|
|
|
|
ScalarEvolution &SE) {
|
|
|
|
|
static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
|
|
|
|
|
ScalarEvolution &SE, int64_t MinOffset,
|
|
|
|
|
int64_t MaxOffset, LSRUse::KindType Kind,
|
|
|
|
|
Type *AccessTy, const SCEV *S, bool HasBaseReg) {
|
|
|
|
|
// Fast-path: zero is always foldable.
|
|
|
|
|
if (S->isZero()) return true;
|
|
|
|
|
|
|
|
|
|
// Conservatively, create an address with an immediate and a
|
|
|
|
|
// base and a scale.
|
|
|
|
|
int64_t BaseOffs = ExtractImmediate(S, SE);
|
|
|
|
|
int64_t BaseOffset = ExtractImmediate(S, SE);
|
|
|
|
|
GlobalValue *BaseGV = ExtractSymbol(S, SE);
|
|
|
|
|
|
|
|
|
|
// If there's anything else involved, it's not foldable.
|
|
|
|
|
if (!S->isZero()) return false;
|
|
|
|
|
|
|
|
|
|
// Fast-path: zero is always foldable.
|
|
|
|
|
if (BaseOffs == 0 && !BaseGV) return true;
|
|
|
|
|
if (BaseOffset == 0 && !BaseGV) return true;
|
|
|
|
|
|
|
|
|
|
// Conservatively, create an address with an immediate and a
|
|
|
|
|
// base and a scale.
|
|
|
|
|
AddrMode AM;
|
|
|
|
|
AM.BaseOffs = BaseOffs;
|
|
|
|
|
AM.BaseGV = BaseGV;
|
|
|
|
|
AM.HasBaseReg = HasBaseReg;
|
|
|
|
|
AM.Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
|
|
|
|
|
int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
|
|
|
|
|
|
|
|
|
|
return isLegalUse(AM, MinOffset, MaxOffset, Kind, AccessTy, TLI);
|
|
|
|
|
return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
|
|
|
|
|
BaseOffset, HasBaseReg, Scale);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
@ -1502,7 +1493,7 @@ class LSRInstance {
|
|
|
|
|
ScalarEvolution &SE;
|
|
|
|
|
DominatorTree &DT;
|
|
|
|
|
LoopInfo &LI;
|
|
|
|
|
const TargetLowering *const TLI;
|
|
|
|
|
const TargetTransformInfo &TTI;
|
|
|
|
|
Loop *const L;
|
|
|
|
|
bool Changed;
|
|
|
|
|
|
|
|
|
@ -1638,7 +1629,7 @@ class LSRInstance {
|
|
|
|
|
Pass *P);
|
|
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
LSRInstance(const TargetLowering *tli, Loop *l, Pass *P);
|
|
|
|
|
LSRInstance(Loop *L, Pass *P);
|
|
|
|
|
|
|
|
|
|
bool getChanged() const { return Changed; }
|
|
|
|
|
|
|
|
|
@ -1688,12 +1679,9 @@ void LSRInstance::OptimizeShadowIV() {
|
|
|
|
|
}
|
|
|
|
|
if (!DestTy) continue;
|
|
|
|
|
|
|
|
|
|
if (TLI) {
|
|
|
|
|
// If target does not support DestTy natively then do not apply
|
|
|
|
|
// this transformation.
|
|
|
|
|
EVT DVT = TLI->getValueType(DestTy);
|
|
|
|
|
if (!TLI->isTypeLegal(DVT)) continue;
|
|
|
|
|
}
|
|
|
|
|
// If target does not support DestTy natively then do not apply
|
|
|
|
|
// this transformation.
|
|
|
|
|
if (!TTI.isTypeLegal(DestTy)) continue;
|
|
|
|
|
|
|
|
|
|
PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
|
|
|
|
|
if (!PH) continue;
|
|
|
|
@ -2015,18 +2003,17 @@ LSRInstance::OptimizeLoopTermCond() {
|
|
|
|
|
if (C->getValue().getMinSignedBits() >= 64 ||
|
|
|
|
|
C->getValue().isMinSignedValue())
|
|
|
|
|
goto decline_post_inc;
|
|
|
|
|
// Without TLI, assume that any stride might be valid, and so any
|
|
|
|
|
// use might be shared.
|
|
|
|
|
if (!TLI)
|
|
|
|
|
goto decline_post_inc;
|
|
|
|
|
// Check for possible scaled-address reuse.
|
|
|
|
|
Type *AccessTy = getAccessType(UI->getUser());
|
|
|
|
|
AddrMode AM;
|
|
|
|
|
AM.Scale = C->getSExtValue();
|
|
|
|
|
if (TLI->isLegalAddressingMode(AM, AccessTy))
|
|
|
|
|
int64_t Scale = C->getSExtValue();
|
|
|
|
|
if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
|
|
|
|
|
/*BaseOffset=*/ 0,
|
|
|
|
|
/*HasBaseReg=*/ false, Scale))
|
|
|
|
|
goto decline_post_inc;
|
|
|
|
|
AM.Scale = -AM.Scale;
|
|
|
|
|
if (TLI->isLegalAddressingMode(AM, AccessTy))
|
|
|
|
|
Scale = -Scale;
|
|
|
|
|
if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0,
|
|
|
|
|
/*BaseOffset=*/ 0,
|
|
|
|
|
/*HasBaseReg=*/ false, Scale))
|
|
|
|
|
goto decline_post_inc;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
@ -2096,13 +2083,13 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg,
|
|
|
|
|
return false;
|
|
|
|
|
// Conservatively assume HasBaseReg is true for now.
|
|
|
|
|
if (NewOffset < LU.MinOffset) {
|
|
|
|
|
if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, HasBaseReg,
|
|
|
|
|
Kind, AccessTy, TLI))
|
|
|
|
|
if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
|
|
|
|
|
LU.MaxOffset - NewOffset, HasBaseReg))
|
|
|
|
|
return false;
|
|
|
|
|
NewMinOffset = NewOffset;
|
|
|
|
|
} else if (NewOffset > LU.MaxOffset) {
|
|
|
|
|
if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, HasBaseReg,
|
|
|
|
|
Kind, AccessTy, TLI))
|
|
|
|
|
if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
|
|
|
|
|
NewOffset - LU.MinOffset, HasBaseReg))
|
|
|
|
|
return false;
|
|
|
|
|
NewMaxOffset = NewOffset;
|
|
|
|
|
}
|
|
|
|
@ -2131,7 +2118,8 @@ LSRInstance::getUse(const SCEV *&Expr,
|
|
|
|
|
int64_t Offset = ExtractImmediate(Expr, SE);
|
|
|
|
|
|
|
|
|
|
// Basic uses can't accept any offset, for example.
|
|
|
|
|
if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, TLI)) {
|
|
|
|
|
if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0,
|
|
|
|
|
Offset, /*HasBaseReg=*/ true)) {
|
|
|
|
|
Expr = Copy;
|
|
|
|
|
Offset = 0;
|
|
|
|
|
}
|
|
|
|
@ -2396,7 +2384,7 @@ bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
|
|
|
|
|
/// TODO: Consider IVInc free if it's already used in another chains.
|
|
|
|
|
static bool
|
|
|
|
|
isProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users,
|
|
|
|
|
ScalarEvolution &SE, const TargetLowering *TLI) {
|
|
|
|
|
ScalarEvolution &SE, const TargetTransformInfo &TTI) {
|
|
|
|
|
if (StressIVChain)
|
|
|
|
|
return true;
|
|
|
|
|
|
|
|
|
@ -2654,7 +2642,7 @@ void LSRInstance::CollectChains() {
|
|
|
|
|
for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
|
|
|
|
|
UsersIdx < NChains; ++UsersIdx) {
|
|
|
|
|
if (!isProfitableChain(IVChainVec[UsersIdx],
|
|
|
|
|
ChainUsersVec[UsersIdx].FarUsers, SE, TLI))
|
|
|
|
|
ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
|
|
|
|
|
continue;
|
|
|
|
|
// Preserve the chain at UsesIdx.
|
|
|
|
|
if (ChainIdx != UsersIdx)
|
|
|
|
@ -2681,7 +2669,7 @@ void LSRInstance::FinalizeChain(IVChain &Chain) {
|
|
|
|
|
|
|
|
|
|
/// Return true if the IVInc can be folded into an addressing mode.
|
|
|
|
|
static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
|
|
|
|
|
Value *Operand, const TargetLowering *TLI) {
|
|
|
|
|
Value *Operand, const TargetTransformInfo &TTI) {
|
|
|
|
|
const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
|
|
|
|
|
if (!IncConst || !isAddressUse(UserInst, Operand))
|
|
|
|
|
return false;
|
|
|
|
@ -2690,8 +2678,9 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
int64_t IncOffset = IncConst->getValue()->getSExtValue();
|
|
|
|
|
if (!isAlwaysFoldable(IncOffset, /*BaseGV=*/0, /*HaseBaseReg=*/false,
|
|
|
|
|
LSRUse::Address, getAccessType(UserInst), TLI))
|
|
|
|
|
if (!isAlwaysFoldable(TTI, LSRUse::Address,
|
|
|
|
|
getAccessType(UserInst), /*BaseGV=*/ 0,
|
|
|
|
|
IncOffset, /*HaseBaseReg=*/ false))
|
|
|
|
|
return false;
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
@ -2762,7 +2751,7 @@ void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter,
|
|
|
|
|
|
|
|
|
|
// If an IV increment can't be folded, use it as the next IV value.
|
|
|
|
|
if (!canFoldIVIncExpr(LeftOverExpr, IncI->UserInst, IncI->IVOperand,
|
|
|
|
|
TLI)) {
|
|
|
|
|
TTI)) {
|
|
|
|
|
assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
|
|
|
|
|
IVSrc = IVOper;
|
|
|
|
|
LeftOverExpr = 0;
|
|
|
|
@ -3106,9 +3095,8 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
|
|
|
|
|
|
|
|
|
|
// Don't pull a constant into a register if the constant could be folded
|
|
|
|
|
// into an immediate field.
|
|
|
|
|
if (isAlwaysFoldable(*J, LU.MinOffset, LU.MaxOffset,
|
|
|
|
|
Base.getNumRegs() > 1,
|
|
|
|
|
LU.Kind, LU.AccessTy, TLI, SE))
|
|
|
|
|
if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
|
|
|
|
|
LU.AccessTy, *J, Base.getNumRegs() > 1))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
// Collect all operands except *J.
|
|
|
|
@ -3120,9 +3108,8 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
|
|
|
|
|
// Don't leave just a constant behind in a register if the constant could
|
|
|
|
|
// be folded into an immediate field.
|
|
|
|
|
if (InnerAddOps.size() == 1 &&
|
|
|
|
|
isAlwaysFoldable(InnerAddOps[0], LU.MinOffset, LU.MaxOffset,
|
|
|
|
|
Base.getNumRegs() > 1,
|
|
|
|
|
LU.Kind, LU.AccessTy, TLI, SE))
|
|
|
|
|
isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
|
|
|
|
|
LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
|
|
|
|
@ -3132,10 +3119,10 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
|
|
|
|
|
|
|
|
|
|
// Add the remaining pieces of the add back into the new formula.
|
|
|
|
|
const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
|
|
|
|
|
if (TLI && InnerSumSC &&
|
|
|
|
|
if (InnerSumSC &&
|
|
|
|
|
SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
|
|
|
|
|
TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
|
|
|
|
|
InnerSumSC->getValue()->getZExtValue())) {
|
|
|
|
|
TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
|
|
|
|
|
InnerSumSC->getValue()->getZExtValue())) {
|
|
|
|
|
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
|
|
|
|
|
InnerSumSC->getValue()->getZExtValue();
|
|
|
|
|
F.BaseRegs.erase(F.BaseRegs.begin() + i);
|
|
|
|
@ -3144,9 +3131,9 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
|
|
|
|
|
|
|
|
|
|
// Add J as its own register, or an unfolded immediate.
|
|
|
|
|
const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
|
|
|
|
|
if (TLI && SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
|
|
|
|
|
TLI->isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
|
|
|
|
|
SC->getValue()->getZExtValue()))
|
|
|
|
|
if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
|
|
|
|
|
TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset +
|
|
|
|
|
SC->getValue()->getZExtValue()))
|
|
|
|
|
F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset +
|
|
|
|
|
SC->getValue()->getZExtValue();
|
|
|
|
|
else
|
|
|
|
@ -3204,8 +3191,7 @@ void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
|
|
|
|
|
continue;
|
|
|
|
|
Formula F = Base;
|
|
|
|
|
F.AM.BaseGV = GV;
|
|
|
|
|
if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
|
|
|
|
|
LU.Kind, LU.AccessTy, TLI))
|
|
|
|
|
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
|
|
|
|
|
continue;
|
|
|
|
|
F.BaseRegs[i] = G;
|
|
|
|
|
(void)InsertFormula(LU, LUIdx, F);
|
|
|
|
@ -3229,8 +3215,8 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
|
|
|
|
|
E = Worklist.end(); I != E; ++I) {
|
|
|
|
|
Formula F = Base;
|
|
|
|
|
F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I;
|
|
|
|
|
if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
|
|
|
|
|
LU.Kind, LU.AccessTy, TLI)) {
|
|
|
|
|
if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind,
|
|
|
|
|
LU.AccessTy, F)) {
|
|
|
|
|
// Add the offset to the base register.
|
|
|
|
|
const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G);
|
|
|
|
|
// If it cancelled out, drop the base register, otherwise update it.
|
|
|
|
@ -3249,8 +3235,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
|
|
|
|
|
continue;
|
|
|
|
|
Formula F = Base;
|
|
|
|
|
F.AM.BaseOffs = (uint64_t)F.AM.BaseOffs + Imm;
|
|
|
|
|
if (!isLegalUse(F.AM, LU.MinOffset, LU.MaxOffset,
|
|
|
|
|
LU.Kind, LU.AccessTy, TLI))
|
|
|
|
|
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
|
|
|
|
|
continue;
|
|
|
|
|
F.BaseRegs[i] = G;
|
|
|
|
|
(void)InsertFormula(LU, LUIdx, F);
|
|
|
|
@ -3297,7 +3282,7 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
|
|
|
|
|
F.AM.BaseOffs = NewBaseOffs;
|
|
|
|
|
|
|
|
|
|
// Check that this scale is legal.
|
|
|
|
|
if (!isLegalUse(F.AM, Offset, Offset, LU.Kind, LU.AccessTy, TLI))
|
|
|
|
|
if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F))
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
// Compensate for the use having MinOffset built into it.
|
|
|
|
@ -3352,13 +3337,13 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
|
|
|
|
|
Base.AM.Scale = Factor;
|
|
|
|
|
Base.AM.HasBaseReg = Base.BaseRegs.size() > 1;
|
|
|
|
|
// Check whether this scale is going to be legal.
|
|
|
|
|
if (!isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
|
|
|
|
|
LU.Kind, LU.AccessTy, TLI)) {
|
|
|
|
|
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
|
|
|
|
|
Base)) {
|
|
|
|
|
// As a special-case, handle special out-of-loop Basic users specially.
|
|
|
|
|
// TODO: Reconsider this special case.
|
|
|
|
|
if (LU.Kind == LSRUse::Basic &&
|
|
|
|
|
isLegalUse(Base.AM, LU.MinOffset, LU.MaxOffset,
|
|
|
|
|
LSRUse::Special, LU.AccessTy, TLI) &&
|
|
|
|
|
isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
|
|
|
|
|
LU.AccessTy, Base) &&
|
|
|
|
|
LU.AllFixupsOutsideLoop)
|
|
|
|
|
LU.Kind = LSRUse::Special;
|
|
|
|
|
else
|
|
|
|
@ -3391,9 +3376,6 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
|
|
|
|
|
|
|
|
|
|
/// GenerateTruncates - Generate reuse formulae from different IV types.
|
|
|
|
|
void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
|
|
|
|
|
// This requires TargetLowering to tell us which truncates are free.
|
|
|
|
|
if (!TLI) return;
|
|
|
|
|
|
|
|
|
|
// Don't bother truncating symbolic values.
|
|
|
|
|
if (Base.AM.BaseGV) return;
|
|
|
|
|
|
|
|
|
@ -3405,7 +3387,7 @@ void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
|
|
|
|
|
for (SmallSetVector<Type *, 4>::const_iterator
|
|
|
|
|
I = Types.begin(), E = Types.end(); I != E; ++I) {
|
|
|
|
|
Type *SrcTy = *I;
|
|
|
|
|
if (SrcTy != DstTy && TLI->isTruncateFree(SrcTy, DstTy)) {
|
|
|
|
|
if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
|
|
|
|
|
Formula F = Base;
|
|
|
|
|
|
|
|
|
|
if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I);
|
|
|
|
@ -3560,8 +3542,8 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
|
|
|
|
|
continue;
|
|
|
|
|
Formula NewF = F;
|
|
|
|
|
NewF.AM.BaseOffs = Offs;
|
|
|
|
|
if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
|
|
|
|
|
LU.Kind, LU.AccessTy, TLI))
|
|
|
|
|
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
|
|
|
|
|
NewF))
|
|
|
|
|
continue;
|
|
|
|
|
NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
|
|
|
|
|
|
|
|
|
@ -3585,10 +3567,9 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
|
|
|
|
|
continue;
|
|
|
|
|
Formula NewF = F;
|
|
|
|
|
NewF.AM.BaseOffs = (uint64_t)NewF.AM.BaseOffs + Imm;
|
|
|
|
|
if (!isLegalUse(NewF.AM, LU.MinOffset, LU.MaxOffset,
|
|
|
|
|
LU.Kind, LU.AccessTy, TLI)) {
|
|
|
|
|
if (!TLI ||
|
|
|
|
|
!TLI->isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
|
|
|
|
|
if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,
|
|
|
|
|
LU.Kind, LU.AccessTy, NewF)) {
|
|
|
|
|
if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm))
|
|
|
|
|
continue;
|
|
|
|
|
NewF = F;
|
|
|
|
|
NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm;
|
|
|
|
@ -3898,9 +3879,8 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
|
|
|
|
|
bool Any = false;
|
|
|
|
|
for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
|
|
|
|
|
Formula &F = LUThatHas->Formulae[i];
|
|
|
|
|
if (!isLegalUse(F.AM,
|
|
|
|
|
LUThatHas->MinOffset, LUThatHas->MaxOffset,
|
|
|
|
|
LUThatHas->Kind, LUThatHas->AccessTy, TLI)) {
|
|
|
|
|
if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
|
|
|
|
|
LUThatHas->Kind, LUThatHas->AccessTy, F)) {
|
|
|
|
|
DEBUG(dbgs() << " Deleting "; F.print(dbgs());
|
|
|
|
|
dbgs() << '\n');
|
|
|
|
|
LUThatHas->DeleteFormula(F);
|
|
|
|
@ -4589,13 +4569,11 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
|
|
|
|
|
Changed |= DeleteTriviallyDeadInstructions(DeadInsts);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
|
|
|
|
|
: IU(P->getAnalysis<IVUsers>()),
|
|
|
|
|
SE(P->getAnalysis<ScalarEvolution>()),
|
|
|
|
|
DT(P->getAnalysis<DominatorTree>()),
|
|
|
|
|
LI(P->getAnalysis<LoopInfo>()),
|
|
|
|
|
TLI(tli), L(l), Changed(false), IVIncInsertPos(0) {
|
|
|
|
|
|
|
|
|
|
LSRInstance::LSRInstance(Loop *L, Pass *P)
|
|
|
|
|
: IU(P->getAnalysis<IVUsers>()), SE(P->getAnalysis<ScalarEvolution>()),
|
|
|
|
|
DT(P->getAnalysis<DominatorTree>()), LI(P->getAnalysis<LoopInfo>()),
|
|
|
|
|
TTI(P->getAnalysis<TargetTransformInfo>()), L(L), Changed(false),
|
|
|
|
|
IVIncInsertPos(0) {
|
|
|
|
|
// If LoopSimplify form is not available, stay out of trouble.
|
|
|
|
|
if (!L->isLoopSimplifyForm())
|
|
|
|
|
return;
|
|
|
|
@ -4678,14 +4656,14 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
|
|
|
|
|
|
|
|
|
|
#ifndef NDEBUG
|
|
|
|
|
// Formulae should be legal.
|
|
|
|
|
for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(),
|
|
|
|
|
E = Uses.end(); I != E; ++I) {
|
|
|
|
|
const LSRUse &LU = *I;
|
|
|
|
|
for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
|
|
|
|
|
JE = LU.Formulae.end(); J != JE; ++J)
|
|
|
|
|
assert(isLegalUse(J->AM, LU.MinOffset, LU.MaxOffset,
|
|
|
|
|
LU.Kind, LU.AccessTy, TLI) &&
|
|
|
|
|
"Illegal formula generated!");
|
|
|
|
|
for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), E = Uses.end();
|
|
|
|
|
I != E; ++I) {
|
|
|
|
|
const LSRUse &LU = *I;
|
|
|
|
|
for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(),
|
|
|
|
|
JE = LU.Formulae.end();
|
|
|
|
|
J != JE; ++J)
|
|
|
|
|
assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
|
|
|
|
|
*J) && "Illegal formula generated!");
|
|
|
|
|
};
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
@ -4757,13 +4735,9 @@ void LSRInstance::dump() const {
|
|
|
|
|
namespace {
|
|
|
|
|
|
|
|
|
|
class LoopStrengthReduce : public LoopPass {
|
|
|
|
|
/// TLI - Keep a pointer of a TargetLowering to consult for determining
|
|
|
|
|
/// transformation profitability.
|
|
|
|
|
const TargetLowering *const TLI;
|
|
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
static char ID; // Pass ID, replacement for typeid
|
|
|
|
|
explicit LoopStrengthReduce(const TargetLowering *tli = 0);
|
|
|
|
|
LoopStrengthReduce();
|
|
|
|
|
|
|
|
|
|
private:
|
|
|
|
|
bool runOnLoop(Loop *L, LPPassManager &LPM);
|
|
|
|
@ -4775,6 +4749,7 @@ private:
|
|
|
|
|
char LoopStrengthReduce::ID = 0;
|
|
|
|
|
INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
|
|
|
|
|
"Loop Strength Reduction", false, false)
|
|
|
|
|
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
|
|
|
|
|
INITIALIZE_PASS_DEPENDENCY(DominatorTree)
|
|
|
|
|
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
|
|
|
|
|
INITIALIZE_PASS_DEPENDENCY(IVUsers)
|
|
|
|
@ -4784,14 +4759,13 @@ INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
|
|
|
|
|
"Loop Strength Reduction", false, false)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
|
|
|
|
|
return new LoopStrengthReduce(TLI);
|
|
|
|
|
Pass *llvm::createLoopStrengthReducePass() {
|
|
|
|
|
return new LoopStrengthReduce();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
LoopStrengthReduce::LoopStrengthReduce(const TargetLowering *tli)
|
|
|
|
|
: LoopPass(ID), TLI(tli) {
|
|
|
|
|
initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
|
|
|
|
|
}
|
|
|
|
|
LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) {
|
|
|
|
|
initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
|
|
|
|
|
// We split critical edges, so we change the CFG. However, we do update
|
|
|
|
@ -4810,13 +4784,14 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
|
|
|
|
|
AU.addRequiredID(LoopSimplifyID);
|
|
|
|
|
AU.addRequired<IVUsers>();
|
|
|
|
|
AU.addPreserved<IVUsers>();
|
|
|
|
|
AU.addRequired<TargetTransformInfo>();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
|
|
|
|
|
bool Changed = false;
|
|
|
|
|
|
|
|
|
|
// Run the main LSR transformation.
|
|
|
|
|
Changed |= LSRInstance(TLI, L, this).getChanged();
|
|
|
|
|
Changed |= LSRInstance(L, this).getChanged();
|
|
|
|
|
|
|
|
|
|
// Remove any extra phis created by processing inner loops.
|
|
|
|
|
Changed |= DeleteDeadPHIs(L->getHeader());
|
|
|
|
@ -4826,8 +4801,10 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
|
|
|
|
|
#ifndef NDEBUG
|
|
|
|
|
Rewriter.setDebugType(DEBUG_TYPE);
|
|
|
|
|
#endif
|
|
|
|
|
unsigned numFolded = Rewriter.
|
|
|
|
|
replaceCongruentIVs(L, &getAnalysis<DominatorTree>(), DeadInsts, TLI);
|
|
|
|
|
unsigned numFolded =
|
|
|
|
|
Rewriter.replaceCongruentIVs(L, &getAnalysis<DominatorTree>(),
|
|
|
|
|
DeadInsts,
|
|
|
|
|
&getAnalysis<TargetTransformInfo>());
|
|
|
|
|
if (numFolded) {
|
|
|
|
|
Changed = true;
|
|
|
|
|
DeleteTriviallyDeadInstructions(DeadInsts);
|
|
|
|
|