mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
Reroll loops with multiple IV and negative step part 3
support multiple induction variables This patch enable loop reroll for the following case: for(int i=0; i<N; i += 2) { S += *a++; S += *a++; }; Differential Revision: http://reviews.llvm.org/D16550 llvm-svn: 268147
This commit is contained in:
parent
a0849b6c3a
commit
840b3f8ac8
@ -163,6 +163,9 @@ namespace {
|
||||
|
||||
// Map between induction variable and its increment
|
||||
DenseMap<Instruction *, int64_t> IVToIncMap;
|
||||
// For loop with multiple induction variable, remember the one used only to
|
||||
// control the loop.
|
||||
Instruction *LoopControlIV;
|
||||
|
||||
// A chain of isomorphic instructions, identified by a single-use PHI
|
||||
// representing a reduction. Only the last value may be used outside the
|
||||
@ -350,9 +353,11 @@ namespace {
|
||||
ScalarEvolution *SE, AliasAnalysis *AA,
|
||||
TargetLibraryInfo *TLI, DominatorTree *DT, LoopInfo *LI,
|
||||
bool PreserveLCSSA,
|
||||
DenseMap<Instruction *, int64_t> &IncrMap)
|
||||
DenseMap<Instruction *, int64_t> &IncrMap,
|
||||
Instruction *LoopCtrlIV)
|
||||
: Parent(Parent), L(L), SE(SE), AA(AA), TLI(TLI), DT(DT), LI(LI),
|
||||
PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap) {}
|
||||
PreserveLCSSA(PreserveLCSSA), IV(IV), IVToIncMap(IncrMap),
|
||||
LoopControlIV(LoopCtrlIV) {}
|
||||
|
||||
/// Stage 1: Find all the DAG roots for the induction variable.
|
||||
bool findRoots();
|
||||
@ -391,6 +396,7 @@ namespace {
|
||||
UsesTy::iterator Start,
|
||||
UsesTy::iterator End);
|
||||
void replaceIV(Instruction *Inst, Instruction *IV, const SCEV *IterCount);
|
||||
void updateNonLoopCtrlIncr();
|
||||
|
||||
LoopReroll *Parent;
|
||||
|
||||
@ -421,8 +427,18 @@ namespace {
|
||||
UsesTy Uses;
|
||||
// Map between induction variable and its increment
|
||||
DenseMap<Instruction *, int64_t> &IVToIncMap;
|
||||
Instruction *LoopControlIV;
|
||||
};
|
||||
|
||||
// Check if it is a compare-like instruction whose user is a branch
|
||||
bool isCompareUsedByBranch(Instruction *I) {
|
||||
auto *TI = I->getParent()->getTerminator();
|
||||
if (!isa<BranchInst>(TI) || !isa<CmpInst>(I))
|
||||
return false;
|
||||
return I->hasOneUse() && TI->getOperand(0) == I;
|
||||
};
|
||||
|
||||
bool isLoopControlIV(Loop *L, Instruction *IV);
|
||||
void collectPossibleIVs(Loop *L, SmallInstructionVector &PossibleIVs);
|
||||
void collectPossibleReductions(Loop *L,
|
||||
ReductionTracker &Reductions);
|
||||
@ -494,6 +510,60 @@ static const SCEVConstant *getIncrmentFactorSCEV(ScalarEvolution *SE,
|
||||
return CIncSCEV;
|
||||
}
|
||||
|
||||
// Check if an IV is only used to control the loop. There are two cases:
|
||||
// 1. It only has one use which is loop increment, and the increment is only
|
||||
// used by comparison and the PHI, and the comparison is only used by branch.
|
||||
// 2. It is used by loop increment and the comparison, the loop increment is
|
||||
// only used by the PHI, and the comparison is used only by the branch.
|
||||
bool LoopReroll::isLoopControlIV(Loop *L, Instruction *IV) {
|
||||
|
||||
unsigned IVUses = IV->getNumUses();
|
||||
if (IVUses != 2 && IVUses != 1)
|
||||
return false;
|
||||
|
||||
for (auto *User : IV->users()) {
|
||||
int32_t IncOrCmpUses = User->getNumUses();
|
||||
bool IsCompInst = isCompareUsedByBranch(cast<Instruction>(User));
|
||||
|
||||
// User can only have one or two uses.
|
||||
if (IncOrCmpUses != 2 && IncOrCmpUses != 1)
|
||||
return false;
|
||||
|
||||
// Case 1
|
||||
if (IVUses == 1) {
|
||||
// The only user must be the loop increment.
|
||||
// The loop increment must have two uses.
|
||||
if (IsCompInst || IncOrCmpUses != 2)
|
||||
return false;
|
||||
}
|
||||
|
||||
// Case 2
|
||||
if (IVUses == 2 && IncOrCmpUses != 1)
|
||||
return false;
|
||||
|
||||
// The users of the IV must be a binary operation or a comparison
|
||||
if (auto *BO = dyn_cast<BinaryOperator>(User)) {
|
||||
if (BO->getOpcode() == Instruction::Add) {
|
||||
// Loop Increment
|
||||
// User of Loop Increment should be either PHI or CMP
|
||||
for (auto *UU : User->users()) {
|
||||
if (PHINode *PN = dyn_cast<PHINode>(UU)) {
|
||||
if (PN != IV)
|
||||
return false;
|
||||
}
|
||||
// Must be a CMP
|
||||
else if (!isCompareUsedByBranch(dyn_cast<Instruction>(UU)))
|
||||
return false;
|
||||
}
|
||||
} else
|
||||
return false;
|
||||
// Compare : can only have one use, and must be branch
|
||||
} else if (!IsCompInst)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Collect the list of loop induction variables with respect to which it might
|
||||
// be possible to reroll the loop.
|
||||
void LoopReroll::collectPossibleIVs(Loop *L,
|
||||
@ -525,7 +595,14 @@ void LoopReroll::collectPossibleIVs(Loop *L,
|
||||
IVToIncMap[&*I] = IncSCEV->getValue()->getSExtValue();
|
||||
DEBUG(dbgs() << "LRR: Possible IV: " << *I << " = " << *PHISCEV
|
||||
<< "\n");
|
||||
PossibleIVs.push_back(&*I);
|
||||
|
||||
if (isLoopControlIV(L, &*I)) {
|
||||
assert(!LoopControlIV && "Found two loop control only IV");
|
||||
LoopControlIV = &(*I);
|
||||
DEBUG(dbgs() << "LRR: Possible loop control only IV: " << *I << " = "
|
||||
<< *PHISCEV << "\n");
|
||||
} else
|
||||
PossibleIVs.push_back(&*I);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1072,6 +1149,28 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
|
||||
Uses[I].set(IL_All);
|
||||
}
|
||||
|
||||
// Make sure we mark loop-control-only PHIs as used in all iterations. See
|
||||
// comment above LoopReroll::isLoopControlIV for more information.
|
||||
BasicBlock *Header = L->getHeader();
|
||||
if (LoopControlIV && LoopControlIV != IV) {
|
||||
for (auto *U : LoopControlIV->users()) {
|
||||
Instruction *IVUser = dyn_cast<Instruction>(U);
|
||||
// IVUser could be loop increment or compare
|
||||
Uses[IVUser].set(IL_All);
|
||||
for (auto *UU : IVUser->users()) {
|
||||
Instruction *UUser = dyn_cast<Instruction>(UU);
|
||||
// UUser could be compare, PHI or branch
|
||||
Uses[UUser].set(IL_All);
|
||||
// Is UUser a compare instruction?
|
||||
if (UU->hasOneUse()) {
|
||||
Instruction *BI = dyn_cast<BranchInst>(*UUser->user_begin());
|
||||
if (BI == cast<BranchInst>(Header->getTerminator()))
|
||||
Uses[BI].set(IL_All);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure all instructions in the loop are in one and only one
|
||||
// set.
|
||||
for (auto &KV : Uses) {
|
||||
@ -1314,25 +1413,65 @@ void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
|
||||
++J;
|
||||
}
|
||||
|
||||
// We need to create a new induction variable for each different BaseInst.
|
||||
for (auto &DRS : RootSets)
|
||||
// Insert the new induction variable.
|
||||
replaceIV(DRS.BaseInst, IV, IterCount);
|
||||
bool HasTwoIVs = LoopControlIV && LoopControlIV != IV;
|
||||
|
||||
if (HasTwoIVs) {
|
||||
updateNonLoopCtrlIncr();
|
||||
replaceIV(LoopControlIV, LoopControlIV, IterCount);
|
||||
} else
|
||||
// We need to create a new induction variable for each different BaseInst.
|
||||
for (auto &DRS : RootSets)
|
||||
// Insert the new induction variable.
|
||||
replaceIV(DRS.BaseInst, IV, IterCount);
|
||||
|
||||
SimplifyInstructionsInBlock(Header, TLI);
|
||||
DeleteDeadPHIs(Header, TLI);
|
||||
}
|
||||
|
||||
// For non-loop-control IVs, we only need to update the last increment
|
||||
// with right amount, then we are done.
|
||||
void LoopReroll::DAGRootTracker::updateNonLoopCtrlIncr() {
|
||||
const SCEV *NewInc = nullptr;
|
||||
for (auto *LoopInc : LoopIncs) {
|
||||
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LoopInc);
|
||||
const SCEVConstant *COp = nullptr;
|
||||
if (GEP && LoopInc->getOperand(0)->getType()->isPointerTy()) {
|
||||
COp = dyn_cast<SCEVConstant>(SE->getSCEV(LoopInc->getOperand(1)));
|
||||
} else {
|
||||
COp = dyn_cast<SCEVConstant>(SE->getSCEV(LoopInc->getOperand(0)));
|
||||
if (!COp)
|
||||
COp = dyn_cast<SCEVConstant>(SE->getSCEV(LoopInc->getOperand(1)));
|
||||
}
|
||||
|
||||
assert(COp && "Didn't find constant operand of LoopInc!\n");
|
||||
|
||||
const APInt &AInt = COp->getValue()->getValue();
|
||||
const SCEV *ScaleSCEV = SE->getConstant(COp->getType(), Scale);
|
||||
if (AInt.isNegative()) {
|
||||
NewInc = SE->getNegativeSCEV(COp);
|
||||
NewInc = SE->getUDivExpr(NewInc, ScaleSCEV);
|
||||
NewInc = SE->getNegativeSCEV(NewInc);
|
||||
} else
|
||||
NewInc = SE->getUDivExpr(COp, ScaleSCEV);
|
||||
|
||||
LoopInc->setOperand(1, dyn_cast<SCEVConstant>(NewInc)->getValue());
|
||||
}
|
||||
}
|
||||
|
||||
void LoopReroll::DAGRootTracker::replaceIV(Instruction *Inst,
|
||||
Instruction *InstIV,
|
||||
const SCEV *IterCount) {
|
||||
BasicBlock *Header = L->getHeader();
|
||||
int64_t Inc = IVToIncMap[InstIV];
|
||||
bool Negative = Inc < 0;
|
||||
bool NeedNewIV = InstIV == LoopControlIV;
|
||||
bool Negative = !NeedNewIV && Inc < 0;
|
||||
|
||||
const SCEVAddRecExpr *RealIVSCEV = cast<SCEVAddRecExpr>(SE->getSCEV(Inst));
|
||||
const SCEV *Start = RealIVSCEV->getStart();
|
||||
|
||||
if (NeedNewIV)
|
||||
Start = SE->getConstant(Start->getType(), 0);
|
||||
|
||||
const SCEV *SizeOfExpr = nullptr;
|
||||
const SCEV *IncrExpr =
|
||||
SE->getConstant(RealIVSCEV->getType(), Negative ? -1 : 1);
|
||||
@ -1360,6 +1499,12 @@ void LoopReroll::DAGRootTracker::replaceIV(Instruction *Inst,
|
||||
if (Uses[BI].find_first() == IL_All) {
|
||||
const SCEV *ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
|
||||
|
||||
if (NeedNewIV)
|
||||
ICSCEV = SE->getMulExpr(IterCount,
|
||||
SE->getConstant(IterCount->getType(), Scale));
|
||||
else
|
||||
ICSCEV = RealIVSCEV->evaluateAtIteration(IterCount, *SE);
|
||||
|
||||
// Iteration count SCEV minus or plus 1
|
||||
const SCEV *MinusPlus1SCEV =
|
||||
SE->getConstant(ICSCEV->getType(), Negative ? -1 : 1);
|
||||
@ -1514,7 +1659,7 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header,
|
||||
const SCEV *IterCount,
|
||||
ReductionTracker &Reductions) {
|
||||
DAGRootTracker DAGRoots(this, L, IV, SE, AA, TLI, DT, LI, PreserveLCSSA,
|
||||
IVToIncMap);
|
||||
IVToIncMap, LoopControlIV);
|
||||
|
||||
if (!DAGRoots.findRoots())
|
||||
return false;
|
||||
@ -1566,6 +1711,7 @@ bool LoopReroll::runOnLoop(Loop *L, LPPassManager &LPM) {
|
||||
// reroll (there may be several possible options).
|
||||
SmallInstructionVector PossibleIVs;
|
||||
IVToIncMap.clear();
|
||||
LoopControlIV = nullptr;
|
||||
collectPossibleIVs(L, PossibleIVs);
|
||||
|
||||
if (PossibleIVs.empty()) {
|
||||
|
134
test/Transforms/LoopReroll/complex_reroll.ll
Normal file
134
test/Transforms/LoopReroll/complex_reroll.ll
Normal file
@ -0,0 +1,134 @@
|
||||
; RUN: opt -S -loop-reroll %s | FileCheck %s
|
||||
declare i32 @goo(i32, i32)
|
||||
|
||||
@buf = external global i8*
|
||||
@aaa = global [16 x i8] c"\01\02\03\04\05\06\07\08\09\0A\0B\0C\0D\0E\0F\10", align 1
|
||||
|
||||
define i32 @test1(i32 %len) {
|
||||
entry:
|
||||
br label %while.body
|
||||
|
||||
while.body:
|
||||
;CHECK-LABEL: while.body:
|
||||
;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %while.body ], [ 0, %entry ]
|
||||
;CHECK-NEXT: %buf.021 = phi i8* [ getelementptr inbounds ([16 x i8], [16 x i8]* @aaa, i64 0, i64 0), %entry ], [ %add.ptr, %while.body ]
|
||||
;CHECK-NEXT: %sum44.020 = phi i64 [ 0, %entry ], [ %add, %while.body ]
|
||||
;CHECK-NEXT: [[T2:%[0-9]+]] = load i8, i8* %buf.021, align 1
|
||||
;CHECK-NEXT: %conv = zext i8 [[T2]] to i64
|
||||
;CHECK-NEXT: %add = add i64 %conv, %sum44.020
|
||||
;CHECK-NEXT: %add.ptr = getelementptr inbounds i8, i8* %buf.021, i64 1
|
||||
;CHECK-NEXT: %indvar.next = add i32 %indvar, 1
|
||||
;CHECK-NEXT: %exitcond = icmp eq i32 %indvar, 1
|
||||
;CHECK-NEXT: br i1 %exitcond, label %while.end, label %while.body
|
||||
|
||||
%dec22 = phi i32 [ 4, %entry ], [ %dec, %while.body ]
|
||||
%buf.021 = phi i8* [ getelementptr inbounds ([16 x i8], [16 x i8]* @aaa, i64 0, i64 0), %entry ], [ %add.ptr, %while.body ]
|
||||
%sum44.020 = phi i64 [ 0, %entry ], [ %add9, %while.body ]
|
||||
%0 = load i8, i8* %buf.021, align 1
|
||||
%conv = zext i8 %0 to i64
|
||||
%add = add i64 %conv, %sum44.020
|
||||
%arrayidx1 = getelementptr inbounds i8, i8* %buf.021, i64 1
|
||||
%1 = load i8, i8* %arrayidx1, align 1
|
||||
%conv2 = zext i8 %1 to i64
|
||||
%add3 = add i64 %add, %conv2
|
||||
%arrayidx4 = getelementptr inbounds i8, i8* %buf.021, i64 2
|
||||
%2 = load i8, i8* %arrayidx4, align 1
|
||||
%conv5 = zext i8 %2 to i64
|
||||
%add6 = add i64 %add3, %conv5
|
||||
%arrayidx7 = getelementptr inbounds i8, i8* %buf.021, i64 3
|
||||
%3 = load i8, i8* %arrayidx7, align 1
|
||||
%conv8 = zext i8 %3 to i64
|
||||
%add9 = add i64 %add6, %conv8
|
||||
%add.ptr = getelementptr inbounds i8, i8* %buf.021, i64 4
|
||||
%dec = add nsw i32 %dec22, -1
|
||||
%tobool = icmp eq i32 %dec, 0
|
||||
br i1 %tobool, label %while.end, label %while.body
|
||||
|
||||
while.end: ; preds = %while.body
|
||||
%conv11 = trunc i64 %add9 to i32
|
||||
%call = tail call i32 @goo(i32 0, i32 %conv11)
|
||||
unreachable
|
||||
}
|
||||
|
||||
define i32 @test2(i32 %N, i32* nocapture readonly %a, i32 %S) {
|
||||
entry:
|
||||
%cmp.9 = icmp sgt i32 %N, 0
|
||||
br i1 %cmp.9, label %for.body.lr.ph, label %for.cond.cleanup
|
||||
|
||||
for.body.lr.ph:
|
||||
br label %for.body
|
||||
|
||||
for.cond.for.cond.cleanup_crit_edge:
|
||||
br label %for.cond.cleanup
|
||||
|
||||
for.cond.cleanup:
|
||||
%S.addr.0.lcssa = phi i32 [ %add2, %for.cond.for.cond.cleanup_crit_edge ], [ %S, %entry ]
|
||||
ret i32 %S.addr.0.lcssa
|
||||
|
||||
for.body:
|
||||
;CHECK-LABEL: for.body:
|
||||
;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %for.body.lr.ph ]
|
||||
;CHECK-NEXT: %S.addr.011 = phi i32 [ %S, %for.body.lr.ph ], [ %add, %for.body ]
|
||||
;CHECK-NEXT: %a.addr.010 = phi i32* [ %a, %for.body.lr.ph ], [ %incdec.ptr1, %for.body ]
|
||||
;CHECK-NEXT: %4 = load i32, i32* %a.addr.010, align 4
|
||||
;CHECK-NEXT: %add = add nsw i32 %4, %S.addr.011
|
||||
;CHECK-NEXT: %incdec.ptr1 = getelementptr inbounds i32, i32* %a.addr.010, i64 1
|
||||
;CHECK-NEXT: %indvar.next = add i32 %indvar, 1
|
||||
;CHECK-NEXT: %exitcond = icmp eq i32 %indvar, %3
|
||||
;CHECK-NEXT: br i1 %exitcond, label %for.cond.for.cond.cleanup_crit_edge, label %for.body
|
||||
|
||||
%i.012 = phi i32 [ 0, %for.body.lr.ph ], [ %add3, %for.body ]
|
||||
%S.addr.011 = phi i32 [ %S, %for.body.lr.ph ], [ %add2, %for.body ]
|
||||
%a.addr.010 = phi i32* [ %a, %for.body.lr.ph ], [ %incdec.ptr1, %for.body ]
|
||||
%incdec.ptr = getelementptr inbounds i32, i32* %a.addr.010, i64 1
|
||||
%0 = load i32, i32* %a.addr.010, align 4
|
||||
%add = add nsw i32 %0, %S.addr.011
|
||||
%incdec.ptr1 = getelementptr inbounds i32, i32* %a.addr.010, i64 2
|
||||
%1 = load i32, i32* %incdec.ptr, align 4
|
||||
%add2 = add nsw i32 %add, %1
|
||||
%add3 = add nsw i32 %i.012, 2
|
||||
%cmp = icmp slt i32 %add3, %N
|
||||
br i1 %cmp, label %for.body, label %for.cond.for.cond.cleanup_crit_edge
|
||||
}
|
||||
|
||||
define i32 @test3(i32* nocapture readonly %buf, i32 %len) #0 {
|
||||
entry:
|
||||
%cmp10 = icmp sgt i32 %len, 1
|
||||
br i1 %cmp10, label %while.body.preheader, label %while.end
|
||||
|
||||
while.body.preheader: ; preds = %entry
|
||||
br label %while.body
|
||||
|
||||
while.body: ; preds = %while.body.preheader, %while.body
|
||||
;CHECK-LABEL: while.body:
|
||||
;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %while.body ], [ 0, %while.body.preheader ]
|
||||
;CHECK-NEXT: %S.012 = phi i32 [ %add, %while.body ], [ undef, %while.body.preheader ]
|
||||
;CHECK-NEXT: %buf.addr.011 = phi i32* [ %add.ptr, %while.body ], [ %buf, %while.body.preheader ]
|
||||
;CHECK-NEXT: %4 = load i32, i32* %buf.addr.011, align 4
|
||||
;CHECK-NEXT: %add = add nsw i32 %4, %S.012
|
||||
;CHECK-NEXT: %add.ptr = getelementptr inbounds i32, i32* %buf.addr.011, i64 -1
|
||||
;CHECK-NEXT: %indvar.next = add i32 %indvar, 1
|
||||
;CHECK-NEXT: %exitcond = icmp eq i32 %indvar, %3
|
||||
;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body
|
||||
|
||||
%i.013 = phi i32 [ %sub, %while.body ], [ %len, %while.body.preheader ]
|
||||
%S.012 = phi i32 [ %add2, %while.body ], [ undef, %while.body.preheader ]
|
||||
%buf.addr.011 = phi i32* [ %add.ptr, %while.body ], [ %buf, %while.body.preheader ]
|
||||
%0 = load i32, i32* %buf.addr.011, align 4
|
||||
%add = add nsw i32 %0, %S.012
|
||||
%arrayidx1 = getelementptr inbounds i32, i32* %buf.addr.011, i64 -1
|
||||
%1 = load i32, i32* %arrayidx1, align 4
|
||||
%add2 = add nsw i32 %add, %1
|
||||
%add.ptr = getelementptr inbounds i32, i32* %buf.addr.011, i64 -2
|
||||
%sub = add nsw i32 %i.013, -2
|
||||
%cmp = icmp sgt i32 %sub, 1
|
||||
br i1 %cmp, label %while.body, label %while.end.loopexit
|
||||
|
||||
while.end.loopexit: ; preds = %while.body
|
||||
br label %while.end
|
||||
|
||||
while.end: ; preds = %while.end.loopexit, %entry
|
||||
%S.0.lcssa = phi i32 [ undef, %entry ], [ %add2, %while.end.loopexit ]
|
||||
ret i32 %S.0.lcssa
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user