mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[LoopInterchange] Track all dependencies, not just anti dependencies.
Currently, we give up on loop interchange if we encounter a flow dependency anywhere in the loop list. Worse yet, we don't even track output dependencies. This patch updates the dependency matrix computation to track flow and output dependencies in the same way we track anti dependencies. This improves an internal workload by 2.2x. Note the loop interchange pass is off by default and it can be enabled with '-mllvm -enable-loopinterchange' Differential Revision: https://reviews.llvm.org/D24564 llvm-svn: 282101
This commit is contained in:
parent
2856a03dce
commit
f9ec13f3d1
@ -111,62 +111,59 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
|
||||
Instruction *Dst = cast<Instruction>(*J);
|
||||
if (Src == Dst)
|
||||
continue;
|
||||
// Ignore Input dependencies.
|
||||
if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
|
||||
continue;
|
||||
// Track Output, Flow, and Anti dependencies.
|
||||
if (auto D = DI->depends(Src, Dst, true)) {
|
||||
DEBUG(dbgs() << "Found Dependency between Src and Dst\n"
|
||||
assert(D->isOrdered() && "Expected an output, flow or anti dep.");
|
||||
DEBUG(StringRef DepType =
|
||||
D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
|
||||
dbgs() << "Found " << DepType
|
||||
<< " dependency between Src and Dst\n"
|
||||
<< " Src:" << *Src << "\n Dst:" << *Dst << '\n');
|
||||
if (D->isFlow()) {
|
||||
// TODO: Handle Flow dependence.Check if it is sufficient to populate
|
||||
// the Dependence Matrix with the direction reversed.
|
||||
DEBUG(dbgs() << "Flow dependence not handled\n");
|
||||
return false;
|
||||
unsigned Levels = D->getLevels();
|
||||
char Direction;
|
||||
for (unsigned II = 1; II <= Levels; ++II) {
|
||||
const SCEV *Distance = D->getDistance(II);
|
||||
const SCEVConstant *SCEVConst =
|
||||
dyn_cast_or_null<SCEVConstant>(Distance);
|
||||
if (SCEVConst) {
|
||||
const ConstantInt *CI = SCEVConst->getValue();
|
||||
if (CI->isNegative())
|
||||
Direction = '<';
|
||||
else if (CI->isZero())
|
||||
Direction = '=';
|
||||
else
|
||||
Direction = '>';
|
||||
Dep.push_back(Direction);
|
||||
} else if (D->isScalar(II)) {
|
||||
Direction = 'S';
|
||||
Dep.push_back(Direction);
|
||||
} else {
|
||||
unsigned Dir = D->getDirection(II);
|
||||
if (Dir == Dependence::DVEntry::LT ||
|
||||
Dir == Dependence::DVEntry::LE)
|
||||
Direction = '<';
|
||||
else if (Dir == Dependence::DVEntry::GT ||
|
||||
Dir == Dependence::DVEntry::GE)
|
||||
Direction = '>';
|
||||
else if (Dir == Dependence::DVEntry::EQ)
|
||||
Direction = '=';
|
||||
else
|
||||
Direction = '*';
|
||||
Dep.push_back(Direction);
|
||||
}
|
||||
}
|
||||
while (Dep.size() != Level) {
|
||||
Dep.push_back('I');
|
||||
}
|
||||
if (D->isAnti()) {
|
||||
DEBUG(dbgs() << "Found Anti dependence\n");
|
||||
unsigned Levels = D->getLevels();
|
||||
char Direction;
|
||||
for (unsigned II = 1; II <= Levels; ++II) {
|
||||
const SCEV *Distance = D->getDistance(II);
|
||||
const SCEVConstant *SCEVConst =
|
||||
dyn_cast_or_null<SCEVConstant>(Distance);
|
||||
if (SCEVConst) {
|
||||
const ConstantInt *CI = SCEVConst->getValue();
|
||||
if (CI->isNegative())
|
||||
Direction = '<';
|
||||
else if (CI->isZero())
|
||||
Direction = '=';
|
||||
else
|
||||
Direction = '>';
|
||||
Dep.push_back(Direction);
|
||||
} else if (D->isScalar(II)) {
|
||||
Direction = 'S';
|
||||
Dep.push_back(Direction);
|
||||
} else {
|
||||
unsigned Dir = D->getDirection(II);
|
||||
if (Dir == Dependence::DVEntry::LT ||
|
||||
Dir == Dependence::DVEntry::LE)
|
||||
Direction = '<';
|
||||
else if (Dir == Dependence::DVEntry::GT ||
|
||||
Dir == Dependence::DVEntry::GE)
|
||||
Direction = '>';
|
||||
else if (Dir == Dependence::DVEntry::EQ)
|
||||
Direction = '=';
|
||||
else
|
||||
Direction = '*';
|
||||
Dep.push_back(Direction);
|
||||
}
|
||||
}
|
||||
while (Dep.size() != Level) {
|
||||
Dep.push_back('I');
|
||||
}
|
||||
|
||||
DepMatrix.push_back(Dep);
|
||||
if (DepMatrix.size() > MaxMemInstrCount) {
|
||||
DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
|
||||
<< " dependencies inside loop\n");
|
||||
return false;
|
||||
}
|
||||
DepMatrix.push_back(Dep);
|
||||
if (DepMatrix.size() > MaxMemInstrCount) {
|
||||
DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
|
||||
<< " dependencies inside loop\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -555,3 +555,195 @@ for.end17: ; preds = %for.inc15
|
||||
; CHECK: for.end17: ; preds = %for.inc15
|
||||
; CHECK: ret void
|
||||
|
||||
;;-----------------------------------Test case 09-------------------------------
|
||||
;; Test that a flow dependency in outer loop doesn't prevent interchange in
|
||||
;; loops i and j.
|
||||
;;
|
||||
;; for (int k = 0; k < 100; ++k) {
|
||||
;; T[k] = fn1();
|
||||
;; for (int i = 0; i < 1000; ++i)
|
||||
;; for(int j = 1; j < 1000; ++j)
|
||||
;; Arr[j][i] = Arr[j][i]+k;
|
||||
;; fn2(T[k]);
|
||||
;; }
|
||||
|
||||
@T = internal global [100 x double] zeroinitializer, align 4
|
||||
@Arr = internal global [1000 x [1000 x i32]] zeroinitializer, align 4
|
||||
|
||||
define void @interchange_09(i32 %k) {
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.cond.cleanup: ; preds = %for.cond.cleanup4
|
||||
ret void
|
||||
|
||||
for.body: ; preds = %for.cond.cleanup4, %entry
|
||||
%indvars.iv45 = phi i64 [ 0, %entry ], [ %indvars.iv.next46, %for.cond.cleanup4 ]
|
||||
%call = call double @fn1()
|
||||
%arrayidx = getelementptr inbounds [100 x double], [100 x double]* @T, i64 0, i64 %indvars.iv45
|
||||
store double %call, double* %arrayidx, align 8
|
||||
br label %for.cond6.preheader
|
||||
|
||||
for.cond6.preheader: ; preds = %for.cond.cleanup8, %for.body
|
||||
%indvars.iv42 = phi i64 [ 0, %for.body ], [ %indvars.iv.next43, %for.cond.cleanup8 ]
|
||||
br label %for.body9
|
||||
|
||||
for.cond.cleanup4: ; preds = %for.cond.cleanup8
|
||||
%tmp = load double, double* %arrayidx, align 8
|
||||
call void @fn2(double %tmp)
|
||||
%indvars.iv.next46 = add nuw nsw i64 %indvars.iv45, 1
|
||||
%exitcond47 = icmp ne i64 %indvars.iv.next46, 100
|
||||
br i1 %exitcond47, label %for.body, label %for.cond.cleanup
|
||||
|
||||
for.cond.cleanup8: ; preds = %for.body9
|
||||
%indvars.iv.next43 = add nuw nsw i64 %indvars.iv42, 1
|
||||
%exitcond44 = icmp ne i64 %indvars.iv.next43, 1000
|
||||
br i1 %exitcond44, label %for.cond6.preheader, label %for.cond.cleanup4
|
||||
|
||||
for.body9: ; preds = %for.body9, %for.cond6.preheader
|
||||
%indvars.iv = phi i64 [ 1, %for.cond6.preheader ], [ %indvars.iv.next, %for.body9 ]
|
||||
%arrayidx13 = getelementptr inbounds [1000 x [1000 x i32]], [1000 x [1000 x i32]]* @Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv42
|
||||
%tmp1 = load i32, i32* %arrayidx13, align 4
|
||||
%tmp2 = trunc i64 %indvars.iv45 to i32
|
||||
%add = add nsw i32 %tmp1, %tmp2
|
||||
store i32 %add, i32* %arrayidx13, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp ne i64 %indvars.iv.next, 1000
|
||||
br i1 %exitcond, label %for.body9, label %for.cond.cleanup8
|
||||
}
|
||||
|
||||
declare double @fn1()
|
||||
declare void @fn2(double)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
;; After interchange %indvars.iv (j) should increment as the middle loop.
|
||||
;; After interchange %indvars.iv42 (i) should increment with the inner most loop.
|
||||
|
||||
; CHECK-LABEL: @interchange_09
|
||||
|
||||
; CHECK: for.body:
|
||||
; CHECK: %indvars.iv45 = phi i64 [ %indvars.iv.next46, %for.cond.cleanup4 ], [ 0, %for.body.preheader ]
|
||||
; CHECK: %call = call double @fn1()
|
||||
; CHECK: %arrayidx = getelementptr inbounds [100 x double], [100 x double]* @T, i64 0, i64 %indvars.iv45
|
||||
; CHECK: store double %call, double* %arrayidx, align 8
|
||||
; CHECK: br label %for.body9.preheader
|
||||
|
||||
; CHECK: for.cond6.preheader.preheader:
|
||||
; CHECK: br label %for.cond6.preheader
|
||||
|
||||
; CHECK: for.cond6.preheader:
|
||||
; CHECK: %indvars.iv42 = phi i64 [ %indvars.iv.next43, %for.cond.cleanup8 ], [ 0, %for.cond6.preheader.preheader ]
|
||||
; CHECK: br label %for.body9.split1
|
||||
|
||||
; CHECK: for.body9.preheader:
|
||||
; CHECK: br label %for.body9
|
||||
|
||||
; CHECK: for.cond.cleanup4:
|
||||
; CHECK: %tmp = load double, double* %arrayidx, align 8
|
||||
; CHECK: call void @fn2(double %tmp)
|
||||
; CHECK: %indvars.iv.next46 = add nuw nsw i64 %indvars.iv45, 1
|
||||
; CHECK: %exitcond47 = icmp ne i64 %indvars.iv.next46, 100
|
||||
; CHECK: br i1 %exitcond47, label %for.body, label %for.cond.cleanup
|
||||
|
||||
; CHECK: for.cond.cleanup8:
|
||||
; CHECK: %indvars.iv.next43 = add nuw nsw i64 %indvars.iv42, 1
|
||||
; CHECK: %exitcond44 = icmp ne i64 %indvars.iv.next43, 1000
|
||||
; CHECK: br i1 %exitcond44, label %for.cond6.preheader, label %for.body9.split
|
||||
|
||||
; CHECK: for.body9:
|
||||
; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body9.split ], [ 1, %for.body9.preheader ]
|
||||
; CHECK: br label %for.cond6.preheader.preheader
|
||||
|
||||
; CHECK: for.body9.split1:
|
||||
; CHECK: %arrayidx13 = getelementptr inbounds [1000 x [1000 x i32]], [1000 x [1000 x i32]]* @Arr, i64 0, i64 %indvars.iv, i64 %indvars.iv42
|
||||
; CHECK: store i32 %add, i32* %arrayidx13, align 4
|
||||
; CHECK: br label %for.cond.cleanup8
|
||||
|
||||
; CHECK: for.body9.split:
|
||||
; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
; CHECK: %exitcond = icmp ne i64 %indvars.iv.next, 1000
|
||||
; CHECK: br i1 %exitcond, label %for.body9, label %for.cond.cleanup4
|
||||
|
||||
|
||||
;;-----------------------------------Test case 10-------------------------------
|
||||
;; Test to make sure we can handle output dependencies.
|
||||
;;
|
||||
;; for (int i = 0; i < 2; ++i)
|
||||
;; for(int j = 0; j < 3; ++j) {
|
||||
;; A[j][i] = i;
|
||||
;; A[j][i+1] = j;
|
||||
;; }
|
||||
|
||||
@A10 = local_unnamed_addr global [3 x [3 x i32]] zeroinitializer, align 16
|
||||
|
||||
define void @interchange_10() {
|
||||
entry:
|
||||
br label %for.cond1.preheader
|
||||
|
||||
for.cond.loopexit: ; preds = %for.body4
|
||||
%exitcond28 = icmp ne i64 %indvars.iv.next27, 2
|
||||
br i1 %exitcond28, label %for.cond1.preheader, label %for.cond.cleanup
|
||||
|
||||
for.cond1.preheader: ; preds = %for.cond.loopexit, %entry
|
||||
%indvars.iv26 = phi i64 [ 0, %entry ], [ %indvars.iv.next27, %for.cond.loopexit ]
|
||||
%indvars.iv.next27 = add nuw nsw i64 %indvars.iv26, 1
|
||||
br label %for.body4
|
||||
|
||||
for.cond.cleanup: ; preds = %for.cond.loopexit
|
||||
ret void
|
||||
|
||||
for.body4: ; preds = %for.body4, %for.cond1.preheader
|
||||
%indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body4 ]
|
||||
%arrayidx6 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* @A10, i64 0, i64 %indvars.iv, i64 %indvars.iv26
|
||||
%tmp = trunc i64 %indvars.iv26 to i32
|
||||
store i32 %tmp, i32* %arrayidx6, align 4
|
||||
%arrayidx10 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* @A10, i64 0, i64 %indvars.iv, i64 %indvars.iv.next27
|
||||
%tmp1 = trunc i64 %indvars.iv to i32
|
||||
store i32 %tmp1, i32* %arrayidx10, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp ne i64 %indvars.iv.next, 3
|
||||
br i1 %exitcond, label %for.body4, label %for.cond.loopexit
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @interchange_10
|
||||
; CHECK: entry:
|
||||
; CHECK: br label %for.body4.preheader
|
||||
|
||||
; CHECK: for.cond1.preheader.preheader:
|
||||
; CHECK: br label %for.cond1.preheader
|
||||
|
||||
; CHECK: for.cond.loopexit:
|
||||
; CHECK: %exitcond28 = icmp ne i64 %indvars.iv.next27, 2
|
||||
; CHECK: br i1 %exitcond28, label %for.cond1.preheader, label %for.body4.split
|
||||
|
||||
; CHECK: for.cond1.preheader:
|
||||
; CHECK: %indvars.iv26 = phi i64 [ %indvars.iv.next27, %for.cond.loopexit ], [ 0, %for.cond1.preheader.preheader ]
|
||||
; CHECK: %indvars.iv.next27 = add nuw nsw i64 %indvars.iv26, 1
|
||||
; CHECK: br label %for.body4.split1
|
||||
|
||||
; CHECK: for.body4.preheader:
|
||||
; CHECK: br label %for.body4
|
||||
|
||||
; CHECK: for.cond.cleanup:
|
||||
; CHECK: ret void
|
||||
|
||||
; CHECK: for.body4:
|
||||
; CHECK: %indvars.iv = phi i64 [ %indvars.iv.next, %for.body4.split ], [ 0, %for.body4.preheader ]
|
||||
; CHECK: br label %for.cond1.preheader.preheader
|
||||
|
||||
; CHECK: for.body4.split1:
|
||||
; CHECK: %arrayidx6 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* @A10, i64 0, i64 %indvars.iv, i64 %indvars.iv26
|
||||
; CHECK: %tmp = trunc i64 %indvars.iv26 to i32
|
||||
; CHECK: store i32 %tmp, i32* %arrayidx6, align 4
|
||||
; CHECK: %arrayidx10 = getelementptr inbounds [3 x [3 x i32]], [3 x [3 x i32]]* @A10, i64 0, i64 %indvars.iv, i64 %indvars.iv.next27
|
||||
; CHECK: %tmp1 = trunc i64 %indvars.iv to i32
|
||||
; CHECK: store i32 %tmp1, i32* %arrayidx10, align 4
|
||||
; CHECK: br label %for.cond.loopexit
|
||||
|
||||
; CHECK: for.body4.split:
|
||||
; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
; CHECK: %exitcond = icmp ne i64 %indvars.iv.next, 3
|
||||
; CHECK: br i1 %exitcond, label %for.body4, label %for.cond.cleanup
|
||||
|
Loading…
x
Reference in New Issue
Block a user