1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 02:52:53 +02:00

[Devirtualization] MemDep returns non-local !invariant.group dependencies

Summary:
Memory Dependence Analysis was limited to return only local dependencies
for invariant.group handling. Now it returns NonLocal when it finds it
and then by asking getNonLocalPointerDependency we get found dep.

Thanks to this we are able to devirtualize loops!

    void indirect(A &a, int n) {
      for (int i = 0 ; i < n; i++)
        a.foo();

    }
    void test(int n) {
      A a;
      indirect(a);
    }

After inlining a.foo() will be changed to direct call, even if foo and A::A()
is external (but only if vtable definition is be available).

Reviewers: nlewycky, dberlin, chandlerc, rsmith

Subscribers: mehdi_amini, davide, llvm-commits

Differential Revision: https://reviews.llvm.org/D28137

llvm-svn: 291762
This commit is contained in:
Piotr Padlewski 2017-01-12 11:33:58 +00:00
parent 592cce271e
commit 9032a3fc4f
6 changed files with 145 additions and 21 deletions

View File

@ -302,6 +302,10 @@ private:
NonLocalPointerInfo() : Size(MemoryLocation::UnknownSize) {} NonLocalPointerInfo() : Size(MemoryLocation::UnknownSize) {}
}; };
/// Cache storing single nonlocal def for the instruction.
/// It is set when nonlocal def would be found in function returning only
/// local dependencies.
DenseMap<Instruction *, NonLocalDepResult> NonLocalDefsCache;
/// This map stores the cached results of doing a pointer lookup at the /// This map stores the cached results of doing a pointer lookup at the
/// bottom of a block. /// bottom of a block.
/// ///
@ -441,9 +445,9 @@ public:
/// This analysis looks for other loads and stores with invariant.group /// This analysis looks for other loads and stores with invariant.group
/// metadata and the same pointer operand. Returns Unknown if it does not /// metadata and the same pointer operand. Returns Unknown if it does not
/// find anything, and Def if it can be assumed that 2 instructions load or /// find anything, and Def if it can be assumed that 2 instructions load or
/// store the same value. /// store the same value and NonLocal which indicate that non-local Def was
/// FIXME: This analysis works only on single block because of restrictions /// found, which can be retrieved by calling getNonLocalPointerDependency
/// at the call site. /// with the same queried instruction.
MemDepResult getInvariantGroupPointerDependency(LoadInst *LI, BasicBlock *BB); MemDepResult getInvariantGroupPointerDependency(LoadInst *LI, BasicBlock *BB);
/// Looks at a memory location for a load (specified by MemLocBase, Offs, and /// Looks at a memory location for a load (specified by MemLocBase, Offs, and

View File

@ -323,17 +323,28 @@ MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) { BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
MemDepResult InvariantGroupDependency = MemDepResult::getUnknown();
if (QueryInst != nullptr) { if (QueryInst != nullptr) {
if (auto *LI = dyn_cast<LoadInst>(QueryInst)) { if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
MemDepResult InvariantGroupDependency = InvariantGroupDependency = getInvariantGroupPointerDependency(LI, BB);
getInvariantGroupPointerDependency(LI, BB);
if (InvariantGroupDependency.isDef()) if (InvariantGroupDependency.isDef())
return InvariantGroupDependency; return InvariantGroupDependency;
} }
} }
return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst, MemDepResult SimpleDep = getSimplePointerDependencyFrom(
Limit); MemLoc, isLoad, ScanIt, BB, QueryInst, Limit);
if (SimpleDep.isDef())
return SimpleDep;
// Non-local invariant group dependency indicates there is non local Def
// (it only returns nonLocal if it finds nonLocal def), which is better than
// local clobber and everything else.
if (InvariantGroupDependency.isNonLocal())
return InvariantGroupDependency;
assert(InvariantGroupDependency.isUnknown() &&
"InvariantGroupDependency should be only unknown at this point");
return SimpleDep;
} }
MemDepResult MemDepResult
@ -358,6 +369,20 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
// Queue to process all pointers that are equivalent to load operand. // Queue to process all pointers that are equivalent to load operand.
SmallVector<const Value *, 8> LoadOperandsQueue; SmallVector<const Value *, 8> LoadOperandsQueue;
LoadOperandsQueue.push_back(LoadOperand); LoadOperandsQueue.push_back(LoadOperand);
Instruction *ClosestDependency = nullptr;
// Order of instructions in uses list is unpredictible. In order to always
// get the same result, we will look for the closest dominance.
auto GetClosestDependency = [this](Instruction *Best, Instruction *Other) {
assert(Other && "Must call it with not null instruction");
if (Best == nullptr || DT.dominates(Best, Other))
return Other;
return Best;
};
// FIXME: This loop is O(N^2) because dominates can be O(n) and in worst case
// we will see all the instructions. This should be fixed in MSSA.
while (!LoadOperandsQueue.empty()) { while (!LoadOperandsQueue.empty()) {
const Value *Ptr = LoadOperandsQueue.pop_back_val(); const Value *Ptr = LoadOperandsQueue.pop_back_val();
assert(Ptr && !isa<GlobalValue>(Ptr) && assert(Ptr && !isa<GlobalValue>(Ptr) &&
@ -388,12 +413,24 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
// If we hit load/store with the same invariant.group metadata (and the // If we hit load/store with the same invariant.group metadata (and the
// same pointer operand) we can assume that value pointed by pointer // same pointer operand) we can assume that value pointed by pointer
// operand didn't change. // operand didn't change.
if ((isa<LoadInst>(U) || isa<StoreInst>(U)) && U->getParent() == BB && if ((isa<LoadInst>(U) || isa<StoreInst>(U)) &&
U->getMetadata(LLVMContext::MD_invariant_group) == InvariantGroupMD) U->getMetadata(LLVMContext::MD_invariant_group) == InvariantGroupMD)
return MemDepResult::getDef(U); ClosestDependency = GetClosestDependency(ClosestDependency, U);
} }
} }
return MemDepResult::getUnknown();
if (!ClosestDependency)
return MemDepResult::getUnknown();
if (ClosestDependency->getParent() == BB)
return MemDepResult::getDef(ClosestDependency);
// Def(U) can't be returned here because it is non-local. If local
// dependency won't be found then return nonLocal counting that the
// user will call getNonLocalPointerDependency, which will return cached
// result.
NonLocalDefsCache.try_emplace(
LI, NonLocalDepResult(ClosestDependency->getParent(),
MemDepResult::getDef(ClosestDependency), nullptr));
return MemDepResult::getNonLocal();
} }
MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
@ -877,7 +914,17 @@ void MemoryDependenceResults::getNonLocalPointerDependency(
assert(Loc.Ptr->getType()->isPointerTy() && assert(Loc.Ptr->getType()->isPointerTy() &&
"Can't get pointer deps of a non-pointer!"); "Can't get pointer deps of a non-pointer!");
Result.clear(); Result.clear();
{
// Check if there is cached Def with invariant.group. FIXME: cache might be
// invalid if cached instruction would be removed between call to
// getPointerDependencyFrom and this function.
auto NonLocalDefIt = NonLocalDefsCache.find(QueryInst);
if (NonLocalDefIt != NonLocalDefsCache.end()) {
Result.push_back(std::move(NonLocalDefIt->second));
NonLocalDefsCache.erase(NonLocalDefIt);
return;
}
}
// This routine does not expect to deal with volatile instructions. // This routine does not expect to deal with volatile instructions.
// Doing so would require piping through the QueryInst all the way through. // Doing so would require piping through the QueryInst all the way through.
// TODO: volatiles can't be elided, but they can be reordered with other // TODO: volatiles can't be elided, but they can be reordered with other

View File

@ -65,22 +65,20 @@ if.then: ; preds = %entry
%vtable1 = load i8**, i8*** %1, align 8, !invariant.group !0 %vtable1 = load i8**, i8*** %1, align 8, !invariant.group !0
%vtable2.cast = bitcast i8** %vtable1 to i32 (%struct.A*)** %vtable2.cast = bitcast i8** %vtable1 to i32 (%struct.A*)**
%call1 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable2.cast, align 8 %call1 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable2.cast, align 8
; FIXME: those loads could be also direct, but right now the invariant.group ; CHECK: call i32 @_ZN1A3fooEv(
; analysis works only on single block
; CHECK-NOT: call i32 @_ZN1A3fooEv(
%callx = tail call i32 %call1(%struct.A* %0) #1 %callx = tail call i32 %call1(%struct.A* %0) #1
%vtable2 = load i8**, i8*** %1, align 8, !invariant.group !0 %vtable2 = load i8**, i8*** %1, align 8, !invariant.group !0
%vtable3.cast = bitcast i8** %vtable2 to i32 (%struct.A*)** %vtable3.cast = bitcast i8** %vtable2 to i32 (%struct.A*)**
%call4 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable3.cast, align 8 %call4 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable3.cast, align 8
; CHECK-NOT: call i32 @_ZN1A3fooEv( ; CHECK: call i32 @_ZN1A3fooEv(
%cally = tail call i32 %call4(%struct.A* %0) #1 %cally = tail call i32 %call4(%struct.A* %0) #1
%b = bitcast i8* %call to %struct.A** %b = bitcast i8* %call to %struct.A**
%vtable3 = load %struct.A*, %struct.A** %b, align 8, !invariant.group !0 %vtable3 = load %struct.A*, %struct.A** %b, align 8, !invariant.group !0
%vtable4.cast = bitcast %struct.A* %vtable3 to i32 (%struct.A*)** %vtable4.cast = bitcast %struct.A* %vtable3 to i32 (%struct.A*)**
%vfun = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable4.cast, align 8 %vfun = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable4.cast, align 8
; CHECK-NOT: call i32 @_ZN1A3fooEv( ; CHECK: call i32 @_ZN1A3fooEv(
%unknown = tail call i32 %vfun(%struct.A* %0) #1 %unknown = tail call i32 %vfun(%struct.A* %0) #1
br label %if.end br label %if.end

View File

@ -392,6 +392,44 @@ define void @testNotGlobal() {
ret void ret void
} }
; CHECK-LABEL: define void @handling_loops()
define void @handling_loops() {
%a = alloca %struct.A, align 8
%1 = bitcast %struct.A* %a to i8*
%2 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 0
store i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2) to i32 (...)**), i32 (...)*** %2, align 8, !invariant.group !0
%3 = load i8, i8* @unknownPtr, align 4
%4 = icmp sgt i8 %3, 0
br i1 %4, label %.lr.ph.i, label %_Z2g2R1A.exit
.lr.ph.i: ; preds = %0
%5 = bitcast %struct.A* %a to void (%struct.A*)***
%6 = load i8, i8* @unknownPtr, align 4
%7 = icmp sgt i8 %6, 1
br i1 %7, label %._crit_edge.preheader, label %_Z2g2R1A.exit
._crit_edge.preheader: ; preds = %.lr.ph.i
br label %._crit_edge
._crit_edge: ; preds = %._crit_edge.preheader, %._crit_edge
%8 = phi i8 [ %10, %._crit_edge ], [ 1, %._crit_edge.preheader ]
%.pre = load void (%struct.A*)**, void (%struct.A*)*** %5, align 8, !invariant.group !0
%9 = load void (%struct.A*)*, void (%struct.A*)** %.pre, align 8
; CHECK: call void @_ZN1A3fooEv(%struct.A* nonnull %a)
call void %9(%struct.A* nonnull %a) #3
; CHECK-NOT: call void %
%10 = add nuw nsw i8 %8, 1
%11 = load i8, i8* @unknownPtr, align 4
%12 = icmp slt i8 %10, %11
br i1 %12, label %._crit_edge, label %_Z2g2R1A.exit.loopexit
_Z2g2R1A.exit.loopexit: ; preds = %._crit_edge
br label %_Z2g2R1A.exit
_Z2g2R1A.exit: ; preds = %_Z2g2R1A.exit.loopexit, %.lr.ph.i, %0
ret void
}
declare void @foo(i8*) declare void @foo(i8*)
declare void @foo2(i8*, i8) declare void @foo2(i8*, i8)

View File

@ -66,22 +66,20 @@ if.then: ; preds = %entry
%vtable1 = load i8**, i8*** %1, align 8, !invariant.group !0 %vtable1 = load i8**, i8*** %1, align 8, !invariant.group !0
%vtable2.cast = bitcast i8** %vtable1 to i32 (%struct.A*)** %vtable2.cast = bitcast i8** %vtable1 to i32 (%struct.A*)**
%call1 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable2.cast, align 8 %call1 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable2.cast, align 8
; FIXME: those loads could be also direct, but right now the invariant.group ; CHECK: call i32 @_ZN1A3fooEv(
; analysis works only on single block
; CHECK-NOT: call i32 @_ZN1A3fooEv(
%callx = tail call i32 %call1(%struct.A* %0) #1 %callx = tail call i32 %call1(%struct.A* %0) #1
%vtable2 = load i8**, i8*** %1, align 8, !invariant.group !0 %vtable2 = load i8**, i8*** %1, align 8, !invariant.group !0
%vtable3.cast = bitcast i8** %vtable2 to i32 (%struct.A*)** %vtable3.cast = bitcast i8** %vtable2 to i32 (%struct.A*)**
%call4 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable3.cast, align 8 %call4 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable3.cast, align 8
; CHECK-NOT: call i32 @_ZN1A3fooEv( ; CHECK: call i32 @_ZN1A3fooEv(
%cally = tail call i32 %call4(%struct.A* %0) #1 %cally = tail call i32 %call4(%struct.A* %0) #1
%b = bitcast i8* %call to %struct.A** %b = bitcast i8* %call to %struct.A**
%vtable3 = load %struct.A*, %struct.A** %b, align 8, !invariant.group !0 %vtable3 = load %struct.A*, %struct.A** %b, align 8, !invariant.group !0
%vtable4.cast = bitcast %struct.A* %vtable3 to i32 (%struct.A*)** %vtable4.cast = bitcast %struct.A* %vtable3 to i32 (%struct.A*)**
%vfun = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable4.cast, align 8 %vfun = load i32 (%struct.A*)*, i32 (%struct.A*)** %vtable4.cast, align 8
; CHECK-NOT: call i32 @_ZN1A3fooEv( ; CHECK: call i32 @_ZN1A3fooEv(
%unknown = tail call i32 %vfun(%struct.A* %0) #1 %unknown = tail call i32 %vfun(%struct.A* %0) #1
br label %if.end br label %if.end

View File

@ -393,6 +393,45 @@ define void @testNotGlobal() {
ret void ret void
} }
; CHECK-LABEL: define void @handling_loops()
define void @handling_loops() {
%a = alloca %struct.A, align 8
%1 = bitcast %struct.A* %a to i8*
%2 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 0
store i32 (...)** bitcast (i8** getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2) to i32 (...)**), i32 (...)*** %2, align 8, !invariant.group !0
%3 = load i8, i8* @unknownPtr, align 4
%4 = icmp sgt i8 %3, 0
br i1 %4, label %.lr.ph.i, label %_Z2g2R1A.exit
.lr.ph.i: ; preds = %0
%5 = bitcast %struct.A* %a to void (%struct.A*)***
%6 = load i8, i8* @unknownPtr, align 4
%7 = icmp sgt i8 %6, 1
br i1 %7, label %._crit_edge.preheader, label %_Z2g2R1A.exit
._crit_edge.preheader: ; preds = %.lr.ph.i
br label %._crit_edge
._crit_edge: ; preds = %._crit_edge.preheader, %._crit_edge
%8 = phi i8 [ %10, %._crit_edge ], [ 1, %._crit_edge.preheader ]
%.pre = load void (%struct.A*)**, void (%struct.A*)*** %5, align 8, !invariant.group !0
%9 = load void (%struct.A*)*, void (%struct.A*)** %.pre, align 8
; CHECK: call void @_ZN1A3fooEv(%struct.A* nonnull %a)
call void %9(%struct.A* nonnull %a) #3
; CHECK-NOT: call void %
%10 = add nuw nsw i8 %8, 1
%11 = load i8, i8* @unknownPtr, align 4
%12 = icmp slt i8 %10, %11
br i1 %12, label %._crit_edge, label %_Z2g2R1A.exit.loopexit
_Z2g2R1A.exit.loopexit: ; preds = %._crit_edge
br label %_Z2g2R1A.exit
_Z2g2R1A.exit: ; preds = %_Z2g2R1A.exit.loopexit, %.lr.ph.i, %0
ret void
}
declare void @foo(i8*) declare void @foo(i8*)
declare void @foo2(i8*, i8) declare void @foo2(i8*, i8)