mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 11:42:57 +01:00
[EarlyCSE] Optimize MemoryPhis and reduce memory clobber queries w/ MemorySSA
Summary: When using MemorySSA, re-optimize MemoryPhis when removing a store since this may create MemoryPhis with all identical arguments. Also, when using MemorySSA to check if two MemoryUses are reading from the same version of the heap, use the defining access instead of calling getClobberingAccess, since the latter can currently result in many more AA calls. Once the MemorySSA use optimization tracking changes are done, we can remove this limitation, which should result in more loads being CSE'd. Reviewers: dberlin Subscribers: mcrosier, llvm-commits Differential Revision: https://reviews.llvm.org/D25881 llvm-svn: 284984
This commit is contained in:
parent
851e2f985f
commit
4cf33cb292
@ -498,15 +498,43 @@ private:
|
|||||||
return;
|
return;
|
||||||
// FIXME: Removing a store here can leave MemorySSA in an unoptimized state
|
// FIXME: Removing a store here can leave MemorySSA in an unoptimized state
|
||||||
// by creating MemoryPhis that have identical arguments and by creating
|
// by creating MemoryPhis that have identical arguments and by creating
|
||||||
// MemoryUses whose defining access is not an actual clobber.
|
// MemoryUses whose defining access is not an actual clobber. We handle the
|
||||||
if (MemoryAccess *MA = MSSA->getMemoryAccess(Inst))
|
// phi case here, but the non-optimized MemoryUse case is not handled. Once
|
||||||
MSSA->removeMemoryAccess(MA);
|
// MemorySSA tracks whether uses are optimized this will be taken care of on
|
||||||
|
// the MemorySSA side.
|
||||||
|
if (MemoryAccess *MA = MSSA->getMemoryAccess(Inst)) {
|
||||||
|
// Optimize MemoryPhi nodes that may become redundant by having all the
|
||||||
|
// same input values once MA is removed.
|
||||||
|
SmallVector<MemoryPhi *, 4> PhisToCheck;
|
||||||
|
SmallVector<MemoryAccess *, 8> WorkQueue;
|
||||||
|
WorkQueue.push_back(MA);
|
||||||
|
// Process MemoryPhi nodes in FIFO order using a ever-growing vector since
|
||||||
|
// we shouldn't be processing that many phis and this will avoid an
|
||||||
|
// allocation in almost all cases.
|
||||||
|
for (unsigned I = 0; I < WorkQueue.size(); ++I) {
|
||||||
|
MemoryAccess *WI = WorkQueue[I];
|
||||||
|
|
||||||
|
for (auto *U : WI->users())
|
||||||
|
if (MemoryPhi *MP = dyn_cast<MemoryPhi>(U))
|
||||||
|
PhisToCheck.push_back(MP);
|
||||||
|
|
||||||
|
MSSA->removeMemoryAccess(WI);
|
||||||
|
|
||||||
|
for (MemoryPhi *MP : PhisToCheck) {
|
||||||
|
MemoryAccess *FirstIn = MP->getIncomingValue(0);
|
||||||
|
if (all_of(MP->incoming_values(),
|
||||||
|
[=](Use &In) { return In == FirstIn; }))
|
||||||
|
WorkQueue.push_back(MP);
|
||||||
|
}
|
||||||
|
PhisToCheck.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Determine if the memory referenced by LaterInst is from the same heap version
|
/// Determine if the memory referenced by LaterInst is from the same heap
|
||||||
/// as EarlierInst.
|
/// version as EarlierInst.
|
||||||
/// This is currently called in two scenarios:
|
/// This is currently called in two scenarios:
|
||||||
///
|
///
|
||||||
/// load p
|
/// load p
|
||||||
@ -536,11 +564,17 @@ bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,
|
|||||||
// LaterInst, if LaterDef dominates EarlierInst then it can't occur between
|
// LaterInst, if LaterDef dominates EarlierInst then it can't occur between
|
||||||
// EarlierInst and LaterInst and neither can any other write that potentially
|
// EarlierInst and LaterInst and neither can any other write that potentially
|
||||||
// clobbers LaterInst.
|
// clobbers LaterInst.
|
||||||
// FIXME: This is currently fairly expensive since it does an AA check even
|
// FIXME: Use getClobberingMemoryAccess only for stores since it is currently
|
||||||
// for MemoryUses that were already optimized by MemorySSA construction.
|
// fairly expensive to call on MemoryUses since it does an AA check even for
|
||||||
// Re-visit once MemorySSA optimized use tracking change has been committed.
|
// MemoryUses that were already optimized by MemorySSA construction. Once
|
||||||
MemoryAccess *LaterDef =
|
// MemorySSA optimized use tracking change has been committed we can use
|
||||||
MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
|
// getClobberingMemoryAccess for MemoryUses as well.
|
||||||
|
MemoryAccess *LaterMA = MSSA->getMemoryAccess(LaterInst);
|
||||||
|
MemoryAccess *LaterDef;
|
||||||
|
if (auto *LaterUse = dyn_cast<MemoryUse>(LaterMA))
|
||||||
|
LaterDef = LaterUse->getDefiningAccess();
|
||||||
|
else
|
||||||
|
LaterDef = MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
|
||||||
return MSSA->dominates(LaterDef, MSSA->getMemoryAccess(EarlierInst));
|
return MSSA->dominates(LaterDef, MSSA->getMemoryAccess(EarlierInst));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -32,3 +32,38 @@ entry:
|
|||||||
store i32 %V1, i32* @G1
|
store i32 %V1, i32* @G1
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
;; Check that memoryphi optimization happens during EarlyCSE, enabling
|
||||||
|
;; more load CSE opportunities.
|
||||||
|
; CHECK-LABEL: @test_memphiopt(
|
||||||
|
; CHECK-NOMEMSSA-LABEL: @test_memphiopt(
|
||||||
|
define void @test_memphiopt(i1 %c, i32* %p) {
|
||||||
|
; CHECK-LABEL: entry:
|
||||||
|
; CHECK-NOMEMSSA-LABEL: entry:
|
||||||
|
entry:
|
||||||
|
; CHECK: load
|
||||||
|
; CHECK-NOMEMSSA: load
|
||||||
|
%v1 = load i32, i32* @G1
|
||||||
|
br i1 %c, label %then, label %end
|
||||||
|
|
||||||
|
; CHECK-LABEL: then:
|
||||||
|
; CHECK-NOMEMSSA-LABEL: then:
|
||||||
|
then:
|
||||||
|
; CHECK: load
|
||||||
|
; CHECK-NOMEMSSA: load
|
||||||
|
%pv = load i32, i32* %p
|
||||||
|
; CHECK-NOT: store
|
||||||
|
; CHECK-NOMEMSSA-NOT: store
|
||||||
|
store i32 %pv, i32* %p
|
||||||
|
br label %end
|
||||||
|
|
||||||
|
; CHECK-LABEL: end:
|
||||||
|
; CHECK-NOMEMSSA-LABEL: end:
|
||||||
|
end:
|
||||||
|
; CHECK-NOT: load
|
||||||
|
; CHECK-NOMEMSSA: load
|
||||||
|
%v2 = load i32, i32* @G1
|
||||||
|
%sum = add i32 %v1, %v2
|
||||||
|
store i32 %sum, i32* @G2
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user