mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
[EarlyCSE] Fix handling of target memory intrinsics for CSE'ing loads.
Summary: Some target intrinsics can access multiple elements, using the pointer as a base address (e.g. AArch64 ld4). When trying to CSE such instructions, it must be checked the available value comes from a compatible instruction because the pointer is not enough to discriminate whether the value is correct. Reviewers: ssijaric Subscribers: mcrosier, llvm-commits, aemerson Differential Revision: http://reviews.llvm.org/D13475 llvm-svn: 249523
This commit is contained in:
parent
203878427e
commit
d8693073cb
@ -290,12 +290,19 @@ public:
|
||||
/// current generation count. The current generation count is incremented
|
||||
/// after every possibly writing memory operation, which ensures that we only
|
||||
/// CSE loads with other loads that have no intervening store.
|
||||
typedef RecyclingAllocator<
|
||||
BumpPtrAllocator,
|
||||
ScopedHashTableVal<Value *, std::pair<Value *, unsigned>>>
|
||||
struct LoadValue {
|
||||
Value *data;
|
||||
unsigned generation;
|
||||
int matchingId;
|
||||
LoadValue() : data(nullptr), generation(0), matchingId(-1) {}
|
||||
LoadValue(Value *data, unsigned generation, unsigned matchingId)
|
||||
: data(data), generation(generation), matchingId(matchingId) {}
|
||||
};
|
||||
typedef RecyclingAllocator<BumpPtrAllocator,
|
||||
ScopedHashTableVal<Value *, LoadValue>>
|
||||
LoadMapAllocator;
|
||||
typedef ScopedHashTable<Value *, std::pair<Value *, unsigned>,
|
||||
DenseMapInfo<Value *>, LoadMapAllocator> LoadHTType;
|
||||
typedef ScopedHashTable<Value *, LoadValue, DenseMapInfo<Value *>,
|
||||
LoadMapAllocator> LoadHTType;
|
||||
LoadHTType AvailableLoads;
|
||||
|
||||
/// \brief A scoped hash table of the current values of read-only call
|
||||
@ -560,13 +567,13 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
|
||||
|
||||
// If we have an available version of this load, and if it is the right
|
||||
// generation, replace this instruction.
|
||||
std::pair<Value *, unsigned> InVal =
|
||||
AvailableLoads.lookup(MemInst.getPtr());
|
||||
if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
|
||||
Value *Op = getOrCreateResult(InVal.first, Inst->getType());
|
||||
LoadValue InVal = AvailableLoads.lookup(MemInst.getPtr());
|
||||
if (InVal.data != nullptr && InVal.generation == CurrentGeneration &&
|
||||
InVal.matchingId == MemInst.getMatchingId()) {
|
||||
Value *Op = getOrCreateResult(InVal.data, Inst->getType());
|
||||
if (Op != nullptr) {
|
||||
DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst
|
||||
<< " to: " << *InVal.first << '\n');
|
||||
<< " to: " << *InVal.data << '\n');
|
||||
if (!Inst->use_empty())
|
||||
Inst->replaceAllUsesWith(Op);
|
||||
Inst->eraseFromParent();
|
||||
@ -577,8 +584,9 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
|
||||
}
|
||||
|
||||
// Otherwise, remember that we have this instruction.
|
||||
AvailableLoads.insert(MemInst.getPtr(), std::pair<Value *, unsigned>(
|
||||
Inst, CurrentGeneration));
|
||||
AvailableLoads.insert(
|
||||
MemInst.getPtr(),
|
||||
LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId()));
|
||||
LastStore = nullptr;
|
||||
continue;
|
||||
}
|
||||
@ -652,8 +660,9 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
|
||||
// version of the pointer. It is safe to forward from volatile stores
|
||||
// to non-volatile loads, so we don't have to check for volatility of
|
||||
// the store.
|
||||
AvailableLoads.insert(MemInst.getPtr(), std::pair<Value *, unsigned>(
|
||||
Inst, CurrentGeneration));
|
||||
AvailableLoads.insert(
|
||||
MemInst.getPtr(),
|
||||
LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId()));
|
||||
|
||||
// Remember that this was the last store we saw for DSE.
|
||||
if (!MemInst.isVolatile())
|
||||
|
18
test/Transforms/EarlyCSE/AArch64/ldstN.ll
Normal file
18
test/Transforms/EarlyCSE/AArch64/ldstN.ll
Normal file
@ -0,0 +1,18 @@
|
||||
; RUN: opt -S -early-cse < %s | FileCheck %s
|
||||
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64--linux-gnu"
|
||||
|
||||
declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>*)
|
||||
|
||||
; Although the store and the ld4 are using the same pointer, the
|
||||
; data can not be reused because ld4 accesses multiple elements.
|
||||
define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @foo() {
|
||||
entry:
|
||||
store <4 x i16> undef, <4 x i16>* undef, align 8
|
||||
%0 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0v4i16(<4 x i16>* undef)
|
||||
ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %0
|
||||
; CHECK-LABEL: @foo(
|
||||
; CHECK: store
|
||||
; CHECK-NEXT: call
|
||||
; CHECK-NEXT: ret
|
||||
}
|
Loading…
Reference in New Issue
Block a user