1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00

[LSV] Don't assume that loads/stores appear in address order in the BB.

Summary:
getVectorizablePrefix previously didn't work properly in the face of
aliasing loads/stores.  It unwittingly assumed that the loads/stores
appeared in the BB in address order.  If they didn't, it would do the
wrong thing.

Reviewers: asbirlea, tstellarAMD

Subscribers: arsenm, llvm-commits, mzolotukhin

Differential Revision: https://reviews.llvm.org/D22535

llvm-svn: 276072
This commit is contained in:
Justin Lebar 2016-07-20 00:55:12 +00:00
parent ceb53a60fa
commit f8c66349e0
2 changed files with 70 additions and 20 deletions

View File

@ -108,7 +108,8 @@ private:
/// intervening instructions which may affect the memory accessed by the
/// instructions within Chain.
///
/// The elements of \p Chain must be all loads or all stores.
/// The elements of \p Chain must be all loads or all stores and must be in
/// address order.
ArrayRef<Value *> getVectorizablePrefix(ArrayRef<Value *> Chain);
/// Collects load and store instructions to vectorize.
@ -424,6 +425,7 @@ Vectorizer::splitOddVectorElts(ArrayRef<Value *> Chain,
}
ArrayRef<Value *> Vectorizer::getVectorizablePrefix(ArrayRef<Value *> Chain) {
// These are in BB order, unlike Chain, which is in address order.
SmallVector<std::pair<Value *, unsigned>, 16> MemoryInstrs;
SmallVector<std::pair<Value *, unsigned>, 16> ChainInstrs;
@ -444,31 +446,34 @@ ArrayRef<Value *> Vectorizer::getVectorizablePrefix(ArrayRef<Value *> Chain) {
assert(Chain.size() == ChainInstrs.size() &&
"All instrs in Chain must be within range getBoundaryInstrs(Chain).");
unsigned ChainIdx = 0;
for (auto EntryChain : ChainInstrs) {
Value *ChainInstrValue = EntryChain.first;
unsigned ChainInstrIdx = EntryChain.second;
// Loop until we find an instruction in ChainInstrs that we can't vectorize.
unsigned ChainInstrIdx, ChainInstrsLen;
for (ChainInstrIdx = 0, ChainInstrsLen = ChainInstrs.size();
ChainInstrIdx < ChainInstrsLen; ++ChainInstrIdx) {
Value *ChainInstr = ChainInstrs[ChainInstrIdx].first;
unsigned ChainInstrLoc = ChainInstrs[ChainInstrIdx].second;
bool AliasFound = false;
for (auto EntryMem : MemoryInstrs) {
Value *MemInstrValue = EntryMem.first;
unsigned MemInstrIdx = EntryMem.second;
if (isa<LoadInst>(MemInstrValue) && isa<LoadInst>(ChainInstrValue))
Value *MemInstr = EntryMem.first;
unsigned MemInstrLoc = EntryMem.second;
if (isa<LoadInst>(MemInstr) && isa<LoadInst>(ChainInstr))
continue;
// We can ignore the alias as long as the load comes before the store,
// because that means we won't be moving the load past the store to
// vectorize it (the vectorized load is inserted at the location of the
// first load in the chain).
if (isa<StoreInst>(MemInstrValue) && isa<LoadInst>(ChainInstrValue) &&
ChainInstrIdx < MemInstrIdx)
if (isa<StoreInst>(MemInstr) && isa<LoadInst>(ChainInstr) &&
ChainInstrLoc < MemInstrLoc)
continue;
// Same case, but in reverse.
if (isa<LoadInst>(MemInstrValue) && isa<StoreInst>(ChainInstrValue) &&
ChainInstrIdx > MemInstrIdx)
if (isa<LoadInst>(MemInstr) && isa<StoreInst>(ChainInstr) &&
ChainInstrLoc > MemInstrLoc)
continue;
Instruction *M0 = cast<Instruction>(MemInstrValue);
Instruction *M1 = cast<Instruction>(ChainInstrValue);
Instruction *M0 = cast<Instruction>(MemInstr);
Instruction *M1 = cast<Instruction>(ChainInstr);
if (!AA.isNoAlias(MemoryLocation::get(M0), MemoryLocation::get(M1))) {
DEBUG({
@ -477,19 +482,34 @@ ArrayRef<Value *> Vectorizer::getVectorizablePrefix(ArrayRef<Value *> Chain) {
dbgs() << "LSV: Found alias:\n"
" Aliasing instruction and pointer:\n"
<< " " << *MemInstrValue << '\n'
<< " " << *MemInstr << '\n'
<< " " << *Ptr0 << '\n'
<< " Aliased instruction and pointer:\n"
<< " " << *ChainInstrValue << '\n'
<< " " << *ChainInstr << '\n'
<< " " << *Ptr1 << '\n';
});
return Chain.slice(0, ChainIdx);
AliasFound = true;
break;
}
}
ChainIdx++;
if (AliasFound)
break;
}
return Chain;
// Find the largest prefix of Chain whose elements are all in
// ChainInstrs[0, ChainInstrIdx). This is the largest vectorizable prefix of
// Chain. (Recall that Chain is in address order, but ChainInstrs is in BB
// order.)
auto VectorizableChainInstrs =
makeArrayRef(ChainInstrs.data(), ChainInstrIdx);
unsigned ChainIdx, ChainLen;
for (ChainIdx = 0, ChainLen = Chain.size(); ChainIdx < ChainLen; ++ChainIdx) {
Value *V = Chain[ChainIdx];
if (!any_of(VectorizableChainInstrs,
[V](std::pair<Value *, unsigned> CI) { return V == CI.first; }))
break;
}
return Chain.slice(0, ChainIdx);
}
std::pair<ValueListMap, ValueListMap>

View File

@ -86,4 +86,34 @@ define float @insert_store_point_alias(float addrspace(1)* nocapture %a, i64 %id
ret float %x
}
; Here we have four stores, with an aliasing load before the last one. We
; could vectorize two of the stores before the load (although we currently
; don't), but the important thing is that we *don't* sink the store to
; a[idx + 1] below the load.
;
; CHECK-LABEL: @insert_store_point_alias_ooo
; CHECK: store float
; CHECK-SAME: %a.idx.3
; CHECK: store float
; CHECK-SAME: %a.idx.1
; CHECK: store float
; CHECK-SAME: %a.idx.2
; CHECK: load float, float addrspace(1)* %a.idx.2
; CHECK: store float
; CHECK-SAME: %a.idx
define float @insert_store_point_alias_ooo(float addrspace(1)* nocapture %a, i64 %idx) {
%a.idx = getelementptr inbounds float, float addrspace(1)* %a, i64 %idx
%a.idx.1 = getelementptr inbounds float, float addrspace(1)* %a.idx, i64 1
%a.idx.2 = getelementptr inbounds float, float addrspace(1)* %a.idx.1, i64 1
%a.idx.3 = getelementptr inbounds float, float addrspace(1)* %a.idx.2, i64 1
store float 0.0, float addrspace(1)* %a.idx.3, align 4
store float 0.0, float addrspace(1)* %a.idx.1, align 4
store float 0.0, float addrspace(1)* %a.idx.2, align 4
%x = load float, float addrspace(1)* %a.idx.2, align 4
store float 0.0, float addrspace(1)* %a.idx, align 4
ret float %x
}
attributes #0 = { nounwind }