mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
[LIR] Add support for structs and hand unrolled loops
Now LIR can turn following codes into memset: typedef struct foo { int a; int b; } foo_t; void bar(foo_t *f, unsigned n) { for (unsigned i = 0; i < n; ++i) { f[i].a = 0; f[i].b = 0; } } void test(foo_t *f, unsigned n) { for (unsigned i = 0; i < n; i += 2) { f[i] = 0; f[i+1] = 0; } } llvm-svn: 258620
This commit is contained in:
parent
f692315c07
commit
9d77533d54
@ -659,6 +659,11 @@ const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
|
|||||||
int isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
|
int isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
|
||||||
const ValueToValueMap &StridesMap);
|
const ValueToValueMap &StridesMap);
|
||||||
|
|
||||||
|
/// \brief Returns true if the memory operations \p A and \p B are consecutive.
|
||||||
|
/// This is a simple API that does not depend on the analysis pass.
|
||||||
|
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
|
||||||
|
ScalarEvolution &SE, bool CheckType = true);
|
||||||
|
|
||||||
/// \brief This analysis provides dependence information for the memory accesses
|
/// \brief This analysis provides dependence information for the memory accesses
|
||||||
/// of a loop.
|
/// of a loop.
|
||||||
///
|
///
|
||||||
|
@ -901,6 +901,78 @@ int llvm::isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr,
|
|||||||
return Stride;
|
return Stride;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Take the pointer operand from the Load/Store instruction.
|
||||||
|
/// Returns NULL if this is not a valid Load/Store instruction.
|
||||||
|
static Value *getPointerOperand(Value *I) {
|
||||||
|
if (LoadInst *LI = dyn_cast<LoadInst>(I))
|
||||||
|
return LI->getPointerOperand();
|
||||||
|
if (StoreInst *SI = dyn_cast<StoreInst>(I))
|
||||||
|
return SI->getPointerOperand();
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Take the address space operand from the Load/Store instruction.
|
||||||
|
/// Returns -1 if this is not a valid Load/Store instruction.
|
||||||
|
static unsigned getAddressSpaceOperand(Value *I) {
|
||||||
|
if (LoadInst *L = dyn_cast<LoadInst>(I))
|
||||||
|
return L->getPointerAddressSpace();
|
||||||
|
if (StoreInst *S = dyn_cast<StoreInst>(I))
|
||||||
|
return S->getPointerAddressSpace();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if the memory operations \p A and \p B are consecutive.
|
||||||
|
bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
|
||||||
|
ScalarEvolution &SE, bool CheckType) {
|
||||||
|
Value *PtrA = getPointerOperand(A);
|
||||||
|
Value *PtrB = getPointerOperand(B);
|
||||||
|
unsigned ASA = getAddressSpaceOperand(A);
|
||||||
|
unsigned ASB = getAddressSpaceOperand(B);
|
||||||
|
|
||||||
|
// Check that the address spaces match and that the pointers are valid.
|
||||||
|
if (!PtrA || !PtrB || (ASA != ASB))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Make sure that A and B are different pointers.
|
||||||
|
if (PtrA == PtrB)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Make sure that A and B have the same type if required.
|
||||||
|
if(CheckType && PtrA->getType() != PtrB->getType())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA);
|
||||||
|
Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
|
||||||
|
APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty));
|
||||||
|
|
||||||
|
APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);
|
||||||
|
PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
|
||||||
|
PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
|
||||||
|
|
||||||
|
// OffsetDelta = OffsetB - OffsetA;
|
||||||
|
const SCEV *OffsetSCEVA = SE.getConstant(OffsetA);
|
||||||
|
const SCEV *OffsetSCEVB = SE.getConstant(OffsetB);
|
||||||
|
const SCEV *OffsetDeltaSCEV = SE.getMinusSCEV(OffsetSCEVB, OffsetSCEVA);
|
||||||
|
const SCEVConstant *OffsetDeltaC = dyn_cast<SCEVConstant>(OffsetDeltaSCEV);
|
||||||
|
const APInt &OffsetDelta = OffsetDeltaC->getAPInt();
|
||||||
|
// Check if they are based on the same pointer. That makes the offsets
|
||||||
|
// sufficient.
|
||||||
|
if (PtrA == PtrB)
|
||||||
|
return OffsetDelta == Size;
|
||||||
|
|
||||||
|
// Compute the necessary base pointer delta to have the necessary final delta
|
||||||
|
// equal to the size.
|
||||||
|
// BaseDelta = Size - OffsetDelta;
|
||||||
|
const SCEV *SizeSCEV = SE.getConstant(Size);
|
||||||
|
const SCEV *BaseDelta = SE.getMinusSCEV(SizeSCEV, OffsetDeltaSCEV);
|
||||||
|
|
||||||
|
// Otherwise compute the distance with SCEV between the base pointers.
|
||||||
|
const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
|
||||||
|
const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
|
||||||
|
const SCEV *X = SE.getAddExpr(PtrSCEVA, BaseDelta);
|
||||||
|
return X == PtrSCEVB;
|
||||||
|
}
|
||||||
|
|
||||||
bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
|
bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
|
||||||
switch (Type) {
|
switch (Type) {
|
||||||
case NoDep:
|
case NoDep:
|
||||||
|
@ -26,22 +26,20 @@
|
|||||||
// i64 and larger types when i64 is legal and the value has few bits set. It
|
// i64 and larger types when i64 is legal and the value has few bits set. It
|
||||||
// would be good to enhance isel to emit a loop for ctpop in this case.
|
// would be good to enhance isel to emit a loop for ctpop in this case.
|
||||||
//
|
//
|
||||||
// We should enhance the memset/memcpy recognition to handle multiple stores in
|
|
||||||
// the loop. This would handle things like:
|
|
||||||
// void foo(_Complex float *P)
|
|
||||||
// for (i) { __real__(*P) = 0; __imag__(*P) = 0; }
|
|
||||||
//
|
|
||||||
// This could recognize common matrix multiplies and dot product idioms and
|
// This could recognize common matrix multiplies and dot product idioms and
|
||||||
// replace them with calls to BLAS (if linked in??).
|
// replace them with calls to BLAS (if linked in??).
|
||||||
//
|
//
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
#include "llvm/Transforms/Scalar.h"
|
#include "llvm/Transforms/Scalar.h"
|
||||||
|
#include "llvm/ADT/MapVector.h"
|
||||||
|
#include "llvm/ADT/SetVector.h"
|
||||||
#include "llvm/ADT/Statistic.h"
|
#include "llvm/ADT/Statistic.h"
|
||||||
#include "llvm/Analysis/AliasAnalysis.h"
|
#include "llvm/Analysis/AliasAnalysis.h"
|
||||||
#include "llvm/Analysis/BasicAliasAnalysis.h"
|
#include "llvm/Analysis/BasicAliasAnalysis.h"
|
||||||
#include "llvm/Analysis/GlobalsModRef.h"
|
#include "llvm/Analysis/GlobalsModRef.h"
|
||||||
#include "llvm/Analysis/LoopPass.h"
|
#include "llvm/Analysis/LoopPass.h"
|
||||||
|
#include "llvm/Analysis/LoopAccessAnalysis.h"
|
||||||
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
|
#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
|
||||||
#include "llvm/Analysis/ScalarEvolutionExpander.h"
|
#include "llvm/Analysis/ScalarEvolutionExpander.h"
|
||||||
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
|
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
|
||||||
@ -108,7 +106,9 @@ public:
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
typedef SmallVector<StoreInst *, 8> StoreList;
|
typedef SmallVector<StoreInst *, 8> StoreList;
|
||||||
StoreList StoreRefsForMemset;
|
typedef MapVector<Value *, StoreList> StoreListMap;
|
||||||
|
StoreListMap StoreRefsForMemset;
|
||||||
|
StoreListMap StoreRefsForMemsetPattern;
|
||||||
StoreList StoreRefsForMemcpy;
|
StoreList StoreRefsForMemcpy;
|
||||||
bool HasMemset;
|
bool HasMemset;
|
||||||
bool HasMemsetPattern;
|
bool HasMemsetPattern;
|
||||||
@ -122,14 +122,18 @@ private:
|
|||||||
SmallVectorImpl<BasicBlock *> &ExitBlocks);
|
SmallVectorImpl<BasicBlock *> &ExitBlocks);
|
||||||
|
|
||||||
void collectStores(BasicBlock *BB);
|
void collectStores(BasicBlock *BB);
|
||||||
bool isLegalStore(StoreInst *SI, bool &ForMemset, bool &ForMemcpy);
|
bool isLegalStore(StoreInst *SI, bool &ForMemset, bool &ForMemsetPattern,
|
||||||
bool processLoopStore(StoreInst *SI, const SCEV *BECount);
|
bool &ForMemcpy);
|
||||||
|
bool processLoopStores(SmallVectorImpl<StoreInst *> &SL, const SCEV *BECount,
|
||||||
|
bool ForMemset);
|
||||||
bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
|
bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount);
|
||||||
|
|
||||||
bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
|
bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
|
||||||
unsigned StoreAlignment, Value *StoredVal,
|
unsigned StoreAlignment, Value *StoredVal,
|
||||||
Instruction *TheStore, const SCEVAddRecExpr *Ev,
|
Instruction *TheStore,
|
||||||
const SCEV *BECount, bool NegStride);
|
SmallPtrSetImpl<Instruction *> &Stores,
|
||||||
|
const SCEVAddRecExpr *Ev, const SCEV *BECount,
|
||||||
|
bool NegStride);
|
||||||
bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount);
|
bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount);
|
||||||
|
|
||||||
/// @}
|
/// @}
|
||||||
@ -305,7 +309,7 @@ static Constant *getMemSetPatternValue(Value *V, const DataLayout *DL) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
|
bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
|
||||||
bool &ForMemcpy) {
|
bool &ForMemsetPattern, bool &ForMemcpy) {
|
||||||
// Don't touch volatile stores.
|
// Don't touch volatile stores.
|
||||||
if (!SI->isSimple())
|
if (!SI->isSimple())
|
||||||
return false;
|
return false;
|
||||||
@ -353,7 +357,7 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
|
|||||||
StorePtr->getType()->getPointerAddressSpace() == 0 &&
|
StorePtr->getType()->getPointerAddressSpace() == 0 &&
|
||||||
(PatternValue = getMemSetPatternValue(StoredVal, DL))) {
|
(PatternValue = getMemSetPatternValue(StoredVal, DL))) {
|
||||||
// It looks like we can use PatternValue!
|
// It looks like we can use PatternValue!
|
||||||
ForMemset = true;
|
ForMemsetPattern = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -393,6 +397,7 @@ bool LoopIdiomRecognize::isLegalStore(StoreInst *SI, bool &ForMemset,
|
|||||||
|
|
||||||
void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
|
void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
|
||||||
StoreRefsForMemset.clear();
|
StoreRefsForMemset.clear();
|
||||||
|
StoreRefsForMemsetPattern.clear();
|
||||||
StoreRefsForMemcpy.clear();
|
StoreRefsForMemcpy.clear();
|
||||||
for (Instruction &I : *BB) {
|
for (Instruction &I : *BB) {
|
||||||
StoreInst *SI = dyn_cast<StoreInst>(&I);
|
StoreInst *SI = dyn_cast<StoreInst>(&I);
|
||||||
@ -400,15 +405,22 @@ void LoopIdiomRecognize::collectStores(BasicBlock *BB) {
|
|||||||
continue;
|
continue;
|
||||||
|
|
||||||
bool ForMemset = false;
|
bool ForMemset = false;
|
||||||
|
bool ForMemsetPattern = false;
|
||||||
bool ForMemcpy = false;
|
bool ForMemcpy = false;
|
||||||
// Make sure this is a strided store with a constant stride.
|
// Make sure this is a strided store with a constant stride.
|
||||||
if (!isLegalStore(SI, ForMemset, ForMemcpy))
|
if (!isLegalStore(SI, ForMemset, ForMemsetPattern, ForMemcpy))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// Save the store locations.
|
// Save the store locations.
|
||||||
if (ForMemset)
|
if (ForMemset) {
|
||||||
StoreRefsForMemset.push_back(SI);
|
// Find the base pointer.
|
||||||
else if (ForMemcpy)
|
Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL);
|
||||||
|
StoreRefsForMemset[Ptr].push_back(SI);
|
||||||
|
} else if (ForMemsetPattern) {
|
||||||
|
// Find the base pointer.
|
||||||
|
Value *Ptr = GetUnderlyingObject(SI->getPointerOperand(), *DL);
|
||||||
|
StoreRefsForMemsetPattern[Ptr].push_back(SI);
|
||||||
|
} else if (ForMemcpy)
|
||||||
StoreRefsForMemcpy.push_back(SI);
|
StoreRefsForMemcpy.push_back(SI);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -430,9 +442,14 @@ bool LoopIdiomRecognize::runOnLoopBlock(
|
|||||||
// Look for store instructions, which may be optimized to memset/memcpy.
|
// Look for store instructions, which may be optimized to memset/memcpy.
|
||||||
collectStores(BB);
|
collectStores(BB);
|
||||||
|
|
||||||
// Look for a single store which can be optimized into a memset.
|
// Look for a single store or sets of stores with a common base, which can be
|
||||||
for (auto &SI : StoreRefsForMemset)
|
// optimized into a memset (memset_pattern). The latter most commonly happens
|
||||||
MadeChange |= processLoopStore(SI, BECount);
|
// with structs and handunrolled loops.
|
||||||
|
for (auto &SL : StoreRefsForMemset)
|
||||||
|
MadeChange |= processLoopStores(SL.second, BECount, true);
|
||||||
|
|
||||||
|
for (auto &SL : StoreRefsForMemsetPattern)
|
||||||
|
MadeChange |= processLoopStores(SL.second, BECount, false);
|
||||||
|
|
||||||
// Optimize the store into a memcpy, if it feeds an similarly strided load.
|
// Optimize the store into a memcpy, if it feeds an similarly strided load.
|
||||||
for (auto &SI : StoreRefsForMemcpy)
|
for (auto &SI : StoreRefsForMemcpy)
|
||||||
@ -458,26 +475,155 @@ bool LoopIdiomRecognize::runOnLoopBlock(
|
|||||||
return MadeChange;
|
return MadeChange;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// processLoopStore - See if this store can be promoted to a memset.
|
/// processLoopStores - See if this store(s) can be promoted to a memset.
|
||||||
bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) {
|
bool LoopIdiomRecognize::processLoopStores(SmallVectorImpl<StoreInst *> &SL,
|
||||||
assert(SI->isSimple() && "Expected only non-volatile stores.");
|
const SCEV *BECount,
|
||||||
|
bool ForMemset) {
|
||||||
|
// Try to find consecutive stores that can be transformed into memsets.
|
||||||
|
SetVector<StoreInst *> Heads, Tails;
|
||||||
|
SmallDenseMap<StoreInst *, StoreInst *> ConsecutiveChain;
|
||||||
|
|
||||||
Value *StoredVal = SI->getValueOperand();
|
// Do a quadratic search on all of the given stores and find
|
||||||
Value *StorePtr = SI->getPointerOperand();
|
// all of the pairs of stores that follow each other.
|
||||||
|
SmallVector<unsigned, 16> IndexQueue;
|
||||||
|
for (unsigned i = 0, e = SL.size(); i < e; ++i) {
|
||||||
|
assert(SL[i]->isSimple() && "Expected only non-volatile stores.");
|
||||||
|
|
||||||
// Check to see if the stride matches the size of the store. If so, then we
|
Value *FirstStoredVal = SL[i]->getValueOperand();
|
||||||
// know that every byte is touched in the loop.
|
Value *FirstStorePtr = SL[i]->getPointerOperand();
|
||||||
const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
|
const SCEVAddRecExpr *FirstStoreEv =
|
||||||
unsigned Stride = getStoreStride(StoreEv);
|
cast<SCEVAddRecExpr>(SE->getSCEV(FirstStorePtr));
|
||||||
unsigned StoreSize = getStoreSizeInBytes(SI, DL);
|
unsigned FirstStride = getStoreStride(FirstStoreEv);
|
||||||
if (StoreSize != Stride && StoreSize != -Stride)
|
unsigned FirstStoreSize = getStoreSizeInBytes(SL[i], DL);
|
||||||
return false;
|
|
||||||
|
|
||||||
bool NegStride = StoreSize == -Stride;
|
// See if we can optimize just this store in isolation.
|
||||||
|
if (FirstStride == FirstStoreSize || FirstStride == -FirstStoreSize) {
|
||||||
|
Heads.insert(SL[i]);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// See if we can optimize just this store in isolation.
|
Value *FirstSplatValue = nullptr;
|
||||||
return processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(),
|
Constant *FirstPatternValue = nullptr;
|
||||||
StoredVal, SI, StoreEv, BECount, NegStride);
|
|
||||||
|
if (ForMemset)
|
||||||
|
FirstSplatValue = isBytewiseValue(FirstStoredVal);
|
||||||
|
else
|
||||||
|
FirstPatternValue = getMemSetPatternValue(FirstStoredVal, DL);
|
||||||
|
|
||||||
|
assert((FirstSplatValue || FirstPatternValue) &&
|
||||||
|
"Expected either splat value or pattern value.");
|
||||||
|
|
||||||
|
IndexQueue.clear();
|
||||||
|
// If a store has multiple consecutive store candidates, search Stores
|
||||||
|
// array according to the sequence: from i+1 to e, then from i-1 to 0.
|
||||||
|
// This is because usually pairing with immediate succeeding or preceding
|
||||||
|
// candidate create the best chance to find memset opportunity.
|
||||||
|
unsigned j = 0;
|
||||||
|
for (j = i + 1; j < e; ++j)
|
||||||
|
IndexQueue.push_back(j);
|
||||||
|
for (j = i; j > 0; --j)
|
||||||
|
IndexQueue.push_back(j - 1);
|
||||||
|
|
||||||
|
for (auto &k : IndexQueue) {
|
||||||
|
assert(SL[k]->isSimple() && "Expected only non-volatile stores.");
|
||||||
|
Value *SecondStorePtr = SL[k]->getPointerOperand();
|
||||||
|
const SCEVAddRecExpr *SecondStoreEv =
|
||||||
|
cast<SCEVAddRecExpr>(SE->getSCEV(SecondStorePtr));
|
||||||
|
unsigned SecondStride = getStoreStride(SecondStoreEv);
|
||||||
|
|
||||||
|
if (FirstStride != SecondStride)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
Value *SecondStoredVal = SL[k]->getValueOperand();
|
||||||
|
Value *SecondSplatValue = nullptr;
|
||||||
|
Constant *SecondPatternValue = nullptr;
|
||||||
|
|
||||||
|
if (ForMemset)
|
||||||
|
SecondSplatValue = isBytewiseValue(SecondStoredVal);
|
||||||
|
else
|
||||||
|
SecondPatternValue = getMemSetPatternValue(SecondStoredVal, DL);
|
||||||
|
|
||||||
|
assert((SecondSplatValue || SecondPatternValue) &&
|
||||||
|
"Expected either splat value or pattern value.");
|
||||||
|
|
||||||
|
if (isConsecutiveAccess(SL[i], SL[k], *DL, *SE, false)) {
|
||||||
|
if (ForMemset) {
|
||||||
|
ConstantInt *C1 = dyn_cast<ConstantInt>(FirstSplatValue);
|
||||||
|
ConstantInt *C2 = dyn_cast<ConstantInt>(SecondSplatValue);
|
||||||
|
if (!C1 || !C2 || C1 != C2)
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
Constant *C1 = FirstPatternValue;
|
||||||
|
Constant *C2 = SecondPatternValue;
|
||||||
|
|
||||||
|
if (ConstantArray *CA1 = dyn_cast<ConstantArray>(C1))
|
||||||
|
C1 = CA1->getSplatValue();
|
||||||
|
|
||||||
|
if (ConstantArray *CA2 = dyn_cast<ConstantArray>(C2))
|
||||||
|
C2 = CA2->getSplatValue();
|
||||||
|
|
||||||
|
if (C1 != C2)
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Tails.insert(SL[k]);
|
||||||
|
Heads.insert(SL[i]);
|
||||||
|
ConsecutiveChain[SL[i]] = SL[k];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We may run into multiple chains that merge into a single chain. We mark the
|
||||||
|
// stores that we transformed so that we don't visit the same store twice.
|
||||||
|
SmallPtrSet<Value *, 16> TransformedStores;
|
||||||
|
bool Changed = false;
|
||||||
|
|
||||||
|
// For stores that start but don't end a link in the chain:
|
||||||
|
for (SetVector<StoreInst *>::iterator it = Heads.begin(), e = Heads.end();
|
||||||
|
it != e; ++it) {
|
||||||
|
if (Tails.count(*it))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// We found a store instr that starts a chain. Now follow the chain and try
|
||||||
|
// to transform it.
|
||||||
|
SmallPtrSet<Instruction *, 8> AdjacentStores;
|
||||||
|
StoreInst *I = *it;
|
||||||
|
|
||||||
|
StoreInst *HeadStore = I;
|
||||||
|
unsigned StoreSize = 0;
|
||||||
|
|
||||||
|
// Collect the chain into a list.
|
||||||
|
while (Tails.count(I) || Heads.count(I)) {
|
||||||
|
if (TransformedStores.count(I))
|
||||||
|
break;
|
||||||
|
AdjacentStores.insert(I);
|
||||||
|
|
||||||
|
StoreSize += getStoreSizeInBytes(I, DL);
|
||||||
|
// Move to the next value in the chain.
|
||||||
|
I = ConsecutiveChain[I];
|
||||||
|
}
|
||||||
|
|
||||||
|
Value *StoredVal = HeadStore->getValueOperand();
|
||||||
|
Value *StorePtr = HeadStore->getPointerOperand();
|
||||||
|
const SCEVAddRecExpr *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
|
||||||
|
unsigned Stride = getStoreStride(StoreEv);
|
||||||
|
|
||||||
|
// Check to see if the stride matches the size of the stores. If so, then
|
||||||
|
// we know that every byte is touched in the loop.
|
||||||
|
if (StoreSize != Stride && StoreSize != -Stride)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
bool NegStride = StoreSize == -Stride;
|
||||||
|
|
||||||
|
if (processLoopStridedStore(StorePtr, StoreSize, HeadStore->getAlignment(),
|
||||||
|
StoredVal, HeadStore, AdjacentStores, StoreEv,
|
||||||
|
BECount, NegStride)) {
|
||||||
|
TransformedStores.insert(AdjacentStores.begin(), AdjacentStores.end());
|
||||||
|
Changed = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Changed;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// processLoopMemSet - See if this memset can be promoted to a large memset.
|
/// processLoopMemSet - See if this memset can be promoted to a large memset.
|
||||||
@ -520,18 +666,21 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
|
|||||||
if (!SplatValue || !CurLoop->isLoopInvariant(SplatValue))
|
if (!SplatValue || !CurLoop->isLoopInvariant(SplatValue))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
SmallPtrSet<Instruction *, 1> MSIs;
|
||||||
|
MSIs.insert(MSI);
|
||||||
return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
|
return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
|
||||||
MSI->getAlignment(), SplatValue, MSI, Ev,
|
MSI->getAlignment(), SplatValue, MSI, MSIs, Ev,
|
||||||
BECount, /*NegStride=*/false);
|
BECount, /*NegStride=*/false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// mayLoopAccessLocation - Return true if the specified loop might access the
|
/// mayLoopAccessLocation - Return true if the specified loop might access the
|
||||||
/// specified pointer location, which is a loop-strided access. The 'Access'
|
/// specified pointer location, which is a loop-strided access. The 'Access'
|
||||||
/// argument specifies what the verboten forms of access are (read or write).
|
/// argument specifies what the verboten forms of access are (read or write).
|
||||||
static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
|
static bool
|
||||||
const SCEV *BECount, unsigned StoreSize,
|
mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
|
||||||
AliasAnalysis &AA,
|
const SCEV *BECount, unsigned StoreSize,
|
||||||
Instruction *IgnoredStore) {
|
AliasAnalysis &AA,
|
||||||
|
SmallPtrSetImpl<Instruction *> &IgnoredStores) {
|
||||||
// Get the location that may be stored across the loop. Since the access is
|
// Get the location that may be stored across the loop. Since the access is
|
||||||
// strided positively through memory, we say that the modified location starts
|
// strided positively through memory, we say that the modified location starts
|
||||||
// at the pointer and has infinite size.
|
// at the pointer and has infinite size.
|
||||||
@ -551,7 +700,8 @@ static bool mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
|
|||||||
for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
|
for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E;
|
||||||
++BI)
|
++BI)
|
||||||
for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)
|
for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I)
|
||||||
if (&*I != IgnoredStore && (AA.getModRefInfo(&*I, StoreLoc) & Access))
|
if (IgnoredStores.count(&*I) == 0 &&
|
||||||
|
(AA.getModRefInfo(&*I, StoreLoc) & Access))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
@ -574,7 +724,8 @@ static const SCEV *getStartForNegStride(const SCEV *Start, const SCEV *BECount,
|
|||||||
/// transform this into a memset or memset_pattern in the loop preheader, do so.
|
/// transform this into a memset or memset_pattern in the loop preheader, do so.
|
||||||
bool LoopIdiomRecognize::processLoopStridedStore(
|
bool LoopIdiomRecognize::processLoopStridedStore(
|
||||||
Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment,
|
Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment,
|
||||||
Value *StoredVal, Instruction *TheStore, const SCEVAddRecExpr *Ev,
|
Value *StoredVal, Instruction *TheStore,
|
||||||
|
SmallPtrSetImpl<Instruction *> &Stores, const SCEVAddRecExpr *Ev,
|
||||||
const SCEV *BECount, bool NegStride) {
|
const SCEV *BECount, bool NegStride) {
|
||||||
Value *SplatValue = isBytewiseValue(StoredVal);
|
Value *SplatValue = isBytewiseValue(StoredVal);
|
||||||
Constant *PatternValue = nullptr;
|
Constant *PatternValue = nullptr;
|
||||||
@ -609,7 +760,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
|
|||||||
Value *BasePtr =
|
Value *BasePtr =
|
||||||
Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());
|
Expander.expandCodeFor(Start, DestInt8PtrTy, Preheader->getTerminator());
|
||||||
if (mayLoopAccessLocation(BasePtr, MRI_ModRef, CurLoop, BECount, StoreSize,
|
if (mayLoopAccessLocation(BasePtr, MRI_ModRef, CurLoop, BECount, StoreSize,
|
||||||
*AA, TheStore)) {
|
*AA, Stores)) {
|
||||||
Expander.clear();
|
Expander.clear();
|
||||||
// If we generated new code for the base pointer, clean up.
|
// If we generated new code for the base pointer, clean up.
|
||||||
RecursivelyDeleteTriviallyDeadInstructions(BasePtr, TLI);
|
RecursivelyDeleteTriviallyDeadInstructions(BasePtr, TLI);
|
||||||
@ -662,7 +813,8 @@ bool LoopIdiomRecognize::processLoopStridedStore(
|
|||||||
|
|
||||||
// Okay, the memset has been formed. Zap the original store and anything that
|
// Okay, the memset has been formed. Zap the original store and anything that
|
||||||
// feeds into it.
|
// feeds into it.
|
||||||
deleteDeadInstruction(TheStore, TLI);
|
for (auto *I : Stores)
|
||||||
|
deleteDeadInstruction(I, TLI);
|
||||||
++NumMemSet;
|
++NumMemSet;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -714,8 +866,10 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
|
|||||||
Value *StoreBasePtr = Expander.expandCodeFor(
|
Value *StoreBasePtr = Expander.expandCodeFor(
|
||||||
StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator());
|
StrStart, Builder.getInt8PtrTy(StrAS), Preheader->getTerminator());
|
||||||
|
|
||||||
|
SmallPtrSet<Instruction *, 1> Stores;
|
||||||
|
Stores.insert(SI);
|
||||||
if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount,
|
if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount,
|
||||||
StoreSize, *AA, SI)) {
|
StoreSize, *AA, Stores)) {
|
||||||
Expander.clear();
|
Expander.clear();
|
||||||
// If we generated new code for the base pointer, clean up.
|
// If we generated new code for the base pointer, clean up.
|
||||||
RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
|
RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
|
||||||
@ -735,7 +889,7 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
|
|||||||
LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());
|
LdStart, Builder.getInt8PtrTy(LdAS), Preheader->getTerminator());
|
||||||
|
|
||||||
if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount, StoreSize,
|
if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount, StoreSize,
|
||||||
*AA, SI)) {
|
*AA, Stores)) {
|
||||||
Expander.clear();
|
Expander.clear();
|
||||||
// If we generated new code for the base pointer, clean up.
|
// If we generated new code for the base pointer, clean up.
|
||||||
RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
|
RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
|
||||||
|
@ -26,6 +26,7 @@
|
|||||||
#include "llvm/Analysis/AssumptionCache.h"
|
#include "llvm/Analysis/AssumptionCache.h"
|
||||||
#include "llvm/Analysis/CodeMetrics.h"
|
#include "llvm/Analysis/CodeMetrics.h"
|
||||||
#include "llvm/Analysis/LoopInfo.h"
|
#include "llvm/Analysis/LoopInfo.h"
|
||||||
|
#include "llvm/Analysis/LoopAccessAnalysis.h"
|
||||||
#include "llvm/Analysis/ScalarEvolution.h"
|
#include "llvm/Analysis/ScalarEvolution.h"
|
||||||
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
|
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
|
||||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||||
@ -401,9 +402,6 @@ public:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// \returns true if the memory operations A and B are consecutive.
|
|
||||||
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL);
|
|
||||||
|
|
||||||
/// \brief Perform LICM and CSE on the newly generated gather sequences.
|
/// \brief Perform LICM and CSE on the newly generated gather sequences.
|
||||||
void optimizeGatherSequence();
|
void optimizeGatherSequence();
|
||||||
|
|
||||||
@ -438,14 +436,6 @@ private:
|
|||||||
/// vectorized, or NULL. They may happen in cycles.
|
/// vectorized, or NULL. They may happen in cycles.
|
||||||
Value *alreadyVectorized(ArrayRef<Value *> VL) const;
|
Value *alreadyVectorized(ArrayRef<Value *> VL) const;
|
||||||
|
|
||||||
/// \brief Take the pointer operand from the Load/Store instruction.
|
|
||||||
/// \returns NULL if this is not a valid Load/Store instruction.
|
|
||||||
static Value *getPointerOperand(Value *I);
|
|
||||||
|
|
||||||
/// \brief Take the address space operand from the Load/Store instruction.
|
|
||||||
/// \returns -1 if this is not a valid Load/Store instruction.
|
|
||||||
static unsigned getAddressSpaceOperand(Value *I);
|
|
||||||
|
|
||||||
/// \returns the scalarization cost for this type. Scalarization in this
|
/// \returns the scalarization cost for this type. Scalarization in this
|
||||||
/// context means the creation of vectors from a group of scalars.
|
/// context means the creation of vectors from a group of scalars.
|
||||||
int getGatherCost(Type *Ty);
|
int getGatherCost(Type *Ty);
|
||||||
@ -1191,8 +1181,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) {
|
if (!isConsecutiveAccess(VL[i], VL[i + 1], DL, *SE)) {
|
||||||
if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0], DL)) {
|
if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0], DL, *SE)) {
|
||||||
++NumLoadsWantToChangeOrder;
|
++NumLoadsWantToChangeOrder;
|
||||||
}
|
}
|
||||||
BS.cancelScheduling(VL);
|
BS.cancelScheduling(VL);
|
||||||
@ -1364,7 +1354,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
|||||||
const DataLayout &DL = F->getParent()->getDataLayout();
|
const DataLayout &DL = F->getParent()->getDataLayout();
|
||||||
// Check if the stores are consecutive or of we need to swizzle them.
|
// Check if the stores are consecutive or of we need to swizzle them.
|
||||||
for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
|
for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
|
||||||
if (!isConsecutiveAccess(VL[i], VL[i + 1], DL)) {
|
if (!isConsecutiveAccess(VL[i], VL[i + 1], DL, *SE)) {
|
||||||
BS.cancelScheduling(VL);
|
BS.cancelScheduling(VL);
|
||||||
newTreeEntry(VL, false);
|
newTreeEntry(VL, false);
|
||||||
DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
|
DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
|
||||||
@ -1837,63 +1827,6 @@ int BoUpSLP::getGatherCost(ArrayRef<Value *> VL) {
|
|||||||
return getGatherCost(VecTy);
|
return getGatherCost(VecTy);
|
||||||
}
|
}
|
||||||
|
|
||||||
Value *BoUpSLP::getPointerOperand(Value *I) {
|
|
||||||
if (LoadInst *LI = dyn_cast<LoadInst>(I))
|
|
||||||
return LI->getPointerOperand();
|
|
||||||
if (StoreInst *SI = dyn_cast<StoreInst>(I))
|
|
||||||
return SI->getPointerOperand();
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
|
|
||||||
if (LoadInst *L = dyn_cast<LoadInst>(I))
|
|
||||||
return L->getPointerAddressSpace();
|
|
||||||
if (StoreInst *S = dyn_cast<StoreInst>(I))
|
|
||||||
return S->getPointerAddressSpace();
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL) {
|
|
||||||
Value *PtrA = getPointerOperand(A);
|
|
||||||
Value *PtrB = getPointerOperand(B);
|
|
||||||
unsigned ASA = getAddressSpaceOperand(A);
|
|
||||||
unsigned ASB = getAddressSpaceOperand(B);
|
|
||||||
|
|
||||||
// Check that the address spaces match and that the pointers are valid.
|
|
||||||
if (!PtrA || !PtrB || (ASA != ASB))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
// Make sure that A and B are different pointers of the same type.
|
|
||||||
if (PtrA == PtrB || PtrA->getType() != PtrB->getType())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA);
|
|
||||||
Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
|
|
||||||
APInt Size(PtrBitWidth, DL.getTypeStoreSize(Ty));
|
|
||||||
|
|
||||||
APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);
|
|
||||||
PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
|
|
||||||
PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
|
|
||||||
|
|
||||||
APInt OffsetDelta = OffsetB - OffsetA;
|
|
||||||
|
|
||||||
// Check if they are based on the same pointer. That makes the offsets
|
|
||||||
// sufficient.
|
|
||||||
if (PtrA == PtrB)
|
|
||||||
return OffsetDelta == Size;
|
|
||||||
|
|
||||||
// Compute the necessary base pointer delta to have the necessary final delta
|
|
||||||
// equal to the size.
|
|
||||||
APInt BaseDelta = Size - OffsetDelta;
|
|
||||||
|
|
||||||
// Otherwise compute the distance with SCEV between the base pointers.
|
|
||||||
const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
|
|
||||||
const SCEV *PtrSCEVB = SE->getSCEV(PtrB);
|
|
||||||
const SCEV *C = SE->getConstant(BaseDelta);
|
|
||||||
const SCEV *X = SE->getAddExpr(PtrSCEVA, C);
|
|
||||||
return X == PtrSCEVB;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reorder commutative operations in alternate shuffle if the resulting vectors
|
// Reorder commutative operations in alternate shuffle if the resulting vectors
|
||||||
// are consecutive loads. This would allow us to vectorize the tree.
|
// are consecutive loads. This would allow us to vectorize the tree.
|
||||||
// If we have something like-
|
// If we have something like-
|
||||||
@ -1921,10 +1854,10 @@ void BoUpSLP::reorderAltShuffleOperands(ArrayRef<Value *> VL,
|
|||||||
if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
|
if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
|
||||||
Instruction *VL1 = cast<Instruction>(VL[j]);
|
Instruction *VL1 = cast<Instruction>(VL[j]);
|
||||||
Instruction *VL2 = cast<Instruction>(VL[j + 1]);
|
Instruction *VL2 = cast<Instruction>(VL[j + 1]);
|
||||||
if (isConsecutiveAccess(L, L1, DL) && VL1->isCommutative()) {
|
if (isConsecutiveAccess(L, L1, DL, *SE) && VL1->isCommutative()) {
|
||||||
std::swap(Left[j], Right[j]);
|
std::swap(Left[j], Right[j]);
|
||||||
continue;
|
continue;
|
||||||
} else if (isConsecutiveAccess(L, L1, DL) && VL2->isCommutative()) {
|
} else if (isConsecutiveAccess(L, L1, DL, *SE) && VL2->isCommutative()) {
|
||||||
std::swap(Left[j + 1], Right[j + 1]);
|
std::swap(Left[j + 1], Right[j + 1]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -1935,10 +1868,10 @@ void BoUpSLP::reorderAltShuffleOperands(ArrayRef<Value *> VL,
|
|||||||
if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
|
if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
|
||||||
Instruction *VL1 = cast<Instruction>(VL[j]);
|
Instruction *VL1 = cast<Instruction>(VL[j]);
|
||||||
Instruction *VL2 = cast<Instruction>(VL[j + 1]);
|
Instruction *VL2 = cast<Instruction>(VL[j + 1]);
|
||||||
if (isConsecutiveAccess(L, L1, DL) && VL1->isCommutative()) {
|
if (isConsecutiveAccess(L, L1, DL, *SE) && VL1->isCommutative()) {
|
||||||
std::swap(Left[j], Right[j]);
|
std::swap(Left[j], Right[j]);
|
||||||
continue;
|
continue;
|
||||||
} else if (isConsecutiveAccess(L, L1, DL) && VL2->isCommutative()) {
|
} else if (isConsecutiveAccess(L, L1, DL, *SE) && VL2->isCommutative()) {
|
||||||
std::swap(Left[j + 1], Right[j + 1]);
|
std::swap(Left[j + 1], Right[j + 1]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -2088,7 +2021,7 @@ void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
|
|||||||
for (unsigned j = 0; j < VL.size() - 1; ++j) {
|
for (unsigned j = 0; j < VL.size() - 1; ++j) {
|
||||||
if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) {
|
if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) {
|
||||||
if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
|
if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
|
||||||
if (isConsecutiveAccess(L, L1, DL)) {
|
if (isConsecutiveAccess(L, L1, DL, *SE)) {
|
||||||
std::swap(Left[j + 1], Right[j + 1]);
|
std::swap(Left[j + 1], Right[j + 1]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -2096,7 +2029,7 @@ void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
|
|||||||
}
|
}
|
||||||
if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) {
|
if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) {
|
||||||
if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
|
if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
|
||||||
if (isConsecutiveAccess(L, L1, DL)) {
|
if (isConsecutiveAccess(L, L1, DL, *SE)) {
|
||||||
std::swap(Left[j + 1], Right[j + 1]);
|
std::swap(Left[j + 1], Right[j + 1]);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -3461,7 +3394,7 @@ bool SLPVectorizer::vectorizeStores(ArrayRef<StoreInst *> Stores,
|
|||||||
IndexQueue.push_back(j - 1);
|
IndexQueue.push_back(j - 1);
|
||||||
|
|
||||||
for (auto &k : IndexQueue) {
|
for (auto &k : IndexQueue) {
|
||||||
if (R.isConsecutiveAccess(Stores[i], Stores[k], DL)) {
|
if (isConsecutiveAccess(Stores[i], Stores[k], DL, *SE)) {
|
||||||
Tails.insert(Stores[k]);
|
Tails.insert(Stores[k]);
|
||||||
Heads.insert(Stores[i]);
|
Heads.insert(Stores[i]);
|
||||||
ConsecutiveChain[Stores[i]] = Stores[k];
|
ConsecutiveChain[Stores[i]] = Stores[k];
|
||||||
|
221
test/Transforms/LoopIdiom/struct.ll
Normal file
221
test/Transforms/LoopIdiom/struct.ll
Normal file
@ -0,0 +1,221 @@
|
|||||||
|
; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
|
||||||
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||||
|
|
||||||
|
target triple = "x86_64-apple-darwin10.0.0"
|
||||||
|
|
||||||
|
%struct.foo = type { i32, i32 }
|
||||||
|
%struct.foo1 = type { i32, i32, i32 }
|
||||||
|
%struct.foo2 = type { i32, i16, i16 }
|
||||||
|
|
||||||
|
;void bar1(foo_t *f, unsigned n) {
|
||||||
|
; for (unsigned i = 0; i < n; ++i) {
|
||||||
|
; f[i].a = 0;
|
||||||
|
; f[i].b = 0;
|
||||||
|
; }
|
||||||
|
;}
|
||||||
|
define void @bar1(%struct.foo* %f, i32 %n) nounwind ssp {
|
||||||
|
entry:
|
||||||
|
%cmp1 = icmp eq i32 %n, 0
|
||||||
|
br i1 %cmp1, label %for.end, label %for.body.preheader
|
||||||
|
|
||||||
|
for.body.preheader: ; preds = %entry
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body: ; preds = %for.body.preheader, %for.body
|
||||||
|
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
|
||||||
|
store i32 0, i32* %a, align 4
|
||||||
|
%b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
|
||||||
|
store i32 0, i32* %b, align 4
|
||||||
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||||
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||||
|
%exitcond = icmp ne i32 %lftr.wideiv, %n
|
||||||
|
br i1 %exitcond, label %for.body, label %for.end.loopexit
|
||||||
|
|
||||||
|
for.end.loopexit: ; preds = %for.body
|
||||||
|
br label %for.end
|
||||||
|
|
||||||
|
for.end: ; preds = %for.end.loopexit, %entry
|
||||||
|
ret void
|
||||||
|
; CHECK-LABEL: @bar1(
|
||||||
|
; CHECK: call void @llvm.memset
|
||||||
|
; CHECK-NOT: store
|
||||||
|
}
|
||||||
|
|
||||||
|
;void bar2(foo_t *f, unsigned n) {
|
||||||
|
; for (unsigned i = 0; i < n; ++i) {
|
||||||
|
; f[i].b = 0;
|
||||||
|
; f[i].a = 0;
|
||||||
|
; }
|
||||||
|
;}
|
||||||
|
define void @bar2(%struct.foo* %f, i32 %n) nounwind ssp {
|
||||||
|
entry:
|
||||||
|
%cmp1 = icmp eq i32 %n, 0
|
||||||
|
br i1 %cmp1, label %for.end, label %for.body.preheader
|
||||||
|
|
||||||
|
for.body.preheader: ; preds = %entry
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body: ; preds = %for.body.preheader, %for.body
|
||||||
|
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
|
||||||
|
store i32 0, i32* %b, align 4
|
||||||
|
%a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
|
||||||
|
store i32 0, i32* %a, align 4
|
||||||
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||||
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||||
|
%exitcond = icmp ne i32 %lftr.wideiv, %n
|
||||||
|
br i1 %exitcond, label %for.body, label %for.end.loopexit
|
||||||
|
|
||||||
|
for.end.loopexit: ; preds = %for.body
|
||||||
|
br label %for.end
|
||||||
|
|
||||||
|
for.end: ; preds = %for.end.loopexit, %entry
|
||||||
|
ret void
|
||||||
|
; CHECK-LABEL: @bar2(
|
||||||
|
; CHECK: call void @llvm.memset
|
||||||
|
; CHECK-NOT: store
|
||||||
|
}
|
||||||
|
|
||||||
|
;void bar3(foo_t *f, unsigned n) {
|
||||||
|
; for (unsigned i = n; i > 0; --i) {
|
||||||
|
; f[i].a = 0;
|
||||||
|
; f[i].b = 0;
|
||||||
|
; }
|
||||||
|
;}
|
||||||
|
define void @bar3(%struct.foo* nocapture %f, i32 %n) nounwind ssp {
|
||||||
|
entry:
|
||||||
|
%cmp1 = icmp eq i32 %n, 0
|
||||||
|
br i1 %cmp1, label %for.end, label %for.body.preheader
|
||||||
|
|
||||||
|
for.body.preheader: ; preds = %entry
|
||||||
|
%0 = zext i32 %n to i64
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body: ; preds = %for.body.preheader, %for.body
|
||||||
|
%indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
|
||||||
|
store i32 0, i32* %a, align 4
|
||||||
|
%b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
|
||||||
|
store i32 0, i32* %b, align 4
|
||||||
|
%1 = trunc i64 %indvars.iv to i32
|
||||||
|
%dec = add i32 %1, -1
|
||||||
|
%cmp = icmp eq i32 %dec, 0
|
||||||
|
%indvars.iv.next = add nsw i64 %indvars.iv, -1
|
||||||
|
br i1 %cmp, label %for.end.loopexit, label %for.body
|
||||||
|
|
||||||
|
for.end.loopexit: ; preds = %for.body
|
||||||
|
br label %for.end
|
||||||
|
|
||||||
|
for.end: ; preds = %for.end.loopexit, %entry
|
||||||
|
ret void
|
||||||
|
; CHECK-LABEL: @bar3(
|
||||||
|
; CHECK: call void @llvm.memset
|
||||||
|
; CHECK-NOT: store
|
||||||
|
}
|
||||||
|
|
||||||
|
;void bar4(foo_t *f, unsigned n) {
|
||||||
|
; for (unsigned i = 0; i < n; ++i) {
|
||||||
|
; f[i].a = 0;
|
||||||
|
; f[i].b = 1;
|
||||||
|
; }
|
||||||
|
;}
|
||||||
|
define void @bar4(%struct.foo* nocapture %f, i32 %n) nounwind ssp {
|
||||||
|
entry:
|
||||||
|
%cmp1 = icmp eq i32 %n, 0
|
||||||
|
br i1 %cmp1, label %for.end, label %for.body.preheader
|
||||||
|
|
||||||
|
for.body.preheader: ; preds = %entry
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body: ; preds = %for.body.preheader, %for.body
|
||||||
|
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
|
||||||
|
store i32 0, i32* %a, align 4
|
||||||
|
%b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
|
||||||
|
store i32 1, i32* %b, align 4
|
||||||
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||||
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||||
|
%exitcond = icmp ne i32 %lftr.wideiv, %n
|
||||||
|
br i1 %exitcond, label %for.body, label %for.end.loopexit
|
||||||
|
|
||||||
|
for.end.loopexit: ; preds = %for.body
|
||||||
|
br label %for.end
|
||||||
|
|
||||||
|
for.end: ; preds = %for.end.loopexit, %entry
|
||||||
|
ret void
|
||||||
|
; CHECK-LABEL: @bar4(
|
||||||
|
; CHECK-NOT: call void @llvm.memset
|
||||||
|
}
|
||||||
|
|
||||||
|
;void bar5(foo1_t *f, unsigned n) {
|
||||||
|
; for (unsigned i = 0; i < n; ++i) {
|
||||||
|
; f[i].a = 0;
|
||||||
|
; f[i].b = 0;
|
||||||
|
; }
|
||||||
|
;}
|
||||||
|
define void @bar5(%struct.foo1* nocapture %f, i32 %n) nounwind ssp {
|
||||||
|
entry:
|
||||||
|
%cmp1 = icmp eq i32 %n, 0
|
||||||
|
br i1 %cmp1, label %for.end, label %for.body.preheader
|
||||||
|
|
||||||
|
for.body.preheader: ; preds = %entry
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body: ; preds = %for.body.preheader, %for.body
|
||||||
|
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%a = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 0
|
||||||
|
store i32 0, i32* %a, align 4
|
||||||
|
%b = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 1
|
||||||
|
store i32 0, i32* %b, align 4
|
||||||
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||||
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||||
|
%exitcond = icmp ne i32 %lftr.wideiv, %n
|
||||||
|
br i1 %exitcond, label %for.body, label %for.end.loopexit
|
||||||
|
|
||||||
|
for.end.loopexit: ; preds = %for.body
|
||||||
|
br label %for.end
|
||||||
|
|
||||||
|
for.end: ; preds = %for.end.loopexit, %entry
|
||||||
|
ret void
|
||||||
|
; CHECK-LABEL: @bar5(
|
||||||
|
; CHECK-NOT: call void @llvm.memset
|
||||||
|
}
|
||||||
|
|
||||||
|
;void bar6(foo2_t *f, unsigned n) {
|
||||||
|
; for (unsigned i = 0; i < n; ++i) {
|
||||||
|
; f[i].a = 0;
|
||||||
|
; f[i].b = 0;
|
||||||
|
; f[i].c = 0;
|
||||||
|
; }
|
||||||
|
;}
|
||||||
|
define void @bar6(%struct.foo2* nocapture %f, i32 %n) nounwind ssp {
|
||||||
|
entry:
|
||||||
|
%cmp1 = icmp eq i32 %n, 0
|
||||||
|
br i1 %cmp1, label %for.end, label %for.body.preheader
|
||||||
|
|
||||||
|
for.body.preheader: ; preds = %entry
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body: ; preds = %for.body.preheader, %for.body
|
||||||
|
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%a = getelementptr inbounds %struct.foo2, %struct.foo2* %f, i64 %indvars.iv, i32 0
|
||||||
|
store i32 0, i32* %a, align 4
|
||||||
|
%b = getelementptr inbounds %struct.foo2, %struct.foo2* %f, i64 %indvars.iv, i32 1
|
||||||
|
store i16 0, i16* %b, align 4
|
||||||
|
%c = getelementptr inbounds %struct.foo2, %struct.foo2* %f, i64 %indvars.iv, i32 2
|
||||||
|
store i16 0, i16* %c, align 2
|
||||||
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||||
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||||
|
%exitcond = icmp ne i32 %lftr.wideiv, %n
|
||||||
|
br i1 %exitcond, label %for.body, label %for.end.loopexit
|
||||||
|
|
||||||
|
for.end.loopexit: ; preds = %for.body
|
||||||
|
br label %for.end
|
||||||
|
|
||||||
|
for.end: ; preds = %for.end.loopexit, %entry
|
||||||
|
ret void
|
||||||
|
; CHECK-LABEL: @bar6(
|
||||||
|
; CHECK: call void @llvm.memset
|
||||||
|
; CHECK-NOT: store
|
||||||
|
}
|
186
test/Transforms/LoopIdiom/struct_pattern.ll
Normal file
186
test/Transforms/LoopIdiom/struct_pattern.ll
Normal file
@ -0,0 +1,186 @@
|
|||||||
|
; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
|
||||||
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||||
|
|
||||||
|
; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
|
||||||
|
; CHECK: @.memset_pattern.1 = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
|
||||||
|
; CHECK: @.memset_pattern.2 = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
|
||||||
|
|
||||||
|
target triple = "x86_64-apple-darwin10.0.0"
|
||||||
|
|
||||||
|
%struct.foo = type { i32, i32 }
|
||||||
|
%struct.foo1 = type { i32, i32, i32 }
|
||||||
|
|
||||||
|
;void bar1(foo_t *f, unsigned n) {
|
||||||
|
; for (unsigned i = 0; i < n; ++i) {
|
||||||
|
; f[i].a = 2;
|
||||||
|
; f[i].b = 2;
|
||||||
|
; }
|
||||||
|
;}
|
||||||
|
define void @bar1(%struct.foo* %f, i32 %n) nounwind ssp {
|
||||||
|
entry:
|
||||||
|
%cmp1 = icmp eq i32 %n, 0
|
||||||
|
br i1 %cmp1, label %for.end, label %for.body.preheader
|
||||||
|
|
||||||
|
for.body.preheader: ; preds = %entry
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body: ; preds = %for.body.preheader, %for.body
|
||||||
|
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
|
||||||
|
store i32 2, i32* %a, align 4
|
||||||
|
%b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
|
||||||
|
store i32 2, i32* %b, align 4
|
||||||
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||||
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||||
|
%exitcond = icmp ne i32 %lftr.wideiv, %n
|
||||||
|
br i1 %exitcond, label %for.body, label %for.end.loopexit
|
||||||
|
|
||||||
|
for.end.loopexit: ; preds = %for.body
|
||||||
|
br label %for.end
|
||||||
|
|
||||||
|
for.end: ; preds = %for.end.loopexit, %entry
|
||||||
|
ret void
|
||||||
|
; CHECK-LABEL: @bar1(
|
||||||
|
; CHECK: call void @memset_pattern16
|
||||||
|
; CHECK-NOT: store
|
||||||
|
}
|
||||||
|
|
||||||
|
;void bar2(foo_t *f, unsigned n) {
|
||||||
|
; for (unsigned i = 0; i < n; ++i) {
|
||||||
|
; f[i].b = 2;
|
||||||
|
; f[i].a = 2;
|
||||||
|
; }
|
||||||
|
;}
|
||||||
|
define void @bar2(%struct.foo* %f, i32 %n) nounwind ssp {
|
||||||
|
entry:
|
||||||
|
%cmp1 = icmp eq i32 %n, 0
|
||||||
|
br i1 %cmp1, label %for.end, label %for.body.preheader
|
||||||
|
|
||||||
|
for.body.preheader: ; preds = %entry
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body: ; preds = %for.body.preheader, %for.body
|
||||||
|
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
|
||||||
|
store i32 2, i32* %b, align 4
|
||||||
|
%a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
|
||||||
|
store i32 2, i32* %a, align 4
|
||||||
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||||
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||||
|
%exitcond = icmp ne i32 %lftr.wideiv, %n
|
||||||
|
br i1 %exitcond, label %for.body, label %for.end.loopexit
|
||||||
|
|
||||||
|
for.end.loopexit: ; preds = %for.body
|
||||||
|
br label %for.end
|
||||||
|
|
||||||
|
for.end: ; preds = %for.end.loopexit, %entry
|
||||||
|
ret void
|
||||||
|
; CHECK-LABEL: @bar2(
|
||||||
|
; CHECK: call void @memset_pattern16
|
||||||
|
; CHECK-NOT: store
|
||||||
|
}
|
||||||
|
|
||||||
|
;void bar3(foo_t *f, unsigned n) {
|
||||||
|
; for (unsigned i = n; i > 0; --i) {
|
||||||
|
; f[i].a = 2;
|
||||||
|
; f[i].b = 2;
|
||||||
|
; }
|
||||||
|
;}
|
||||||
|
define void @bar3(%struct.foo* nocapture %f, i32 %n) nounwind ssp {
|
||||||
|
entry:
|
||||||
|
%cmp1 = icmp eq i32 %n, 0
|
||||||
|
br i1 %cmp1, label %for.end, label %for.body.preheader
|
||||||
|
|
||||||
|
for.body.preheader: ; preds = %entry
|
||||||
|
%0 = zext i32 %n to i64
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body: ; preds = %for.body.preheader, %for.body
|
||||||
|
%indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
|
||||||
|
store i32 2, i32* %a, align 4
|
||||||
|
%b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
|
||||||
|
store i32 2, i32* %b, align 4
|
||||||
|
%1 = trunc i64 %indvars.iv to i32
|
||||||
|
%dec = add i32 %1, -1
|
||||||
|
%cmp = icmp eq i32 %dec, 0
|
||||||
|
%indvars.iv.next = add nsw i64 %indvars.iv, -1
|
||||||
|
br i1 %cmp, label %for.end.loopexit, label %for.body
|
||||||
|
|
||||||
|
for.end.loopexit: ; preds = %for.body
|
||||||
|
br label %for.end
|
||||||
|
|
||||||
|
for.end: ; preds = %for.end.loopexit, %entry
|
||||||
|
ret void
|
||||||
|
; CHECK-LABEL: @bar3(
|
||||||
|
; CHECK: call void @memset_pattern16
|
||||||
|
; CHECK-NOT: store
|
||||||
|
}
|
||||||
|
|
||||||
|
;void bar4(foo_t *f, unsigned n) {
|
||||||
|
; for (unsigned i = 0; i < n; ++i) {
|
||||||
|
; f[i].a = 0;
|
||||||
|
; f[i].b = 1;
|
||||||
|
; }
|
||||||
|
;}
|
||||||
|
define void @bar4(%struct.foo* nocapture %f, i32 %n) nounwind ssp {
|
||||||
|
entry:
|
||||||
|
%cmp1 = icmp eq i32 %n, 0
|
||||||
|
br i1 %cmp1, label %for.end, label %for.body.preheader
|
||||||
|
|
||||||
|
for.body.preheader: ; preds = %entry
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body: ; preds = %for.body.preheader, %for.body
|
||||||
|
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
|
||||||
|
store i32 0, i32* %a, align 4
|
||||||
|
%b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
|
||||||
|
store i32 1, i32* %b, align 4
|
||||||
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||||
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||||
|
%exitcond = icmp ne i32 %lftr.wideiv, %n
|
||||||
|
br i1 %exitcond, label %for.body, label %for.end.loopexit
|
||||||
|
|
||||||
|
for.end.loopexit: ; preds = %for.body
|
||||||
|
br label %for.end
|
||||||
|
|
||||||
|
for.end: ; preds = %for.end.loopexit, %entry
|
||||||
|
ret void
|
||||||
|
; CHECK-LABEL: @bar4(
|
||||||
|
; CHECK-NOT: call void @memset_pattern16
|
||||||
|
}
|
||||||
|
|
||||||
|
;void bar5(foo1_t *f, unsigned n) {
|
||||||
|
; for (unsigned i = 0; i < n; ++i) {
|
||||||
|
; f[i].a = 1;
|
||||||
|
; f[i].b = 1;
|
||||||
|
; }
|
||||||
|
;}
|
||||||
|
define void @bar5(%struct.foo1* nocapture %f, i32 %n) nounwind ssp {
|
||||||
|
entry:
|
||||||
|
%cmp1 = icmp eq i32 %n, 0
|
||||||
|
br i1 %cmp1, label %for.end, label %for.body.preheader
|
||||||
|
|
||||||
|
for.body.preheader: ; preds = %entry
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body: ; preds = %for.body.preheader, %for.body
|
||||||
|
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%a = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 0
|
||||||
|
store i32 1, i32* %a, align 4
|
||||||
|
%b = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 1
|
||||||
|
store i32 1, i32* %b, align 4
|
||||||
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||||
|
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||||
|
%exitcond = icmp ne i32 %lftr.wideiv, %n
|
||||||
|
br i1 %exitcond, label %for.body, label %for.end.loopexit
|
||||||
|
|
||||||
|
for.end.loopexit: ; preds = %for.body
|
||||||
|
br label %for.end
|
||||||
|
|
||||||
|
for.end: ; preds = %for.end.loopexit, %entry
|
||||||
|
ret void
|
||||||
|
; CHECK-LABEL: @bar5(
|
||||||
|
; CHECK-NOT: call void @memset_pattern16
|
||||||
|
}
|
80
test/Transforms/LoopIdiom/unroll.ll
Normal file
80
test/Transforms/LoopIdiom/unroll.ll
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
|
||||||
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||||
|
|
||||||
|
; CHECK @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
|
||||||
|
|
||||||
|
target triple = "x86_64-apple-darwin10.0.0"
|
||||||
|
|
||||||
|
;void test(int *f, unsigned n) {
|
||||||
|
; for (unsigned i = 0; i < 2 * n; i += 2) {
|
||||||
|
; f[i] = 0;
|
||||||
|
; f[i+1] = 0;
|
||||||
|
; }
|
||||||
|
;}
|
||||||
|
define void @test(i32* %f, i32 %n) nounwind ssp {
|
||||||
|
entry:
|
||||||
|
%mul = shl i32 %n, 1
|
||||||
|
%cmp1 = icmp eq i32 %mul, 0
|
||||||
|
br i1 %cmp1, label %for.end, label %for.body.preheader
|
||||||
|
|
||||||
|
for.body.preheader: ; preds = %entry
|
||||||
|
%0 = zext i32 %mul to i64
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body: ; preds = %for.body.preheader, %for.body
|
||||||
|
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv
|
||||||
|
store i32 0, i32* %arrayidx, align 4
|
||||||
|
%1 = or i64 %indvars.iv, 1
|
||||||
|
%arrayidx2 = getelementptr inbounds i32, i32* %f, i64 %1
|
||||||
|
store i32 0, i32* %arrayidx2, align 4
|
||||||
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
|
||||||
|
%cmp = icmp ult i64 %indvars.iv.next, %0
|
||||||
|
br i1 %cmp, label %for.body, label %for.end.loopexit
|
||||||
|
|
||||||
|
for.end.loopexit: ; preds = %for.body
|
||||||
|
br label %for.end
|
||||||
|
|
||||||
|
for.end: ; preds = %for.end.loopexit, %entry
|
||||||
|
ret void
|
||||||
|
; CHECK-LABEL: @test(
|
||||||
|
; CHECK: call void @llvm.memset
|
||||||
|
; CHECK-NOT: store
|
||||||
|
}
|
||||||
|
|
||||||
|
;void test_pattern(int *f, unsigned n) {
|
||||||
|
; for (unsigned i = 0; i < 2 * n; i += 2) {
|
||||||
|
; f[i] = 2;
|
||||||
|
; f[i+1] = 2;
|
||||||
|
; }
|
||||||
|
;}
|
||||||
|
define void @test_pattern(i32* %f, i32 %n) nounwind ssp {
|
||||||
|
entry:
|
||||||
|
%mul = shl i32 %n, 1
|
||||||
|
%cmp1 = icmp eq i32 %mul, 0
|
||||||
|
br i1 %cmp1, label %for.end, label %for.body.preheader
|
||||||
|
|
||||||
|
for.body.preheader: ; preds = %entry
|
||||||
|
%0 = zext i32 %mul to i64
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
for.body: ; preds = %for.body.preheader, %for.body
|
||||||
|
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv
|
||||||
|
store i32 2, i32* %arrayidx, align 4
|
||||||
|
%1 = or i64 %indvars.iv, 1
|
||||||
|
%arrayidx2 = getelementptr inbounds i32, i32* %f, i64 %1
|
||||||
|
store i32 2, i32* %arrayidx2, align 4
|
||||||
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
|
||||||
|
%cmp = icmp ult i64 %indvars.iv.next, %0
|
||||||
|
br i1 %cmp, label %for.body, label %for.end.loopexit
|
||||||
|
|
||||||
|
for.end.loopexit: ; preds = %for.body
|
||||||
|
br label %for.end
|
||||||
|
|
||||||
|
for.end: ; preds = %for.end.loopexit, %entry
|
||||||
|
ret void
|
||||||
|
; CHECK-LABEL: @test_pattern(
|
||||||
|
; CHECK: call void @memset_pattern16
|
||||||
|
; CHECK-NOT: store
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user