mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
Allow GlobalValues to vectorize with AliasAnalysis
Storing the load/store instructions with the values and inspect them using Alias Analysis to make sure they don't alias, since the GEP pointer operand doesn't take the offset into account. Trying hard to not add any extra cost to loads and stores that don't overlap on global values, AA is *only* calculated if all of the previous attempts failed. Using biggest vector register size as the stride for the vectorization access, as we're being conservative and the cost model (which calculates the real vectorization factor) is only run after the legalization phase. We might re-think this relationship in the future, but for now, I'd rather be safe than sorry. llvm-svn: 175818
This commit is contained in:
parent
ab1db3b10b
commit
d371b2ca14
@ -319,8 +319,9 @@ private:
|
||||
class LoopVectorizationLegality {
|
||||
public:
|
||||
LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DataLayout *DL,
|
||||
DominatorTree *DT)
|
||||
: TheLoop(L), SE(SE), DL(DL), DT(DT), Induction(0) {}
|
||||
DominatorTree *DT, TargetTransformInfo* TTI,
|
||||
AliasAnalysis* AA)
|
||||
: TheLoop(L), SE(SE), DL(DL), DT(DT), TTI(TTI), AA(AA), Induction(0) {}
|
||||
|
||||
/// This enum represents the kinds of reductions that we support.
|
||||
enum ReductionKind {
|
||||
@ -404,6 +405,11 @@ public:
|
||||
/// induction descriptor.
|
||||
typedef MapVector<PHINode*, InductionInfo> InductionList;
|
||||
|
||||
/// Alias(Multi)Map stores the values (GEPs or underlying objects and their
|
||||
/// respective Store/Load instruction(s) to calculate aliasing.
|
||||
typedef DenseMap<Value*, Instruction* > AliasMap;
|
||||
typedef DenseMap<Value*, std::vector<Instruction*> > AliasMultiMap;
|
||||
|
||||
/// Returns true if it is legal to vectorize this loop.
|
||||
/// This does not mean that it is profitable to vectorize this
|
||||
/// loop, only that it is legal to do so.
|
||||
@ -477,6 +483,14 @@ private:
|
||||
InductionKind isInductionVariable(PHINode *Phi);
|
||||
/// Return true if can compute the address bounds of Ptr within the loop.
|
||||
bool hasComputableBounds(Value *Ptr);
|
||||
/// Return true if there is the chance of write reorder.
|
||||
bool hasPossibleGlobalWriteReorder(Value *Object,
|
||||
Instruction *Inst,
|
||||
AliasMultiMap &WriteObjects,
|
||||
unsigned MaxByteWidth);
|
||||
/// Return the AA location for a load or a store.
|
||||
AliasAnalysis::Location getLoadStoreLocation(Instruction *Inst);
|
||||
|
||||
|
||||
/// The loop that we evaluate.
|
||||
Loop *TheLoop;
|
||||
@ -484,8 +498,12 @@ private:
|
||||
ScalarEvolution *SE;
|
||||
/// DataLayout analysis.
|
||||
DataLayout *DL;
|
||||
// Dominators.
|
||||
/// Dominators.
|
||||
DominatorTree *DT;
|
||||
/// Target Info.
|
||||
TargetTransformInfo *TTI;
|
||||
/// Alias Analysis.
|
||||
AliasAnalysis *AA;
|
||||
|
||||
// --- vectorization state --- //
|
||||
|
||||
@ -612,6 +630,7 @@ struct LoopVectorize : public LoopPass {
|
||||
LoopInfo *LI;
|
||||
TargetTransformInfo *TTI;
|
||||
DominatorTree *DT;
|
||||
AliasAnalysis *AA;
|
||||
|
||||
virtual bool runOnLoop(Loop *L, LPPassManager &LPM) {
|
||||
// We only vectorize innermost loops.
|
||||
@ -623,12 +642,13 @@ struct LoopVectorize : public LoopPass {
|
||||
LI = &getAnalysis<LoopInfo>();
|
||||
TTI = &getAnalysis<TargetTransformInfo>();
|
||||
DT = &getAnalysis<DominatorTree>();
|
||||
AA = getAnalysisIfAvailable<AliasAnalysis>();
|
||||
|
||||
DEBUG(dbgs() << "LV: Checking a loop in \"" <<
|
||||
L->getHeader()->getParent()->getName() << "\"\n");
|
||||
|
||||
// Check if it is legal to vectorize the loop.
|
||||
LoopVectorizationLegality LVL(L, SE, DL, DT);
|
||||
LoopVectorizationLegality LVL(L, SE, DL, DT, TTI, AA);
|
||||
if (!LVL.canVectorize()) {
|
||||
DEBUG(dbgs() << "LV: Not vectorizing.\n");
|
||||
return false;
|
||||
@ -2275,6 +2295,42 @@ void LoopVectorizationLegality::collectLoopUniforms() {
|
||||
}
|
||||
}
|
||||
|
||||
AliasAnalysis::Location
|
||||
LoopVectorizationLegality::getLoadStoreLocation(Instruction *Inst) {
|
||||
if (StoreInst *Store = dyn_cast<StoreInst>(Inst))
|
||||
return AA->getLocation(Store);
|
||||
else if (LoadInst *Load = dyn_cast<LoadInst>(Inst))
|
||||
return AA->getLocation(Load);
|
||||
|
||||
llvm_unreachable("Should be either load or store instruction");
|
||||
}
|
||||
|
||||
bool
|
||||
LoopVectorizationLegality::hasPossibleGlobalWriteReorder(
|
||||
Value *Object,
|
||||
Instruction *Inst,
|
||||
AliasMultiMap& WriteObjects,
|
||||
unsigned MaxByteWidth) {
|
||||
|
||||
AliasAnalysis::Location ThisLoc = getLoadStoreLocation(Inst);
|
||||
|
||||
std::vector<Instruction*>::iterator
|
||||
it = WriteObjects[Object].begin(),
|
||||
end = WriteObjects[Object].end();
|
||||
|
||||
for (; it != end; ++it) {
|
||||
Instruction* I = *it;
|
||||
if (I == Inst)
|
||||
continue;
|
||||
|
||||
AliasAnalysis::Location ThatLoc = getLoadStoreLocation(I);
|
||||
if (AA->alias(ThisLoc.getWithNewSize(MaxByteWidth),
|
||||
ThatLoc.getWithNewSize(MaxByteWidth)))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool LoopVectorizationLegality::canVectorizeMemory() {
|
||||
|
||||
if (TheLoop->isAnnotatedParallel()) {
|
||||
@ -2337,9 +2393,10 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Holds the read and read-write *pointers* that we find.
|
||||
ValueVector Reads;
|
||||
ValueVector ReadWrites;
|
||||
// Holds the read and read-write *pointers* that we find. These maps hold
|
||||
// unique values for pointers (so no need for multi-map).
|
||||
AliasMap Reads;
|
||||
AliasMap ReadWrites;
|
||||
|
||||
// Holds the analyzed pointers. We don't want to call GetUnderlyingObjects
|
||||
// multiple times on the same object. If the ptr is accessed twice, once
|
||||
@ -2361,7 +2418,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
|
||||
// If we did *not* see this pointer before, insert it to
|
||||
// the read-write list. At this phase it is only a 'write' list.
|
||||
if (Seen.insert(Ptr))
|
||||
ReadWrites.push_back(Ptr);
|
||||
ReadWrites.insert(std::make_pair(Ptr, ST));
|
||||
}
|
||||
|
||||
for (I = Loads.begin(), IE = Loads.end(); I != IE; ++I) {
|
||||
@ -2376,7 +2433,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
|
||||
// read a few words, modify, and write a few words, and some of the
|
||||
// words may be written to the same address.
|
||||
if (Seen.insert(Ptr) || 0 == isConsecutivePtr(Ptr))
|
||||
Reads.push_back(Ptr);
|
||||
Reads.insert(std::make_pair(Ptr, LD));
|
||||
}
|
||||
|
||||
// If we write (or read-write) to a single destination and there are no
|
||||
@ -2389,22 +2446,27 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
|
||||
// Find pointers with computable bounds. We are going to use this information
|
||||
// to place a runtime bound check.
|
||||
bool CanDoRT = true;
|
||||
for (I = ReadWrites.begin(), IE = ReadWrites.end(); I != IE; ++I)
|
||||
if (hasComputableBounds(*I)) {
|
||||
PtrRtCheck.insert(SE, TheLoop, *I);
|
||||
DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n");
|
||||
AliasMap::iterator MI, ME;
|
||||
for (MI = ReadWrites.begin(), ME = ReadWrites.end(); MI != ME; ++MI) {
|
||||
Value *V = (*MI).first;
|
||||
if (hasComputableBounds(V)) {
|
||||
PtrRtCheck.insert(SE, TheLoop, V);
|
||||
DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n");
|
||||
} else {
|
||||
CanDoRT = false;
|
||||
break;
|
||||
}
|
||||
for (I = Reads.begin(), IE = Reads.end(); I != IE; ++I)
|
||||
if (hasComputableBounds(*I)) {
|
||||
PtrRtCheck.insert(SE, TheLoop, *I);
|
||||
DEBUG(dbgs() << "LV: Found a runtime check ptr:" << **I <<"\n");
|
||||
}
|
||||
for (MI = Reads.begin(), ME = Reads.end(); MI != ME; ++MI) {
|
||||
Value *V = (*MI).first;
|
||||
if (hasComputableBounds(V)) {
|
||||
PtrRtCheck.insert(SE, TheLoop, V);
|
||||
DEBUG(dbgs() << "LV: Found a runtime check ptr:" << *V <<"\n");
|
||||
} else {
|
||||
CanDoRT = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Check that we did not collect too many pointers or found a
|
||||
// unsizeable pointer.
|
||||
@ -2419,47 +2481,104 @@ bool LoopVectorizationLegality::canVectorizeMemory() {
|
||||
|
||||
bool NeedRTCheck = false;
|
||||
|
||||
// Biggest vectorized access possible, vector width * unroll factor.
|
||||
// TODO: We're being very pessimistic here, find a way to know the
|
||||
// real access width before getting here.
|
||||
unsigned MaxByteWidth = (TTI->getRegisterBitWidth(true) / 8) *
|
||||
TTI->getMaximumUnrollFactor();
|
||||
// Now that the pointers are in two lists (Reads and ReadWrites), we
|
||||
// can check that there are no conflicts between each of the writes and
|
||||
// between the writes to the reads.
|
||||
ValueSet WriteObjects;
|
||||
// Note that WriteObjects duplicates the stores (indexed now by underlying
|
||||
// objects) to avoid pointing to elements inside ReadWrites.
|
||||
// TODO: Maybe create a new type where they can interact without duplication.
|
||||
AliasMultiMap WriteObjects;
|
||||
ValueVector TempObjects;
|
||||
|
||||
// Check that the read-writes do not conflict with other read-write
|
||||
// pointers.
|
||||
bool AllWritesIdentified = true;
|
||||
for (I = ReadWrites.begin(), IE = ReadWrites.end(); I != IE; ++I) {
|
||||
GetUnderlyingObjects(*I, TempObjects, DL);
|
||||
for (ValueVector::iterator it=TempObjects.begin(), e=TempObjects.end();
|
||||
it != e; ++it) {
|
||||
if (!isIdentifiedObject(*it)) {
|
||||
DEBUG(dbgs() << "LV: Found an unidentified write ptr:"<< **it <<"\n");
|
||||
for (MI = ReadWrites.begin(), ME = ReadWrites.end(); MI != ME; ++MI) {
|
||||
Value *Val = (*MI).first;
|
||||
Instruction *Inst = (*MI).second;
|
||||
|
||||
GetUnderlyingObjects(Val, TempObjects, DL);
|
||||
for (ValueVector::iterator UI=TempObjects.begin(), UE=TempObjects.end();
|
||||
UI != UE; ++UI) {
|
||||
if (!isIdentifiedObject(*UI)) {
|
||||
DEBUG(dbgs() << "LV: Found an unidentified write ptr:"<< **UI <<"\n");
|
||||
NeedRTCheck = true;
|
||||
AllWritesIdentified = false;
|
||||
}
|
||||
if (!WriteObjects.insert(*it)) {
|
||||
|
||||
// Never seen it before, can't alias.
|
||||
if (WriteObjects[*UI].empty()) {
|
||||
DEBUG(dbgs() << "LV: Adding Underlying value:" << **UI <<"\n");
|
||||
WriteObjects[*UI].push_back(Inst);
|
||||
continue;
|
||||
}
|
||||
// Direct alias found.
|
||||
if (!AA || dyn_cast<GlobalValue>(*UI) == NULL) {
|
||||
DEBUG(dbgs() << "LV: Found a possible write-write reorder:"
|
||||
<< **it <<"\n");
|
||||
<< **UI <<"\n");
|
||||
return false;
|
||||
}
|
||||
DEBUG(dbgs() << "LV: Found a conflicting global value:"
|
||||
<< **UI <<"\n");
|
||||
DEBUG(dbgs() << "LV: While examining store:" << *Inst <<"\n");
|
||||
DEBUG(dbgs() << "LV: On value:" << *Val <<"\n");
|
||||
|
||||
// If global alias, make sure they do alias.
|
||||
if (hasPossibleGlobalWriteReorder(*UI,
|
||||
Inst,
|
||||
WriteObjects,
|
||||
MaxByteWidth)) {
|
||||
DEBUG(dbgs() << "LV: Found a possible write-write reorder:"
|
||||
<< *UI <<"\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Didn't alias, insert into map for further reference.
|
||||
WriteObjects[*UI].push_back(Inst);
|
||||
}
|
||||
TempObjects.clear();
|
||||
}
|
||||
|
||||
/// Check that the reads don't conflict with the read-writes.
|
||||
for (I = Reads.begin(), IE = Reads.end(); I != IE; ++I) {
|
||||
GetUnderlyingObjects(*I, TempObjects, DL);
|
||||
for (ValueVector::iterator it=TempObjects.begin(), e=TempObjects.end();
|
||||
it != e; ++it) {
|
||||
for (MI = Reads.begin(), ME = Reads.end(); MI != ME; ++MI) {
|
||||
Value *Val = (*MI).first;
|
||||
GetUnderlyingObjects(Val, TempObjects, DL);
|
||||
for (ValueVector::iterator UI=TempObjects.begin(), UE=TempObjects.end();
|
||||
UI != UE; ++UI) {
|
||||
// If all of the writes are identified then we don't care if the read
|
||||
// pointer is identified or not.
|
||||
if (!AllWritesIdentified && !isIdentifiedObject(*it)) {
|
||||
DEBUG(dbgs() << "LV: Found an unidentified read ptr:"<< **it <<"\n");
|
||||
if (!AllWritesIdentified && !isIdentifiedObject(*UI)) {
|
||||
DEBUG(dbgs() << "LV: Found an unidentified read ptr:"<< **UI <<"\n");
|
||||
NeedRTCheck = true;
|
||||
}
|
||||
if (WriteObjects.count(*it)) {
|
||||
DEBUG(dbgs() << "LV: Found a possible read/write reorder:"
|
||||
<< **it <<"\n");
|
||||
|
||||
// Never seen it before, can't alias.
|
||||
if (WriteObjects[*UI].empty())
|
||||
continue;
|
||||
// Direct alias found.
|
||||
if (!AA || dyn_cast<GlobalValue>(*UI) == NULL) {
|
||||
DEBUG(dbgs() << "LV: Found a possible write-write reorder:"
|
||||
<< **UI <<"\n");
|
||||
return false;
|
||||
}
|
||||
DEBUG(dbgs() << "LV: Found a global value: "
|
||||
<< **UI <<"\n");
|
||||
Instruction *Inst = (*MI).second;
|
||||
DEBUG(dbgs() << "LV: While examining load:" << *Inst <<"\n");
|
||||
DEBUG(dbgs() << "LV: On value:" << *Val <<"\n");
|
||||
|
||||
// If global alias, make sure they do alias.
|
||||
if (hasPossibleGlobalWriteReorder(*UI,
|
||||
Inst,
|
||||
WriteObjects,
|
||||
MaxByteWidth)) {
|
||||
DEBUG(dbgs() << "LV: Found a possible read-write reorder:"
|
||||
<< *UI <<"\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
356
test/Transforms/LoopVectorize/global_alias.ll
Normal file
356
test/Transforms/LoopVectorize/global_alias.ll
Normal file
@ -0,0 +1,356 @@
|
||||
; RUN: opt < %s -O3 -loop-vectorize -force-vector-unroll=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:64:128-a0:0:64-n32-S64"
|
||||
|
||||
%struct.anon = type { [100 x i32], i32, [100 x i32] }
|
||||
|
||||
@Foo = common global %struct.anon zeroinitializer, align 4
|
||||
@PB = external global i32*
|
||||
@PA = external global i32*
|
||||
|
||||
; int noAlias01 (int a) {
|
||||
; int i;
|
||||
; for (i=0; i<SIZE; i++)
|
||||
; Foo.A[i] = Foo.B[i] + a;
|
||||
; return Foo.A[a];
|
||||
; }
|
||||
; CHECK: define i32 @noAlias01
|
||||
; CHECK: add nsw <4 x i32>
|
||||
; CHECK ret
|
||||
|
||||
define i32 @noAlias01(i32 %a) nounwind {
|
||||
entry:
|
||||
%a.addr = alloca i32, align 4
|
||||
%i = alloca i32, align 4
|
||||
store i32 %a, i32* %a.addr, align 4
|
||||
store i32 0, i32* %i, align 4
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc, %entry
|
||||
%0 = load i32* %i, align 4
|
||||
%cmp = icmp slt i32 %0, 100
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
%1 = load i32* %i, align 4
|
||||
%arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
|
||||
%2 = load i32* %arrayidx, align 4
|
||||
%3 = load i32* %a.addr, align 4
|
||||
%add = add nsw i32 %2, %3
|
||||
%4 = load i32* %i, align 4
|
||||
%arrayidx1 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
|
||||
store i32 %add, i32* %arrayidx1, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body
|
||||
%5 = load i32* %i, align 4
|
||||
%inc = add nsw i32 %5, 1
|
||||
store i32 %inc, i32* %i, align 4
|
||||
br label %for.cond
|
||||
|
||||
for.end: ; preds = %for.cond
|
||||
%6 = load i32* %a.addr, align 4
|
||||
%arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
|
||||
%7 = load i32* %arrayidx2, align 4
|
||||
ret i32 %7
|
||||
}
|
||||
|
||||
; int mayAlias01 (int a) {
|
||||
; int i;
|
||||
; for (i=0; i<SIZE; i++)
|
||||
; Foo.A[i] = Foo.B[SIZE-i-1] + a;
|
||||
; return Foo.A[a];
|
||||
; }
|
||||
; CHECK: define i32 @mayAlias01
|
||||
; CHECK-NOT: add nsw <4 x i32>
|
||||
; CHECK ret
|
||||
|
||||
define i32 @mayAlias01(i32 %a) nounwind {
|
||||
entry:
|
||||
%a.addr = alloca i32, align 4
|
||||
%i = alloca i32, align 4
|
||||
store i32 %a, i32* %a.addr, align 4
|
||||
store i32 0, i32* %i, align 4
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc, %entry
|
||||
%0 = load i32* %i, align 4
|
||||
%cmp = icmp slt i32 %0, 100
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
%1 = load i32* %i, align 4
|
||||
%sub = sub nsw i32 100, %1
|
||||
%sub1 = sub nsw i32 %sub, 1
|
||||
%arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
|
||||
%2 = load i32* %arrayidx, align 4
|
||||
%3 = load i32* %a.addr, align 4
|
||||
%add = add nsw i32 %2, %3
|
||||
%4 = load i32* %i, align 4
|
||||
%arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
|
||||
store i32 %add, i32* %arrayidx2, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body
|
||||
%5 = load i32* %i, align 4
|
||||
%inc = add nsw i32 %5, 1
|
||||
store i32 %inc, i32* %i, align 4
|
||||
br label %for.cond
|
||||
|
||||
for.end: ; preds = %for.cond
|
||||
%6 = load i32* %a.addr, align 4
|
||||
%arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
|
||||
%7 = load i32* %arrayidx3, align 4
|
||||
ret i32 %7
|
||||
}
|
||||
|
||||
; int mayAlias02 (int a) {
|
||||
; int i;
|
||||
; for (i=0; i<SIZE; i++)
|
||||
; Foo.A[SIZE-i-1] = Foo.B[i] + a;
|
||||
; return Foo.A[a];
|
||||
; }
|
||||
; CHECK: define i32 @mayAlias02
|
||||
; CHECK-NOT: add nsw <4 x i32>
|
||||
; CHECK ret
|
||||
|
||||
define i32 @mayAlias02(i32 %a) nounwind {
|
||||
entry:
|
||||
%a.addr = alloca i32, align 4
|
||||
%i = alloca i32, align 4
|
||||
store i32 %a, i32* %a.addr, align 4
|
||||
store i32 0, i32* %i, align 4
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc, %entry
|
||||
%0 = load i32* %i, align 4
|
||||
%cmp = icmp slt i32 %0, 100
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
%1 = load i32* %i, align 4
|
||||
%arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %1
|
||||
%2 = load i32* %arrayidx, align 4
|
||||
%3 = load i32* %a.addr, align 4
|
||||
%add = add nsw i32 %2, %3
|
||||
%4 = load i32* %i, align 4
|
||||
%sub = sub nsw i32 100, %4
|
||||
%sub1 = sub nsw i32 %sub, 1
|
||||
%arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %sub1
|
||||
store i32 %add, i32* %arrayidx2, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body
|
||||
%5 = load i32* %i, align 4
|
||||
%inc = add nsw i32 %5, 1
|
||||
store i32 %inc, i32* %i, align 4
|
||||
br label %for.cond
|
||||
|
||||
for.end: ; preds = %for.cond
|
||||
%6 = load i32* %a.addr, align 4
|
||||
%arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
|
||||
%7 = load i32* %arrayidx3, align 4
|
||||
ret i32 %7
|
||||
}
|
||||
|
||||
; int mayAlias03 (int a) {
|
||||
; int i;
|
||||
; for (i=0; i<SIZE; i++)
|
||||
; *(PA+i) = *(PB+SIZE-i-1) + a;
|
||||
; return *(PA+a);
|
||||
; }
|
||||
; CHECK: define i32 @mayAlias03
|
||||
; CHECK-NOT: add nsw <4 x i32>
|
||||
; CHECK ret
|
||||
|
||||
define i32 @mayAlias03(i32 %a) nounwind {
|
||||
entry:
|
||||
%a.addr = alloca i32, align 4
|
||||
%i = alloca i32, align 4
|
||||
store i32 %a, i32* %a.addr, align 4
|
||||
store i32 0, i32* %i, align 4
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc, %entry
|
||||
%0 = load i32* %i, align 4
|
||||
%cmp = icmp slt i32 %0, 100
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
%1 = load i32** @PB, align 4
|
||||
%add.ptr = getelementptr inbounds i32* %1, i32 100
|
||||
%2 = load i32* %i, align 4
|
||||
%idx.neg = sub i32 0, %2
|
||||
%add.ptr1 = getelementptr inbounds i32* %add.ptr, i32 %idx.neg
|
||||
%add.ptr2 = getelementptr inbounds i32* %add.ptr1, i32 -1
|
||||
%3 = load i32* %add.ptr2, align 4
|
||||
%4 = load i32* %a.addr, align 4
|
||||
%add = add nsw i32 %3, %4
|
||||
%5 = load i32** @PA, align 4
|
||||
%6 = load i32* %i, align 4
|
||||
%add.ptr3 = getelementptr inbounds i32* %5, i32 %6
|
||||
store i32 %add, i32* %add.ptr3, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body
|
||||
%7 = load i32* %i, align 4
|
||||
%inc = add nsw i32 %7, 1
|
||||
store i32 %inc, i32* %i, align 4
|
||||
br label %for.cond
|
||||
|
||||
for.end: ; preds = %for.cond
|
||||
%8 = load i32** @PA, align 4
|
||||
%9 = load i32* %a.addr, align 4
|
||||
%add.ptr4 = getelementptr inbounds i32* %8, i32 %9
|
||||
%10 = load i32* %add.ptr4, align 4
|
||||
ret i32 %10
|
||||
}
|
||||
|
||||
; int mustAlias01 (int a) {
|
||||
; int i;
|
||||
; for (i=0; i<SIZE; i++)
|
||||
; Foo.A[i+10] = Foo.B[SIZE-i-1] + a;
|
||||
; return Foo.A[a];
|
||||
; }
|
||||
; CHECK: define i32 @mustAlias01
|
||||
; CHECK-NOT: add nsw <4 x i32>
|
||||
; CHECK ret
|
||||
|
||||
define i32 @mustAlias01(i32 %a) nounwind {
|
||||
entry:
|
||||
%a.addr = alloca i32, align 4
|
||||
%i = alloca i32, align 4
|
||||
store i32 %a, i32* %a.addr, align 4
|
||||
store i32 0, i32* %i, align 4
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc, %entry
|
||||
%0 = load i32* %i, align 4
|
||||
%cmp = icmp slt i32 %0, 100
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
%1 = load i32* %i, align 4
|
||||
%sub = sub nsw i32 100, %1
|
||||
%sub1 = sub nsw i32 %sub, 1
|
||||
%arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
|
||||
%2 = load i32* %arrayidx, align 4
|
||||
%3 = load i32* %a.addr, align 4
|
||||
%add = add nsw i32 %2, %3
|
||||
%4 = load i32* %i, align 4
|
||||
%add2 = add nsw i32 %4, 10
|
||||
%arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2
|
||||
store i32 %add, i32* %arrayidx3, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body
|
||||
%5 = load i32* %i, align 4
|
||||
%inc = add nsw i32 %5, 1
|
||||
store i32 %inc, i32* %i, align 4
|
||||
br label %for.cond
|
||||
|
||||
for.end: ; preds = %for.cond
|
||||
%6 = load i32* %a.addr, align 4
|
||||
%arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
|
||||
%7 = load i32* %arrayidx4, align 4
|
||||
ret i32 %7
|
||||
}
|
||||
|
||||
; int mustAlias02 (int a) {
|
||||
; int i;
|
||||
; for (i=0; i<SIZE; i++)
|
||||
; Foo.A[i] = Foo.B[SIZE-i-10] + a;
|
||||
; return Foo.A[a];
|
||||
; }
|
||||
; CHECK: define i32 @mustAlias02
|
||||
; CHECK-NOT: add nsw <4 x i32>
|
||||
; CHECK ret
|
||||
|
||||
define i32 @mustAlias02(i32 %a) nounwind {
|
||||
entry:
|
||||
%a.addr = alloca i32, align 4
|
||||
%i = alloca i32, align 4
|
||||
store i32 %a, i32* %a.addr, align 4
|
||||
store i32 0, i32* %i, align 4
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc, %entry
|
||||
%0 = load i32* %i, align 4
|
||||
%cmp = icmp slt i32 %0, 100
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
%1 = load i32* %i, align 4
|
||||
%sub = sub nsw i32 100, %1
|
||||
%sub1 = sub nsw i32 %sub, 10
|
||||
%arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
|
||||
%2 = load i32* %arrayidx, align 4
|
||||
%3 = load i32* %a.addr, align 4
|
||||
%add = add nsw i32 %2, %3
|
||||
%4 = load i32* %i, align 4
|
||||
%arrayidx2 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %4
|
||||
store i32 %add, i32* %arrayidx2, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body
|
||||
%5 = load i32* %i, align 4
|
||||
%inc = add nsw i32 %5, 1
|
||||
store i32 %inc, i32* %i, align 4
|
||||
br label %for.cond
|
||||
|
||||
for.end: ; preds = %for.cond
|
||||
%6 = load i32* %a.addr, align 4
|
||||
%arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
|
||||
%7 = load i32* %arrayidx3, align 4
|
||||
ret i32 %7
|
||||
}
|
||||
|
||||
; int mustAlias03 (int a) {
|
||||
; int i;
|
||||
; for (i=0; i<SIZE; i++)
|
||||
; Foo.A[i+10] = Foo.B[SIZE-i-10] + a;
|
||||
; return Foo.A[a];
|
||||
; }
|
||||
; CHECK: define i32 @mustAlias03
|
||||
; CHECK-NOT: add nsw <4 x i32>
|
||||
; CHECK ret
|
||||
|
||||
define i32 @mustAlias03(i32 %a) nounwind {
|
||||
entry:
|
||||
%a.addr = alloca i32, align 4
|
||||
%i = alloca i32, align 4
|
||||
store i32 %a, i32* %a.addr, align 4
|
||||
store i32 0, i32* %i, align 4
|
||||
br label %for.cond
|
||||
|
||||
for.cond: ; preds = %for.inc, %entry
|
||||
%0 = load i32* %i, align 4
|
||||
%cmp = icmp slt i32 %0, 100
|
||||
br i1 %cmp, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %for.cond
|
||||
%1 = load i32* %i, align 4
|
||||
%sub = sub nsw i32 100, %1
|
||||
%sub1 = sub nsw i32 %sub, 10
|
||||
%arrayidx = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 2), i32 0, i32 %sub1
|
||||
%2 = load i32* %arrayidx, align 4
|
||||
%3 = load i32* %a.addr, align 4
|
||||
%add = add nsw i32 %2, %3
|
||||
%4 = load i32* %i, align 4
|
||||
%add2 = add nsw i32 %4, 10
|
||||
%arrayidx3 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %add2
|
||||
store i32 %add, i32* %arrayidx3, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body
|
||||
%5 = load i32* %i, align 4
|
||||
%inc = add nsw i32 %5, 1
|
||||
store i32 %inc, i32* %i, align 4
|
||||
br label %for.cond
|
||||
|
||||
for.end: ; preds = %for.cond
|
||||
%6 = load i32* %a.addr, align 4
|
||||
%arrayidx4 = getelementptr inbounds [100 x i32]* getelementptr inbounds (%struct.anon* @Foo, i32 0, i32 0), i32 0, i32 %6
|
||||
%7 = load i32* %arrayidx4, align 4
|
||||
ret i32 %7
|
||||
}
|
Loading…
Reference in New Issue
Block a user