1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 19:23:23 +01:00

[LLE] Add missing check for unit stride

I somehow missed this.  The case in GCC (global_alloc) was similar to
the new testcase except it had an array of structs rather than a two
dimensional array.

Fixes RP26885.

llvm-svn: 263058
This commit is contained in:
Adam Nemet 2016-03-09 20:47:55 +00:00
parent 3204bf1bc8
commit e774f7e34b
3 changed files with 58 additions and 6 deletions

View File

@ -663,7 +663,8 @@ const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
/// The \p Assume parameter indicates if we are allowed to make additional
/// run-time assumptions.
int isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
const ValueToValueMap &StridesMap, bool Assume = false);
const ValueToValueMap &StridesMap = ValueToValueMap(),
bool Assume = false);
/// \brief Returns true if the memory operations \p A and \p B are consecutive.
/// This is a simple API that does not depend on the analysis pass.

View File

@ -61,7 +61,8 @@ struct StoreToLoadForwardingCandidate {
/// \brief Return true if the dependence from the store to the load has a
/// distance of one. E.g. A[i+1] = A[i]
bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE) const {
bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE,
Loop *L) const {
Value *LoadPtr = Load->getPointerOperand();
Value *StorePtr = Store->getPointerOperand();
Type *LoadPtrType = LoadPtr->getType();
@ -72,6 +73,13 @@ struct StoreToLoadForwardingCandidate {
LoadType == StorePtr->getType()->getPointerElementType() &&
"Should be a known dependence");
// Currently we only support accesses with unit stride. FIXME: we should be
// able to handle non unit stirde as well as long as the stride is equal to
// the dependence distance.
if (isStridedPtr(PSE, LoadPtr, L) != 1 ||
isStridedPtr(PSE, LoadPtr, L) != 1)
return false;
auto &DL = Load->getParent()->getModule()->getDataLayout();
unsigned TypeByteSize = DL.getTypeAllocSize(const_cast<Type *>(LoadType));
@ -83,7 +91,7 @@ struct StoreToLoadForwardingCandidate {
auto *Dist = cast<SCEVConstant>(
PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
const APInt &Val = Dist->getAPInt();
return Val.abs() == TypeByteSize;
return Val == TypeByteSize;
}
Value *getLoadPtr() const { return Load->getPointerOperand(); }
@ -223,8 +231,8 @@ public:
// so deciding which one forwards is easy. The later one forwards as
// long as they both have a dependence distance of one to the load.
if (Cand.Store->getParent() == OtherCand->Store->getParent() &&
Cand.isDependenceDistanceOfOne(PSE) &&
OtherCand->isDependenceDistanceOfOne(PSE)) {
Cand.isDependenceDistanceOfOne(PSE, L) &&
OtherCand->isDependenceDistanceOfOne(PSE, L)) {
// They are in the same block, the later one will forward to the load.
if (getInstrIndex(OtherCand->Store) < getInstrIndex(Cand.Store))
OtherCand = &Cand;
@ -441,7 +449,7 @@ public:
// Check whether the SCEV difference is the same as the induction step,
// thus we load the value in the next iteration.
if (!Cand.isDependenceDistanceOfOne(PSE))
if (!Cand.isDependenceDistanceOfOne(PSE, L))
continue;
++NumForwarding;

View File

@ -0,0 +1,43 @@
; RUN: opt -loop-load-elim -S < %s | FileCheck %s
; The accesses to A are independent here but LAA reports it as a loop-carried
; forward dependence. Check that we don't perform st->ld forwarding between
; them.
;
; for (unsigned i = 0; i < 100; i++) {
; A[i][1] = B[i] + 2;
; C[i] = A[i][0] * 2;
; }
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
define void @f([2 x i32]* noalias %A, i32* noalias %B, i32* noalias %C, i64 %N) {
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%A1idx = getelementptr inbounds [2 x i32], [2 x i32]* %A, i64 %indvars.iv, i32 1
%Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
%Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
%A0idx = getelementptr inbounds [2 x i32], [2 x i32]* %A, i64 %indvars.iv, i32 0
%b = load i32, i32* %Bidx, align 4
%a_p1 = add i32 %b, 2
store i32 %a_p1, i32* %A1idx, align 4
; CHECK: %a = load i32, i32* %A0idx, align 4
%a = load i32, i32* %A0idx, align 4
; CHECK: %c = mul i32 %a, 2
%c = mul i32 %a, 2
store i32 %c, i32* %Cidx, align 4
%exitcond = icmp eq i64 %indvars.iv.next, %N
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}