mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
[LLE] Add missing check for unit stride
I somehow missed this. The case in GCC (global_alloc) was similar to the new testcase except it had an array of structs rather than a two dimensional array. Fixes RP26885. llvm-svn: 263058
This commit is contained in:
parent
3204bf1bc8
commit
e774f7e34b
@ -663,7 +663,8 @@ const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
|
||||
/// The \p Assume parameter indicates if we are allowed to make additional
|
||||
/// run-time assumptions.
|
||||
int isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
|
||||
const ValueToValueMap &StridesMap, bool Assume = false);
|
||||
const ValueToValueMap &StridesMap = ValueToValueMap(),
|
||||
bool Assume = false);
|
||||
|
||||
/// \brief Returns true if the memory operations \p A and \p B are consecutive.
|
||||
/// This is a simple API that does not depend on the analysis pass.
|
||||
|
@ -61,7 +61,8 @@ struct StoreToLoadForwardingCandidate {
|
||||
|
||||
/// \brief Return true if the dependence from the store to the load has a
|
||||
/// distance of one. E.g. A[i+1] = A[i]
|
||||
bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE) const {
|
||||
bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE,
|
||||
Loop *L) const {
|
||||
Value *LoadPtr = Load->getPointerOperand();
|
||||
Value *StorePtr = Store->getPointerOperand();
|
||||
Type *LoadPtrType = LoadPtr->getType();
|
||||
@ -72,6 +73,13 @@ struct StoreToLoadForwardingCandidate {
|
||||
LoadType == StorePtr->getType()->getPointerElementType() &&
|
||||
"Should be a known dependence");
|
||||
|
||||
// Currently we only support accesses with unit stride. FIXME: we should be
|
||||
// able to handle non unit stirde as well as long as the stride is equal to
|
||||
// the dependence distance.
|
||||
if (isStridedPtr(PSE, LoadPtr, L) != 1 ||
|
||||
isStridedPtr(PSE, LoadPtr, L) != 1)
|
||||
return false;
|
||||
|
||||
auto &DL = Load->getParent()->getModule()->getDataLayout();
|
||||
unsigned TypeByteSize = DL.getTypeAllocSize(const_cast<Type *>(LoadType));
|
||||
|
||||
@ -83,7 +91,7 @@ struct StoreToLoadForwardingCandidate {
|
||||
auto *Dist = cast<SCEVConstant>(
|
||||
PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV));
|
||||
const APInt &Val = Dist->getAPInt();
|
||||
return Val.abs() == TypeByteSize;
|
||||
return Val == TypeByteSize;
|
||||
}
|
||||
|
||||
Value *getLoadPtr() const { return Load->getPointerOperand(); }
|
||||
@ -223,8 +231,8 @@ public:
|
||||
// so deciding which one forwards is easy. The later one forwards as
|
||||
// long as they both have a dependence distance of one to the load.
|
||||
if (Cand.Store->getParent() == OtherCand->Store->getParent() &&
|
||||
Cand.isDependenceDistanceOfOne(PSE) &&
|
||||
OtherCand->isDependenceDistanceOfOne(PSE)) {
|
||||
Cand.isDependenceDistanceOfOne(PSE, L) &&
|
||||
OtherCand->isDependenceDistanceOfOne(PSE, L)) {
|
||||
// They are in the same block, the later one will forward to the load.
|
||||
if (getInstrIndex(OtherCand->Store) < getInstrIndex(Cand.Store))
|
||||
OtherCand = &Cand;
|
||||
@ -441,7 +449,7 @@ public:
|
||||
|
||||
// Check whether the SCEV difference is the same as the induction step,
|
||||
// thus we load the value in the next iteration.
|
||||
if (!Cand.isDependenceDistanceOfOne(PSE))
|
||||
if (!Cand.isDependenceDistanceOfOne(PSE, L))
|
||||
continue;
|
||||
|
||||
++NumForwarding;
|
||||
|
43
test/Transforms/LoopLoadElim/non-consecutive.ll
Normal file
43
test/Transforms/LoopLoadElim/non-consecutive.ll
Normal file
@ -0,0 +1,43 @@
|
||||
; RUN: opt -loop-load-elim -S < %s | FileCheck %s
|
||||
|
||||
; The accesses to A are independent here but LAA reports it as a loop-carried
|
||||
; forward dependence. Check that we don't perform st->ld forwarding between
|
||||
; them.
|
||||
;
|
||||
; for (unsigned i = 0; i < 100; i++) {
|
||||
; A[i][1] = B[i] + 2;
|
||||
; C[i] = A[i][0] * 2;
|
||||
; }
|
||||
|
||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
define void @f([2 x i32]* noalias %A, i32* noalias %B, i32* noalias %C, i64 %N) {
|
||||
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
|
||||
%A1idx = getelementptr inbounds [2 x i32], [2 x i32]* %A, i64 %indvars.iv, i32 1
|
||||
%Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
|
||||
%Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
|
||||
%A0idx = getelementptr inbounds [2 x i32], [2 x i32]* %A, i64 %indvars.iv, i32 0
|
||||
|
||||
%b = load i32, i32* %Bidx, align 4
|
||||
%a_p1 = add i32 %b, 2
|
||||
store i32 %a_p1, i32* %A1idx, align 4
|
||||
|
||||
; CHECK: %a = load i32, i32* %A0idx, align 4
|
||||
%a = load i32, i32* %A0idx, align 4
|
||||
; CHECK: %c = mul i32 %a, 2
|
||||
%c = mul i32 %a, 2
|
||||
store i32 %c, i32* %Cidx, align 4
|
||||
|
||||
%exitcond = icmp eq i64 %indvars.iv.next, %N
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue
Block a user