1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

[LAA] Enable symbolic stride speculation for all LAA clients

This is a functional change for LLE and LDist.  The other clients (LV,
LVerLICM) already had this explicitly enabled.

The temporary boolean parameter to LAA is removed that allowed turning
off speculation of symbolic strides.  This makes LAA's caching interface
LAA::getInfo only take the loop as the parameter.  This makes the
interface more friendly to the new Pass Manager.

The flag -enable-mem-access-versioning is moved from LV to a LAA which
now allows turning off speculation globally.

llvm-svn: 273064
This commit is contained in:
Adam Nemet 2016-06-17 22:35:41 +00:00
parent 12e6050372
commit 62a274f0e9
6 changed files with 154 additions and 50 deletions

View File

@ -513,8 +513,7 @@ class LoopAccessInfo {
public:
LoopAccessInfo(Loop *L, ScalarEvolution *SE, const DataLayout &DL,
const TargetLibraryInfo *TLI, AliasAnalysis *AA,
DominatorTree *DT, LoopInfo *LI,
bool SpeculateSymbolicStrides);
DominatorTree *DT, LoopInfo *LI);
/// Return true we can analyze the memory accesses in the loop and there are
/// no memory dependence cycles.
@ -585,11 +584,6 @@ public:
/// \brief Print the information about the memory accesses in the loop.
void print(raw_ostream &OS, unsigned Depth = 0) const;
/// \brief Used to ensure that if the analysis was run with speculating the
/// value of symbolic strides, the client queries it with the same assumption.
/// Only used in DEBUG build but we don't want NDEBUG-dependent ABI.
bool SpeculateSymbolicStrides;
/// \brief Checks existence of store to invariant address inside loop.
/// If the loop has any store to invariant address, then it returns true,
/// else returns false.
@ -715,11 +709,8 @@ public:
/// \brief Query the result of the loop access information for the loop \p L.
///
/// \p SpeculateSymbolicStrides enables symbolic value speculation. The
/// corresponding run-time checks are collected in LAI::PSE.
///
/// If there is no cached result available run the analysis.
const LoopAccessInfo &getInfo(Loop *L, bool SpeculateSymbolicStrides = false);
const LoopAccessInfo &getInfo(Loop *L);
void releaseMemory() override {
// Invalidate the cache when the pass is freed.

View File

@ -65,6 +65,21 @@ static cl::opt<unsigned>
"loop-access analysis (default = 100)"),
cl::init(100));
/// This enables versioning on the strides of symbolically striding memory
/// accesses in code like the following.
/// for (i = 0; i < N; ++i)
/// A[i * Stride1] += B[i * Stride2] ...
///
/// Will be roughly translated to
/// if (Stride1 == 1 && Stride2 == 1) {
/// for (i = 0; i < N; i+=4)
/// A[i:i+3] += ...
/// } else
/// ...
static cl::opt<bool> EnableMemAccessVersioning(
"enable-mem-access-versioning", cl::init(true), cl::Hidden,
cl::desc("Enable symbolic stride memory access versioning"));
/// \brief Enable store-to-load forwarding conflict detection. This option can
/// be disabled for correctness testing.
static cl::opt<bool> EnableForwardingConflictDetection(
@ -1540,7 +1555,7 @@ void LoopAccessInfo::analyzeLoop() {
NumLoads++;
Loads.push_back(Ld);
DepChecker.addAccess(Ld);
if (SpeculateSymbolicStrides)
if (EnableMemAccessVersioning)
collectStridedAccess(Ld);
continue;
}
@ -1564,7 +1579,7 @@ void LoopAccessInfo::analyzeLoop() {
NumStores++;
Stores.push_back(St);
DepChecker.addAccess(St);
if (SpeculateSymbolicStrides)
if (EnableMemAccessVersioning)
collectStridedAccess(St);
}
} // Next instr.
@ -1904,11 +1919,9 @@ void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
const DataLayout &DL,
const TargetLibraryInfo *TLI, AliasAnalysis *AA,
DominatorTree *DT, LoopInfo *LI,
bool SpeculateSymbolicStrides)
: SpeculateSymbolicStrides(SpeculateSymbolicStrides), PSE(*SE, *L),
PtrRtChecking(SE), DepChecker(PSE, L), TheLoop(L), DL(DL), TLI(TLI),
AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
DominatorTree *DT, LoopInfo *LI)
: PSE(*SE, *L), PtrRtChecking(SE), DepChecker(PSE, L), TheLoop(L), DL(DL),
TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),
MaxSafeDepDistBytes(-1U), CanVecMem(false),
StoreToLoopInvariantAddress(false) {
if (canAnalyzeLoop())
@ -1955,19 +1968,12 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
PSE.print(OS, Depth);
}
const LoopAccessInfo &
LoopAccessAnalysis::getInfo(Loop *L, bool SpeculateSymbolicStrides) {
const LoopAccessInfo &LoopAccessAnalysis::getInfo(Loop *L) {
auto &LAI = LoopAccessInfoMap[L];
#ifndef NDEBUG
assert((!LAI || LAI->SpeculateSymbolicStrides == SpeculateSymbolicStrides) &&
"Symbolic strides changed for loop");
#endif
if (!LAI) {
const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI,
SpeculateSymbolicStrides);
LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI);
}
return *LAI.get();
}

View File

@ -385,7 +385,7 @@ bool LoopVersioningLICM::legalLoopInstructions() {
return false;
}
// Get LoopAccessInfo from current loop.
LAI = &LAA->getInfo(CurLoop, true);
LAI = &LAA->getInfo(CurLoop);
// Check LoopAccessInfo for need of runtime check.
if (LAI->getRuntimePointerChecking()->getChecks().empty()) {
DEBUG(dbgs() << " LAA: Runtime check not found !!\n");

View File

@ -130,21 +130,6 @@ static cl::opt<bool> MaximizeBandwidth(
cl::desc("Maximize bandwidth when selecting vectorization factor which "
"will be determined by the smallest type in loop."));
/// This enables versioning on the strides of symbolically striding memory
/// accesses in code like the following.
/// for (i = 0; i < N; ++i)
/// A[i * Stride1] += B[i * Stride2] ...
///
/// Will be roughly translated to
/// if (Stride1 == 1 && Stride2 == 1) {
/// for (i = 0; i < N; i+=4)
/// A[i:i+3] += ...
/// } else
/// ...
static cl::opt<bool> EnableMemAccessVersioning(
"enable-mem-access-versioning", cl::init(true), cl::Hidden,
cl::desc("Enable symbolic stride memory access versioning"));
static cl::opt<bool> EnableInterleavedMemAccesses(
"enable-interleaved-mem-accesses", cl::init(false), cl::Hidden,
cl::desc("Enable vectorization on interleaved memory accesses in a loop"));
@ -4970,7 +4955,7 @@ void LoopVectorizationLegality::collectLoopUniforms() {
}
bool LoopVectorizationLegality::canVectorizeMemory() {
LAI = &LAA->getInfo(TheLoop, EnableMemAccessVersioning);
LAI = &LAA->getInfo(TheLoop);
auto &OptionalReport = LAI->getReport();
if (OptionalReport)
emitAnalysis(VectorizationReport(*OptionalReport));

View File

@ -0,0 +1,65 @@
; RUN: opt -basicaa -loop-distribute -S < %s | \
; RUN: FileCheck %s --check-prefix=ALL --check-prefix=STRIDE_SPEC
; RUN: opt -basicaa -loop-distribute -S -enable-mem-access-versioning=0 < %s | \
; RUN: FileCheck %s --check-prefix=ALL --check-prefix=NO_STRIDE_SPEC
; If we don't speculate stride for 1 we can't distribute along the line
; because we could have a backward dependence:
;
; for (i = 0; i < n; i++) {
; A[i + 1] = A[i] * B[i];
; =======================
; C[i] = D[i] * A[stride * i];
; }
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.10.0"
; ALL-LABEL: @f(
define void @f(i32* noalias %a,
i32* noalias %b,
i32* noalias %c,
i32* noalias %d,
i64 %stride) {
entry:
br label %for.body
; STRIDE_SPEC: %ident.check = icmp ne i64 %stride, 1
; STRIDE_SPEC: for.body.ldist1:
; NO_STRIDE_SPEC-NOT: for.body.ldist1:
for.body: ; preds = %for.body, %entry
%ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
%arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
%loadA = load i32, i32* %arrayidxA, align 4
%arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
%loadB = load i32, i32* %arrayidxB, align 4
%mulA = mul i32 %loadB, %loadA
%add = add nuw nsw i64 %ind, 1
%arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add
store i32 %mulA, i32* %arrayidxA_plus_4, align 4
%arrayidxD = getelementptr inbounds i32, i32* %d, i64 %ind
%loadD = load i32, i32* %arrayidxD, align 4
%mul = mul i64 %ind, %stride
%arrayidxStridedA = getelementptr inbounds i32, i32* %a, i64 %mul
%loadStridedA = load i32, i32* %arrayidxStridedA, align 4
%mulC = mul i32 %loadD, %loadStridedA
%arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
store i32 %mulC, i32* %arrayidxC, align 4
%exitcond = icmp eq i64 %add, 20
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}

View File

@ -1,28 +1,44 @@
; RUN: opt -loop-load-elim -S < %s | FileCheck %s
; RUN: opt -loop-load-elim -S < %s | \
; RUN: FileCheck %s -check-prefix=ALL -check-prefix=ONE_STRIDE_SPEC \
; RUN: -check-prefix=TWO_STRIDE_SPEC
; Forwarding in the presence of symbolic strides is currently not supported:
; RUN: opt -loop-load-elim -S -enable-mem-access-versioning=0 < %s | \
; RUN: FileCheck %s -check-prefix=ALL -check-prefix=NO_ONE_STRIDE_SPEC \
; RUN: -check-prefix=NO_TWO_STRIDE_SPEC
; RUN: opt -loop-load-elim -S -loop-load-elimination-scev-check-threshold=1 < %s | \
; RUN: FileCheck %s -check-prefix=ALL -check-prefix=ONE_STRIDE_SPEC \
; RUN: -check-prefix=NO_TWO_STRIDE_SPEC
; Forwarding in the presence of symbolic strides:
;
; for (unsigned i = 0; i < 100; i++)
; A[i + 1] = A[Stride * i] + B[i];
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; CHECK-LABEL: @f(
; ALL-LABEL: @f(
define void @f(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i64 %N,
i64 %stride) {
; ONE_STRIDE_SPEC: %ident.check = icmp ne i64 %stride, 1
entry:
; CHECK-NOT: %load_initial = load i32, i32* %A
; NO_ONE_STRIDE_SPEC-NOT: %load_initial = load i32, i32* %A
; ONE_STRIDE_SPEC: %load_initial = load i32, i32* %A
br label %for.body
for.body: ; preds = %for.body, %entry
; CHECK-NOT: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ]
; NO_ONE_STRIDE_SPEC-NOT: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ]
; ONE_STRIDE_SPEC: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ]
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%mul = mul i64 %indvars.iv, %stride
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %mul
%load = load i32, i32* %arrayidx, align 4
%arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
%load_1 = load i32, i32* %arrayidx2, align 4
; CHECK-NOT: %add = add i32 %load_1, %store_forwarded
; NO_ONE_STRIDE_SPEC-NOT: %add = add i32 %load_1, %store_forwarded
; ONE_STRIDE_SPEC: %add = add i32 %load_1, %store_forwarded
%add = add i32 %load_1, %load
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%arrayidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
@ -33,3 +49,44 @@ for.body: ; preds = %for.body, %entry
for.end: ; preds = %for.body
ret void
}
; With two symbolic strides:
;
; for (unsigned i = 0; i < 100; i++)
; A[Stride2 * (i + 1)] = A[Stride1 * i] + B[i];
; ALL-LABEL: @two_strides(
define void @two_strides(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i64 %N,
i64 %stride.1, i64 %stride.2) {
; TWO_STRIDE_SPEC: %ident.check = icmp ne i64 %stride.2, 1
; TWO_STRIDE_SPEC: %ident.check1 = icmp ne i64 %stride.1, 1
; NO_TWO_STRIDE_SPEC-NOT: %ident.check{{.*}} = icmp ne i64 %stride{{.*}}, 1
entry:
; NO_TWO_STRIDE_SPEC-NOT: %load_initial = load i32, i32* %A
; TWO_STRIDE_SPEC: %load_initial = load i32, i32* %A
br label %for.body
for.body: ; preds = %for.body, %entry
; NO_TWO_STRIDE_SPEC-NOT: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ]
; TWO_STRIDE_SPEC: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ]
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%mul = mul i64 %indvars.iv, %stride.1
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %mul
%load = load i32, i32* %arrayidx, align 4
%arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
%load_1 = load i32, i32* %arrayidx2, align 4
; NO_TWO_STRIDE_SPEC-NOT: %add = add i32 %load_1, %store_forwarded
; TWO_STRIDE_SPEC: %add = add i32 %load_1, %store_forwarded
%add = add i32 %load_1, %load
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%mul.2 = mul i64 %indvars.iv.next, %stride.2
%arrayidx_next = getelementptr inbounds i32, i32* %A, i64 %mul.2
store i32 %add, i32* %arrayidx_next, align 4
%exitcond = icmp eq i64 %indvars.iv.next, %N
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret void
}