mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[LV] Clamp VF hint when unsafe
In the following loop the dependence distance is 2 and can only be vectorized if the vector length is no larger than this. void foo(int *a, int *b, int N) { #pragma clang loop vectorize(enable) vectorize_width(4) for (int i=0; i<N; ++i) { a[i + 2] = a[i] + b[i]; } } However, when specifying a VF of 4 via a loop hint this loop is vectorized. According to [1][2], loop hints are ignored if the optimization is not safe to apply. This patch introduces a check to bail of vectorization if the user specified VF is greater than the maximum feasible VF, unless explicitly forced with '-force-vector-width=X'. [1] https://llvm.org/docs/LangRef.html#llvm-loop-vectorize-and-llvm-loop-interleave [2] https://clang.llvm.org/docs/LanguageExtensions.html#extensions-for-loop-hint-optimizations Reviewed By: sdesmalen, fhahn, Meinersbur Differential Revision: https://reviews.llvm.org/D90687
This commit is contained in:
parent
32a49915a1
commit
c640adbe73
@ -1443,7 +1443,8 @@ private:
|
||||
/// \return An upper bound for the vectorization factor, a power-of-2 larger
|
||||
/// than zero. One is returned if vectorization should best be avoided due
|
||||
/// to cost.
|
||||
ElementCount computeFeasibleMaxVF(unsigned ConstTripCount);
|
||||
ElementCount computeFeasibleMaxVF(unsigned ConstTripCount,
|
||||
ElementCount UserVF);
|
||||
|
||||
/// The vectorization cost is a combination of the cost itself and a boolean
|
||||
/// indicating whether any of the contributing operations will actually
|
||||
@ -5270,9 +5271,11 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
|
||||
return None;
|
||||
}
|
||||
|
||||
ElementCount MaxVF = computeFeasibleMaxVF(TC, UserVF);
|
||||
|
||||
switch (ScalarEpilogueStatus) {
|
||||
case CM_ScalarEpilogueAllowed:
|
||||
return UserVF ? UserVF : computeFeasibleMaxVF(TC);
|
||||
return MaxVF;
|
||||
case CM_ScalarEpilogueNotNeededUsePredicate:
|
||||
LLVM_DEBUG(
|
||||
dbgs() << "LV: vector predicate hint/switch found.\n"
|
||||
@ -5308,7 +5311,6 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
|
||||
InterleaveInfo.invalidateGroupsRequiringScalarEpilogue();
|
||||
}
|
||||
|
||||
ElementCount MaxVF = UserVF ? UserVF : computeFeasibleMaxVF(TC);
|
||||
assert(!MaxVF.isScalable() &&
|
||||
"Scalable vectors do not yet support tail folding");
|
||||
assert((UserVF.isNonZero() || isPowerOf2_32(MaxVF.getFixedValue())) &&
|
||||
@ -5361,7 +5363,9 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
|
||||
}
|
||||
|
||||
ElementCount
|
||||
LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount) {
|
||||
LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount,
|
||||
ElementCount UserVF) {
|
||||
assert(!UserVF.isScalable() && "scalable vectorization not yet handled");
|
||||
MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI);
|
||||
unsigned SmallestType, WidestType;
|
||||
std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes();
|
||||
@ -5373,6 +5377,27 @@ LoopVectorizationCostModel::computeFeasibleMaxVF(unsigned ConstTripCount) {
|
||||
// dependence distance).
|
||||
unsigned MaxSafeVectorWidthInBits = Legal->getMaxSafeVectorWidthInBits();
|
||||
|
||||
if (UserVF.isNonZero()) {
|
||||
// If legally unsafe, clamp the user vectorization factor to a safe value.
|
||||
unsigned MaxSafeVF = PowerOf2Floor(MaxSafeVectorWidthInBits / WidestType);
|
||||
if (UserVF.getFixedValue() <= MaxSafeVF)
|
||||
return UserVF;
|
||||
|
||||
LLVM_DEBUG(dbgs() << "LV: User VF=" << UserVF
|
||||
<< " is unsafe, clamping to max safe VF=" << MaxSafeVF
|
||||
<< ".\n");
|
||||
ORE->emit([&]() {
|
||||
return OptimizationRemarkAnalysis(DEBUG_TYPE, "VectorizationFactor",
|
||||
TheLoop->getStartLoc(),
|
||||
TheLoop->getHeader())
|
||||
<< "User-specified vectorization factor "
|
||||
<< ore::NV("UserVectorizationFactor", UserVF)
|
||||
<< " is unsafe, clamping to maximum safe vectorization factor "
|
||||
<< ore::NV("VectorizationFactor", MaxSafeVF);
|
||||
});
|
||||
return ElementCount::getFixed(MaxSafeVF);
|
||||
}
|
||||
|
||||
WidestRegister = std::min(WidestRegister, MaxSafeVectorWidthInBits);
|
||||
|
||||
// Ensure MaxVF is a power of 2; the dependence distance bound may not be.
|
||||
@ -7031,9 +7056,12 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
|
||||
CM.invalidateCostModelingDecisions();
|
||||
}
|
||||
|
||||
if (!UserVF.isZero()) {
|
||||
ElementCount MaxVF = MaybeMaxVF.getValue();
|
||||
assert(MaxVF.isNonZero() && "MaxVF is zero.");
|
||||
|
||||
if (!UserVF.isZero() && UserVF.getFixedValue() <= MaxVF.getFixedValue()) {
|
||||
LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
|
||||
assert(isPowerOf2_32(UserVF.getKnownMinValue()) &&
|
||||
assert(isPowerOf2_32(UserVF.getFixedValue()) &&
|
||||
"VF needs to be a power of two");
|
||||
// Collect the instructions (and their associated costs) that will be more
|
||||
// profitable to scalarize.
|
||||
@ -7044,9 +7072,6 @@ LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
|
||||
return {{UserVF, 0}};
|
||||
}
|
||||
|
||||
ElementCount MaxVF = MaybeMaxVF.getValue();
|
||||
assert(MaxVF.isNonZero() && "MaxVF is zero.");
|
||||
|
||||
for (ElementCount VF = ElementCount::getFixed(1);
|
||||
ElementCount::isKnownLE(VF, MaxVF); VF *= 2) {
|
||||
// Collect Uniform and Scalar instructions after vectorization with VF.
|
||||
|
@ -0,0 +1,43 @@
|
||||
; REQUIRES: asserts
|
||||
; RUN: opt -loop-vectorize -mtriple=arm64-apple-iphoneos -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s
|
||||
|
||||
; Specify a large unsafe vectorization factor of 32 that gets clamped to 16,
|
||||
; then test an even smaller VF of 2 is selected based on the cost-model.
|
||||
|
||||
; CHECK: LV: User VF=32 is unsafe, clamping to max safe VF=16.
|
||||
; CHECK: remark: <unknown>:0:0: User-specified vectorization factor 32 is unsafe, clamping to maximum safe vectorization factor 16
|
||||
; CHECK: LV: Selecting VF: 2.
|
||||
; CHECK-LABEL: @test
|
||||
; CHECK: <2 x i64>
|
||||
define void @test(i64* nocapture %a, i64* nocapture readonly %b) {
|
||||
entry:
|
||||
br label %loop.header
|
||||
|
||||
loop.header:
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
|
||||
%arrayidx = getelementptr inbounds i64, i64* %a, i64 %iv
|
||||
%0 = load i64, i64* %arrayidx, align 4
|
||||
%arrayidx2 = getelementptr inbounds i64, i64* %b, i64 %iv
|
||||
%1 = load i64, i64* %arrayidx2, align 4
|
||||
%add = add nsw i64 %1, %0
|
||||
%2 = add nuw nsw i64 %iv, 16
|
||||
%arrayidx5 = getelementptr inbounds i64, i64* %a, i64 %2
|
||||
%c = icmp eq i64 %1, 120
|
||||
br i1 %c, label %then, label %latch
|
||||
|
||||
then:
|
||||
store i64 %add, i64* %arrayidx5, align 4
|
||||
br label %latch
|
||||
|
||||
latch:
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond.not = icmp eq i64 %iv.next, 1024
|
||||
br i1 %exitcond.not, label %exit, label %loop.header, !llvm.loop !0
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
!0 = !{!0, !1, !2}
|
||||
!1 = !{!"llvm.loop.vectorize.width", i64 32}
|
||||
!2 = !{!"llvm.loop.vectorize.enable", i1 true}
|
46
test/Transforms/LoopVectorize/unsafe-vf-hint-remark.ll
Normal file
46
test/Transforms/LoopVectorize/unsafe-vf-hint-remark.ll
Normal file
@ -0,0 +1,46 @@
|
||||
; REQUIRES: asserts
|
||||
; RUN: opt -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s
|
||||
|
||||
; Make sure the unsafe user specified vectorization factor is clamped.
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
|
||||
; void foo(int *a, int *b) {
|
||||
; #pragma clang loop vectorize(enable) vectorize_width(4)
|
||||
; for (int i=0; i < 1024; ++i) {
|
||||
; a[i + 2] = a[i] + b[i];
|
||||
; }
|
||||
; }
|
||||
|
||||
; CHECK: LV: User VF=4 is unsafe, clamping to max safe VF=2.
|
||||
; CHECK: remark: <unknown>:0:0: User-specified vectorization factor 4 is unsafe, clamping to maximum safe vectorization factor 2
|
||||
; CHECK-LABEL: @foo
|
||||
; CHECK: <2 x i32>
|
||||
define void @foo(i32* %a, i32* %b) {
|
||||
entry:
|
||||
br label %loop.ph
|
||||
|
||||
loop.ph:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%iv = phi i64 [ 0, %loop.ph ], [ %iv.next, %loop ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
|
||||
%0 = load i32, i32* %arrayidx, align 4
|
||||
%arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
|
||||
%1 = load i32, i32* %arrayidx2, align 4
|
||||
%add = add nsw i32 %1, %0
|
||||
%2 = add nuw nsw i64 %iv, 2
|
||||
%arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
|
||||
store i32 %add, i32* %arrayidx5, align 4
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond.not = icmp eq i64 %iv.next, 1024
|
||||
br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !0
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
!0 = !{!0, !1, !2}
|
||||
!1 = !{!"llvm.loop.vectorize.width", i32 4}
|
||||
!2 = !{!"llvm.loop.vectorize.enable", i1 true}
|
Loading…
Reference in New Issue
Block a user