mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-25 04:02:41 +01:00
[LV] Disable Scalable VFs when tail folding is enabled b/c of low tripcount.
The loop vectorizer may decide to use tail folding when the trip-count is low. When that happens, scalable VFs are no longer a candidate, since tail folding/predication is not yet supported for scalable vectors. This can be re-enabled in a future patch. Reviewed By: kmclaughlin Differential Revision: https://reviews.llvm.org/D106657
This commit is contained in:
parent
3bc8cd6a0b
commit
e1c8650040
@ -5817,6 +5817,12 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// For scalable vectors, don't use tail folding as this is currently not yet
|
||||||
|
// supported. The code is likely to have ended up here if the tripcount is
|
||||||
|
// low, in which case it makes sense not to use scalable vectors.
|
||||||
|
if (MaxFactors.ScalableVF.isVector())
|
||||||
|
MaxFactors.ScalableVF = ElementCount::getScalable(0);
|
||||||
|
|
||||||
// If we don't know the precise trip count, or if the trip count that we
|
// If we don't know the precise trip count, or if the trip count that we
|
||||||
// found modulo the vectorization factor is not zero, try to fold the tail
|
// found modulo the vectorization factor is not zero, try to fold the tail
|
||||||
// by masking.
|
// by masking.
|
||||||
|
23
test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
Normal file
23
test/Transforms/LoopVectorize/AArch64/sve-tail-folding.ll
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
; RUN: opt -S -loop-vectorize -scalable-vectorization=preferred -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck %s
|
||||||
|
|
||||||
|
; CHECK-NOT: vector.body:
|
||||||
|
|
||||||
|
target triple = "aarch64-unknown-linux-gnu"
|
||||||
|
|
||||||
|
define void @tail_predication(i32 %init, i32* %ptr, i32 %val) #0 {
|
||||||
|
entry:
|
||||||
|
br label %while.body
|
||||||
|
|
||||||
|
while.body: ; preds = %while.body, %entry
|
||||||
|
%index = phi i32 [ %index.dec, %while.body ], [ %init, %entry ]
|
||||||
|
%gep = getelementptr i32, i32* %ptr, i32 %index
|
||||||
|
store i32 %val, i32* %gep
|
||||||
|
%index.dec = add nsw i32 %index, -1
|
||||||
|
%cmp10 = icmp sgt i32 %index, 0
|
||||||
|
br i1 %cmp10, label %while.body, label %while.end.loopexit
|
||||||
|
|
||||||
|
while.end.loopexit: ; preds = %while.body
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
attributes #0 = { "target-features"="+sve" }
|
Loading…
Reference in New Issue
Block a user