mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
[SVE] Add support for scalable vectors with vectorize.scalable.enable loop attribute
In this patch I have added support for a new loop hint called vectorize.scalable.enable that says whether we should enable scalable vectorization or not. If a user wants to instruct the compiler to vectorize a loop with scalable vectors they can now do this as follows: br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2 ... !2 = !{!2, !3, !4} !3 = !{!"llvm.loop.vectorize.width", i32 8} !4 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} Setting the hint to false simply reverts the behaviour back to the default, using fixed width vectors. Differential Revision: https://reviews.llvm.org/D88962
This commit is contained in:
parent
ae44f6b6df
commit
6d7c7dcc2b
@ -5956,6 +5956,21 @@ vectorization:
|
||||
!0 = !{!"llvm.loop.vectorize.predicate.enable", i1 0}
|
||||
!1 = !{!"llvm.loop.vectorize.predicate.enable", i1 1}
|
||||
|
||||
'``llvm.loop.vectorize.scalable.enable``' Metadata
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
This metadata selectively enables or disables scalable vectorization for the
|
||||
loop, and only has any effect if vectorization for the loop is already enabled.
|
||||
The first operand is the string ``llvm.loop.vectorize.scalable.enable``
|
||||
and the second operand is a bit. If the bit operand value is 1 scalable
|
||||
vectorization is enabled, whereas a value of 0 reverts to the default fixed
|
||||
width vectorization:
|
||||
|
||||
.. code-block:: llvm
|
||||
|
||||
!0 = !{!"llvm.loop.vectorize.scalable.enable", i1 0}
|
||||
!1 = !{!"llvm.loop.vectorize.scalable.enable", i1 1}
|
||||
|
||||
'``llvm.loop.vectorize.width``' Metadata
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
|
@ -213,6 +213,13 @@ Optional<const MDOperand *> findStringMetadataForLoop(const Loop *TheLoop,
|
||||
/// Find named metadata for a loop with an integer value.
|
||||
llvm::Optional<int> getOptionalIntLoopAttribute(Loop *TheLoop, StringRef Name);
|
||||
|
||||
/// Find a combination of metadata ("llvm.loop.vectorize.width" and
|
||||
/// "llvm.loop.vectorize.scalable.enable") for a loop and use it to construct a
|
||||
/// ElementCount. If the metadata "llvm.loop.vectorize.width" cannot be found
|
||||
/// then None is returned.
|
||||
Optional<ElementCount>
|
||||
getOptionalElementCountLoopAttribute(Loop *TheLoop);
|
||||
|
||||
/// Create a new loop identifier for a loop created from a loop transformation.
|
||||
///
|
||||
/// @param OrigLoopID The loop ID of the loop before the transformation.
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include "llvm/ADT/MapVector.h"
|
||||
#include "llvm/Analysis/LoopAccessAnalysis.h"
|
||||
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
|
||||
#include "llvm/Support/TypeSize.h"
|
||||
#include "llvm/Transforms/Utils/LoopUtils.h"
|
||||
|
||||
namespace llvm {
|
||||
@ -43,8 +44,14 @@ namespace llvm {
|
||||
/// for example 'force', means a decision has been made. So, we need to be
|
||||
/// careful NOT to add them if the user hasn't specifically asked so.
|
||||
class LoopVectorizeHints {
|
||||
enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED,
|
||||
HK_PREDICATE };
|
||||
enum HintKind {
|
||||
HK_WIDTH,
|
||||
HK_UNROLL,
|
||||
HK_FORCE,
|
||||
HK_ISVECTORIZED,
|
||||
HK_PREDICATE,
|
||||
HK_SCALABLE
|
||||
};
|
||||
|
||||
/// Hint - associates name and validation with the hint value.
|
||||
struct Hint {
|
||||
@ -73,6 +80,9 @@ class LoopVectorizeHints {
|
||||
/// Vector Predicate
|
||||
Hint Predicate;
|
||||
|
||||
/// Says whether we should use fixed width or scalable vectorization.
|
||||
Hint Scalable;
|
||||
|
||||
/// Return the loop metadata prefix.
|
||||
static StringRef Prefix() { return "llvm.loop."; }
|
||||
|
||||
@ -98,7 +108,9 @@ public:
|
||||
/// Dumps all the hint information.
|
||||
void emitRemarkWithHints() const;
|
||||
|
||||
unsigned getWidth() const { return Width.Value; }
|
||||
ElementCount getWidth() const {
|
||||
return ElementCount::get(Width.Value, isScalable());
|
||||
}
|
||||
unsigned getInterleave() const { return Interleave.Value; }
|
||||
unsigned getIsVectorized() const { return IsVectorized.Value; }
|
||||
unsigned getPredicate() const { return Predicate.Value; }
|
||||
@ -109,6 +121,8 @@ public:
|
||||
return (ForceKind)Force.Value;
|
||||
}
|
||||
|
||||
bool isScalable() const { return Scalable.Value; }
|
||||
|
||||
/// If hints are provided that force vectorization, use the AlwaysPrint
|
||||
/// pass name to force the frontend to print the diagnostic.
|
||||
const char *vectorizeAnalysisPassName() const;
|
||||
@ -119,7 +133,9 @@ public:
|
||||
// enabled by default because can be unsafe or inefficient. For example,
|
||||
// reordering floating-point operations will change the way round-off
|
||||
// error accumulates in the loop.
|
||||
return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1;
|
||||
ElementCount EC = getWidth();
|
||||
return getForce() == LoopVectorizeHints::FK_Enabled ||
|
||||
EC.getKnownMinValue() > 1;
|
||||
}
|
||||
|
||||
bool isPotentiallyUnsafe() const {
|
||||
|
@ -48,12 +48,12 @@ static void warnAboutLeftoverTransformations(Loop *L,
|
||||
|
||||
if (hasVectorizeTransformation(L) == TM_ForcedByUser) {
|
||||
LLVM_DEBUG(dbgs() << "Leftover vectorization transformation\n");
|
||||
Optional<int> VectorizeWidth =
|
||||
getOptionalIntLoopAttribute(L, "llvm.loop.vectorize.width");
|
||||
Optional<ElementCount> VectorizeWidth =
|
||||
getOptionalElementCountLoopAttribute(L);
|
||||
Optional<int> InterleaveCount =
|
||||
getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
|
||||
|
||||
if (VectorizeWidth.getValueOr(0) != 1)
|
||||
if (!VectorizeWidth || VectorizeWidth->isVector())
|
||||
ORE->emit(
|
||||
DiagnosticInfoOptimizationFailure(DEBUG_TYPE,
|
||||
"FailedRequestedVectorization",
|
||||
|
@ -301,6 +301,21 @@ bool llvm::getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) {
|
||||
return getOptionalBoolLoopAttribute(TheLoop, Name).getValueOr(false);
|
||||
}
|
||||
|
||||
Optional<ElementCount>
|
||||
llvm::getOptionalElementCountLoopAttribute(Loop *TheLoop) {
|
||||
Optional<int> Width =
|
||||
getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width");
|
||||
|
||||
if (Width.hasValue()) {
|
||||
Optional<int> IsScalable = getOptionalIntLoopAttribute(
|
||||
TheLoop, "llvm.loop.vectorize.scalable.enable");
|
||||
return ElementCount::get(*Width,
|
||||
IsScalable.hasValue() ? *IsScalable : false);
|
||||
}
|
||||
|
||||
return None;
|
||||
}
|
||||
|
||||
llvm::Optional<int> llvm::getOptionalIntLoopAttribute(Loop *TheLoop,
|
||||
StringRef Name) {
|
||||
const MDOperand *AttrMD =
|
||||
@ -450,14 +465,15 @@ TransformationMode llvm::hasVectorizeTransformation(Loop *L) {
|
||||
if (Enable == false)
|
||||
return TM_SuppressedByUser;
|
||||
|
||||
Optional<int> VectorizeWidth =
|
||||
getOptionalIntLoopAttribute(L, "llvm.loop.vectorize.width");
|
||||
Optional<ElementCount> VectorizeWidth =
|
||||
getOptionalElementCountLoopAttribute(L);
|
||||
Optional<int> InterleaveCount =
|
||||
getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
|
||||
|
||||
// 'Forcing' vector width and interleave count to one effectively disables
|
||||
// this tranformation.
|
||||
if (Enable == true && VectorizeWidth == 1 && InterleaveCount == 1)
|
||||
if (Enable == true && VectorizeWidth && VectorizeWidth->isScalar() &&
|
||||
InterleaveCount == 1)
|
||||
return TM_SuppressedByUser;
|
||||
|
||||
if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
|
||||
@ -466,10 +482,10 @@ TransformationMode llvm::hasVectorizeTransformation(Loop *L) {
|
||||
if (Enable == true)
|
||||
return TM_ForcedByUser;
|
||||
|
||||
if (VectorizeWidth == 1 && InterleaveCount == 1)
|
||||
if ((VectorizeWidth && VectorizeWidth->isScalar()) && InterleaveCount == 1)
|
||||
return TM_Disable;
|
||||
|
||||
if (VectorizeWidth > 1 || InterleaveCount > 1)
|
||||
if ((VectorizeWidth && VectorizeWidth->isVector()) || InterleaveCount > 1)
|
||||
return TM_Enable;
|
||||
|
||||
if (hasDisableAllTransformsHint(L))
|
||||
|
@ -66,6 +66,7 @@ bool LoopVectorizeHints::Hint::validate(unsigned Val) {
|
||||
return (Val <= 1);
|
||||
case HK_ISVECTORIZED:
|
||||
case HK_PREDICATE:
|
||||
case HK_SCALABLE:
|
||||
return (Val == 0 || Val == 1);
|
||||
}
|
||||
return false;
|
||||
@ -78,7 +79,8 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
|
||||
Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
|
||||
Force("vectorize.enable", FK_Undefined, HK_FORCE),
|
||||
IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
|
||||
Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), TheLoop(L),
|
||||
Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
|
||||
Scalable("vectorize.scalable.enable", false, HK_SCALABLE), TheLoop(L),
|
||||
ORE(ORE) {
|
||||
// Populate values with existing loop metadata.
|
||||
getHintsFromMetadata();
|
||||
@ -91,7 +93,8 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
|
||||
// If the vectorization width and interleaving count are both 1 then
|
||||
// consider the loop to have been already vectorized because there's
|
||||
// nothing more that we can do.
|
||||
IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
|
||||
IsVectorized.Value =
|
||||
getWidth() == ElementCount::getFixed(1) && Interleave.Value == 1;
|
||||
LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
|
||||
<< "LV: Interleaving disabled by the pass manager\n");
|
||||
}
|
||||
@ -164,7 +167,7 @@ void LoopVectorizeHints::emitRemarkWithHints() const {
|
||||
if (Force.Value == LoopVectorizeHints::FK_Enabled) {
|
||||
R << " (Force=" << NV("Force", true);
|
||||
if (Width.Value != 0)
|
||||
R << ", Vector Width=" << NV("VectorWidth", Width.Value);
|
||||
R << ", Vector Width=" << NV("VectorWidth", getWidth());
|
||||
if (Interleave.Value != 0)
|
||||
R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
|
||||
R << ")";
|
||||
@ -175,11 +178,11 @@ void LoopVectorizeHints::emitRemarkWithHints() const {
|
||||
}
|
||||
|
||||
const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
|
||||
if (getWidth() == 1)
|
||||
if (getWidth() == ElementCount::getFixed(1))
|
||||
return LV_NAME;
|
||||
if (getForce() == LoopVectorizeHints::FK_Disabled)
|
||||
return LV_NAME;
|
||||
if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
|
||||
if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth().isZero())
|
||||
return LV_NAME;
|
||||
return OptimizationRemarkAnalysis::AlwaysPrint;
|
||||
}
|
||||
@ -230,7 +233,8 @@ void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
|
||||
return;
|
||||
unsigned Val = C->getZExtValue();
|
||||
|
||||
Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate};
|
||||
Hint *Hints[] = {&Width, &Interleave, &Force,
|
||||
&IsVectorized, &Predicate, &Scalable};
|
||||
for (auto H : Hints) {
|
||||
if (Name == H->Name) {
|
||||
if (H->validate(Val))
|
||||
|
@ -8314,11 +8314,16 @@ static bool processLoopInVPlanNativePath(
|
||||
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI, PSE);
|
||||
|
||||
// Get user vectorization factor.
|
||||
const unsigned UserVF = Hints.getWidth();
|
||||
ElementCount UserVF = Hints.getWidth();
|
||||
if (UserVF.isScalable()) {
|
||||
// TODO: Use scalable UserVF once we've added initial support for scalable
|
||||
// vectorization. For now we convert it to fixed width, but this will be
|
||||
// removed in a later patch.
|
||||
UserVF = ElementCount::getFixed(UserVF.getKnownMinValue());
|
||||
}
|
||||
|
||||
// Plan how to best vectorize, return the best VF and its cost.
|
||||
const VectorizationFactor VF =
|
||||
LVP.planInVPlanNativePath(ElementCount::getFixed(UserVF));
|
||||
const VectorizationFactor VF = LVP.planInVPlanNativePath(UserVF);
|
||||
|
||||
// If we are stress testing VPlan builds, do not attempt to generate vector
|
||||
// code. Masked vector code generation support will follow soon.
|
||||
@ -8480,12 +8485,18 @@ bool LoopVectorizePass::processLoop(Loop *L) {
|
||||
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI, PSE);
|
||||
|
||||
// Get user vectorization factor and interleave count.
|
||||
unsigned UserVF = Hints.getWidth();
|
||||
ElementCount UserVF = Hints.getWidth();
|
||||
if (UserVF.isScalable()) {
|
||||
// TODO: Use scalable UserVF once we've added initial support for scalable
|
||||
// vectorization. For now we convert it to fixed width, but this will be
|
||||
// removed in a later patch.
|
||||
UserVF = ElementCount::getFixed(UserVF.getKnownMinValue());
|
||||
}
|
||||
|
||||
unsigned UserIC = Hints.getInterleave();
|
||||
|
||||
// Plan how to best vectorize, return the best VF and its cost.
|
||||
Optional<VectorizationFactor> MaybeVF =
|
||||
LVP.plan(ElementCount::getFixed(UserVF), UserIC);
|
||||
Optional<VectorizationFactor> MaybeVF = LVP.plan(UserVF, UserIC);
|
||||
|
||||
VectorizationFactor VF = VectorizationFactor::Disabled();
|
||||
unsigned IC = 1;
|
||||
|
@ -24,7 +24,55 @@ for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @test2(
|
||||
; CHECK: store <8 x i32>
|
||||
; CHECK: ret void
|
||||
define void @test2(i32* nocapture %a, i32 %n) #0 {
|
||||
entry:
|
||||
%cmp4 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp4, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
|
||||
%0 = trunc i64 %indvars.iv to i32
|
||||
store i32 %0, i32* %arrayidx, align 4
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: @test3(
|
||||
; CHECK: store <8 x i32>
|
||||
; CHECK: ret void
|
||||
define void @test3(i32* nocapture %a, i32 %n) #0 {
|
||||
entry:
|
||||
%cmp4 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp4, label %for.body, label %for.end
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
|
||||
%0 = trunc i64 %indvars.iv to i32
|
||||
store i32 %0, i32* %arrayidx, align 4
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
||||
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
|
||||
!0 = !{!0, !1}
|
||||
!1 = !{!"llvm.loop.vectorize.width", i32 8}
|
||||
!2 = !{!2, !1, !3}
|
||||
!3 = !{!"llvm.loop.vectorize.scalable.enable", i32 1}
|
||||
!4 = !{!4, !1, !5}
|
||||
!5 = !{!"llvm.loop.vectorize.scalable.enable", i32 0}
|
||||
|
69
test/Transforms/LoopVectorize/no_array_bounds_scalable.ll
Normal file
69
test/Transforms/LoopVectorize/no_array_bounds_scalable.ll
Normal file
@ -0,0 +1,69 @@
|
||||
; RUN: opt < %s -loop-vectorize -transform-warning -S 2>&1 | FileCheck %s
|
||||
|
||||
; Like no_array_bounds.ll we verify warnings are generated when vectorization/interleaving is
|
||||
; explicitly specified and fails to occur for both fixed and scalable vectorize.width loop hints.
|
||||
|
||||
; #pragma clang loop vectorize(enable)
|
||||
; for (int i = 0; i < number; i++) {
|
||||
; A[B[i]]++;
|
||||
; }
|
||||
|
||||
; CHECK: warning: <unknown>:0:0: loop not interleaved: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
|
||||
define dso_local void @foo(i32* nocapture %A, i32* nocapture readonly %B, i32 %N) {
|
||||
entry:
|
||||
%cmp7 = icmp sgt i32 %N, 0
|
||||
br i1 %cmp7, label %for.body.preheader, label %for.end
|
||||
|
||||
for.body.preheader: ; preds = %entry
|
||||
%wide.trip.count = zext i32 %N to i64
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body.preheader, %for.body
|
||||
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
|
||||
%0 = load i32, i32* %arrayidx, align 4
|
||||
%idxprom1 = sext i32 %0 to i64
|
||||
%arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1
|
||||
%1 = load i32, i32* %arrayidx2, align 4
|
||||
%inc = add nsw i32 %1, 1
|
||||
store i32 %inc, i32* %arrayidx2, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
|
||||
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: warning: <unknown>:0:0: loop not vectorized: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
|
||||
define dso_local void @foo2(i32* nocapture %A, i32* nocapture readonly %B, i32 %N) {
|
||||
entry:
|
||||
%cmp7 = icmp sgt i32 %N, 0
|
||||
br i1 %cmp7, label %for.body.preheader, label %for.end
|
||||
|
||||
for.body.preheader: ; preds = %entry
|
||||
%wide.trip.count = zext i32 %N to i64
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body.preheader, %for.body
|
||||
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
|
||||
%0 = load i32, i32* %arrayidx, align 4
|
||||
%idxprom1 = sext i32 %0 to i64
|
||||
%arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1
|
||||
%1 = load i32, i32* %arrayidx2, align 4
|
||||
%inc = add nsw i32 %1, 1
|
||||
store i32 %inc, i32* %arrayidx2, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
|
||||
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !3
|
||||
|
||||
for.end: ; preds = %for.body, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
!0 = distinct !{!0, !1, !2}
|
||||
!1 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
!2 = !{!"llvm.loop.vectorize.width", i32 1}
|
||||
!3 = distinct !{!3, !1, !2, !4}
|
||||
!4 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
|
Loading…
Reference in New Issue
Block a user