1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

[SVE] Add support for scalable vectors with vectorize.scalable.enable loop attribute

In this patch I have added support for a new loop hint called
vectorize.scalable.enable that says whether we should enable scalable
vectorization or not. If a user wants to instruct the compiler to
vectorize a loop with scalable vectors they can now do this as
follows:

  br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2
  ...
  !2 = !{!2, !3, !4}
  !3 = !{!"llvm.loop.vectorize.width", i32 8}
  !4 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}

Setting the hint to false simply reverts the behaviour back to the
default, using fixed width vectors.

Differential Revision: https://reviews.llvm.org/D88962
This commit is contained in:
David Sherwood 2020-10-07 09:21:39 +01:00
parent ae44f6b6df
commit 6d7c7dcc2b
9 changed files with 210 additions and 24 deletions

View File

@ -5956,6 +5956,21 @@ vectorization:
!0 = !{!"llvm.loop.vectorize.predicate.enable", i1 0}
!1 = !{!"llvm.loop.vectorize.predicate.enable", i1 1}
'``llvm.loop.vectorize.scalable.enable``' Metadata
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
This metadata selectively enables or disables scalable vectorization for the
loop, and only has any effect if vectorization for the loop is already enabled.
The first operand is the string ``llvm.loop.vectorize.scalable.enable``
and the second operand is a bit. If the bit operand value is 1 scalable
vectorization is enabled, whereas a value of 0 reverts to the default fixed
width vectorization:
.. code-block:: llvm
!0 = !{!"llvm.loop.vectorize.scalable.enable", i1 0}
!1 = !{!"llvm.loop.vectorize.scalable.enable", i1 1}
'``llvm.loop.vectorize.width``' Metadata
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -213,6 +213,13 @@ Optional<const MDOperand *> findStringMetadataForLoop(const Loop *TheLoop,
/// Find named metadata for a loop with an integer value.
llvm::Optional<int> getOptionalIntLoopAttribute(Loop *TheLoop, StringRef Name);
/// Find a combination of metadata ("llvm.loop.vectorize.width" and
/// "llvm.loop.vectorize.scalable.enable") for a loop and use it to construct a
/// ElementCount. If the metadata "llvm.loop.vectorize.width" cannot be found
/// then None is returned.
Optional<ElementCount>
getOptionalElementCountLoopAttribute(Loop *TheLoop);
/// Create a new loop identifier for a loop created from a loop transformation.
///
/// @param OrigLoopID The loop ID of the loop before the transformation.

View File

@ -29,6 +29,7 @@
#include "llvm/ADT/MapVector.h"
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Support/TypeSize.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
namespace llvm {
@ -43,8 +44,14 @@ namespace llvm {
/// for example 'force', means a decision has been made. So, we need to be
/// careful NOT to add them if the user hasn't specifically asked so.
class LoopVectorizeHints {
enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED,
HK_PREDICATE };
enum HintKind {
HK_WIDTH,
HK_UNROLL,
HK_FORCE,
HK_ISVECTORIZED,
HK_PREDICATE,
HK_SCALABLE
};
/// Hint - associates name and validation with the hint value.
struct Hint {
@ -73,6 +80,9 @@ class LoopVectorizeHints {
/// Vector Predicate
Hint Predicate;
/// Says whether we should use fixed width or scalable vectorization.
Hint Scalable;
/// Return the loop metadata prefix.
static StringRef Prefix() { return "llvm.loop."; }
@ -98,7 +108,9 @@ public:
/// Dumps all the hint information.
void emitRemarkWithHints() const;
unsigned getWidth() const { return Width.Value; }
ElementCount getWidth() const {
return ElementCount::get(Width.Value, isScalable());
}
unsigned getInterleave() const { return Interleave.Value; }
unsigned getIsVectorized() const { return IsVectorized.Value; }
unsigned getPredicate() const { return Predicate.Value; }
@ -109,6 +121,8 @@ public:
return (ForceKind)Force.Value;
}
bool isScalable() const { return Scalable.Value; }
/// If hints are provided that force vectorization, use the AlwaysPrint
/// pass name to force the frontend to print the diagnostic.
const char *vectorizeAnalysisPassName() const;
@ -119,7 +133,9 @@ public:
// enabled by default because can be unsafe or inefficient. For example,
// reordering floating-point operations will change the way round-off
// error accumulates in the loop.
return getForce() == LoopVectorizeHints::FK_Enabled || getWidth() > 1;
ElementCount EC = getWidth();
return getForce() == LoopVectorizeHints::FK_Enabled ||
EC.getKnownMinValue() > 1;
}
bool isPotentiallyUnsafe() const {

View File

@ -48,12 +48,12 @@ static void warnAboutLeftoverTransformations(Loop *L,
if (hasVectorizeTransformation(L) == TM_ForcedByUser) {
LLVM_DEBUG(dbgs() << "Leftover vectorization transformation\n");
Optional<int> VectorizeWidth =
getOptionalIntLoopAttribute(L, "llvm.loop.vectorize.width");
Optional<ElementCount> VectorizeWidth =
getOptionalElementCountLoopAttribute(L);
Optional<int> InterleaveCount =
getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
if (VectorizeWidth.getValueOr(0) != 1)
if (!VectorizeWidth || VectorizeWidth->isVector())
ORE->emit(
DiagnosticInfoOptimizationFailure(DEBUG_TYPE,
"FailedRequestedVectorization",

View File

@ -301,6 +301,21 @@ bool llvm::getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) {
return getOptionalBoolLoopAttribute(TheLoop, Name).getValueOr(false);
}
Optional<ElementCount>
llvm::getOptionalElementCountLoopAttribute(Loop *TheLoop) {
Optional<int> Width =
getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width");
if (Width.hasValue()) {
Optional<int> IsScalable = getOptionalIntLoopAttribute(
TheLoop, "llvm.loop.vectorize.scalable.enable");
return ElementCount::get(*Width,
IsScalable.hasValue() ? *IsScalable : false);
}
return None;
}
llvm::Optional<int> llvm::getOptionalIntLoopAttribute(Loop *TheLoop,
StringRef Name) {
const MDOperand *AttrMD =
@ -450,14 +465,15 @@ TransformationMode llvm::hasVectorizeTransformation(Loop *L) {
if (Enable == false)
return TM_SuppressedByUser;
Optional<int> VectorizeWidth =
getOptionalIntLoopAttribute(L, "llvm.loop.vectorize.width");
Optional<ElementCount> VectorizeWidth =
getOptionalElementCountLoopAttribute(L);
Optional<int> InterleaveCount =
getOptionalIntLoopAttribute(L, "llvm.loop.interleave.count");
// 'Forcing' vector width and interleave count to one effectively disables
// this tranformation.
if (Enable == true && VectorizeWidth == 1 && InterleaveCount == 1)
if (Enable == true && VectorizeWidth && VectorizeWidth->isScalar() &&
InterleaveCount == 1)
return TM_SuppressedByUser;
if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
@ -466,10 +482,10 @@ TransformationMode llvm::hasVectorizeTransformation(Loop *L) {
if (Enable == true)
return TM_ForcedByUser;
if (VectorizeWidth == 1 && InterleaveCount == 1)
if ((VectorizeWidth && VectorizeWidth->isScalar()) && InterleaveCount == 1)
return TM_Disable;
if (VectorizeWidth > 1 || InterleaveCount > 1)
if ((VectorizeWidth && VectorizeWidth->isVector()) || InterleaveCount > 1)
return TM_Enable;
if (hasDisableAllTransformsHint(L))

View File

@ -66,6 +66,7 @@ bool LoopVectorizeHints::Hint::validate(unsigned Val) {
return (Val <= 1);
case HK_ISVECTORIZED:
case HK_PREDICATE:
case HK_SCALABLE:
return (Val == 0 || Val == 1);
}
return false;
@ -78,7 +79,8 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
Force("vectorize.enable", FK_Undefined, HK_FORCE),
IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), TheLoop(L),
Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
Scalable("vectorize.scalable.enable", false, HK_SCALABLE), TheLoop(L),
ORE(ORE) {
// Populate values with existing loop metadata.
getHintsFromMetadata();
@ -91,7 +93,8 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
// If the vectorization width and interleaving count are both 1 then
// consider the loop to have been already vectorized because there's
// nothing more that we can do.
IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
IsVectorized.Value =
getWidth() == ElementCount::getFixed(1) && Interleave.Value == 1;
LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
<< "LV: Interleaving disabled by the pass manager\n");
}
@ -164,7 +167,7 @@ void LoopVectorizeHints::emitRemarkWithHints() const {
if (Force.Value == LoopVectorizeHints::FK_Enabled) {
R << " (Force=" << NV("Force", true);
if (Width.Value != 0)
R << ", Vector Width=" << NV("VectorWidth", Width.Value);
R << ", Vector Width=" << NV("VectorWidth", getWidth());
if (Interleave.Value != 0)
R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
R << ")";
@ -175,11 +178,11 @@ void LoopVectorizeHints::emitRemarkWithHints() const {
}
const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
if (getWidth() == 1)
if (getWidth() == ElementCount::getFixed(1))
return LV_NAME;
if (getForce() == LoopVectorizeHints::FK_Disabled)
return LV_NAME;
if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth().isZero())
return LV_NAME;
return OptimizationRemarkAnalysis::AlwaysPrint;
}
@ -230,7 +233,8 @@ void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
return;
unsigned Val = C->getZExtValue();
Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate};
Hint *Hints[] = {&Width, &Interleave, &Force,
&IsVectorized, &Predicate, &Scalable};
for (auto H : Hints) {
if (Name == H->Name) {
if (H->validate(Val))

View File

@ -8314,11 +8314,16 @@ static bool processLoopInVPlanNativePath(
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, LVL, CM, IAI, PSE);
// Get user vectorization factor.
const unsigned UserVF = Hints.getWidth();
ElementCount UserVF = Hints.getWidth();
if (UserVF.isScalable()) {
// TODO: Use scalable UserVF once we've added initial support for scalable
// vectorization. For now we convert it to fixed width, but this will be
// removed in a later patch.
UserVF = ElementCount::getFixed(UserVF.getKnownMinValue());
}
// Plan how to best vectorize, return the best VF and its cost.
const VectorizationFactor VF =
LVP.planInVPlanNativePath(ElementCount::getFixed(UserVF));
const VectorizationFactor VF = LVP.planInVPlanNativePath(UserVF);
// If we are stress testing VPlan builds, do not attempt to generate vector
// code. Masked vector code generation support will follow soon.
@ -8480,12 +8485,18 @@ bool LoopVectorizePass::processLoop(Loop *L) {
LoopVectorizationPlanner LVP(L, LI, TLI, TTI, &LVL, CM, IAI, PSE);
// Get user vectorization factor and interleave count.
unsigned UserVF = Hints.getWidth();
ElementCount UserVF = Hints.getWidth();
if (UserVF.isScalable()) {
// TODO: Use scalable UserVF once we've added initial support for scalable
// vectorization. For now we convert it to fixed width, but this will be
// removed in a later patch.
UserVF = ElementCount::getFixed(UserVF.getKnownMinValue());
}
unsigned UserIC = Hints.getInterleave();
// Plan how to best vectorize, return the best VF and its cost.
Optional<VectorizationFactor> MaybeVF =
LVP.plan(ElementCount::getFixed(UserVF), UserIC);
Optional<VectorizationFactor> MaybeVF = LVP.plan(UserVF, UserIC);
VectorizationFactor VF = VectorizationFactor::Disabled();
unsigned IC = 1;

View File

@ -24,7 +24,55 @@ for.end: ; preds = %for.body, %entry
ret void
}
; CHECK-LABEL: @test2(
; CHECK: store <8 x i32>
; CHECK: ret void
define void @test2(i32* nocapture %a, i32 %n) #0 {
entry:
%cmp4 = icmp sgt i32 %n, 0
br i1 %cmp4, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%0 = trunc i64 %indvars.iv to i32
store i32 %0, i32* %arrayidx, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2
for.end: ; preds = %for.body, %entry
ret void
}
; CHECK-LABEL: @test3(
; CHECK: store <8 x i32>
; CHECK: ret void
define void @test3(i32* nocapture %a, i32 %n) #0 {
entry:
%cmp4 = icmp sgt i32 %n, 0
br i1 %cmp4, label %for.body, label %for.end
for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%0 = trunc i64 %indvars.iv to i32
store i32 %0, i32* %arrayidx, align 4
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4
for.end: ; preds = %for.body, %entry
ret void
}
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
!0 = !{!0, !1}
!1 = !{!"llvm.loop.vectorize.width", i32 8}
!2 = !{!2, !1, !3}
!3 = !{!"llvm.loop.vectorize.scalable.enable", i32 1}
!4 = !{!4, !1, !5}
!5 = !{!"llvm.loop.vectorize.scalable.enable", i32 0}

View File

@ -0,0 +1,69 @@
; RUN: opt < %s -loop-vectorize -transform-warning -S 2>&1 | FileCheck %s
; Like no_array_bounds.ll we verify warnings are generated when vectorization/interleaving is
; explicitly specified and fails to occur for both fixed and scalable vectorize.width loop hints.
; #pragma clang loop vectorize(enable)
; for (int i = 0; i < number; i++) {
; A[B[i]]++;
; }
; CHECK: warning: <unknown>:0:0: loop not interleaved: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
define dso_local void @foo(i32* nocapture %A, i32* nocapture readonly %B, i32 %N) {
entry:
%cmp7 = icmp sgt i32 %N, 0
br i1 %cmp7, label %for.body.preheader, label %for.end
for.body.preheader: ; preds = %entry
%wide.trip.count = zext i32 %N to i64
br label %for.body
for.body: ; preds = %for.body.preheader, %for.body
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%idxprom1 = sext i32 %0 to i64
%arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1
%1 = load i32, i32* %arrayidx2, align 4
%inc = add nsw i32 %1, 1
store i32 %inc, i32* %arrayidx2, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
for.end: ; preds = %for.body, %entry
ret void
}
; CHECK: warning: <unknown>:0:0: loop not vectorized: the optimizer was unable to perform the requested transformation; the transformation might be disabled or specified as part of an unsupported transformation ordering
define dso_local void @foo2(i32* nocapture %A, i32* nocapture readonly %B, i32 %N) {
entry:
%cmp7 = icmp sgt i32 %N, 0
br i1 %cmp7, label %for.body.preheader, label %for.end
for.body.preheader: ; preds = %entry
%wide.trip.count = zext i32 %N to i64
br label %for.body
for.body: ; preds = %for.body.preheader, %for.body
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
%arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%idxprom1 = sext i32 %0 to i64
%arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1
%1 = load i32, i32* %arrayidx2, align 4
%inc = add nsw i32 %1, 1
store i32 %inc, i32* %arrayidx2, align 4
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !3
for.end: ; preds = %for.body, %entry
ret void
}
!0 = distinct !{!0, !1, !2}
!1 = !{!"llvm.loop.vectorize.enable", i1 true}
!2 = !{!"llvm.loop.vectorize.width", i32 1}
!3 = distinct !{!3, !1, !2, !4}
!4 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}