mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
[LV] Consider Loop Unroll Hints When Making Interleave Decisions
This patch causes the loop vectorizer to not interleave loops that have nounroll loop hints (llvm.loop.unroll.disable and llvm.loop.unroll_count(1)). Note that if a particular interleave count is being requested (through llvm.loop.interleave_count), it will still be honoured, regardless of the presence of nounroll hints. Reviewed By: Meinersbur Differential Revision: https://reviews.llvm.org/D101374
This commit is contained in:
parent
f4e32d4d07
commit
2c0197c261
@ -219,14 +219,15 @@ Optional<const MDOperand *> findStringMetadataForLoop(const Loop *TheLoop,
|
||||
StringRef Name);
|
||||
|
||||
/// Find named metadata for a loop with an integer value.
|
||||
llvm::Optional<int> getOptionalIntLoopAttribute(Loop *TheLoop, StringRef Name);
|
||||
llvm::Optional<int> getOptionalIntLoopAttribute(const Loop *TheLoop,
|
||||
StringRef Name);
|
||||
|
||||
/// Find a combination of metadata ("llvm.loop.vectorize.width" and
|
||||
/// "llvm.loop.vectorize.scalable.enable") for a loop and use it to construct a
|
||||
/// ElementCount. If the metadata "llvm.loop.vectorize.width" cannot be found
|
||||
/// then None is returned.
|
||||
Optional<ElementCount>
|
||||
getOptionalElementCountLoopAttribute(Loop *TheLoop);
|
||||
getOptionalElementCountLoopAttribute(const Loop *TheLoop);
|
||||
|
||||
/// Create a new loop identifier for a loop created from a loop transformation.
|
||||
///
|
||||
@ -295,11 +296,11 @@ enum TransformationMode {
|
||||
|
||||
/// @{
|
||||
/// Get the mode for LLVM's supported loop transformations.
|
||||
TransformationMode hasUnrollTransformation(Loop *L);
|
||||
TransformationMode hasUnrollAndJamTransformation(Loop *L);
|
||||
TransformationMode hasVectorizeTransformation(Loop *L);
|
||||
TransformationMode hasDistributeTransformation(Loop *L);
|
||||
TransformationMode hasLICMVersioningTransformation(Loop *L);
|
||||
TransformationMode hasUnrollTransformation(const Loop *L);
|
||||
TransformationMode hasUnrollAndJamTransformation(const Loop *L);
|
||||
TransformationMode hasVectorizeTransformation(const Loop *L);
|
||||
TransformationMode hasDistributeTransformation(const Loop *L);
|
||||
TransformationMode hasLICMVersioningTransformation(const Loop *L);
|
||||
/// @}
|
||||
|
||||
/// Set input string into loop metadata by keeping other values intact.
|
||||
|
@ -46,7 +46,7 @@ namespace llvm {
|
||||
class LoopVectorizeHints {
|
||||
enum HintKind {
|
||||
HK_WIDTH,
|
||||
HK_UNROLL,
|
||||
HK_INTERLEAVE,
|
||||
HK_FORCE,
|
||||
HK_ISVECTORIZED,
|
||||
HK_PREDICATE,
|
||||
@ -111,7 +111,15 @@ public:
|
||||
ElementCount getWidth() const {
|
||||
return ElementCount::get(Width.Value, isScalable());
|
||||
}
|
||||
unsigned getInterleave() const { return Interleave.Value; }
|
||||
unsigned getInterleave() const {
|
||||
if (Interleave.Value)
|
||||
return Interleave.Value;
|
||||
// If interleaving is not explicitly set, assume that if we do not want
|
||||
// unrolling, we also don't want any interleaving.
|
||||
if (llvm::hasUnrollTransformation(TheLoop) & TM_Disable)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
unsigned getIsVectorized() const { return IsVectorized.Value; }
|
||||
unsigned getPredicate() const { return Predicate.Value; }
|
||||
enum ForceKind getForce() const {
|
||||
|
@ -298,7 +298,7 @@ bool llvm::getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) {
|
||||
}
|
||||
|
||||
Optional<ElementCount>
|
||||
llvm::getOptionalElementCountLoopAttribute(Loop *TheLoop) {
|
||||
llvm::getOptionalElementCountLoopAttribute(const Loop *TheLoop) {
|
||||
Optional<int> Width =
|
||||
getOptionalIntLoopAttribute(TheLoop, "llvm.loop.vectorize.width");
|
||||
|
||||
@ -311,7 +311,7 @@ llvm::getOptionalElementCountLoopAttribute(Loop *TheLoop) {
|
||||
return None;
|
||||
}
|
||||
|
||||
llvm::Optional<int> llvm::getOptionalIntLoopAttribute(Loop *TheLoop,
|
||||
llvm::Optional<int> llvm::getOptionalIntLoopAttribute(const Loop *TheLoop,
|
||||
StringRef Name) {
|
||||
const MDOperand *AttrMD =
|
||||
findStringMetadataForLoop(TheLoop, Name).getValueOr(nullptr);
|
||||
@ -418,7 +418,7 @@ bool llvm::hasMustProgress(const Loop *L) {
|
||||
return getBooleanLoopAttribute(L, LLVMLoopMustProgress);
|
||||
}
|
||||
|
||||
TransformationMode llvm::hasUnrollTransformation(Loop *L) {
|
||||
TransformationMode llvm::hasUnrollTransformation(const Loop *L) {
|
||||
if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable"))
|
||||
return TM_SuppressedByUser;
|
||||
|
||||
@ -439,7 +439,7 @@ TransformationMode llvm::hasUnrollTransformation(Loop *L) {
|
||||
return TM_Unspecified;
|
||||
}
|
||||
|
||||
TransformationMode llvm::hasUnrollAndJamTransformation(Loop *L) {
|
||||
TransformationMode llvm::hasUnrollAndJamTransformation(const Loop *L) {
|
||||
if (getBooleanLoopAttribute(L, "llvm.loop.unroll_and_jam.disable"))
|
||||
return TM_SuppressedByUser;
|
||||
|
||||
@ -457,7 +457,7 @@ TransformationMode llvm::hasUnrollAndJamTransformation(Loop *L) {
|
||||
return TM_Unspecified;
|
||||
}
|
||||
|
||||
TransformationMode llvm::hasVectorizeTransformation(Loop *L) {
|
||||
TransformationMode llvm::hasVectorizeTransformation(const Loop *L) {
|
||||
Optional<bool> Enable =
|
||||
getOptionalBoolLoopAttribute(L, "llvm.loop.vectorize.enable");
|
||||
|
||||
@ -493,7 +493,7 @@ TransformationMode llvm::hasVectorizeTransformation(Loop *L) {
|
||||
return TM_Unspecified;
|
||||
}
|
||||
|
||||
TransformationMode llvm::hasDistributeTransformation(Loop *L) {
|
||||
TransformationMode llvm::hasDistributeTransformation(const Loop *L) {
|
||||
if (getBooleanLoopAttribute(L, "llvm.loop.distribute.enable"))
|
||||
return TM_ForcedByUser;
|
||||
|
||||
@ -503,7 +503,7 @@ TransformationMode llvm::hasDistributeTransformation(Loop *L) {
|
||||
return TM_Unspecified;
|
||||
}
|
||||
|
||||
TransformationMode llvm::hasLICMVersioningTransformation(Loop *L) {
|
||||
TransformationMode llvm::hasLICMVersioningTransformation(const Loop *L) {
|
||||
if (getBooleanLoopAttribute(L, "llvm.loop.licm_versioning.disable"))
|
||||
return TM_SuppressedByUser;
|
||||
|
||||
|
@ -57,7 +57,7 @@ bool LoopVectorizeHints::Hint::validate(unsigned Val) {
|
||||
switch (Kind) {
|
||||
case HK_WIDTH:
|
||||
return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
|
||||
case HK_UNROLL:
|
||||
case HK_INTERLEAVE:
|
||||
return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
|
||||
case HK_FORCE:
|
||||
return (Val <= 1);
|
||||
@ -73,7 +73,7 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
|
||||
bool InterleaveOnlyWhenForced,
|
||||
OptimizationRemarkEmitter &ORE)
|
||||
: Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
|
||||
Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
|
||||
Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
|
||||
Force("vectorize.enable", FK_Undefined, HK_FORCE),
|
||||
IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
|
||||
Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
|
||||
@ -91,8 +91,8 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
|
||||
// consider the loop to have been already vectorized because there's
|
||||
// nothing more that we can do.
|
||||
IsVectorized.Value =
|
||||
getWidth() == ElementCount::getFixed(1) && Interleave.Value == 1;
|
||||
LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
|
||||
getWidth() == ElementCount::getFixed(1) && getInterleave() == 1;
|
||||
LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs()
|
||||
<< "LV: Interleaving disabled by the pass manager\n");
|
||||
}
|
||||
|
||||
@ -165,8 +165,8 @@ void LoopVectorizeHints::emitRemarkWithHints() const {
|
||||
R << " (Force=" << NV("Force", true);
|
||||
if (Width.Value != 0)
|
||||
R << ", Vector Width=" << NV("VectorWidth", getWidth());
|
||||
if (Interleave.Value != 0)
|
||||
R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
|
||||
if (getInterleave() != 0)
|
||||
R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
|
||||
R << ")";
|
||||
}
|
||||
return R;
|
||||
|
@ -9735,7 +9735,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
|
||||
? "enabled"
|
||||
: "?"))
|
||||
<< " width=" << Hints.getWidth()
|
||||
<< " unroll=" << Hints.getInterleave() << "\n");
|
||||
<< " interleave=" << Hints.getInterleave() << "\n");
|
||||
|
||||
// Function containing loop
|
||||
Function *F = L->getHeader()->getParent();
|
||||
|
@ -35,7 +35,7 @@ for.body: ; preds = %for.body.preheader,
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !8
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !8
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %n, !dbg !8
|
||||
br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !dbg !8, !llvm.loop !17
|
||||
br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !dbg !8
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readonly ssp uwtable
|
||||
@ -92,8 +92,6 @@ attributes #0 = { nounwind }
|
||||
!14 = !{!"Simple C/C++ TBAA"}
|
||||
!15 = !DILocation(line: 6, column: 19, scope: !4)
|
||||
!16 = !DILocation(line: 6, column: 11, scope: !4)
|
||||
!17 = distinct !{!17, !18}
|
||||
!18 = !{!"llvm.loop.unroll.disable"}
|
||||
!19 = !DILocation(line: 16, column: 20, scope: !20)
|
||||
!20 = distinct !DISubprogram(name: "cond_sum_loop_hint", scope: !5, file: !5, line: 12, type: !6, isLocal: false, isDefinition: true, scopeLine: 12, flags: DIFlagPrototyped, isOptimized: true, unit: !28, retainedNodes: !7)
|
||||
!21 = !DILocation(line: 16, column: 3, scope: !20)
|
||||
@ -101,7 +99,7 @@ attributes #0 = { nounwind }
|
||||
!23 = !DILocation(line: 20, column: 3, scope: !20)
|
||||
!24 = !DILocation(line: 17, column: 19, scope: !20)
|
||||
!25 = !DILocation(line: 17, column: 11, scope: !20)
|
||||
!26 = distinct !{!26, !27, !18}
|
||||
!26 = distinct !{!26, !27}
|
||||
!27 = !{!"llvm.loop.vectorize.enable", i1 true}
|
||||
!28 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang",
|
||||
file: !5,
|
||||
|
@ -19,7 +19,7 @@
|
||||
; Case 1: Annotated outer loop WITH vector width information must be collected.
|
||||
|
||||
; CHECK-LABEL: vector_width
|
||||
; CHECK: LV: Loop hints: force=enabled width=4 unroll=0
|
||||
; CHECK: LV: Loop hints: force=enabled width=4 interleave=0
|
||||
; CHECK: LV: We can vectorize this outer loop!
|
||||
; CHECK: LV: Using user VF 4 to build VPlans.
|
||||
; CHECK-NOT: LV: Loop hints: force=?
|
||||
@ -71,7 +71,7 @@ for.end15: ; preds = %outer.inc, %entry
|
||||
; Case 2: Annotated outer loop WITHOUT vector width information must be collected.
|
||||
|
||||
; CHECK-LABEL: case2
|
||||
; CHECK: LV: Loop hints: force=enabled width=0 unroll=0
|
||||
; CHECK: LV: Loop hints: force=enabled width=0 interleave=0
|
||||
; CHECK: LV: We can vectorize this outer loop!
|
||||
; CHECK: LV: Using VF 1 to build VPlans.
|
||||
|
||||
|
93
test/Transforms/LoopVectorize/nounroll.ll
Normal file
93
test/Transforms/LoopVectorize/nounroll.ll
Normal file
@ -0,0 +1,93 @@
|
||||
; RUN: opt < %s -passes='loop-vectorize' -debug-only=loop-vectorize -S 2>&1 | FileCheck %s
|
||||
|
||||
target datalayout = "e-m:e-i64:64-n32:64-S128-v256:256:256-v512:512:512"
|
||||
|
||||
; CHECK: LV: Checking a loop in "f1"
|
||||
; CHECK: LV: Loop hints: force=? width=0 interleave=1
|
||||
define dso_local void @f1(i32 signext %n, i32* %A) {
|
||||
entry:
|
||||
%cmp1 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp1, label %for.body.preheader, label %for.end
|
||||
|
||||
for.body.preheader: ; preds = %entry
|
||||
%wide.trip.count = zext i32 %n to i64
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body.preheader, %for.body
|
||||
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
|
||||
%0 = trunc i64 %indvars.iv to i32
|
||||
store i32 %0, i32* %arrayidx, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
|
||||
br i1 %exitcond, label %for.body, label %for.end.loopexit, !llvm.loop !1
|
||||
|
||||
for.end.loopexit: ; preds = %for.body
|
||||
br label %for.end
|
||||
|
||||
for.end: ; preds = %for.end.loopexit, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: LV: Checking a loop in "f2"
|
||||
; CHECK: LV: Loop hints: force=? width=0 interleave=4
|
||||
define dso_local void @f2(i32 signext %n, i32* %A) {
|
||||
entry:
|
||||
%cmp1 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp1, label %for.body.preheader, label %for.end
|
||||
|
||||
for.body.preheader: ; preds = %entry
|
||||
%wide.trip.count = zext i32 %n to i64
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body.preheader, %for.body
|
||||
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
|
||||
%0 = trunc i64 %indvars.iv to i32
|
||||
store i32 %0, i32* %arrayidx, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
|
||||
br i1 %exitcond, label %for.body, label %for.end.loopexit, !llvm.loop !3
|
||||
|
||||
for.end.loopexit: ; preds = %for.body
|
||||
br label %for.end
|
||||
|
||||
for.end: ; preds = %for.end.loopexit, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: LV: Checking a loop in "f3"
|
||||
; CHECK: LV: Loop hints: force=? width=0 interleave=1
|
||||
define dso_local void @f3(i32 signext %n, i32* %A) {
|
||||
entry:
|
||||
%cmp1 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp1, label %for.body.preheader, label %for.end
|
||||
|
||||
for.body.preheader: ; preds = %entry
|
||||
%wide.trip.count = zext i32 %n to i64
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.body.preheader, %for.body
|
||||
%indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
|
||||
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
|
||||
%0 = trunc i64 %indvars.iv to i32
|
||||
store i32 %0, i32* %arrayidx, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
|
||||
br i1 %exitcond, label %for.body, label %for.end.loopexit, !llvm.loop !6
|
||||
|
||||
for.end.loopexit: ; preds = %for.body
|
||||
br label %for.end
|
||||
|
||||
for.end: ; preds = %for.end.loopexit, %entry
|
||||
ret void
|
||||
}
|
||||
|
||||
!1 = distinct !{!1, !2}
|
||||
!2 = !{!"llvm.loop.unroll.disable"}
|
||||
!3 = distinct !{!3, !4, !5}
|
||||
!4 = !{!"llvm.loop.unroll.disable"}
|
||||
!5 = !{!"llvm.loop.interleave.count", i32 4}
|
||||
!6 = distinct !{!6, !7, !8}
|
||||
!7 = !{!"llvm.loop.mustprogress"}
|
||||
!8 = !{!"llvm.loop.unroll.count", i32 1}
|
Loading…
Reference in New Issue
Block a user