mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
Do not restrict interleaved unrolling to small loops, depending on the target.
llvm-svn: 231528
This commit is contained in:
parent
96e4689440
commit
09fc2f3b93
@ -331,6 +331,9 @@ public:
|
||||
/// target.
|
||||
bool shouldBuildLookupTables() const;
|
||||
|
||||
/// \brief Don't restrict interleaved unrolling to small loops.
|
||||
bool enableAggressiveInterleaving(bool LoopHasReductions) const;
|
||||
|
||||
/// \brief Return hardware support for population count.
|
||||
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
|
||||
|
||||
@ -531,6 +534,7 @@ public:
|
||||
virtual unsigned getJumpBufAlignment() = 0;
|
||||
virtual unsigned getJumpBufSize() = 0;
|
||||
virtual bool shouldBuildLookupTables() = 0;
|
||||
virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
|
||||
virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
|
||||
virtual bool haveFastSqrt(Type *Ty) = 0;
|
||||
virtual unsigned getFPOpCost(Type *Ty) = 0;
|
||||
@ -648,6 +652,9 @@ public:
|
||||
bool shouldBuildLookupTables() override {
|
||||
return Impl.shouldBuildLookupTables();
|
||||
}
|
||||
bool enableAggressiveInterleaving(bool LoopHasReductions) override {
|
||||
return Impl.enableAggressiveInterleaving(LoopHasReductions);
|
||||
}
|
||||
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
|
||||
return Impl.getPopcntSupport(IntTyWidthInBit);
|
||||
}
|
||||
|
@ -235,6 +235,8 @@ public:
|
||||
|
||||
bool shouldBuildLookupTables() { return true; }
|
||||
|
||||
bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; }
|
||||
|
||||
TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) {
|
||||
return TTI::PSK_Software;
|
||||
}
|
||||
|
@ -143,6 +143,10 @@ bool TargetTransformInfo::shouldBuildLookupTables() const {
|
||||
return TTIImpl->shouldBuildLookupTables();
|
||||
}
|
||||
|
||||
bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const {
|
||||
return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
|
||||
}
|
||||
|
||||
TargetTransformInfo::PopcntSupportKind
|
||||
TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
|
||||
return TTIImpl->getPopcntSupport(IntTyWidthInBit);
|
||||
|
@ -192,6 +192,10 @@ void PPCTTIImpl::getUnrollingPreferences(Loop *L,
|
||||
BaseT::getUnrollingPreferences(L, UP);
|
||||
}
|
||||
|
||||
bool PPCTTIImpl::enableAggressiveInterleaving(bool LoopHasReductions) {
|
||||
return LoopHasReductions;
|
||||
}
|
||||
|
||||
unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) {
|
||||
if (Vector && !ST->hasAltivec() && !ST->hasQPX())
|
||||
return 0;
|
||||
|
@ -78,6 +78,7 @@ public:
|
||||
/// \name Vector TTI Implementations
|
||||
/// @{
|
||||
|
||||
bool enableAggressiveInterleaving(bool LoopHasReductions);
|
||||
unsigned getNumberOfRegisters(bool Vector);
|
||||
unsigned getRegisterBitWidth(bool Vector);
|
||||
unsigned getMaxInterleaveFactor();
|
||||
|
@ -4564,6 +4564,14 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
|
||||
return SmallUF;
|
||||
}
|
||||
|
||||
// Unroll if this is a large loop (small loops are already dealt with by this
|
||||
// point) that could benefit from interleaved unrolling.
|
||||
bool HasReductions = (Legal->getReductionVars()->size() > 0);
|
||||
if (TTI.enableAggressiveInterleaving(HasReductions)) {
|
||||
DEBUG(dbgs() << "LV: Unrolling to expose ILP.\n");
|
||||
return UF;
|
||||
}
|
||||
|
||||
DEBUG(dbgs() << "LV: Not Unrolling.\n");
|
||||
return 1;
|
||||
}
|
||||
|
73
test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll
Normal file
73
test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll
Normal file
@ -0,0 +1,73 @@
|
||||
; RUN: opt < %s -loop-vectorize -S | FileCheck %s
|
||||
|
||||
; CHECK: fadd
|
||||
; CHECK-NEXT: fadd
|
||||
; CHECK-NEXT: fadd
|
||||
; CHECK-NEXT: fadd
|
||||
; CHECK-NEXT: fadd
|
||||
; CHECK-NEXT: fadd
|
||||
; CHECK-NEXT: fadd
|
||||
; CHECK-NEXT: fadd
|
||||
; CHECK-NEXT: fadd
|
||||
; CHECK-NEXT: fadd
|
||||
; CHECK-NEXT: fadd
|
||||
; CHECK-NEXT: fadd
|
||||
; CHECK-NEXT-NOT: fadd
|
||||
|
||||
target datalayout = "e-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64le-ibm-linux-gnu"
|
||||
|
||||
define void @QLA_F3_r_veq_norm2_V(float* noalias nocapture %r, [3 x { float, float }]* noalias nocapture readonly %a, i32 signext %n) #0 {
|
||||
entry:
|
||||
%cmp24 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp24, label %for.cond1.preheader.preheader, label %for.end13
|
||||
|
||||
for.cond1.preheader.preheader: ; preds = %entry
|
||||
br label %for.cond1.preheader
|
||||
|
||||
for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.cond1.preheader
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.cond1.preheader ], [ 0, %for.cond1.preheader.preheader ]
|
||||
%sum.026 = phi double [ %add10.2, %for.cond1.preheader ], [ 0.000000e+00, %for.cond1.preheader.preheader ]
|
||||
%arrayidx5.realp = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 %indvars.iv, i64 0, i32 0
|
||||
%arrayidx5.real = load float, float* %arrayidx5.realp, align 8
|
||||
%arrayidx5.imagp = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 %indvars.iv, i64 0, i32 1
|
||||
%arrayidx5.imag = load float, float* %arrayidx5.imagp, align 8
|
||||
%mul = fmul fast float %arrayidx5.real, %arrayidx5.real
|
||||
%mul9 = fmul fast float %arrayidx5.imag, %arrayidx5.imag
|
||||
%add = fadd fast float %mul9, %mul
|
||||
%conv = fpext float %add to double
|
||||
%add10 = fadd fast double %conv, %sum.026
|
||||
%arrayidx5.realp.1 = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 %indvars.iv, i64 1, i32 0
|
||||
%arrayidx5.real.1 = load float, float* %arrayidx5.realp.1, align 8
|
||||
%arrayidx5.imagp.1 = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 %indvars.iv, i64 1, i32 1
|
||||
%arrayidx5.imag.1 = load float, float* %arrayidx5.imagp.1, align 8
|
||||
%mul.1 = fmul fast float %arrayidx5.real.1, %arrayidx5.real.1
|
||||
%mul9.1 = fmul fast float %arrayidx5.imag.1, %arrayidx5.imag.1
|
||||
%add.1 = fadd fast float %mul9.1, %mul.1
|
||||
%conv.1 = fpext float %add.1 to double
|
||||
%add10.1 = fadd fast double %conv.1, %add10
|
||||
%arrayidx5.realp.2 = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 %indvars.iv, i64 2, i32 0
|
||||
%arrayidx5.real.2 = load float, float* %arrayidx5.realp.2, align 8
|
||||
%arrayidx5.imagp.2 = getelementptr inbounds [3 x { float, float }], [3 x { float, float }]* %a, i64 %indvars.iv, i64 2, i32 1
|
||||
%arrayidx5.imag.2 = load float, float* %arrayidx5.imagp.2, align 8
|
||||
%mul.2 = fmul fast float %arrayidx5.real.2, %arrayidx5.real.2
|
||||
%mul9.2 = fmul fast float %arrayidx5.imag.2, %arrayidx5.imag.2
|
||||
%add.2 = fadd fast float %mul9.2, %mul.2
|
||||
%conv.2 = fpext float %add.2 to double
|
||||
%add10.2 = fadd fast double %conv.2, %add10.1
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, %n
|
||||
br i1 %exitcond, label %for.cond.for.end13_crit_edge, label %for.cond1.preheader
|
||||
|
||||
for.cond.for.end13_crit_edge: ; preds = %for.cond1.preheader
|
||||
%add10.2.lcssa = phi double [ %add10.2, %for.cond1.preheader ]
|
||||
%phitmp = fptrunc double %add10.2.lcssa to float
|
||||
br label %for.end13
|
||||
|
||||
for.end13: ; preds = %for.cond.for.end13_crit_edge, %entry
|
||||
%sum.0.lcssa = phi float [ %phitmp, %for.cond.for.end13_crit_edge ], [ 0.000000e+00, %entry ]
|
||||
store float %sum.0.lcssa, float* %r, align 4
|
||||
ret void
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user