mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
SLPVectorizer: Add support for trees that don't start at binary operators, and add the cost of extracting values from the roots of the tree.
llvm-svn: 179475
This commit is contained in:
parent
8fafe8cd31
commit
e380208d4f
@ -85,14 +85,16 @@ struct SLPVectorizer : public BasicBlockPass {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool tryToVectorizePair(BinaryOperator *A, BinaryOperator *B, BoUpSLP &R) {
|
||||
bool tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
|
||||
if (!A || !B) return false;
|
||||
BoUpSLP::ValueList VL;
|
||||
VL.push_back(A);
|
||||
VL.push_back(B);
|
||||
int Cost = R.getTreeCost(VL);
|
||||
DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost << ".\n");
|
||||
if (Cost >= -SLPCostThreshold) return false;
|
||||
int ExtrCost = R.getScalarizationCost(VL);
|
||||
DEBUG(dbgs()<<"SLP: Cost of pair:" << Cost <<
|
||||
" Cost of extract:" << ExtrCost << ".\n");
|
||||
if ((Cost+ExtrCost) >= -SLPCostThreshold) return false;
|
||||
DEBUG(dbgs()<<"SLP: Vectorizing pair.\n");
|
||||
R.vectorizeArith(VL);
|
||||
return true;
|
||||
@ -100,11 +102,12 @@ struct SLPVectorizer : public BasicBlockPass {
|
||||
|
||||
bool tryToVectorizeCandidate(BinaryOperator *V, BoUpSLP &R) {
|
||||
if (!V) return false;
|
||||
// Try to vectorize V.
|
||||
if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R))
|
||||
return true;
|
||||
|
||||
BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0));
|
||||
BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1));
|
||||
// Try to vectorize V.
|
||||
if (tryToVectorizePair(A, B, R)) return true;
|
||||
|
||||
// Try to skip B.
|
||||
if (B && B->hasOneUse()) {
|
||||
BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0));
|
||||
|
@ -173,6 +173,16 @@ bool BoUpSLP::vectorizeStores(StoreList &Stores, int costThreshold) {
|
||||
return Changed;
|
||||
}
|
||||
|
||||
int BoUpSLP::getScalarizationCost(ValueList &VL) {
|
||||
Type *ScalarTy = VL[0]->getType();
|
||||
|
||||
if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
|
||||
ScalarTy = SI->getValueOperand()->getType();
|
||||
|
||||
VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
|
||||
return getScalarizationCost(VecTy);
|
||||
}
|
||||
|
||||
int BoUpSLP::getScalarizationCost(Type *Ty) {
|
||||
int Cost = 0;
|
||||
for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)
|
||||
|
@ -61,6 +61,11 @@ struct BoUpSLP {
|
||||
/// A negative number means that this is profitable.
|
||||
int getTreeCost(ValueList &VL);
|
||||
|
||||
/// \returns the scalarization cost for this ValueList. Assuming that this
|
||||
/// subtree gets vectorized, we may need to extract the values from the
|
||||
/// roots. This method calculates the cost of extracting the values.
|
||||
int getScalarizationCost(ValueList &VL);
|
||||
|
||||
/// \brief Attempts to order and vectorize a sequence of stores. This
|
||||
/// function does a quadratic scan of the given stores.
|
||||
/// \returns true if the basic block was modified.
|
||||
@ -118,7 +123,7 @@ private:
|
||||
/// by multiple lanes, or by users outside the tree.
|
||||
/// NOTICE: The vectorization methods also use this set.
|
||||
ValueSet MustScalarize;
|
||||
|
||||
|
||||
// Contains a list of values that are used outside the current tree. This
|
||||
// set must be reset between runs.
|
||||
ValueSet MultiUserVals;
|
||||
|
37
test/Transforms/SLPVectorizer/X86/reduction2.ll
Normal file
37
test/Transforms/SLPVectorizer/X86/reduction2.ll
Normal file
@ -0,0 +1,37 @@
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
|
||||
target triple = "i386-apple-macosx10.8.0"
|
||||
|
||||
;CHECK: @foo
|
||||
;CHECK: load <2 x double>
|
||||
;CHECK: ret
|
||||
define double @foo(double* nocapture %D) #0 {
|
||||
br label %1
|
||||
|
||||
; <label>:1 ; preds = %1, %0
|
||||
%i.02 = phi i32 [ 0, %0 ], [ %10, %1 ]
|
||||
%sum.01 = phi double [ 0.000000e+00, %0 ], [ %9, %1 ]
|
||||
%2 = shl nsw i32 %i.02, 1
|
||||
%3 = getelementptr inbounds double* %D, i32 %2
|
||||
%4 = load double* %3, align 4, !tbaa !0
|
||||
%A4 = fmul double %4, %4
|
||||
%5 = or i32 %2, 1
|
||||
%6 = getelementptr inbounds double* %D, i32 %5
|
||||
%7 = load double* %6, align 4, !tbaa !0
|
||||
%A7 = fmul double %7, %7
|
||||
%8 = fadd double %A4, %A7
|
||||
%9 = fadd double %sum.01, %8
|
||||
%10 = add nsw i32 %i.02, 1
|
||||
%exitcond = icmp eq i32 %10, 100
|
||||
br i1 %exitcond, label %11, label %1
|
||||
|
||||
; <label>:11 ; preds = %1
|
||||
ret double %9
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readonly ssp "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
|
||||
!0 = metadata !{metadata !"double", metadata !1}
|
||||
!1 = metadata !{metadata !"omnipotent char", metadata !2}
|
||||
!2 = metadata !{metadata !"Simple C/C++ TBAA"}
|
Loading…
x
Reference in New Issue
Block a user