1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 19:23:23 +01:00

SLPVectorizer: Fix PR16777. PHInodes may use multiple extracted values that come from different blocks.

Thanks Alexey Samsonov.

llvm-svn: 187663
This commit is contained in:
Nadav Rotem 2013-08-02 18:40:24 +00:00
parent 05068436ba
commit eef986f7a3
2 changed files with 48 additions and 10 deletions

View File

@ -1401,30 +1401,33 @@ void BoUpSLP::vectorizeTree() {
Value *Vec = E->VectorizedValue;
assert(Vec && "Can't find vectorizable value");
Value *Lane = Builder.getInt32(it->Lane);
// Generate extracts for out-of-tree users.
// Find the insertion point for the extractelement lane.
Instruction *Loc = 0;
if (PHINode *PN = dyn_cast<PHINode>(Vec)) {
Loc = PN->getParent()->getFirstInsertionPt();
Builder.SetInsertPoint(PN->getParent()->getFirstInsertionPt());
Value *Ex = Builder.CreateExtractElement(Vec, Lane);
User->replaceUsesOfWith(Scalar, Ex);
} else if (isa<Instruction>(Vec)){
if (PHINode *PH = dyn_cast<PHINode>(User)) {
for (int i = 0, e = PH->getNumIncomingValues(); i != e; ++i) {
if (PH->getIncomingValue(i) == Scalar) {
Loc = PH->getIncomingBlock(i)->getTerminator();
break;
Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator());
Value *Ex = Builder.CreateExtractElement(Vec, Lane);
PH->setOperand(i, Ex);
}
}
assert(Loc && "Unable to find incoming value for the PHI");
} else {
Loc = cast<Instruction>(User);
Builder.SetInsertPoint(cast<Instruction>(User));
Value *Ex = Builder.CreateExtractElement(Vec, Lane);
User->replaceUsesOfWith(Scalar, Ex);
}
} else {
Loc = F->getEntryBlock().begin();
Builder.SetInsertPoint(F->getEntryBlock().begin());
Value *Ex = Builder.CreateExtractElement(Vec, Lane);
User->replaceUsesOfWith(Scalar, Ex);
}
Builder.SetInsertPoint(Loc);
Value *Ex = Builder.CreateExtractElement(Vec, Builder.getInt32(it->Lane));
User->replaceUsesOfWith(Scalar, Ex);
DEBUG(dbgs() << "SLP: Replaced:" << *User << ".\n");
}

View File

@ -0,0 +1,35 @@
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
%struct.GPar.0.16.26 = type { [0 x double], double }
@d = external global double, align 8
declare %struct.GPar.0.16.26* @Rf_gpptr(...)
define void @Rf_GReset() {
entry:
%sub = fsub double -0.000000e+00, undef
%0 = load double* @d, align 8
%sub1 = fsub double -0.000000e+00, %0
br i1 icmp eq (%struct.GPar.0.16.26* (...)* inttoptr (i64 115 to %struct.GPar.0.16.26* (...)*), %struct.GPar.0.16.26* (...)* @Rf_gpptr), label %if.then, label %if.end7
if.then: ; preds = %entry
%sub2 = fsub double %sub, undef
%div.i = fdiv double %sub2, undef
%sub4 = fsub double %sub1, undef
%div.i16 = fdiv double %sub4, undef
%cmp = fcmp ogt double %div.i, %div.i16
br i1 %cmp, label %if.then6, label %if.end7
if.then6: ; preds = %if.then
br label %if.end7
if.end7: ; preds = %if.then6, %if.then, %entry
%g.0 = phi double [ 0.000000e+00, %if.then6 ], [ %sub, %if.then ], [ %sub, %entry ]
ret void
}