mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
[SLP][NFC] Avoid repetitive calls to getSameOpcode()
We can avoid repetitive calls getSameOpcode() for already known tree elements by keeping MainOp and AltOp in TreeEntry. Differential Revision: https://reviews.llvm.org/D64700 llvm-svn: 369315
This commit is contained in:
parent
f12628ef2a
commit
3a0d7be9f3
@ -1154,8 +1154,7 @@ private:
|
||||
|
||||
/// Set the Builder insert point to one after the last instruction in
|
||||
/// the bundle
|
||||
void setInsertPointAfterBundle(ArrayRef<Value *> VL,
|
||||
const InstructionsState &S);
|
||||
void setInsertPointAfterBundle(TreeEntry *E);
|
||||
|
||||
/// \returns a vector from a collection of scalars in \p VL.
|
||||
Value *Gather(ArrayRef<Value *> VL, VectorType *Ty);
|
||||
@ -1221,6 +1220,10 @@ private:
|
||||
/// reordering of operands during buildTree_rec() and vectorizeTree().
|
||||
SmallVector<ValueList, 2> Operands;
|
||||
|
||||
/// The main/alternate instruction.
|
||||
Instruction *MainOp = nullptr;
|
||||
Instruction *AltOp = nullptr;
|
||||
|
||||
public:
|
||||
/// Set this bundle's \p OpIdx'th operand to \p OpVL.
|
||||
void setOperand(unsigned OpIdx, ArrayRef<Value *> OpVL) {
|
||||
@ -1266,6 +1269,58 @@ private:
|
||||
return Operands[OpIdx][0];
|
||||
}
|
||||
|
||||
/// Some of the instructions in the list have alternate opcodes.
|
||||
bool isAltShuffle() const {
|
||||
return getOpcode() != getAltOpcode();
|
||||
}
|
||||
|
||||
bool isOpcodeOrAlt(Instruction *I) const {
|
||||
unsigned CheckedOpcode = I->getOpcode();
|
||||
return (getOpcode() == CheckedOpcode ||
|
||||
getAltOpcode() == CheckedOpcode);
|
||||
}
|
||||
|
||||
/// Chooses the correct key for scheduling data. If \p Op has the same (or
|
||||
/// alternate) opcode as \p OpValue, the key is \p Op. Otherwise the key is
|
||||
/// \p OpValue.
|
||||
Value *isOneOf(Value *Op) const {
|
||||
auto *I = dyn_cast<Instruction>(Op);
|
||||
if (I && isOpcodeOrAlt(I))
|
||||
return Op;
|
||||
return MainOp;
|
||||
}
|
||||
|
||||
void setOperations(const InstructionsState &S) {
|
||||
MainOp = S.MainOp;
|
||||
AltOp = S.AltOp;
|
||||
}
|
||||
|
||||
Instruction *getMainOp() const {
|
||||
return MainOp;
|
||||
}
|
||||
|
||||
Instruction *getAltOp() const {
|
||||
return AltOp;
|
||||
}
|
||||
|
||||
/// The main/alternate opcodes for the list of instructions.
|
||||
unsigned getOpcode() const {
|
||||
return MainOp ? MainOp->getOpcode() : 0;
|
||||
}
|
||||
|
||||
unsigned getAltOpcode() const {
|
||||
return AltOp ? AltOp->getOpcode() : 0;
|
||||
}
|
||||
|
||||
/// Update operations state of this entry if reorder occurred.
|
||||
bool updateStateIfReorder() {
|
||||
if (ReorderIndices.empty())
|
||||
return false;
|
||||
InstructionsState S = getSameOpcode(Scalars, ReorderIndices.front());
|
||||
setOperations(S);
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
/// Debug printer.
|
||||
LLVM_DUMP_METHOD void dump() const {
|
||||
@ -1279,6 +1334,8 @@ private:
|
||||
for (Value *V : Scalars)
|
||||
dbgs().indent(2) << *V << "\n";
|
||||
dbgs() << "NeedToGather: " << NeedToGather << "\n";
|
||||
dbgs() << "MainOp: " << *MainOp << "\n";
|
||||
dbgs() << "AltOp: " << *AltOp << "\n";
|
||||
dbgs() << "VectorizedValue: ";
|
||||
if (VectorizedValue)
|
||||
dbgs() << *VectorizedValue;
|
||||
@ -1305,8 +1362,8 @@ private:
|
||||
};
|
||||
|
||||
/// Create a new VectorizableTree entry.
|
||||
TreeEntry *newTreeEntry(ArrayRef<Value *> VL,
|
||||
Optional<ScheduleData *> Bundle,
|
||||
TreeEntry *newTreeEntry(ArrayRef<Value *> VL, Optional<ScheduleData *> Bundle,
|
||||
const InstructionsState &S,
|
||||
const EdgeInfo &UserTreeIdx,
|
||||
ArrayRef<unsigned> ReuseShuffleIndices = None,
|
||||
ArrayRef<unsigned> ReorderIndices = None) {
|
||||
@ -1319,6 +1376,7 @@ private:
|
||||
Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(),
|
||||
ReuseShuffleIndices.end());
|
||||
Last->ReorderIndices = ReorderIndices;
|
||||
Last->setOperations(S);
|
||||
if (Vectorized) {
|
||||
for (int i = 0, e = VL.size(); i != e; ++i) {
|
||||
assert(!getTreeEntry(VL[i]) && "Scalar already in tree!");
|
||||
@ -2075,28 +2133,28 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
InstructionsState S = getSameOpcode(VL);
|
||||
if (Depth == RecursionMaxDepth) {
|
||||
LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");
|
||||
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx);
|
||||
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
|
||||
return;
|
||||
}
|
||||
|
||||
// Don't handle vectors.
|
||||
if (S.OpValue->getType()->isVectorTy()) {
|
||||
LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
|
||||
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx);
|
||||
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
|
||||
return;
|
||||
}
|
||||
|
||||
if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue))
|
||||
if (SI->getValueOperand()->getType()->isVectorTy()) {
|
||||
LLVM_DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n");
|
||||
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx);
|
||||
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
|
||||
return;
|
||||
}
|
||||
|
||||
// If all of the operands are identical or constant we have a simple solution.
|
||||
if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !S.getOpcode()) {
|
||||
LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n");
|
||||
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx);
|
||||
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -2108,7 +2166,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
if (EphValues.count(VL[i])) {
|
||||
LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *VL[i]
|
||||
<< ") is ephemeral.\n");
|
||||
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx);
|
||||
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -2118,7 +2176,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n");
|
||||
if (!E->isSame(VL)) {
|
||||
LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
|
||||
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx);
|
||||
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
|
||||
return;
|
||||
}
|
||||
// Record the reuse of the tree node. FIXME, currently this is only used to
|
||||
@ -2137,7 +2195,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
if (getTreeEntry(I)) {
|
||||
LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *VL[i]
|
||||
<< ") is already in tree.\n");
|
||||
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx);
|
||||
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -2148,7 +2206,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
for (unsigned i = 0, e = VL.size(); i != e; ++i) {
|
||||
if (MustGather.count(VL[i]) || is_contained(UserIgnoreList, VL[i])) {
|
||||
LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
|
||||
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx);
|
||||
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
|
||||
return;
|
||||
}
|
||||
}
|
||||
@ -2162,7 +2220,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
// Don't go into unreachable blocks. They may contain instructions with
|
||||
// dependency cycles which confuse the final scheduling.
|
||||
LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
|
||||
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx);
|
||||
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -2184,7 +2242,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
if (NumUniqueScalarValues <= 1 ||
|
||||
!llvm::isPowerOf2_32(NumUniqueScalarValues)) {
|
||||
LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
|
||||
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx);
|
||||
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx);
|
||||
return;
|
||||
}
|
||||
VL = UniqueValues;
|
||||
@ -2202,7 +2260,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
assert((!BS.getScheduleData(VL0) ||
|
||||
!BS.getScheduleData(VL0)->isPartOfBundle()) &&
|
||||
"tryScheduleBundle should cancelScheduling on failure");
|
||||
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
|
||||
ReuseShuffleIndicies);
|
||||
return;
|
||||
}
|
||||
@ -2224,14 +2282,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
LLVM_DEBUG(dbgs()
|
||||
<< "SLP: Need to swizzle PHINodes (terminator use).\n");
|
||||
BS.cancelScheduling(VL, VL0);
|
||||
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
|
||||
ReuseShuffleIndicies);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
TreeEntry *TE =
|
||||
newTreeEntry(VL, Bundle, UserTreeIdx, ReuseShuffleIndicies);
|
||||
newTreeEntry(VL, Bundle, S, UserTreeIdx, ReuseShuffleIndicies);
|
||||
LLVM_DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n");
|
||||
|
||||
// Keeps the reordered operands to avoid code duplication.
|
||||
@ -2256,7 +2314,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
if (Reuse) {
|
||||
LLVM_DEBUG(dbgs() << "SLP: Reusing or shuffling extract sequence.\n");
|
||||
++NumOpsWantToKeepOriginalOrder;
|
||||
newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
|
||||
newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
|
||||
ReuseShuffleIndicies);
|
||||
// This is a special case, as it does not gather, but at the same time
|
||||
// we are not extending buildTree_rec() towards the operands.
|
||||
@ -2278,7 +2336,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
auto StoredCurrentOrderAndNum =
|
||||
NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first;
|
||||
++StoredCurrentOrderAndNum->getSecond();
|
||||
newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
|
||||
newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
|
||||
ReuseShuffleIndicies,
|
||||
StoredCurrentOrderAndNum->getFirst());
|
||||
// This is a special case, as it does not gather, but at the same time
|
||||
@ -2289,7 +2347,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
return;
|
||||
}
|
||||
LLVM_DEBUG(dbgs() << "SLP: Gather extract sequence.\n");
|
||||
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
|
||||
ReuseShuffleIndicies);
|
||||
BS.cancelScheduling(VL, VL0);
|
||||
return;
|
||||
@ -2306,7 +2364,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
if (DL->getTypeSizeInBits(ScalarTy) !=
|
||||
DL->getTypeAllocSizeInBits(ScalarTy)) {
|
||||
BS.cancelScheduling(VL, VL0);
|
||||
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
|
||||
ReuseShuffleIndicies);
|
||||
LLVM_DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
|
||||
return;
|
||||
@ -2320,7 +2378,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
auto *L = cast<LoadInst>(V);
|
||||
if (!L->isSimple()) {
|
||||
BS.cancelScheduling(VL, VL0);
|
||||
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
|
||||
ReuseShuffleIndicies);
|
||||
LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
|
||||
return;
|
||||
@ -2351,16 +2409,17 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
if (CurrentOrder.empty()) {
|
||||
// Original loads are consecutive and does not require reordering.
|
||||
++NumOpsWantToKeepOriginalOrder;
|
||||
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
|
||||
ReuseShuffleIndicies);
|
||||
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S,
|
||||
UserTreeIdx, ReuseShuffleIndicies);
|
||||
TE->setOperandsInOrder();
|
||||
LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
|
||||
} else {
|
||||
// Need to reorder.
|
||||
auto I = NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first;
|
||||
++I->getSecond();
|
||||
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
|
||||
ReuseShuffleIndicies, I->getFirst());
|
||||
TreeEntry *TE =
|
||||
newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
|
||||
ReuseShuffleIndicies, I->getFirst());
|
||||
TE->setOperandsInOrder();
|
||||
LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
|
||||
}
|
||||
@ -2370,7 +2429,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
|
||||
LLVM_DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n");
|
||||
BS.cancelScheduling(VL, VL0);
|
||||
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
|
||||
ReuseShuffleIndicies);
|
||||
return;
|
||||
}
|
||||
@ -2391,14 +2450,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
Type *Ty = cast<Instruction>(VL[i])->getOperand(0)->getType();
|
||||
if (Ty != SrcTy || !isValidElementType(Ty)) {
|
||||
BS.cancelScheduling(VL, VL0);
|
||||
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
|
||||
ReuseShuffleIndicies);
|
||||
LLVM_DEBUG(dbgs()
|
||||
<< "SLP: Gathering casts with different src types.\n");
|
||||
return;
|
||||
}
|
||||
}
|
||||
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
|
||||
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
|
||||
ReuseShuffleIndicies);
|
||||
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
|
||||
|
||||
@ -2424,7 +2483,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
if ((Cmp->getPredicate() != P0 && Cmp->getPredicate() != SwapP0) ||
|
||||
Cmp->getOperand(0)->getType() != ComparedTy) {
|
||||
BS.cancelScheduling(VL, VL0);
|
||||
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
|
||||
ReuseShuffleIndicies);
|
||||
LLVM_DEBUG(dbgs()
|
||||
<< "SLP: Gathering cmp with different predicate.\n");
|
||||
@ -2432,7 +2491,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
}
|
||||
}
|
||||
|
||||
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
|
||||
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
|
||||
ReuseShuffleIndicies);
|
||||
LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
|
||||
|
||||
@ -2480,7 +2539,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
case Instruction::And:
|
||||
case Instruction::Or:
|
||||
case Instruction::Xor: {
|
||||
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
|
||||
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
|
||||
ReuseShuffleIndicies);
|
||||
LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
|
||||
|
||||
@ -2513,7 +2572,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
if (cast<Instruction>(VL[j])->getNumOperands() != 2) {
|
||||
LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n");
|
||||
BS.cancelScheduling(VL, VL0);
|
||||
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
|
||||
ReuseShuffleIndicies);
|
||||
return;
|
||||
}
|
||||
@ -2528,7 +2587,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
LLVM_DEBUG(dbgs()
|
||||
<< "SLP: not-vectorizable GEP (different types).\n");
|
||||
BS.cancelScheduling(VL, VL0);
|
||||
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
|
||||
ReuseShuffleIndicies);
|
||||
return;
|
||||
}
|
||||
@ -2541,13 +2600,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
LLVM_DEBUG(dbgs()
|
||||
<< "SLP: not-vectorizable GEP (non-constant indexes).\n");
|
||||
BS.cancelScheduling(VL, VL0);
|
||||
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
|
||||
ReuseShuffleIndicies);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
|
||||
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
|
||||
ReuseShuffleIndicies);
|
||||
LLVM_DEBUG(dbgs() << "SLP: added a vector of GEPs.\n");
|
||||
TE->setOperandsInOrder();
|
||||
@ -2566,13 +2625,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
|
||||
if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) {
|
||||
BS.cancelScheduling(VL, VL0);
|
||||
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
|
||||
ReuseShuffleIndicies);
|
||||
LLVM_DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
|
||||
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
|
||||
ReuseShuffleIndicies);
|
||||
LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
|
||||
|
||||
@ -2591,7 +2650,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
|
||||
if (!isTriviallyVectorizable(ID)) {
|
||||
BS.cancelScheduling(VL, VL0);
|
||||
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
|
||||
ReuseShuffleIndicies);
|
||||
LLVM_DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
|
||||
return;
|
||||
@ -2608,7 +2667,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
getVectorIntrinsicIDForCall(CI2, TLI) != ID ||
|
||||
!CI->hasIdenticalOperandBundleSchema(*CI2)) {
|
||||
BS.cancelScheduling(VL, VL0);
|
||||
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
|
||||
ReuseShuffleIndicies);
|
||||
LLVM_DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i]
|
||||
<< "\n");
|
||||
@ -2621,7 +2680,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
Value *A1J = CI2->getArgOperand(j);
|
||||
if (ScalarArgs[j] != A1J) {
|
||||
BS.cancelScheduling(VL, VL0);
|
||||
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
|
||||
ReuseShuffleIndicies);
|
||||
LLVM_DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
|
||||
<< " argument " << ScalarArgs[j] << "!=" << A1J
|
||||
@ -2636,7 +2695,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
CI->op_begin() + CI->getBundleOperandsEndIndex(),
|
||||
CI2->op_begin() + CI2->getBundleOperandsStartIndex())) {
|
||||
BS.cancelScheduling(VL, VL0);
|
||||
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
|
||||
ReuseShuffleIndicies);
|
||||
LLVM_DEBUG(dbgs() << "SLP: mismatched bundle operands in calls:"
|
||||
<< *CI << "!=" << *VL[i] << '\n');
|
||||
@ -2644,7 +2703,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
}
|
||||
}
|
||||
|
||||
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
|
||||
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
|
||||
ReuseShuffleIndicies);
|
||||
TE->setOperandsInOrder();
|
||||
for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
|
||||
@ -2663,12 +2722,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
// then do not vectorize this instruction.
|
||||
if (!S.isAltShuffle()) {
|
||||
BS.cancelScheduling(VL, VL0);
|
||||
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
|
||||
ReuseShuffleIndicies);
|
||||
LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
|
||||
return;
|
||||
}
|
||||
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
|
||||
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
|
||||
ReuseShuffleIndicies);
|
||||
LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
|
||||
|
||||
@ -2696,7 +2755,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
}
|
||||
default:
|
||||
BS.cancelScheduling(VL, VL0);
|
||||
newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
|
||||
newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
|
||||
ReuseShuffleIndicies);
|
||||
LLVM_DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n");
|
||||
return;
|
||||
@ -2832,7 +2891,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
||||
return ReuseShuffleCost +
|
||||
TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0);
|
||||
}
|
||||
if (getSameOpcode(VL).getOpcode() == Instruction::ExtractElement &&
|
||||
if (E->getOpcode() == Instruction::ExtractElement &&
|
||||
allSameType(VL) && allSameBlock(VL)) {
|
||||
Optional<TargetTransformInfo::ShuffleKind> ShuffleKind = isShuffle(VL);
|
||||
if (ShuffleKind.hasValue()) {
|
||||
@ -2855,11 +2914,10 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
||||
}
|
||||
return ReuseShuffleCost + getGatherCost(VL);
|
||||
}
|
||||
InstructionsState S = getSameOpcode(VL);
|
||||
assert(S.getOpcode() && allSameType(VL) && allSameBlock(VL) && "Invalid VL");
|
||||
Instruction *VL0 = cast<Instruction>(S.OpValue);
|
||||
unsigned ShuffleOrOp = S.isAltShuffle() ?
|
||||
(unsigned) Instruction::ShuffleVector : S.getOpcode();
|
||||
assert(E->getOpcode() && allSameType(VL) && allSameBlock(VL) && "Invalid VL");
|
||||
Instruction *VL0 = E->getMainOp();
|
||||
unsigned ShuffleOrOp =
|
||||
E->isAltShuffle() ? (unsigned)Instruction::ShuffleVector : E->getOpcode();
|
||||
switch (ShuffleOrOp) {
|
||||
case Instruction::PHI:
|
||||
return 0;
|
||||
@ -2945,7 +3003,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
||||
case Instruction::BitCast: {
|
||||
Type *SrcTy = VL0->getOperand(0)->getType();
|
||||
int ScalarEltCost =
|
||||
TTI->getCastInstrCost(S.getOpcode(), ScalarTy, SrcTy, VL0);
|
||||
TTI->getCastInstrCost(E->getOpcode(), ScalarTy, SrcTy, VL0);
|
||||
if (NeedToShuffleReuses) {
|
||||
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
|
||||
}
|
||||
@ -2958,7 +3016,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
||||
// Check if the values are candidates to demote.
|
||||
if (!MinBWs.count(VL0) || VecTy != SrcVecTy) {
|
||||
VecCost = ReuseShuffleCost +
|
||||
TTI->getCastInstrCost(S.getOpcode(), VecTy, SrcVecTy, VL0);
|
||||
TTI->getCastInstrCost(E->getOpcode(), VecTy, SrcVecTy, VL0);
|
||||
}
|
||||
return VecCost - ScalarCost;
|
||||
}
|
||||
@ -2966,14 +3024,14 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
||||
case Instruction::ICmp:
|
||||
case Instruction::Select: {
|
||||
// Calculate the cost of this instruction.
|
||||
int ScalarEltCost = TTI->getCmpSelInstrCost(S.getOpcode(), ScalarTy,
|
||||
int ScalarEltCost = TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy,
|
||||
Builder.getInt1Ty(), VL0);
|
||||
if (NeedToShuffleReuses) {
|
||||
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
|
||||
}
|
||||
VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size());
|
||||
int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
|
||||
int VecCost = TTI->getCmpSelInstrCost(S.getOpcode(), VecTy, MaskTy, VL0);
|
||||
int VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, VL0);
|
||||
return ReuseShuffleCost + VecCost - ScalarCost;
|
||||
}
|
||||
case Instruction::FNeg:
|
||||
@ -3034,12 +3092,12 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
||||
|
||||
SmallVector<const Value *, 4> Operands(VL0->operand_values());
|
||||
int ScalarEltCost = TTI->getArithmeticInstrCost(
|
||||
S.getOpcode(), ScalarTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands);
|
||||
E->getOpcode(), ScalarTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands);
|
||||
if (NeedToShuffleReuses) {
|
||||
ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost;
|
||||
}
|
||||
int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
|
||||
int VecCost = TTI->getArithmeticInstrCost(S.getOpcode(), VecTy, Op1VK,
|
||||
int VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, Op1VK,
|
||||
Op2VK, Op1VP, Op2VP, Operands);
|
||||
return ReuseShuffleCost + VecCost - ScalarCost;
|
||||
}
|
||||
@ -3121,11 +3179,11 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
||||
return ReuseShuffleCost + VecCallCost - ScalarCallCost;
|
||||
}
|
||||
case Instruction::ShuffleVector: {
|
||||
assert(S.isAltShuffle() &&
|
||||
((Instruction::isBinaryOp(S.getOpcode()) &&
|
||||
Instruction::isBinaryOp(S.getAltOpcode())) ||
|
||||
(Instruction::isCast(S.getOpcode()) &&
|
||||
Instruction::isCast(S.getAltOpcode()))) &&
|
||||
assert(E->isAltShuffle() &&
|
||||
((Instruction::isBinaryOp(E->getOpcode()) &&
|
||||
Instruction::isBinaryOp(E->getAltOpcode())) ||
|
||||
(Instruction::isCast(E->getOpcode()) &&
|
||||
Instruction::isCast(E->getAltOpcode()))) &&
|
||||
"Invalid Shuffle Vector Operand");
|
||||
int ScalarCost = 0;
|
||||
if (NeedToShuffleReuses) {
|
||||
@ -3142,23 +3200,23 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
|
||||
}
|
||||
for (Value *i : VL) {
|
||||
Instruction *I = cast<Instruction>(i);
|
||||
assert(S.isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
|
||||
assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
|
||||
ScalarCost += TTI->getInstructionCost(
|
||||
I, TargetTransformInfo::TCK_RecipThroughput);
|
||||
}
|
||||
// VecCost is equal to sum of the cost of creating 2 vectors
|
||||
// and the cost of creating shuffle.
|
||||
int VecCost = 0;
|
||||
if (Instruction::isBinaryOp(S.getOpcode())) {
|
||||
VecCost = TTI->getArithmeticInstrCost(S.getOpcode(), VecTy);
|
||||
VecCost += TTI->getArithmeticInstrCost(S.getAltOpcode(), VecTy);
|
||||
if (Instruction::isBinaryOp(E->getOpcode())) {
|
||||
VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy);
|
||||
VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy);
|
||||
} else {
|
||||
Type *Src0SclTy = S.MainOp->getOperand(0)->getType();
|
||||
Type *Src1SclTy = S.AltOp->getOperand(0)->getType();
|
||||
Type *Src0SclTy = E->getMainOp()->getOperand(0)->getType();
|
||||
Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType();
|
||||
VectorType *Src0Ty = VectorType::get(Src0SclTy, VL.size());
|
||||
VectorType *Src1Ty = VectorType::get(Src1SclTy, VL.size());
|
||||
VecCost = TTI->getCastInstrCost(S.getOpcode(), VecTy, Src0Ty);
|
||||
VecCost += TTI->getCastInstrCost(S.getAltOpcode(), VecTy, Src1Ty);
|
||||
VecCost = TTI->getCastInstrCost(E->getOpcode(), VecTy, Src0Ty);
|
||||
VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty);
|
||||
}
|
||||
VecCost += TTI->getShuffleCost(TargetTransformInfo::SK_Select, VecTy, 0);
|
||||
return ReuseShuffleCost + VecCost - ScalarCost;
|
||||
@ -3413,16 +3471,16 @@ void BoUpSLP::reorderInputsAccordingToOpcode(
|
||||
Right = Ops.getVL(1);
|
||||
}
|
||||
|
||||
void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL,
|
||||
const InstructionsState &S) {
|
||||
void BoUpSLP::setInsertPointAfterBundle(TreeEntry *E) {
|
||||
// Get the basic block this bundle is in. All instructions in the bundle
|
||||
// should be in this block.
|
||||
auto *Front = cast<Instruction>(S.OpValue);
|
||||
auto *Front = E->getMainOp();
|
||||
auto *BB = Front->getParent();
|
||||
assert(llvm::all_of(make_range(VL.begin(), VL.end()), [=](Value *V) -> bool {
|
||||
auto *I = cast<Instruction>(V);
|
||||
return !S.isOpcodeOrAlt(I) || I->getParent() == BB;
|
||||
}));
|
||||
assert(llvm::all_of(make_range(E->Scalars.begin(), E->Scalars.end()),
|
||||
[=](Value *V) -> bool {
|
||||
auto *I = cast<Instruction>(V);
|
||||
return !E->isOpcodeOrAlt(I) || I->getParent() == BB;
|
||||
}));
|
||||
|
||||
// The last instruction in the bundle in program order.
|
||||
Instruction *LastInst = nullptr;
|
||||
@ -3433,7 +3491,7 @@ void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL,
|
||||
// bundle. The end of the bundle is marked by null ScheduleData.
|
||||
if (BlocksSchedules.count(BB)) {
|
||||
auto *Bundle =
|
||||
BlocksSchedules[BB]->getScheduleData(isOneOf(S, VL.back()));
|
||||
BlocksSchedules[BB]->getScheduleData(E->isOneOf(E->Scalars.back()));
|
||||
if (Bundle && Bundle->isPartOfBundle())
|
||||
for (; Bundle; Bundle = Bundle->NextInBundle)
|
||||
if (Bundle->OpValue == Bundle->Inst)
|
||||
@ -3459,9 +3517,9 @@ void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL,
|
||||
// we both exit early from buildTree_rec and that the bundle be out-of-order
|
||||
// (causing us to iterate all the way to the end of the block).
|
||||
if (!LastInst) {
|
||||
SmallPtrSet<Value *, 16> Bundle(VL.begin(), VL.end());
|
||||
SmallPtrSet<Value *, 16> Bundle(E->Scalars.begin(), E->Scalars.end());
|
||||
for (auto &I : make_range(BasicBlock::iterator(Front), BB->end())) {
|
||||
if (Bundle.erase(&I) && S.isOpcodeOrAlt(&I))
|
||||
if (Bundle.erase(&I) && E->isOpcodeOrAlt(&I))
|
||||
LastInst = &I;
|
||||
if (Bundle.empty())
|
||||
break;
|
||||
@ -3588,8 +3646,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||
return E->VectorizedValue;
|
||||
}
|
||||
|
||||
InstructionsState S = getSameOpcode(E->Scalars);
|
||||
Instruction *VL0 = cast<Instruction>(S.OpValue);
|
||||
Instruction *VL0 = E->getMainOp();
|
||||
Type *ScalarTy = VL0->getType();
|
||||
if (StoreInst *SI = dyn_cast<StoreInst>(VL0))
|
||||
ScalarTy = SI->getValueOperand()->getType();
|
||||
@ -3598,7 +3655,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
|
||||
|
||||
if (E->NeedToGather) {
|
||||
setInsertPointAfterBundle(E->Scalars, S);
|
||||
setInsertPointAfterBundle(E);
|
||||
auto *V = Gather(E->Scalars, VecTy);
|
||||
if (NeedToShuffleReuses) {
|
||||
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
|
||||
@ -3612,8 +3669,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||
return V;
|
||||
}
|
||||
|
||||
unsigned ShuffleOrOp = S.isAltShuffle() ?
|
||||
(unsigned) Instruction::ShuffleVector : S.getOpcode();
|
||||
unsigned ShuffleOrOp =
|
||||
E->isAltShuffle() ? (unsigned)Instruction::ShuffleVector : E->getOpcode();
|
||||
switch (ShuffleOrOp) {
|
||||
case Instruction::PHI: {
|
||||
PHINode *PH = dyn_cast<PHINode>(VL0);
|
||||
@ -3671,7 +3728,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||
E->VectorizedValue = V;
|
||||
return V;
|
||||
}
|
||||
setInsertPointAfterBundle(E->Scalars, S);
|
||||
setInsertPointAfterBundle(E);
|
||||
auto *V = Gather(E->Scalars, VecTy);
|
||||
if (NeedToShuffleReuses) {
|
||||
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
|
||||
@ -3706,7 +3763,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||
E->VectorizedValue = NewV;
|
||||
return NewV;
|
||||
}
|
||||
setInsertPointAfterBundle(E->Scalars, S);
|
||||
setInsertPointAfterBundle(E);
|
||||
auto *V = Gather(E->Scalars, VecTy);
|
||||
if (NeedToShuffleReuses) {
|
||||
V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy),
|
||||
@ -3731,7 +3788,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||
case Instruction::Trunc:
|
||||
case Instruction::FPTrunc:
|
||||
case Instruction::BitCast: {
|
||||
setInsertPointAfterBundle(E->Scalars, S);
|
||||
setInsertPointAfterBundle(E);
|
||||
|
||||
Value *InVec = vectorizeTree(E->getOperand(0));
|
||||
|
||||
@ -3752,7 +3809,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||
}
|
||||
case Instruction::FCmp:
|
||||
case Instruction::ICmp: {
|
||||
setInsertPointAfterBundle(E->Scalars, S);
|
||||
setInsertPointAfterBundle(E);
|
||||
|
||||
Value *L = vectorizeTree(E->getOperand(0));
|
||||
Value *R = vectorizeTree(E->getOperand(1));
|
||||
@ -3764,7 +3821,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||
|
||||
CmpInst::Predicate P0 = cast<CmpInst>(VL0)->getPredicate();
|
||||
Value *V;
|
||||
if (S.getOpcode() == Instruction::FCmp)
|
||||
if (E->getOpcode() == Instruction::FCmp)
|
||||
V = Builder.CreateFCmp(P0, L, R);
|
||||
else
|
||||
V = Builder.CreateICmp(P0, L, R);
|
||||
@ -3779,7 +3836,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||
return V;
|
||||
}
|
||||
case Instruction::Select: {
|
||||
setInsertPointAfterBundle(E->Scalars, S);
|
||||
setInsertPointAfterBundle(E);
|
||||
|
||||
Value *Cond = vectorizeTree(E->getOperand(0));
|
||||
Value *True = vectorizeTree(E->getOperand(1));
|
||||
@ -3800,7 +3857,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||
return V;
|
||||
}
|
||||
case Instruction::FNeg: {
|
||||
setInsertPointAfterBundle(E->Scalars, S);
|
||||
setInsertPointAfterBundle(E);
|
||||
|
||||
Value *Op = vectorizeTree(E->getOperand(0));
|
||||
|
||||
@ -3810,7 +3867,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||
}
|
||||
|
||||
Value *V = Builder.CreateUnOp(
|
||||
static_cast<Instruction::UnaryOps>(S.getOpcode()), Op);
|
||||
static_cast<Instruction::UnaryOps>(E->getOpcode()), Op);
|
||||
propagateIRFlags(V, E->Scalars, VL0);
|
||||
if (auto *I = dyn_cast<Instruction>(V))
|
||||
V = propagateMetadata(I, E->Scalars);
|
||||
@ -3842,7 +3899,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||
case Instruction::And:
|
||||
case Instruction::Or:
|
||||
case Instruction::Xor: {
|
||||
setInsertPointAfterBundle(E->Scalars, S);
|
||||
setInsertPointAfterBundle(E);
|
||||
|
||||
Value *LHS = vectorizeTree(E->getOperand(0));
|
||||
Value *RHS = vectorizeTree(E->getOperand(1));
|
||||
@ -3853,7 +3910,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||
}
|
||||
|
||||
Value *V = Builder.CreateBinOp(
|
||||
static_cast<Instruction::BinaryOps>(S.getOpcode()), LHS, RHS);
|
||||
static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS,
|
||||
RHS);
|
||||
propagateIRFlags(V, E->Scalars, VL0);
|
||||
if (auto *I = dyn_cast<Instruction>(V))
|
||||
V = propagateMetadata(I, E->Scalars);
|
||||
@ -3870,12 +3928,10 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||
case Instruction::Load: {
|
||||
// Loads are inserted at the head of the tree because we don't want to
|
||||
// sink them all the way down past store instructions.
|
||||
bool IsReorder = !E->ReorderIndices.empty();
|
||||
if (IsReorder) {
|
||||
S = getSameOpcode(E->Scalars, E->ReorderIndices.front());
|
||||
VL0 = cast<Instruction>(S.OpValue);
|
||||
}
|
||||
setInsertPointAfterBundle(E->Scalars, S);
|
||||
bool IsReorder = E->updateStateIfReorder();
|
||||
if (IsReorder)
|
||||
VL0 = E->getMainOp();
|
||||
setInsertPointAfterBundle(E);
|
||||
|
||||
LoadInst *LI = cast<LoadInst>(VL0);
|
||||
Type *ScalarLoadTy = LI->getType();
|
||||
@ -3918,7 +3974,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||
unsigned Alignment = SI->getAlignment();
|
||||
unsigned AS = SI->getPointerAddressSpace();
|
||||
|
||||
setInsertPointAfterBundle(E->Scalars, S);
|
||||
setInsertPointAfterBundle(E);
|
||||
|
||||
Value *VecValue = vectorizeTree(E->getOperand(0));
|
||||
Value *ScalarPtr = SI->getPointerOperand();
|
||||
@ -3945,7 +4001,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||
return V;
|
||||
}
|
||||
case Instruction::GetElementPtr: {
|
||||
setInsertPointAfterBundle(E->Scalars, S);
|
||||
setInsertPointAfterBundle(E);
|
||||
|
||||
Value *Op0 = vectorizeTree(E->getOperand(0));
|
||||
|
||||
@ -3972,7 +4028,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||
}
|
||||
case Instruction::Call: {
|
||||
CallInst *CI = cast<CallInst>(VL0);
|
||||
setInsertPointAfterBundle(E->Scalars, S);
|
||||
setInsertPointAfterBundle(E);
|
||||
|
||||
Intrinsic::ID IID = Intrinsic::not_intrinsic;
|
||||
if (Function *FI = CI->getCalledFunction())
|
||||
@ -4020,20 +4076,20 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||
return V;
|
||||
}
|
||||
case Instruction::ShuffleVector: {
|
||||
assert(S.isAltShuffle() &&
|
||||
((Instruction::isBinaryOp(S.getOpcode()) &&
|
||||
Instruction::isBinaryOp(S.getAltOpcode())) ||
|
||||
(Instruction::isCast(S.getOpcode()) &&
|
||||
Instruction::isCast(S.getAltOpcode()))) &&
|
||||
assert(E->isAltShuffle() &&
|
||||
((Instruction::isBinaryOp(E->getOpcode()) &&
|
||||
Instruction::isBinaryOp(E->getAltOpcode())) ||
|
||||
(Instruction::isCast(E->getOpcode()) &&
|
||||
Instruction::isCast(E->getAltOpcode()))) &&
|
||||
"Invalid Shuffle Vector Operand");
|
||||
|
||||
Value *LHS = nullptr, *RHS = nullptr;
|
||||
if (Instruction::isBinaryOp(S.getOpcode())) {
|
||||
setInsertPointAfterBundle(E->Scalars, S);
|
||||
if (Instruction::isBinaryOp(E->getOpcode())) {
|
||||
setInsertPointAfterBundle(E);
|
||||
LHS = vectorizeTree(E->getOperand(0));
|
||||
RHS = vectorizeTree(E->getOperand(1));
|
||||
} else {
|
||||
setInsertPointAfterBundle(E->Scalars, S);
|
||||
setInsertPointAfterBundle(E);
|
||||
LHS = vectorizeTree(E->getOperand(0));
|
||||
}
|
||||
|
||||
@ -4043,16 +4099,16 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||
}
|
||||
|
||||
Value *V0, *V1;
|
||||
if (Instruction::isBinaryOp(S.getOpcode())) {
|
||||
if (Instruction::isBinaryOp(E->getOpcode())) {
|
||||
V0 = Builder.CreateBinOp(
|
||||
static_cast<Instruction::BinaryOps>(S.getOpcode()), LHS, RHS);
|
||||
static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS, RHS);
|
||||
V1 = Builder.CreateBinOp(
|
||||
static_cast<Instruction::BinaryOps>(S.getAltOpcode()), LHS, RHS);
|
||||
static_cast<Instruction::BinaryOps>(E->getAltOpcode()), LHS, RHS);
|
||||
} else {
|
||||
V0 = Builder.CreateCast(
|
||||
static_cast<Instruction::CastOps>(S.getOpcode()), LHS, VecTy);
|
||||
static_cast<Instruction::CastOps>(E->getOpcode()), LHS, VecTy);
|
||||
V1 = Builder.CreateCast(
|
||||
static_cast<Instruction::CastOps>(S.getAltOpcode()), LHS, VecTy);
|
||||
static_cast<Instruction::CastOps>(E->getAltOpcode()), LHS, VecTy);
|
||||
}
|
||||
|
||||
// Create shuffle to take alternate operations from the vector.
|
||||
@ -4063,8 +4119,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
|
||||
SmallVector<Constant *, 8> Mask(e);
|
||||
for (unsigned i = 0; i < e; ++i) {
|
||||
auto *OpInst = cast<Instruction>(E->Scalars[i]);
|
||||
assert(S.isOpcodeOrAlt(OpInst) && "Unexpected main/alternate opcode");
|
||||
if (OpInst->getOpcode() == S.getAltOpcode()) {
|
||||
assert(E->isOpcodeOrAlt(OpInst) && "Unexpected main/alternate opcode");
|
||||
if (OpInst->getOpcode() == E->getAltOpcode()) {
|
||||
Mask[i] = Builder.getInt32(e + i);
|
||||
AltScalars.push_back(E->Scalars[i]);
|
||||
} else {
|
||||
|
Loading…
x
Reference in New Issue
Block a user