mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
[SLPVectorizer] Merge reorderAltShuffleOperands into reorderInputsAccordingToOpcode
As discussed on D59738, this generalizes reorderInputsAccordingToOpcode to handle multiple + non-commutative instructions so we can get rid of reorderAltShuffleOperands and make use of the extra canonicalizations that reorderInputsAccordingToOpcode brings. Differential Revision: https://reviews.llvm.org/D59784 llvm-svn: 356939
This commit is contained in:
parent
51fa2dddea
commit
6b2a982e08
@ -674,13 +674,6 @@ private:
|
||||
/// be beneficial even the tree height is tiny.
|
||||
bool isFullyVectorizableTinyTree();
|
||||
|
||||
/// \reorder commutative operands in alt shuffle if they result in
|
||||
/// vectorized code.
|
||||
void reorderAltShuffleOperands(const InstructionsState &S,
|
||||
ArrayRef<Value *> VL,
|
||||
SmallVectorImpl<Value *> &Left,
|
||||
SmallVectorImpl<Value *> &Right);
|
||||
|
||||
/// \reorder commutative operands to get better probability of
|
||||
/// generating vectorized code.
|
||||
void reorderInputsAccordingToOpcode(const InstructionsState &S,
|
||||
@ -2072,7 +2065,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
|
||||
// Reorder operands if reordering would enable vectorization.
|
||||
if (isa<BinaryOperator>(VL0)) {
|
||||
ValueList Left, Right;
|
||||
reorderAltShuffleOperands(S, VL, Left, Right);
|
||||
reorderInputsAccordingToOpcode(S, VL, Left, Right);
|
||||
UserTreeIdx.EdgeIdx = 0;
|
||||
buildTree_rec(Left, Depth + 1, UserTreeIdx);
|
||||
UserTreeIdx.EdgeIdx = 1;
|
||||
@ -2787,63 +2780,6 @@ int BoUpSLP::getGatherCost(ArrayRef<Value *> VL) {
|
||||
return getGatherCost(VecTy, ShuffledElements);
|
||||
}
|
||||
|
||||
// Reorder commutative operations in alternate shuffle if the resulting vectors
|
||||
// are consecutive loads. This would allow us to vectorize the tree.
|
||||
// If we have something like-
|
||||
// load a[0] - load b[0]
|
||||
// load b[1] + load a[1]
|
||||
// load a[2] - load b[2]
|
||||
// load a[3] + load b[3]
|
||||
// Reordering the second load b[1] load a[1] would allow us to vectorize this
|
||||
// code.
|
||||
void BoUpSLP::reorderAltShuffleOperands(const InstructionsState &S,
|
||||
ArrayRef<Value *> VL,
|
||||
SmallVectorImpl<Value *> &Left,
|
||||
SmallVectorImpl<Value *> &Right) {
|
||||
// Push left and right operands of binary operation into Left and Right
|
||||
for (Value *V : VL) {
|
||||
auto *I = cast<Instruction>(V);
|
||||
assert(S.isOpcodeOrAlt(I) && "Incorrect instruction in vector");
|
||||
Left.push_back(I->getOperand(0));
|
||||
Right.push_back(I->getOperand(1));
|
||||
}
|
||||
|
||||
// Reorder if we have a commutative operation and consecutive access
|
||||
// are on either side of the alternate instructions.
|
||||
for (unsigned j = 0, e = VL.size() - 1; j < e; ++j) {
|
||||
if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) {
|
||||
if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
|
||||
Instruction *VL1 = cast<Instruction>(VL[j]);
|
||||
Instruction *VL2 = cast<Instruction>(VL[j + 1]);
|
||||
if (VL1->isCommutative() && isConsecutiveAccess(L, L1, *DL, *SE)) {
|
||||
std::swap(Left[j], Right[j]);
|
||||
continue;
|
||||
} else if (VL2->isCommutative() &&
|
||||
isConsecutiveAccess(L, L1, *DL, *SE)) {
|
||||
std::swap(Left[j + 1], Right[j + 1]);
|
||||
continue;
|
||||
}
|
||||
// else unchanged
|
||||
}
|
||||
}
|
||||
if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) {
|
||||
if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
|
||||
Instruction *VL1 = cast<Instruction>(VL[j]);
|
||||
Instruction *VL2 = cast<Instruction>(VL[j + 1]);
|
||||
if (VL1->isCommutative() && isConsecutiveAccess(L, L1, *DL, *SE)) {
|
||||
std::swap(Left[j], Right[j]);
|
||||
continue;
|
||||
} else if (VL2->isCommutative() &&
|
||||
isConsecutiveAccess(L, L1, *DL, *SE)) {
|
||||
std::swap(Left[j + 1], Right[j + 1]);
|
||||
continue;
|
||||
}
|
||||
// else unchanged
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Return true if the i'th left and right operands can be commuted.
|
||||
//
|
||||
// The vectorizer is trying to either have all elements one side being
|
||||
@ -2918,10 +2854,13 @@ void BoUpSLP::reorderInputsAccordingToOpcode(const InstructionsState &S,
|
||||
ArrayRef<Value *> VL,
|
||||
SmallVectorImpl<Value *> &Left,
|
||||
SmallVectorImpl<Value *> &Right) {
|
||||
if (!VL.empty()) {
|
||||
// Peel the first iteration out of the loop since there's nothing
|
||||
// interesting to do anyway and it simplifies the checks in the loop.
|
||||
auto *I = cast<Instruction>(VL[0]);
|
||||
assert(!VL.empty() && Left.empty() && Right.empty() &&
|
||||
"Unexpected instruction/operand lists");
|
||||
|
||||
// Push left and right operands of binary operation into Left and Right
|
||||
for (Value *V : VL) {
|
||||
auto *I = cast<Instruction>(V);
|
||||
assert(S.isOpcodeOrAlt(I) && "Incorrect instruction in vector");
|
||||
Left.push_back(I->getOperand(0));
|
||||
Right.push_back(I->getOperand(1));
|
||||
}
|
||||
@ -2935,15 +2874,10 @@ void BoUpSLP::reorderInputsAccordingToOpcode(const InstructionsState &S,
|
||||
|
||||
for (unsigned i = 1, e = VL.size(); i != e; ++i) {
|
||||
Instruction *I = cast<Instruction>(VL[i]);
|
||||
assert(((I->getOpcode() == S.getOpcode() && I->isCommutative()) ||
|
||||
(I->getOpcode() != S.getOpcode() &&
|
||||
Instruction::isCommutative(S.getOpcode()))) &&
|
||||
"Can only process commutative instruction");
|
||||
// Commute to favor either a splat or maximizing having the same opcodes on
|
||||
// one side.
|
||||
Left.push_back(I->getOperand(0));
|
||||
Right.push_back(I->getOperand(1));
|
||||
if (shouldReorderOperands(i, Left, Right, AllSameOpcodeLeft,
|
||||
if (I->isCommutative() &&
|
||||
shouldReorderOperands(i, Left, Right, AllSameOpcodeLeft,
|
||||
AllSameOpcodeRight, SplatLeft, SplatRight))
|
||||
std::swap(Left[i], Right[i]);
|
||||
|
||||
@ -2965,11 +2899,11 @@ void BoUpSLP::reorderInputsAccordingToOpcode(const InstructionsState &S,
|
||||
// Finally check if we can get longer vectorizable chain by reordering
|
||||
// without breaking the good operand order detected above.
|
||||
// E.g. If we have something like-
|
||||
// load a[0] load b[0]
|
||||
// load b[1] load a[1]
|
||||
// load a[2] load b[2]
|
||||
// load a[3] load b[3]
|
||||
// Reordering the second load b[1] load a[1] would allow us to vectorize
|
||||
// load a[0] - load b[0]
|
||||
// load b[1] + load a[1]
|
||||
// load a[2] - load b[2]
|
||||
// load a[3] + load b[3]
|
||||
// Reordering the second load b[1] + load a[1] would allow us to vectorize
|
||||
// this code and we still retain AllSameOpcode property.
|
||||
// FIXME: This load reordering might break AllSameOpcode in some rare cases
|
||||
// such as-
|
||||
@ -2981,16 +2915,32 @@ void BoUpSLP::reorderInputsAccordingToOpcode(const InstructionsState &S,
|
||||
if (LoadInst *L = dyn_cast<LoadInst>(Left[j])) {
|
||||
if (LoadInst *L1 = dyn_cast<LoadInst>(Right[j + 1])) {
|
||||
if (isConsecutiveAccess(L, L1, *DL, *SE)) {
|
||||
std::swap(Left[j + 1], Right[j + 1]);
|
||||
continue;
|
||||
auto *VL1 = cast<Instruction>(VL[j]);
|
||||
auto *VL2 = cast<Instruction>(VL[j + 1]);
|
||||
if (VL2->isCommutative()) {
|
||||
std::swap(Left[j + 1], Right[j + 1]);
|
||||
continue;
|
||||
}
|
||||
if (VL1->isCommutative()) {
|
||||
std::swap(Left[j], Right[j]);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (LoadInst *L = dyn_cast<LoadInst>(Right[j])) {
|
||||
if (LoadInst *L1 = dyn_cast<LoadInst>(Left[j + 1])) {
|
||||
if (isConsecutiveAccess(L, L1, *DL, *SE)) {
|
||||
std::swap(Left[j + 1], Right[j + 1]);
|
||||
continue;
|
||||
auto *VL1 = cast<Instruction>(VL[j]);
|
||||
auto *VL2 = cast<Instruction>(VL[j + 1]);
|
||||
if (VL2->isCommutative()) {
|
||||
std::swap(Left[j + 1], Right[j + 1]);
|
||||
continue;
|
||||
}
|
||||
if (VL1->isCommutative()) {
|
||||
std::swap(Left[j], Right[j]);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -81,9 +81,9 @@ define void @Test(i32) {
|
||||
; CHECK-NEXT: [[OP_EXTRA30:%.*]] = and i32 [[OP_EXTRA29]], [[TMP0]]
|
||||
; CHECK-NEXT: [[VAL_42:%.*]] = and i32 [[VAL_40]], undef
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> undef, i32 [[OP_EXTRA30]], i32 0
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP2]], i32 1
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 14910, i32 1
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 14910, i32 1
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP2]], i32 1
|
||||
; CHECK-NEXT: [[TMP9:%.*]] = and <2 x i32> [[TMP6]], [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP10:%.*]] = add <2 x i32> [[TMP6]], [[TMP8]]
|
||||
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> [[TMP10]], <2 x i32> <i32 0, i32 3>
|
||||
|
Loading…
Reference in New Issue
Block a user