mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 02:33:06 +01:00
Revert "[SLP]Fix costs calculations."
This reverts commit a053afed49897aa34e08287f91c5255efa4e5131 to fix buildbots.
This commit is contained in:
parent
7e3be1e953
commit
7d6020c9b9
@ -3654,6 +3654,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
|
||||
else if (auto *IE = dyn_cast<InsertElementInst>(VL[0]))
|
||||
ScalarTy = IE->getOperand(1)->getType();
|
||||
auto *VecTy = FixedVectorType::get(ScalarTy, VL.size());
|
||||
auto *FinalVecTy = VecTy;
|
||||
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
|
||||
|
||||
// If we have computed a smaller type for the expression, update VecTy so
|
||||
@ -3661,7 +3662,6 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
|
||||
if (MinBWs.count(VL[0]))
|
||||
VecTy = FixedVectorType::get(
|
||||
IntegerType::get(F->getContext(), MinBWs[VL[0]].first), VL.size());
|
||||
auto *FinalVecTy = VecTy;
|
||||
|
||||
unsigned ReuseShuffleNumbers = E->ReuseShuffleIndices.size();
|
||||
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
|
||||
@ -3838,6 +3838,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
|
||||
case Instruction::ExtractElement: {
|
||||
// The common cost of removal ExtractElement/ExtractValue instructions +
|
||||
// the cost of shuffles, if required to resuffle the original vector.
|
||||
InstructionCost CommonCost = 0;
|
||||
if (NeedToShuffleReuses) {
|
||||
unsigned Idx = 0;
|
||||
for (unsigned I : E->ReuseShuffleIndices) {
|
||||
@ -4132,7 +4133,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
|
||||
commonAlignment(CommonAlignment, cast<LoadInst>(V)->getAlign());
|
||||
VecLdCost = TTI->getGatherScatterOpCost(
|
||||
Instruction::Load, VecTy, cast<LoadInst>(VL0)->getPointerOperand(),
|
||||
/*VariableMask=*/false, CommonAlignment, CostKind, VL0);
|
||||
/*VariableMask=*/false, Alignment, CostKind, VL0);
|
||||
}
|
||||
LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecLdCost, ScalarLdCost));
|
||||
return CommonCost + VecLdCost - ScalarLdCost;
|
||||
@ -4470,6 +4471,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
|
||||
|
||||
SmallPtrSet<Value *, 16> ExtractCostCalculated;
|
||||
InstructionCost ExtractCost = 0;
|
||||
SmallBitVector IsIdentity;
|
||||
SmallVector<unsigned> VF;
|
||||
SmallVector<SmallVector<int>> ShuffleMask;
|
||||
SmallVector<Value *> FirstUsers;
|
||||
@ -4526,12 +4528,15 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
|
||||
ShuffleMask.emplace_back(VF.back(), UndefMaskElem);
|
||||
FirstUsers.push_back(EU.User);
|
||||
DemandedElts.push_back(APInt::getNullValue(VF.back()));
|
||||
IsIdentity.push_back(true);
|
||||
VecId = FirstUsers.size() - 1;
|
||||
} else {
|
||||
VecId = std::distance(FirstUsers.begin(), It);
|
||||
}
|
||||
int Idx = *InsertIdx;
|
||||
ShuffleMask[VecId][Idx] = EU.Lane;
|
||||
IsIdentity.set(IsIdentity.test(VecId) &
|
||||
(EU.Lane == Idx || EU.Lane == UndefMaskElem));
|
||||
DemandedElts[VecId].setBit(Idx);
|
||||
}
|
||||
}
|
||||
@ -4557,8 +4562,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
|
||||
InstructionCost SpillCost = getSpillCost();
|
||||
Cost += SpillCost + ExtractCost;
|
||||
for (int I = 0, E = FirstUsers.size(); I < E; ++I) {
|
||||
// For the very first element - simple shuffle of the source vector.
|
||||
if (I == 0 && !ShuffleVectorInst::isIdentityMask(ShuffleMask[I])) {
|
||||
if (!IsIdentity.test(I)) {
|
||||
InstructionCost C = TTI->getShuffleCost(
|
||||
TTI::SK_PermuteSingleSrc,
|
||||
cast<FixedVectorType>(FirstUsers[I]->getType()), ShuffleMask[I]);
|
||||
@ -4567,15 +4571,10 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
|
||||
<< *VectorizableTree.front()->Scalars.front() << ".\n"
|
||||
<< "SLP: Current total cost = " << Cost << "\n");
|
||||
Cost += C;
|
||||
continue;
|
||||
}
|
||||
// Other elements - permutation of 2 vectors (the initial one and the next
|
||||
// Ith incoming vector).
|
||||
unsigned VF = ShuffleMask[I].size();
|
||||
for (unsigned Idx = 0; Idx < VF; ++Idx) {
|
||||
int &Mask = ShuffleMask[I][Idx];
|
||||
Mask = Mask == UndefMaskElem ? Idx : VF + Mask;
|
||||
}
|
||||
for (int &Mask : ShuffleMask[I])
|
||||
Mask = (Mask == UndefMaskElem ? 0 : VF) + Mask;
|
||||
InstructionCost C = TTI->getShuffleCost(
|
||||
TTI::SK_PermuteTwoSrc, cast<FixedVectorType>(FirstUsers[I]->getType()),
|
||||
ShuffleMask[I]);
|
||||
|
@ -40,22 +40,22 @@ define void @test(i32* nocapture %t2) {
|
||||
; CHECK-NEXT: [[T42:%.*]] = mul nsw i32 [[T17]], 16819
|
||||
; CHECK-NEXT: [[T47:%.*]] = mul nsw i32 [[T37]], -16069
|
||||
; CHECK-NEXT: [[T48:%.*]] = mul nsw i32 [[T38]], -3196
|
||||
; CHECK-NEXT: [[T49:%.*]] = add nsw i32 [[T40]], [[T47]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[T15]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[T40]], i32 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[T9]], i32 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[T48]], i32 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[T40]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[T15]], i32 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[T47]], i32 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[T9]], i32 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[TMP2]], [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i32 0
|
||||
; CHECK-NEXT: [[T50:%.*]] = add nsw i32 [[T40]], [[T48]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1
|
||||
; CHECK-NEXT: [[T65:%.*]] = insertelement <8 x i32> poison, i32 [[TMP6]], i32 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1
|
||||
; CHECK-NEXT: [[T66:%.*]] = insertelement <8 x i32> [[T65]], i32 [[TMP7]], i32 1
|
||||
; CHECK-NEXT: [[T66:%.*]] = insertelement <8 x i32> [[T65]], i32 [[T50]], i32 1
|
||||
; CHECK-NEXT: [[T67:%.*]] = insertelement <8 x i32> [[T66]], i32 [[T32]], i32 2
|
||||
; CHECK-NEXT: [[T68:%.*]] = insertelement <8 x i32> [[T67]], i32 [[T49]], i32 3
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[T701:%.*]] = shufflevector <8 x i32> [[T68]], <8 x i32> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
|
||||
; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T701]], i32 [[T34]], i32 6
|
||||
; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[T49]], i32 7
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[T691:%.*]] = shufflevector <8 x i32> [[T67]], <8 x i32> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 9, i32 5, i32 6, i32 7>
|
||||
; CHECK-NEXT: [[T70:%.*]] = insertelement <8 x i32> [[T691]], i32 [[T50]], i32 5
|
||||
; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T70]], i32 [[T34]], i32 6
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP5]], i32 0
|
||||
; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[TMP8]], i32 7
|
||||
; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
||||
; CHECK-NEXT: [[T79:%.*]] = bitcast i32* [[T2]] to <8 x i32>*
|
||||
; CHECK-NEXT: store <8 x i32> [[T76]], <8 x i32>* [[T79]], align 4
|
||||
|
@ -40,22 +40,22 @@ define void @test(i32* nocapture %t2) {
|
||||
; CHECK-NEXT: [[T42:%.*]] = mul nsw i32 [[T17]], 16819
|
||||
; CHECK-NEXT: [[T47:%.*]] = mul nsw i32 [[T37]], -16069
|
||||
; CHECK-NEXT: [[T48:%.*]] = mul nsw i32 [[T38]], -3196
|
||||
; CHECK-NEXT: [[T49:%.*]] = add nsw i32 [[T40]], [[T47]]
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[T15]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[T40]], i32 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[T9]], i32 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[T48]], i32 1
|
||||
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[T40]], i32 0
|
||||
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[T15]], i32 1
|
||||
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> poison, i32 [[T47]], i32 0
|
||||
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> [[TMP3]], i32 [[T9]], i32 1
|
||||
; CHECK-NEXT: [[TMP5:%.*]] = add nsw <2 x i32> [[TMP2]], [[TMP4]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i32 0
|
||||
; CHECK-NEXT: [[T50:%.*]] = add nsw i32 [[T40]], [[T48]]
|
||||
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1
|
||||
; CHECK-NEXT: [[T65:%.*]] = insertelement <8 x i32> undef, i32 [[TMP6]], i32 0
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1
|
||||
; CHECK-NEXT: [[T66:%.*]] = insertelement <8 x i32> [[T65]], i32 [[TMP7]], i32 1
|
||||
; CHECK-NEXT: [[T66:%.*]] = insertelement <8 x i32> [[T65]], i32 [[T50]], i32 1
|
||||
; CHECK-NEXT: [[T67:%.*]] = insertelement <8 x i32> [[T66]], i32 [[T32]], i32 2
|
||||
; CHECK-NEXT: [[T68:%.*]] = insertelement <8 x i32> [[T67]], i32 [[T49]], i32 3
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[T701:%.*]] = shufflevector <8 x i32> [[T68]], <8 x i32> [[TMP8]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
|
||||
; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T701]], i32 [[T34]], i32 6
|
||||
; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[T49]], i32 7
|
||||
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
; CHECK-NEXT: [[T691:%.*]] = shufflevector <8 x i32> [[T67]], <8 x i32> [[TMP7]], <8 x i32> <i32 0, i32 1, i32 2, i32 8, i32 9, i32 5, i32 6, i32 7>
|
||||
; CHECK-NEXT: [[T70:%.*]] = insertelement <8 x i32> [[T691]], i32 [[T50]], i32 5
|
||||
; CHECK-NEXT: [[T71:%.*]] = insertelement <8 x i32> [[T70]], i32 [[T34]], i32 6
|
||||
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[TMP5]], i32 0
|
||||
; CHECK-NEXT: [[T72:%.*]] = insertelement <8 x i32> [[T71]], i32 [[TMP8]], i32 7
|
||||
; CHECK-NEXT: [[T76:%.*]] = shl <8 x i32> [[T72]], <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
||||
; CHECK-NEXT: [[T79:%.*]] = bitcast i32* [[T2]] to <8 x i32>*
|
||||
; CHECK-NEXT: store <8 x i32> [[T76]], <8 x i32>* [[T79]], align 4
|
||||
|
Loading…
Reference in New Issue
Block a user