1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 04:02:41 +01:00

[LV] NFCI: Do cost comparison on InstructionCost directly.

Instead of performing the isMoreProfitable() operation on
InstructionCost::CostTy the operation is performed on InstructionCost
directly, so that it can handle the case where one of the costs is
Invalid.

This patch also changes the CostTy to be int64_t, so that the type is
wide enough to deal with multiplications with e.g. `unsigned MaxTripCount`.

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D105113
This commit is contained in:
Sander de Smalen 2021-07-10 11:57:12 +01:00
parent 5d8e9991c3
commit b4ab982f78
2 changed files with 12 additions and 8 deletions

View File

@ -28,7 +28,7 @@ class raw_ostream;
class InstructionCost {
public:
using CostType = int;
using CostType = int64_t;
/// CostState describes the state of a cost.
enum CostState {

View File

@ -6014,8 +6014,8 @@ ElementCount LoopVectorizationCostModel::getMaximizedVFForTarget(
bool LoopVectorizationCostModel::isMoreProfitable(
const VectorizationFactor &A, const VectorizationFactor &B) const {
InstructionCost::CostType CostA = *A.Cost.getValue();
InstructionCost::CostType CostB = *B.Cost.getValue();
InstructionCost CostA = A.Cost;
InstructionCost CostB = B.Cost;
unsigned MaxTripCount = PSE.getSE()->getSmallConstantMaxTripCount(TheLoop);
@ -6028,8 +6028,8 @@ bool LoopVectorizationCostModel::isMoreProfitable(
// be PerIterationCost*floor(TC/VF) + Scalar remainder cost, and so is
// approximated with the per-lane cost below instead of using the tripcount
// as here.
int64_t RTCostA = CostA * divideCeil(MaxTripCount, A.Width.getFixedValue());
int64_t RTCostB = CostB * divideCeil(MaxTripCount, B.Width.getFixedValue());
auto RTCostA = CostA * divideCeil(MaxTripCount, A.Width.getFixedValue());
auto RTCostB = CostB * divideCeil(MaxTripCount, B.Width.getFixedValue());
return RTCostA < RTCostB;
}
@ -6064,7 +6064,7 @@ VectorizationFactor LoopVectorizationCostModel::selectVectorizationFactor(
// Ignore scalar width, because the user explicitly wants vectorization.
// Initialize cost to max so that VF = 2 is, at least, chosen during cost
// evaluation.
ChosenFactor.Cost = std::numeric_limits<InstructionCost::CostType>::max();
ChosenFactor.Cost = InstructionCost::getMax();
}
for (const auto &i : VFCandidates) {
@ -6600,10 +6600,14 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<ElementCount> VFs) {
// A lambda that gets the register usage for the given type and VF.
const auto &TTICapture = TTI;
auto GetRegUsage = [&TTICapture](Type *Ty, ElementCount VF) {
auto GetRegUsage = [&TTICapture](Type *Ty, ElementCount VF) -> unsigned {
if (Ty->isTokenTy() || !VectorType::isValidElementType(Ty))
return 0;
return *TTICapture.getRegUsageForType(VectorType::get(Ty, VF)).getValue();
InstructionCost::CostType RegUsage =
*TTICapture.getRegUsageForType(VectorType::get(Ty, VF)).getValue();
assert(RegUsage >= 0 && RegUsage <= std::numeric_limits<unsigned>::max() &&
"Nonsensical values for register usage.");
return RegUsage;
};
for (unsigned int i = 0, s = IdxToInstr.size(); i < s; ++i) {