mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
[Hexagon] Rudimentary support for auto-vectorization for HVX
This implements a set of TTI functions that the loop vectorizer uses. The only purpose of this is to enable testing. Auto-vectorization is disabled by default, enabled by -hexagon-autohvx. llvm-svn: 328639
This commit is contained in:
parent
cff8c7276f
commit
e5c6fcdb65
@ -26,6 +26,9 @@ using namespace llvm;
|
||||
|
||||
#define DEBUG_TYPE "hexagontti"
|
||||
|
||||
static cl::opt<bool> HexagonAutoHVX("hexagon-autohvx", cl::init(false),
|
||||
cl::Hidden, cl::desc("Enable loop vectorizer for HVX"));
|
||||
|
||||
static cl::opt<bool> EmitLookupTables("hexagon-emit-lookup-tables",
|
||||
cl::init(true), cl::Hidden,
|
||||
cl::desc("Control lookup table emission on Hexagon target"));
|
||||
@ -47,8 +50,41 @@ bool HexagonTTIImpl::shouldFavorPostInc() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned HexagonTTIImpl::getNumberOfRegisters(bool vector) const {
|
||||
return vector ? 0 : 32;
|
||||
unsigned HexagonTTIImpl::getNumberOfRegisters(bool Vector) const {
|
||||
if (Vector)
|
||||
return HexagonAutoHVX && getST()->useHVXOps() ? 32 : 0;
|
||||
return 32;
|
||||
}
|
||||
|
||||
unsigned HexagonTTIImpl::getMaxInterleaveFactor(unsigned VF) {
|
||||
return HexagonAutoHVX && getST()->useHVXOps() ? 64 : 0;
|
||||
}
|
||||
|
||||
unsigned HexagonTTIImpl::getRegisterBitWidth(bool Vector) const {
|
||||
return Vector ? getMinVectorRegisterBitWidth() : 32;
|
||||
}
|
||||
|
||||
unsigned HexagonTTIImpl::getMinVectorRegisterBitWidth() const {
|
||||
return getST()->useHVXOps() ? getST()->getVectorLength()*8 : 0;
|
||||
}
|
||||
|
||||
unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
|
||||
unsigned Alignment, unsigned AddressSpace, const Instruction *I) {
|
||||
if (Opcode == Instruction::Load && Src->isVectorTy()) {
|
||||
VectorType *VecTy = cast<VectorType>(Src);
|
||||
unsigned VecWidth = VecTy->getBitWidth();
|
||||
if (VecWidth > 64) {
|
||||
// Assume that vectors longer than 64 bits are meant for HVX.
|
||||
if (getNumberOfRegisters(true) > 0) {
|
||||
if (VecWidth % getRegisterBitWidth(true) == 0)
|
||||
return 1;
|
||||
}
|
||||
unsigned AlignWidth = 8 * std::max(1u, Alignment);
|
||||
unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
|
||||
return 3*NumLoads;
|
||||
}
|
||||
}
|
||||
return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
|
||||
}
|
||||
|
||||
unsigned HexagonTTIImpl::getPrefetchDistance() const {
|
||||
@ -61,21 +97,22 @@ unsigned HexagonTTIImpl::getCacheLineSize() const {
|
||||
|
||||
int HexagonTTIImpl::getUserCost(const User *U,
|
||||
ArrayRef<const Value *> Operands) {
|
||||
auto isCastFoldedIntoLoad = [](const CastInst *CI) -> bool {
|
||||
auto isCastFoldedIntoLoad = [this](const CastInst *CI) -> bool {
|
||||
if (!CI->isIntegerCast())
|
||||
return false;
|
||||
// Only extensions from an integer type shorter than 32-bit to i32
|
||||
// can be folded into the load.
|
||||
const DataLayout &DL = getDataLayout();
|
||||
unsigned SBW = DL.getTypeSizeInBits(CI->getSrcTy());
|
||||
unsigned DBW = DL.getTypeSizeInBits(CI->getDestTy());
|
||||
if (DBW != 32 || SBW >= DBW)
|
||||
return false;
|
||||
|
||||
const LoadInst *LI = dyn_cast<const LoadInst>(CI->getOperand(0));
|
||||
// Technically, this code could allow multiple uses of the load, and
|
||||
// check if all the uses are the same extension operation, but this
|
||||
// should be sufficient for most cases.
|
||||
if (!LI || !LI->hasOneUse())
|
||||
return false;
|
||||
|
||||
// Only extensions from an integer type shorter than 32-bit to i32
|
||||
// can be folded into the load.
|
||||
unsigned SBW = CI->getSrcTy()->getIntegerBitWidth();
|
||||
unsigned DBW = CI->getDestTy()->getIntegerBitWidth();
|
||||
return DBW == 32 && (SBW < DBW);
|
||||
return LI && LI->hasOneUse();
|
||||
};
|
||||
|
||||
if (const CastInst *CI = dyn_cast<const CastInst>(U))
|
||||
|
@ -70,6 +70,113 @@ public:
|
||||
/// @{
|
||||
|
||||
unsigned getNumberOfRegisters(bool vector) const;
|
||||
unsigned getMaxInterleaveFactor(unsigned VF);
|
||||
unsigned getRegisterBitWidth(bool Vector) const;
|
||||
unsigned getMinVectorRegisterBitWidth() const;
|
||||
|
||||
bool supportsEfficientVectorElementLoadStore() {
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned getOperandsScalarizationOverhead(ArrayRef<const Value*> Args,
|
||||
unsigned VF) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type*> Tys) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
|
||||
ArrayRef<Value*> Args, FastMathFlags FMF, unsigned VF) {
|
||||
return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
|
||||
}
|
||||
unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
|
||||
ArrayRef<Type*> Tys, FastMathFlags FMF,
|
||||
unsigned ScalarizationCostPassed = UINT_MAX) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
bool hasBranchDivergence() {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool enableAggressiveInterleaving(bool LoopHasReductions) {
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned getCFInstrCost(unsigned Opcode) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *,
|
||||
const SCEV *) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
||||
unsigned AddressSpace, const Instruction *I = nullptr);
|
||||
|
||||
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
|
||||
unsigned AddressSpace) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
|
||||
Type *SubTp) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
|
||||
bool VariableMask,
|
||||
unsigned Alignment) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
|
||||
unsigned Factor,
|
||||
ArrayRef<unsigned> Indices,
|
||||
unsigned Alignment,
|
||||
unsigned AddressSpace) {
|
||||
return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
|
||||
Alignment, AddressSpace);
|
||||
}
|
||||
|
||||
unsigned getNumberOfParts(Type *Tp) {
|
||||
return BaseT::getNumberOfParts(Tp);
|
||||
}
|
||||
|
||||
bool prefersVectorizedAddressing() {
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
|
||||
const Instruction *I) {
|
||||
return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
|
||||
}
|
||||
|
||||
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
|
||||
TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
|
||||
TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
|
||||
TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
|
||||
TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
|
||||
ArrayRef<const Value *> Args = ArrayRef<const Value *>()) {
|
||||
return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
|
||||
Opd1PropInfo, Opd2PropInfo, Args);
|
||||
}
|
||||
|
||||
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
|
||||
const Instruction *I = nullptr) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
/// @}
|
||||
|
||||
@ -80,5 +187,4 @@ public:
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETTRANSFORMINFO_H
|
||||
|
Loading…
Reference in New Issue
Block a user