From e5c6fcdb65f26e1597feb69b698face20e1016a1 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 27 Mar 2018 17:07:52 +0000 Subject: [PATCH] [Hexagon] Rudimentary support for auto-vectorization for HVX This implements a set of TTI functions that the loop vectorizer uses. The only purpose of this is to enable testing. Auto-vectorization is disabled by default, enabled by -hexagon-autohvx. llvm-svn: 328639 --- .../Hexagon/HexagonTargetTransformInfo.cpp | 59 ++++++++-- .../Hexagon/HexagonTargetTransformInfo.h | 108 +++++++++++++++++- 2 files changed, 155 insertions(+), 12 deletions(-) diff --git a/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp index 4feaca32b45..3290b5931ad 100644 --- a/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp +++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp @@ -26,6 +26,9 @@ using namespace llvm; #define DEBUG_TYPE "hexagontti" +static cl::opt HexagonAutoHVX("hexagon-autohvx", cl::init(false), + cl::Hidden, cl::desc("Enable loop vectorizer for HVX")); + static cl::opt EmitLookupTables("hexagon-emit-lookup-tables", cl::init(true), cl::Hidden, cl::desc("Control lookup table emission on Hexagon target")); @@ -47,8 +50,41 @@ bool HexagonTTIImpl::shouldFavorPostInc() const { return true; } -unsigned HexagonTTIImpl::getNumberOfRegisters(bool vector) const { - return vector ? 0 : 32; +unsigned HexagonTTIImpl::getNumberOfRegisters(bool Vector) const { + if (Vector) + return HexagonAutoHVX && getST()->useHVXOps() ? 32 : 0; + return 32; +} + +unsigned HexagonTTIImpl::getMaxInterleaveFactor(unsigned VF) { + return HexagonAutoHVX && getST()->useHVXOps() ? 64 : 0; +} + +unsigned HexagonTTIImpl::getRegisterBitWidth(bool Vector) const { + return Vector ? getMinVectorRegisterBitWidth() : 32; +} + +unsigned HexagonTTIImpl::getMinVectorRegisterBitWidth() const { + return getST()->useHVXOps() ? getST()->getVectorLength()*8 : 0; +} + +unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, unsigned AddressSpace, const Instruction *I) { + if (Opcode == Instruction::Load && Src->isVectorTy()) { + VectorType *VecTy = cast(Src); + unsigned VecWidth = VecTy->getBitWidth(); + if (VecWidth > 64) { + // Assume that vectors longer than 64 bits are meant for HVX. + if (getNumberOfRegisters(true) > 0) { + if (VecWidth % getRegisterBitWidth(true) == 0) + return 1; + } + unsigned AlignWidth = 8 * std::max(1u, Alignment); + unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth; + return 3*NumLoads; + } + } + return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I); } unsigned HexagonTTIImpl::getPrefetchDistance() const { @@ -61,21 +97,22 @@ unsigned HexagonTTIImpl::getCacheLineSize() const { int HexagonTTIImpl::getUserCost(const User *U, ArrayRef Operands) { - auto isCastFoldedIntoLoad = [](const CastInst *CI) -> bool { + auto isCastFoldedIntoLoad = [this](const CastInst *CI) -> bool { if (!CI->isIntegerCast()) return false; + // Only extensions from an integer type shorter than 32-bit to i32 + // can be folded into the load. + const DataLayout &DL = getDataLayout(); + unsigned SBW = DL.getTypeSizeInBits(CI->getSrcTy()); + unsigned DBW = DL.getTypeSizeInBits(CI->getDestTy()); + if (DBW != 32 || SBW >= DBW) + return false; + const LoadInst *LI = dyn_cast(CI->getOperand(0)); // Technically, this code could allow multiple uses of the load, and // check if all the uses are the same extension operation, but this // should be sufficient for most cases. - if (!LI || !LI->hasOneUse()) - return false; - - // Only extensions from an integer type shorter than 32-bit to i32 - // can be folded into the load. - unsigned SBW = CI->getSrcTy()->getIntegerBitWidth(); - unsigned DBW = CI->getDestTy()->getIntegerBitWidth(); - return DBW == 32 && (SBW < DBW); + return LI && LI->hasOneUse(); }; if (const CastInst *CI = dyn_cast(U)) diff --git a/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/lib/Target/Hexagon/HexagonTargetTransformInfo.h index 58e23b2e1dc..7adacafa11c 100644 --- a/lib/Target/Hexagon/HexagonTargetTransformInfo.h +++ b/lib/Target/Hexagon/HexagonTargetTransformInfo.h @@ -70,6 +70,113 @@ public: /// @{ unsigned getNumberOfRegisters(bool vector) const; + unsigned getMaxInterleaveFactor(unsigned VF); + unsigned getRegisterBitWidth(bool Vector) const; + unsigned getMinVectorRegisterBitWidth() const; + + bool supportsEfficientVectorElementLoadStore() { + return false; + } + + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { + return 0; + } + + unsigned getOperandsScalarizationOverhead(ArrayRef Args, + unsigned VF) { + return 0; + } + + unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef Tys) { + return 1; + } + + unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, + ArrayRef Args, FastMathFlags FMF, unsigned VF) { + return BaseT::getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF); + } + unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, + ArrayRef Tys, FastMathFlags FMF, + unsigned ScalarizationCostPassed = UINT_MAX) { + return 1; + } + + bool hasBranchDivergence() { + return false; + } + + bool enableAggressiveInterleaving(bool LoopHasReductions) { + return false; + } + + unsigned getCFInstrCost(unsigned Opcode) { + return 1; + } + + unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *, + const SCEV *) { + return 0; + } + + unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace, const Instruction *I = nullptr); + + unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, + unsigned AddressSpace) { + return 1; + } + + unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp) { + return 1; + } + + unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, + bool VariableMask, + unsigned Alignment) { + return 1; + } + + unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, + unsigned Factor, + ArrayRef Indices, + unsigned Alignment, + unsigned AddressSpace) { + return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, + Alignment, AddressSpace); + } + + unsigned getNumberOfParts(Type *Tp) { + return BaseT::getNumberOfParts(Tp); + } + + bool prefersVectorizedAddressing() { + return true; + } + + unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, + const Instruction *I) { + return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I); + } + + unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, + TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, + TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, + TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, + TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, + ArrayRef Args = ArrayRef()) { + return BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, + Opd1PropInfo, Opd2PropInfo, Args); + } + + unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, + const Instruction *I = nullptr) { + return 1; + } + + unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { + return 1; + } /// @} @@ -80,5 +187,4 @@ public: }; } // end namespace llvm - #endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONTARGETTRANSFORMINFO_H