1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[SVE] Remove calls to VectorType::getNumElements from AArch64

Reviewers: efriedma, paquette, david-arm, kmclaughlin

Reviewed By: david-arm

Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, danielkiss, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D82214
This commit is contained in:
Christopher Tetreault 2020-06-30 11:07:24 -07:00
parent b0b0a7801d
commit 4a8eb5f3d3
4 changed files with 37 additions and 30 deletions

View File

@ -9486,8 +9486,8 @@ static bool areExtractShuffleVectors(Value *Op1, Value *Op2) {
};
auto extractHalf = [](Value *FullV, Value *HalfV) {
auto *FullVT = cast<VectorType>(FullV->getType());
auto *HalfVT = cast<VectorType>(HalfV->getType());
auto *FullVT = cast<FixedVectorType>(FullV->getType());
auto *HalfVT = cast<FixedVectorType>(HalfV->getType());
return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
};
@ -9507,7 +9507,7 @@ static bool areExtractShuffleVectors(Value *Op1, Value *Op2) {
// elements.
int M1Start = -1;
int M2Start = -1;
int NumElements = cast<VectorType>(Op1->getType())->getNumElements() * 2;
int NumElements = cast<FixedVectorType>(Op1->getType())->getNumElements() * 2;
if (!ShuffleVectorInst::isExtractSubvectorMask(M1, NumElements, M1Start) ||
!ShuffleVectorInst::isExtractSubvectorMask(M2, NumElements, M2Start) ||
M1Start != M2Start || (M1Start != 0 && M2Start != (NumElements / 2)))
@ -9639,7 +9639,7 @@ bool AArch64TargetLowering::isLegalInterleavedAccessType(
unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
// Ensure the number of vector elements is greater than 1.
if (VecTy->getNumElements() < 2)
if (cast<FixedVectorType>(VecTy)->getNumElements() < 2)
return false;
// Ensure the element type is legal.
@ -9673,22 +9673,24 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
const DataLayout &DL = LI->getModule()->getDataLayout();
VectorType *VecTy = Shuffles[0]->getType();
VectorType *VTy = Shuffles[0]->getType();
// Skip if we do not have NEON and skip illegal vector types. We can
// "legalize" wide vector types into multiple interleaved accesses as long as
// the vector types are divisible by 128.
if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VecTy, DL))
if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VTy, DL))
return false;
unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
unsigned NumLoads = getNumInterleavedAccesses(VTy, DL);
auto *FVTy = cast<FixedVectorType>(VTy);
// A pointer vector can not be the return type of the ldN intrinsics. Need to
// load integer vectors first and then convert to pointer vectors.
Type *EltTy = VecTy->getElementType();
Type *EltTy = FVTy->getElementType();
if (EltTy->isPointerTy())
VecTy =
FixedVectorType::get(DL.getIntPtrType(EltTy), VecTy->getNumElements());
FVTy =
FixedVectorType::get(DL.getIntPtrType(EltTy), FVTy->getNumElements());
IRBuilder<> Builder(LI);
@ -9698,19 +9700,19 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
if (NumLoads > 1) {
// If we're going to generate more than one load, reset the sub-vector type
// to something legal.
VecTy = FixedVectorType::get(VecTy->getElementType(),
VecTy->getNumElements() / NumLoads);
FVTy = FixedVectorType::get(FVTy->getElementType(),
FVTy->getNumElements() / NumLoads);
// We will compute the pointer operand of each load from the original base
// address using GEPs. Cast the base address to a pointer to the scalar
// element type.
BaseAddr = Builder.CreateBitCast(
BaseAddr,
VecTy->getElementType()->getPointerTo(LI->getPointerAddressSpace()));
FVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace()));
}
Type *PtrTy = VecTy->getPointerTo(LI->getPointerAddressSpace());
Type *Tys[2] = {VecTy, PtrTy};
Type *PtrTy = FVTy->getPointerTo(LI->getPointerAddressSpace());
Type *Tys[2] = {FVTy, PtrTy};
static const Intrinsic::ID LoadInts[3] = {Intrinsic::aarch64_neon_ld2,
Intrinsic::aarch64_neon_ld3,
Intrinsic::aarch64_neon_ld4};
@ -9727,8 +9729,8 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
// If we're generating more than one load, compute the base address of
// subsequent loads as an offset from the previous.
if (LoadCount > 0)
BaseAddr = Builder.CreateConstGEP1_32(VecTy->getElementType(), BaseAddr,
VecTy->getNumElements() * Factor);
BaseAddr = Builder.CreateConstGEP1_32(FVTy->getElementType(), BaseAddr,
FVTy->getNumElements() * Factor);
CallInst *LdN = Builder.CreateCall(
LdNFunc, Builder.CreateBitCast(BaseAddr, PtrTy), "ldN");
@ -9744,7 +9746,7 @@ bool AArch64TargetLowering::lowerInterleavedLoad(
if (EltTy->isPointerTy())
SubVec = Builder.CreateIntToPtr(
SubVec, FixedVectorType::get(SVI->getType()->getElementType(),
VecTy->getNumElements()));
FVTy->getNumElements()));
SubVecs[SVI].push_back(SubVec);
}
}
@ -9795,7 +9797,7 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
"Invalid interleave factor");
VectorType *VecTy = SVI->getType();
auto *VecTy = cast<FixedVectorType>(SVI->getType());
assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
unsigned LaneLen = VecTy->getNumElements() / Factor;
@ -9820,7 +9822,8 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
// vectors to integer vectors.
if (EltTy->isPointerTy()) {
Type *IntTy = DL.getIntPtrType(EltTy);
unsigned NumOpElts = cast<VectorType>(Op0->getType())->getNumElements();
unsigned NumOpElts =
cast<FixedVectorType>(Op0->getType())->getNumElements();
// Convert to the corresponding integer vector.
auto *IntVecTy = FixedVectorType::get(IntTy, NumOpElts);

View File

@ -265,8 +265,9 @@ public:
Type *EltTy = VecTy->getElementType();
if (EltTy->isPointerTy()) {
uint32_t EltSize = DL->getTypeSizeInBits(EltTy);
auto *NewTy = FixedVectorType::get(IntegerType::get(Ctx, EltSize),
VecTy->getNumElements());
auto *NewTy = FixedVectorType::get(
IntegerType::get(Ctx, EltSize),
cast<FixedVectorType>(VecTy)->getNumElements());
V = IRB.CreatePointerCast(V, NewTy);
}
}

View File

@ -212,7 +212,7 @@ bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
// elements in type Ty determine the vector width.
auto toVectorTy = [&](Type *ArgTy) {
return FixedVectorType::get(ArgTy->getScalarType(),
cast<VectorType>(DstTy)->getNumElements());
cast<FixedVectorType>(DstTy)->getNumElements());
};
// Exit early if DstTy is not a vector type whose elements are at least
@ -724,8 +724,8 @@ int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
// have to promote the elements to v.2.
ProfitableNumElements = 8;
if (cast<VectorType>(Ty)->getNumElements() < ProfitableNumElements) {
unsigned NumVecElts = cast<VectorType>(Ty)->getNumElements();
if (cast<FixedVectorType>(Ty)->getNumElements() < ProfitableNumElements) {
unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
// We generate 2 instructions per vector element.
return NumVectorizableInstsToAmortize * NumVecElts * 2;
@ -740,7 +740,7 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
bool UseMaskForCond, bool UseMaskForGaps) {
assert(Factor >= 2 && "Invalid interleave factor");
auto *VecVTy = cast<VectorType>(VecTy);
auto *VecVTy = cast<FixedVectorType>(VecTy);
if (!UseMaskForCond && !UseMaskForGaps &&
Factor <= TLI->getMaxSupportedInterleaveFactor()) {
@ -767,7 +767,8 @@ int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
for (auto *I : Tys) {
if (!I->isVectorTy())
continue;
if (I->getScalarSizeInBits() * cast<VectorType>(I)->getNumElements() == 128)
if (I->getScalarSizeInBits() * cast<FixedVectorType>(I)->getNumElements() ==
128)
Cost += getMemoryOpCost(Instruction::Store, I, Align(128), 0, CostKind) +
getMemoryOpCost(Instruction::Load, I, Align(128), 0, CostKind);
}
@ -970,9 +971,10 @@ bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
case Instruction::Mul:
return false;
case Instruction::Add:
return ScalarBits * VTy->getNumElements() >= 128;
return ScalarBits * cast<FixedVectorType>(VTy)->getNumElements() >= 128;
case Instruction::ICmp:
return (ScalarBits < 64) && (ScalarBits * VTy->getNumElements() >= 128);
return (ScalarBits < 64) &&
(ScalarBits * cast<FixedVectorType>(VTy)->getNumElements() >= 128);
case Instruction::FCmp:
return Flags.NoNaN;
default:

View File

@ -189,7 +189,8 @@ public:
// the element type fits into a register and the number of elements is a
// power of 2 > 1.
if (auto *DataTypeVTy = dyn_cast<VectorType>(DataType)) {
unsigned NumElements = DataTypeVTy->getNumElements();
unsigned NumElements =
cast<FixedVectorType>(DataTypeVTy)->getNumElements();
unsigned EltSize = DataTypeVTy->getElementType()->getScalarSizeInBits();
return NumElements > 1 && isPowerOf2_64(NumElements) && EltSize >= 8 &&
EltSize <= 128 && isPowerOf2_64(EltSize);