1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[SVE][CodeGen] Fix scalable vector issues in DAGTypeLegalizer::GenWidenVectorLoads

In DAGTypeLegalizer::GenWidenVectorLoads the algorithm assumes it only
ever deals with fixed width types, hence the offsets for each individual
store never take 'vscale' into account. I've changed the code in that
function to use TypeSize instead of unsigned for tracking the remaining
load amount. In addition, I've changed the load loop to use the new
IncrementPointer helper function for updating the addresses in each
iteration, since this handles scalable vector types.

Also, I've added report_fatal_errors in GenWidenVectorExtLoads,
TargetLowering::scalarizeVectorLoad and TargetLowering::scalarizeVectorStores,
since these functions currently use a sequence of element-by-element
scalar loads/stores. In a similar vein, I've also added a fatal error
report in FindMemType for the case when we decide to return the element
type for a scalable vector type.

I've added new tests in

  CodeGen/AArch64/sve-split-load.ll
  CodeGen/AArch64/sve-ld-addressing-mode-reg-imm.ll

for the changes in GenWidenVectorLoads.

Differential Revision: https://reviews.llvm.org/D85909
This commit is contained in:
David Sherwood 2020-08-12 14:16:22 +01:00
parent 540752542a
commit f7a1832d69
5 changed files with 116 additions and 33 deletions

View File

@ -145,6 +145,24 @@ public:
return *this;
}
friend TypeSize operator-(const TypeSize &LHS, const TypeSize &RHS) {
assert(LHS.IsScalable == RHS.IsScalable &&
"Arithmetic using mixed scalable and fixed types");
return {LHS.MinSize - RHS.MinSize, LHS.IsScalable};
}
friend TypeSize operator/(const TypeSize &LHS, const TypeSize &RHS) {
assert(LHS.IsScalable == RHS.IsScalable &&
"Arithmetic using mixed scalable and fixed types");
return {LHS.MinSize / RHS.MinSize, LHS.IsScalable};
}
friend TypeSize operator%(const TypeSize &LHS, const TypeSize &RHS) {
assert(LHS.IsScalable == RHS.IsScalable &&
"Arithmetic using mixed scalable and fixed types");
return {LHS.MinSize % RHS.MinSize, LHS.IsScalable};
}
// Return the minimum size with the assumption that the size is exact.
// Use in places where a scalable size doesn't make sense (e.g. non-vector
// types, or vectors in backends which don't support scalable vectors).

View File

@ -4895,11 +4895,14 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
isPowerOf2_32(WidenWidth / MemVTWidth) &&
(MemVTWidth <= Width ||
(Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT)
if (RetVT.getSizeInBits().getFixedSize() < MemVTWidth || MemVT == WidenVT)
return MemVT;
}
}
if (Scalable)
report_fatal_error("Using element-wise loads and stores for widening "
"operations is not supported for scalable vectors");
return RetVT;
}
@ -4942,10 +4945,10 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
// element type or scalar loads and then recombines it to the widen vector
// type.
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
unsigned WidenWidth = WidenVT.getSizeInBits();
EVT LdVT = LD->getMemoryVT();
SDLoc dl(LD);
assert(LdVT.isVector() && WidenVT.isVector());
assert(LdVT.isScalableVector() == WidenVT.isScalableVector());
assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType());
// Load information
@ -4954,15 +4957,17 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
AAMDNodes AAInfo = LD->getAAInfo();
int LdWidth = LdVT.getSizeInBits();
int WidthDiff = WidenWidth - LdWidth;
TypeSize LdWidth = LdVT.getSizeInBits();
TypeSize WidenWidth = WidenVT.getSizeInBits();
TypeSize WidthDiff = WidenWidth - LdWidth;
// Allow wider loads if they are sufficiently aligned to avoid memory faults
// and if the original load is simple.
unsigned LdAlign = (!LD->isSimple()) ? 0 : LD->getAlignment();
// Find the vector type that can load from.
EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
int NewVTWidth = NewVT.getSizeInBits();
EVT NewVT = FindMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign,
WidthDiff.getKnownMinSize());
TypeSize NewVTWidth = NewVT.getSizeInBits();
SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
LD->getOriginalAlign(), MMOFlags, AAInfo);
LdChain.push_back(LdOp.getValue(1));
@ -4970,7 +4975,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
// Check if we can load the element with one instruction.
if (LdWidth <= NewVTWidth) {
if (!NewVT.isVector()) {
unsigned NumElts = WidenWidth / NewVTWidth;
unsigned NumElts = WidenWidth.getFixedSize() / NewVTWidth.getFixedSize();
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
@ -4978,8 +4983,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
if (NewVT == WidenVT)
return LdOp;
assert(WidenWidth % NewVTWidth == 0);
unsigned NumConcat = WidenWidth / NewVTWidth;
// TODO: We don't currently have any tests that exercise this code path.
assert(WidenWidth.getFixedSize() % NewVTWidth.getFixedSize() == 0);
unsigned NumConcat = WidenWidth.getFixedSize() / NewVTWidth.getFixedSize();
SmallVector<SDValue, 16> ConcatOps(NumConcat);
SDValue UndefVal = DAG.getUNDEF(NewVT);
ConcatOps[0] = LdOp;
@ -4992,35 +4998,30 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
SmallVector<SDValue, 16> LdOps;
LdOps.push_back(LdOp);
LdWidth -= NewVTWidth;
unsigned Offset = 0;
uint64_t ScaledOffset = 0;
MachinePointerInfo MPI = LD->getPointerInfo();
do {
LdWidth -= NewVTWidth;
IncrementPointer(cast<LoadSDNode>(LdOp), NewVT, MPI, BasePtr,
&ScaledOffset);
while (LdWidth > 0) {
unsigned Increment = NewVTWidth / 8;
Offset += Increment;
BasePtr = DAG.getObjectPtrOffset(dl, BasePtr, TypeSize::Fixed(Increment));
SDValue L;
if (LdWidth < NewVTWidth) {
// The current type we are using is too large. Find a better size.
NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
NewVT = FindMemType(DAG, TLI, LdWidth.getKnownMinSize(), WidenVT, LdAlign,
WidthDiff.getKnownMinSize());
NewVTWidth = NewVT.getSizeInBits();
L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
LD->getPointerInfo().getWithOffset(Offset),
LD->getOriginalAlign(), MMOFlags, AAInfo);
LdChain.push_back(L.getValue(1));
} else {
L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
LD->getPointerInfo().getWithOffset(Offset),
LD->getOriginalAlign(), MMOFlags, AAInfo);
LdChain.push_back(L.getValue(1));
}
Align NewAlign = ScaledOffset == 0
? LD->getOriginalAlign()
: commonAlignment(LD->getAlign(), ScaledOffset);
SDValue L =
DAG.getLoad(NewVT, dl, Chain, BasePtr, MPI, NewAlign, MMOFlags, AAInfo);
LdChain.push_back(L.getValue(1));
LdOps.push_back(L);
LdOp = L;
LdWidth -= NewVTWidth;
}
} while (LdWidth > NewVTWidth);
// Build the vector from the load operations.
unsigned End = LdOps.size();
@ -5044,13 +5045,17 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
}
ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i + 1, End);
}
ConcatOps[--Idx] = LdOps[i];
for (--i; i >= 0; --i) {
EVT NewLdTy = LdOps[i].getValueType();
if (NewLdTy != LdTy) {
// Create a larger vector.
unsigned NumOps = NewLdTy.getSizeInBits() / LdTy.getSizeInBits();
assert(NewLdTy.getSizeInBits() % LdTy.getSizeInBits() == 0);
unsigned NumOps =
(NewLdTy.getSizeInBits() / LdTy.getSizeInBits()).getKnownMinSize();
assert(
(NewLdTy.getSizeInBits() % LdTy.getSizeInBits()).getKnownMinSize() ==
0);
SmallVector<SDValue, 16> WidenOps(NumOps);
unsigned j = 0;
for (; j != End-Idx; ++j)
@ -5071,7 +5076,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
makeArrayRef(&ConcatOps[Idx], End - Idx));
// We need to fill the rest with undefs to build the vector.
unsigned NumOps = WidenWidth / LdTy.getSizeInBits();
unsigned NumOps = (WidenWidth / LdTy.getSizeInBits()).getKnownMinSize();
SmallVector<SDValue, 16> WidenOps(NumOps);
SDValue UndefVal = DAG.getUNDEF(LdTy);
{
@ -5094,6 +5099,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
EVT LdVT = LD->getMemoryVT();
SDLoc dl(LD);
assert(LdVT.isVector() && WidenVT.isVector());
assert(LdVT.isScalableVector() == WidenVT.isScalableVector());
// Load information
SDValue Chain = LD->getChain();
@ -5101,6 +5107,10 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
AAMDNodes AAInfo = LD->getAAInfo();
if (LdVT.isScalableVector())
report_fatal_error("Generating widen scalable extending vector loads is "
"not yet supported");
EVT EltVT = WidenVT.getVectorElementType();
EVT LdEltVT = LdVT.getVectorElementType();
unsigned NumElts = LdVT.getVectorNumElements();

View File

@ -6724,6 +6724,9 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
EVT DstVT = LD->getValueType(0);
ISD::LoadExtType ExtType = LD->getExtensionType();
if (SrcVT.isScalableVector())
report_fatal_error("Cannot scalarize scalable vector loads");
unsigned NumElem = SrcVT.getVectorNumElements();
EVT SrcEltVT = SrcVT.getScalarType();
@ -6811,6 +6814,9 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
SDValue Value = ST->getValue();
EVT StVT = ST->getMemoryVT();
if (StVT.isScalableVector())
report_fatal_error("Cannot scalarize scalable vector stores");
// The type of the data we want to save
EVT RegVT = Value.getValueType();
EVT RegSclVT = RegVT.getScalarType();

View File

@ -104,3 +104,40 @@ define <vscale x 2 x i64> @ld1d_inbound(<vscale x 2 x i64>* %a) {
%load = load <vscale x 2 x i64>, <vscale x 2 x i64>* %base
ret <vscale x 2 x i64> %load
}
define void @load_nxv6f16(<vscale x 6 x half>* %a) {
; CHECK-LABEL: load_nxv6f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, #2, mul vl]
; CHECK-NEXT: ld1h { z0.s }, p1/z, [x0]
; CHECK-NEXT: ret
%val = load volatile <vscale x 6 x half>, <vscale x 6 x half>* %a
ret void
}
define void @load_nxv6f32(<vscale x 6 x float>* %a) {
; CHECK-LABEL: load_nxv6f32:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.d
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, #2, mul vl]
; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0]
; CHECK-NEXT: ret
%val = load volatile <vscale x 6 x float>, <vscale x 6 x float>* %a
ret void
}
define void @load_nxv12f16(<vscale x 12 x half>* %a) {
; CHECK-LABEL: load_nxv12f16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, #2, mul vl]
; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0]
; CHECK-NEXT: ret
%val = load volatile <vscale x 12 x half>, <vscale x 12 x half>* %a
ret void
}

View File

@ -24,6 +24,18 @@ define <vscale x 16 x i16> @load_split_i16(<vscale x 16 x i16>* %a) {
ret <vscale x 16 x i16> %load
}
define <vscale x 24 x i16> @load_split_24i16(<vscale x 24 x i16>* %a) {
; CHECK-LABEL: load_split_24i16:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.h
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0, #1, mul vl]
; CHECK-NEXT: ld1h { z2.h }, p0/z, [x0, #2, mul vl]
; CHECK-NEXT: ret
%load = load <vscale x 24 x i16>, <vscale x 24 x i16>* %a
ret <vscale x 24 x i16> %load
}
define <vscale x 32 x i16> @load_split_32i16(<vscale x 32 x i16>* %a) {
; CHECK-LABEL: load_split_32i16:
; CHECK: // %bb.0: