1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[CodeGen] Fix issues with scalable-vector INSERT/EXTRACT_SUBVECTORs

This patch addresses a few issues when dealing with scalable-vector
INSERT_SUBVECTOR and EXTRACT_SUBVECTOR nodes.

When legalizing in DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR, we
store the low and high halves to the stack separately. The offset for
the high half was calculated incorrectly.

Additionally, we can optimize this process when we can detect that the
subvector is contained entirely within the low/high split vector type.
While this optimization is valid on scalable vectors, when performing
the 'high' optimization, the subvector must also be a scalable vector.
Note that the 'low' optimization is still conservative: it may be
possible to insert v2i32 into the low half of a split nxv1i32/nxv1i32,
but we can't guarantee it. It is always possible to insert v2i32 into
nxv2i32 or v2i32 into nxv4i32+2 as we know vscale is at least 1.

Lastly, in SelectionDAG::isSplatValue, we early-exit on the extracted subvector value
type being a scalable vector, forgetting that we can also extract a
fixed-length vector from a scalable one.

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D98495
This commit is contained in:
Fraser Cormack 2021-03-12 11:22:29 +00:00
parent 4f01a02f9f
commit 33c1eeca58
4 changed files with 149 additions and 10 deletions

View File

@ -1273,20 +1273,24 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
EVT VecVT = Vec.getValueType();
EVT LoVT = Lo.getValueType();
unsigned VecElems = VecVT.getVectorNumElements();
unsigned SubElems = SubVec.getValueType().getVectorNumElements();
unsigned LoElems = LoVT.getVectorNumElements();
EVT SubVecVT = SubVec.getValueType();
unsigned VecElems = VecVT.getVectorMinNumElements();
unsigned SubElems = SubVecVT.getVectorMinNumElements();
unsigned LoElems = LoVT.getVectorMinNumElements();
// If we know the index is in the first half, and we know the subvector
// doesn't cross the boundary between the halves, we can avoid spilling the
// vector, and insert into the lower half of the split vector directly.
// Similarly if the subvector is fully in the high half.
unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
if (IdxVal + SubElems <= LoElems) {
Lo = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, LoVT, Lo, SubVec, Idx);
return;
}
if (IdxVal >= LoElems && IdxVal + SubElems <= VecElems) {
// Similarly if the subvector is fully in the high half, but mind that we
// can't tell whether a fixed-length subvector is fully within the high half
// of a scalable vector.
if (VecVT.isScalableVector() == SubVecVT.isScalableVector() &&
IdxVal >= LoElems && IdxVal + SubElems <= VecElems) {
Hi = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, Hi.getValueType(), Hi, SubVec,
DAG.getVectorIdxConstant(IdxVal - LoElems, dl));
return;
@ -1315,13 +1319,12 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
SmallestAlign);
// Increment the pointer to the other part.
unsigned IncrementSize = Lo.getValueSizeInBits() / 8;
StackPtr =
DAG.getMemBasePlusOffset(StackPtr, TypeSize::Fixed(IncrementSize), dl);
auto *Load = cast<LoadSDNode>(Lo);
MachinePointerInfo MPI = Load->getPointerInfo();
IncrementPointer(Load, LoVT, MPI, StackPtr);
// Load the Hi part from the stack slot.
Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr,
PtrInfo.getWithOffset(IncrementSize), SmallestAlign);
Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MPI, SmallestAlign);
}
void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,

View File

@ -2524,6 +2524,9 @@ bool SelectionDAG::isSplatValue(SDValue V, const APInt &DemandedElts,
case ISD::EXTRACT_SUBVECTOR: {
// Offset the demanded elts by the subvector index.
SDValue Src = V.getOperand(0);
// We don't support scalable vectors at the moment.
if (Src.getValueType().isScalableVector())
return false;
uint64_t Idx = V.getConstantOperandVal(1);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
APInt UndefSrcElts;

View File

@ -553,6 +553,99 @@ define <vscale x 8 x i1> @insert_nxv8i1_v8i1_16(<vscale x 8 x i1> %v, <8 x i1>*
ret <vscale x 8 x i1> %c
}
declare <vscale x 16 x i64> @llvm.experimental.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64>, <2 x i64>, i64)
define void @insert_v2i64_nxv16i64(<2 x i64>* %psv0, <2 x i64>* %psv1, <vscale x 16 x i64>* %out) {
; CHECK-LABEL: insert_v2i64_nxv16i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a3, 2, e64,m1,ta,mu
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vle64.v v16, (a1)
; CHECK-NEXT: vsetivli a0, 6, e64,m8,tu,mu
; CHECK-NEXT: vslideup.vi v8, v16, 4
; CHECK-NEXT: vs8r.v v8, (a2)
; CHECK-NEXT: ret
%sv0 = load <2 x i64>, <2 x i64>* %psv0
%sv1 = load <2 x i64>, <2 x i64>* %psv1
%v0 = call <vscale x 16 x i64> @llvm.experimental.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> undef, <2 x i64> %sv0, i64 0)
%v = call <vscale x 16 x i64> @llvm.experimental.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> %v0, <2 x i64> %sv1, i64 4)
store <vscale x 16 x i64> %v, <vscale x 16 x i64>* %out
ret void
}
define void @insert_v2i64_nxv16i64_lo0(<2 x i64>* %psv, <vscale x 16 x i64>* %out) {
; CHECK-LABEL: insert_v2i64_nxv16i64_lo0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a2, 2, e64,m1,ta,mu
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vs8r.v v8, (a1)
; CHECK-NEXT: ret
%sv = load <2 x i64>, <2 x i64>* %psv
%v = call <vscale x 16 x i64> @llvm.experimental.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> undef, <2 x i64> %sv, i64 0)
store <vscale x 16 x i64> %v, <vscale x 16 x i64>* %out
ret void
}
define void @insert_v2i64_nxv16i64_lo2(<2 x i64>* %psv, <vscale x 16 x i64>* %out) {
; CHECK-LABEL: insert_v2i64_nxv16i64_lo2:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a2, 2, e64,m1,ta,mu
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: vsetivli a0, 4, e64,m8,ta,mu
; CHECK-NEXT: vslideup.vi v16, v8, 2
; CHECK-NEXT: vs8r.v v16, (a1)
; CHECK-NEXT: ret
%sv = load <2 x i64>, <2 x i64>* %psv
%v = call <vscale x 16 x i64> @llvm.experimental.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> undef, <2 x i64> %sv, i64 2)
store <vscale x 16 x i64> %v, <vscale x 16 x i64>* %out
ret void
}
; Check we don't mistakenly optimize this: we don't know whether this is
; inserted into the low or high split vector.
define void @insert_v2i64_nxv16i64_hi(<2 x i64>* %psv, <vscale x 16 x i64>* %out) {
; CHECK-LABEL: insert_v2i64_nxv16i64_hi:
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 4
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: vsetivli a2, 2, e64,m1,ta,mu
; CHECK-NEXT: vle64.v v25, (a0)
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: srli a0, a0, 3
; CHECK-NEXT: slli a2, a0, 4
; CHECK-NEXT: addi a2, a2, -1
; CHECK-NEXT: addi a3, zero, 8
; CHECK-NEXT: bltu a2, a3, .LBB29_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: addi a2, zero, 8
; CHECK-NEXT: .LBB29_2:
; CHECK-NEXT: slli a2, a2, 3
; CHECK-NEXT: addi a3, sp, 16
; CHECK-NEXT: add a2, a3, a2
; CHECK-NEXT: vsetivli a4, 2, e64,m1,ta,mu
; CHECK-NEXT: vse64.v v25, (a2)
; CHECK-NEXT: slli a0, a0, 6
; CHECK-NEXT: add a2, a3, a0
; CHECK-NEXT: vl8re64.v v8, (a2)
; CHECK-NEXT: addi a2, sp, 16
; CHECK-NEXT: vl8re64.v v16, (a2)
; CHECK-NEXT: add a0, a1, a0
; CHECK-NEXT: vs8r.v v8, (a0)
; CHECK-NEXT: vs8r.v v16, (a1)
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
%sv = load <2 x i64>, <2 x i64>* %psv
%v = call <vscale x 16 x i64> @llvm.experimental.vector.insert.v2i64.nxv16i64(<vscale x 16 x i64> undef, <2 x i64> %sv, i64 8)
store <vscale x 16 x i64> %v, <vscale x 16 x i64>* %out
ret void
}
declare <8 x i1> @llvm.experimental.vector.insert.v4i1.v8i1(<8 x i1>, <4 x i1>, i64)
declare <32 x i1> @llvm.experimental.vector.insert.v8i1.v32i1(<32 x i1>, <8 x i1>, i64)

View File

@ -458,6 +458,46 @@ define <vscale x 4 x i1> @insert_nxv4i1_nxv1i1_2(<vscale x 4 x i1> %v, <vscale x
ret <vscale x 4 x i1> %vec
}
declare <vscale x 16 x i64> @llvm.experimental.vector.insert.nxv8i64.nxv16i64(<vscale x 16 x i64>, <vscale x 8 x i64>, i64)
define void @insert_nxv8i64_nxv16i64(<vscale x 8 x i64> %sv0, <vscale x 8 x i64> %sv1, <vscale x 16 x i64>* %out) {
; CHECK-LABEL: insert_nxv8i64_nxv16i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vs8r.v v8, (a0)
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs8r.v v16, (a0)
; CHECK-NEXT: ret
%v0 = call <vscale x 16 x i64> @llvm.experimental.vector.insert.nxv8i64.nxv16i64(<vscale x 16 x i64> undef, <vscale x 8 x i64> %sv0, i64 0)
%v = call <vscale x 16 x i64> @llvm.experimental.vector.insert.nxv8i64.nxv16i64(<vscale x 16 x i64> %v0, <vscale x 8 x i64> %sv1, i64 8)
store <vscale x 16 x i64> %v, <vscale x 16 x i64>* %out
ret void
}
define void @insert_nxv8i64_nxv16i64_lo(<vscale x 8 x i64> %sv0, <vscale x 16 x i64>* %out) {
; CHECK-LABEL: insert_nxv8i64_nxv16i64_lo:
; CHECK: # %bb.0:
; CHECK-NEXT: vs8r.v v8, (a0)
; CHECK-NEXT: ret
%v = call <vscale x 16 x i64> @llvm.experimental.vector.insert.nxv8i64.nxv16i64(<vscale x 16 x i64> undef, <vscale x 8 x i64> %sv0, i64 0)
store <vscale x 16 x i64> %v, <vscale x 16 x i64>* %out
ret void
}
define void @insert_nxv8i64_nxv16i64_hi(<vscale x 8 x i64> %sv0, <vscale x 16 x i64>* %out) {
; CHECK-LABEL: insert_nxv8i64_nxv16i64_hi:
; CHECK: # %bb.0:
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vs8r.v v8, (a0)
; CHECK-NEXT: ret
%v = call <vscale x 16 x i64> @llvm.experimental.vector.insert.nxv8i64.nxv16i64(<vscale x 16 x i64> undef, <vscale x 8 x i64> %sv0, i64 8)
store <vscale x 16 x i64> %v, <vscale x 16 x i64>* %out
ret void
}
declare <vscale x 4 x i1> @llvm.experimental.vector.insert.nxv1i1.nxv4i1(<vscale x 4 x i1>, <vscale x 1 x i1>, i64)
declare <vscale x 32 x i1> @llvm.experimental.vector.insert.nxv8i1.nxv32i1(<vscale x 32 x i1>, <vscale x 8 x i1>, i64)