1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

[SVE] Custom ISel for fixed length extract/insert_subvector.

We use extact_subvector and insert_subvector to "cast" between
fixed length and scalable vectors.  This patch adds custom c++
based ISel for the following cases:

  fixed_vector = ISD::EXTRACT_SUBVECTOR scalable_vector, 0
  scalable_vector = ISD::INSERT_SUBVECTOR undef(scalable_vector), fixed_vector, 0

Which result in either EXTRACT_SUBREG/INSERT_SUBREG for NEON sized
vectors or COPY_TO_REGCLASS otherwise.

Differential Revision: https://reviews.llvm.org/D82871
This commit is contained in:
Paul Walker 2020-07-08 09:10:16 +00:00
parent 5ea70edf9e
commit f7143dfdb4
4 changed files with 246 additions and 21 deletions

View File

@ -3240,6 +3240,63 @@ void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
ReplaceNode(N, N3);
}
// NOTE: We cannot use EXTRACT_SUBREG in all cases because the fixed length
// vector types larger than NEON don't have a matching SubRegIndex.
static SDNode *extractSubReg(SelectionDAG *DAG, EVT VT, SDValue V) {
assert(V.getValueType().isScalableVector() &&
V.getValueType().getSizeInBits().getKnownMinSize() ==
AArch64::SVEBitsPerBlock &&
"Expected to extract from a packed scalable vector!");
assert(VT.isFixedLengthVector() &&
"Expected to extract a fixed length vector!");
SDLoc DL(V);
switch (VT.getSizeInBits()) {
case 64: {
auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg);
}
case 128: {
auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg);
}
default: {
auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
}
}
}
// NOTE: We cannot use INSERT_SUBREG in all cases because the fixed length
// vector types larger than NEON don't have a matching SubRegIndex.
static SDNode *insertSubReg(SelectionDAG *DAG, EVT VT, SDValue V) {
assert(VT.isScalableVector() &&
VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock &&
"Expected to insert into a packed scalable vector!");
assert(V.getValueType().isFixedLengthVector() &&
"Expected to insert a fixed length vector!");
SDLoc DL(V);
switch (V.getValueType().getSizeInBits()) {
case 64: {
auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT,
SDValue(Container, 0), V, SubReg);
}
case 128: {
auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT,
SDValue(Container, 0), V, SubReg);
}
default: {
auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
}
}
}
void AArch64DAGToDAGISel::Select(SDNode *Node) {
// If we have a custom node, we already have selected!
if (Node->isMachineOpcode()) {
@ -3313,6 +3370,52 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
return;
break;
case ISD::EXTRACT_SUBVECTOR: {
// Bail when not a "cast" like extract_subvector.
if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue() != 0)
break;
// Bail when normal isel can do the job.
EVT InVT = Node->getOperand(0).getValueType();
if (VT.isScalableVector() || InVT.isFixedLengthVector())
break;
// NOTE: We can only get here when doing fixed length SVE code generation.
// We do manual selection because the types involved are not linked to real
// registers (despite being legal) and must be coerced into SVE registers.
//
// NOTE: If the above changes, be aware that selection will still not work
// because the td definition of extract_vector does not support extracting
// a fixed length vector from a scalable vector.
ReplaceNode(Node, extractSubReg(CurDAG, VT, Node->getOperand(0)));
return;
}
case ISD::INSERT_SUBVECTOR: {
// Bail when not a "cast" like insert_subvector.
if (cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue() != 0)
break;
if (!Node->getOperand(0).isUndef())
break;
// Bail when normal isel should do the job.
EVT InVT = Node->getOperand(1).getValueType();
if (VT.isFixedLengthVector() || InVT.isScalableVector())
break;
// NOTE: We can only get here when doing fixed length SVE code generation.
// We do manual selection because the types involved are not linked to real
// registers (despite being legal) and must be coerced into SVE registers.
//
// NOTE: If the above changes, be aware that selection will still not work
// because the td definition of insert_vector does not support inserting a
// fixed length vector into a scalable vector.
ReplaceNode(Node, insertSubReg(CurDAG, VT, Node->getOperand(1)));
return;
}
case ISD::Constant: {
// Materialize zero constants as copies from WZR/XZR. This allows
// the coalescer to propagate these into other instructions.

View File

@ -116,6 +116,18 @@ EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
/// Value type used for condition codes.
static const MVT MVT_CC = MVT::i32;
/// Returns true if VT's elements occupy the lowest bit positions of its
/// associated register class without any intervening space.
///
/// For example, nxv2f16, nxv4f16 and nxv8f16 are legal types that belong to the
/// same register class, but only nxv8f16 can be treated as a packed vector.
static inline bool isPackedVectorType(EVT VT, SelectionDAG &DAG) {
assert(VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
"Expected legal vector type!");
return VT.isFixedLengthVector() ||
VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock;
}
AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
const AArch64Subtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
@ -908,6 +920,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// D68877 for more details.
for (MVT VT : MVT::integer_scalable_vector_valuetypes()) {
if (isTypeLegal(VT)) {
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SDIV, VT, Custom);
@ -921,16 +934,18 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SRA, VT, Custom);
if (VT.getScalarType() == MVT::i1)
setOperationAction(ISD::SETCC, VT, Custom);
} else {
for (auto VT : { MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32 })
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
}
}
for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32})
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
for (MVT VT : MVT::fp_scalable_vector_valuetypes()) {
if (isTypeLegal(VT)) {
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
setOperationAction(ISD::SELECT, VT, Custom);
}
@ -1037,9 +1052,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
setOperationAction(Op, VT, Expand);
// EXTRACT_SUBVECTOR/INSERT_SUBVECTOR are used to "cast" between scalable
// and fixed length vector types, although with the current level of support
// only the former is exercised.
// We use EXTRACT_SUBVECTOR to "cast" a scalable vector to a fixed length one.
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
// Lower fixed length vector operations to scalable equivalents.
@ -3469,6 +3482,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerSPLAT_VECTOR(Op, DAG);
case ISD::EXTRACT_SUBVECTOR:
return LowerEXTRACT_SUBVECTOR(Op, DAG);
case ISD::INSERT_SUBVECTOR:
return LowerINSERT_SUBVECTOR(Op, DAG);
case ISD::SDIV:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SDIV_PRED);
case ISD::UDIV:
@ -8679,29 +8694,47 @@ AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
SelectionDAG &DAG) const {
assert(!Op.getValueType().isScalableVector() &&
"Unexpected scalable type for custom lowering EXTRACT_SUBVECTOR");
EVT VT = Op.getOperand(0).getValueType();
SDLoc dl(Op);
// Just in case...
if (!VT.isVector())
return SDValue();
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(1));
if (!Cst)
return SDValue();
unsigned Val = Cst->getZExtValue();
assert(Op.getValueType().isFixedLengthVector() &&
"Only cases that extract a fixed length vector are supported!");
EVT InVT = Op.getOperand(0).getValueType();
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
unsigned Size = Op.getValueSizeInBits();
if (InVT.isScalableVector()) {
// This will be matched by custom code during ISelDAGToDAG.
if (Idx == 0 && isPackedVectorType(InVT, DAG))
return Op;
return SDValue();
}
// This will get lowered to an appropriate EXTRACT_SUBREG in ISel.
if (Val == 0)
if (Idx == 0 && InVT.getSizeInBits() <= 128)
return Op;
// If this is extracting the upper 64-bits of a 128-bit vector, we match
// that directly.
if (Size == 64 && Val * VT.getScalarSizeInBits() == 64)
if (Size == 64 && Idx * InVT.getScalarSizeInBits() == 64)
return Op;
return SDValue();
}
SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
SelectionDAG &DAG) const {
assert(Op.getValueType().isScalableVector() &&
"Only expect to lower inserts into scalable vectors!");
EVT InVT = Op.getOperand(1).getValueType();
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
// We don't have any patterns for scalable vector yet.
if (InVT.isScalableVector() || !useSVEForFixedLengthVectorVT(InVT))
return SDValue();
// This will be matched by custom code during ISelDAGToDAG.
if (Idx == 0 && isPackedVectorType(InVT, DAG) && Op.getOperand(0).isUndef())
return Op;
return SDValue();

View File

@ -850,6 +850,7 @@ private:
SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG,
unsigned NewOp) const;
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;

View File

@ -0,0 +1,88 @@
; RUN: llc -aarch64-sve-vector-bits-min=128 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefix=NO_SVE
; RUN: llc -aarch64-sve-vector-bits-min=256 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK
; RUN: llc -aarch64-sve-vector-bits-min=384 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK
; RUN: llc -aarch64-sve-vector-bits-min=512 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
; RUN: llc -aarch64-sve-vector-bits-min=640 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
; RUN: llc -aarch64-sve-vector-bits-min=768 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
; RUN: llc -aarch64-sve-vector-bits-min=896 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
; RUN: llc -aarch64-sve-vector-bits-min=1024 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
; RUN: llc -aarch64-sve-vector-bits-min=1152 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
; RUN: llc -aarch64-sve-vector-bits-min=1280 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
; RUN: llc -aarch64-sve-vector-bits-min=1408 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
; RUN: llc -aarch64-sve-vector-bits-min=1536 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
; RUN: llc -aarch64-sve-vector-bits-min=1664 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
; RUN: llc -aarch64-sve-vector-bits-min=1792 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
; RUN: llc -aarch64-sve-vector-bits-min=1920 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
; RUN: llc -aarch64-sve-vector-bits-min=2048 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
; Test we can code generater patterns of the form:
; fixed_length_vector = ISD::EXTRACT_SUBVECTOR scalable_vector, 0
; scalable_vector = ISD::INSERT_SUBVECTOR scalable_vector, fixed_length_vector, 0
;
; NOTE: Currently shufflevector does not support scalable vectors so it cannot
; be used to model the above operations. Instead these tests rely on knowing
; how fixed length operation are lowered to scalable ones, with multiple blocks
; ensuring insert/extract sequences are not folded away.
target triple = "aarch64-unknown-linux-gnu"
; Don't use SVE when its registers are no bigger than NEON.
; NO_SVE-NOT: ptrue
define void @subvector_v8i32(<8 x i32> *%in, <8 x i32>* %out) #0 {
; CHECK-LABEL: subvector_v8i32:
; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
; CHECK: ld1w { [[DATA:z[0-9]+.s]] }, [[PG]]/z, [x0]
; CHECK: st1w { [[DATA]] }, [[PG]], [x1]
; CHECK: ret
%a = load <8 x i32>, <8 x i32>* %in
br label %bb1
bb1:
store <8 x i32> %a, <8 x i32>* %out
ret void
}
define void @subvector_v16i32(<16 x i32> *%in, <16 x i32>* %out) #0 {
; CHECK-LABEL: subvector_v16i32:
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
; VBITS_GE_512: ld1w { [[DATA:z[0-9]+.s]] }, [[PG]]/z, [x0]
; VBITS_GE_512: st1w { [[DATA]] }, [[PG]], [x1]
; CHECKT: ret
%a = load <16 x i32>, <16 x i32>* %in
br label %bb1
bb1:
store <16 x i32> %a, <16 x i32>* %out
ret void
}
define void @subvector_v32i32(<32 x i32> *%in, <32 x i32>* %out) #0 {
; CHECK-LABEL: subvector_v32i32:
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
; VBITS_GE_1024: ld1w { [[DATA:z[0-9]+.s]] }, [[PG]]/z, [x0]
; VBITS_GE_1024: st1w { [[DATA]] }, [[PG]], [x1]
; CHECK: ret
%a = load <32 x i32>, <32 x i32>* %in
br label %bb1
bb1:
store <32 x i32> %a, <32 x i32>* %out
ret void
}
define void @subvector_v64i32(<64 x i32> *%in, <64 x i32>* %out) #0 {
; CHECK-LABEL: subvector_v64i32:
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
; VBITS_GE_2048: ld1w { [[DATA:z[0-9]+.s]] }, [[PG]]/z, [x0]
; VBITS_GE_2048: st1w { [[DATA]] }, [[PG]], [x1]
; CHECK: ret
%a = load <64 x i32>, <64 x i32>* %in
br label %bb1
bb1:
store <64 x i32> %a, <64 x i32>* %out
ret void
}
attributes #0 = { "target-features"="+sve" }