mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
[SVE] Custom ISel for fixed length extract/insert_subvector.
We use extact_subvector and insert_subvector to "cast" between fixed length and scalable vectors. This patch adds custom c++ based ISel for the following cases: fixed_vector = ISD::EXTRACT_SUBVECTOR scalable_vector, 0 scalable_vector = ISD::INSERT_SUBVECTOR undef(scalable_vector), fixed_vector, 0 Which result in either EXTRACT_SUBREG/INSERT_SUBREG for NEON sized vectors or COPY_TO_REGCLASS otherwise. Differential Revision: https://reviews.llvm.org/D82871
This commit is contained in:
parent
5ea70edf9e
commit
f7143dfdb4
@ -3240,6 +3240,63 @@ void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
|
||||
ReplaceNode(N, N3);
|
||||
}
|
||||
|
||||
// NOTE: We cannot use EXTRACT_SUBREG in all cases because the fixed length
|
||||
// vector types larger than NEON don't have a matching SubRegIndex.
|
||||
static SDNode *extractSubReg(SelectionDAG *DAG, EVT VT, SDValue V) {
|
||||
assert(V.getValueType().isScalableVector() &&
|
||||
V.getValueType().getSizeInBits().getKnownMinSize() ==
|
||||
AArch64::SVEBitsPerBlock &&
|
||||
"Expected to extract from a packed scalable vector!");
|
||||
assert(VT.isFixedLengthVector() &&
|
||||
"Expected to extract a fixed length vector!");
|
||||
|
||||
SDLoc DL(V);
|
||||
switch (VT.getSizeInBits()) {
|
||||
case 64: {
|
||||
auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
|
||||
return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg);
|
||||
}
|
||||
case 128: {
|
||||
auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
|
||||
return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg);
|
||||
}
|
||||
default: {
|
||||
auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
|
||||
return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: We cannot use INSERT_SUBREG in all cases because the fixed length
|
||||
// vector types larger than NEON don't have a matching SubRegIndex.
|
||||
static SDNode *insertSubReg(SelectionDAG *DAG, EVT VT, SDValue V) {
|
||||
assert(VT.isScalableVector() &&
|
||||
VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock &&
|
||||
"Expected to insert into a packed scalable vector!");
|
||||
assert(V.getValueType().isFixedLengthVector() &&
|
||||
"Expected to insert a fixed length vector!");
|
||||
|
||||
SDLoc DL(V);
|
||||
switch (V.getValueType().getSizeInBits()) {
|
||||
case 64: {
|
||||
auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
|
||||
auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
|
||||
return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT,
|
||||
SDValue(Container, 0), V, SubReg);
|
||||
}
|
||||
case 128: {
|
||||
auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
|
||||
auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
|
||||
return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT,
|
||||
SDValue(Container, 0), V, SubReg);
|
||||
}
|
||||
default: {
|
||||
auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
|
||||
return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void AArch64DAGToDAGISel::Select(SDNode *Node) {
|
||||
// If we have a custom node, we already have selected!
|
||||
if (Node->isMachineOpcode()) {
|
||||
@ -3313,6 +3370,52 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
|
||||
return;
|
||||
break;
|
||||
|
||||
case ISD::EXTRACT_SUBVECTOR: {
|
||||
// Bail when not a "cast" like extract_subvector.
|
||||
if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue() != 0)
|
||||
break;
|
||||
|
||||
// Bail when normal isel can do the job.
|
||||
EVT InVT = Node->getOperand(0).getValueType();
|
||||
if (VT.isScalableVector() || InVT.isFixedLengthVector())
|
||||
break;
|
||||
|
||||
// NOTE: We can only get here when doing fixed length SVE code generation.
|
||||
// We do manual selection because the types involved are not linked to real
|
||||
// registers (despite being legal) and must be coerced into SVE registers.
|
||||
//
|
||||
// NOTE: If the above changes, be aware that selection will still not work
|
||||
// because the td definition of extract_vector does not support extracting
|
||||
// a fixed length vector from a scalable vector.
|
||||
|
||||
ReplaceNode(Node, extractSubReg(CurDAG, VT, Node->getOperand(0)));
|
||||
return;
|
||||
}
|
||||
|
||||
case ISD::INSERT_SUBVECTOR: {
|
||||
// Bail when not a "cast" like insert_subvector.
|
||||
if (cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue() != 0)
|
||||
break;
|
||||
if (!Node->getOperand(0).isUndef())
|
||||
break;
|
||||
|
||||
// Bail when normal isel should do the job.
|
||||
EVT InVT = Node->getOperand(1).getValueType();
|
||||
if (VT.isFixedLengthVector() || InVT.isScalableVector())
|
||||
break;
|
||||
|
||||
// NOTE: We can only get here when doing fixed length SVE code generation.
|
||||
// We do manual selection because the types involved are not linked to real
|
||||
// registers (despite being legal) and must be coerced into SVE registers.
|
||||
//
|
||||
// NOTE: If the above changes, be aware that selection will still not work
|
||||
// because the td definition of insert_vector does not support inserting a
|
||||
// fixed length vector into a scalable vector.
|
||||
|
||||
ReplaceNode(Node, insertSubReg(CurDAG, VT, Node->getOperand(1)));
|
||||
return;
|
||||
}
|
||||
|
||||
case ISD::Constant: {
|
||||
// Materialize zero constants as copies from WZR/XZR. This allows
|
||||
// the coalescer to propagate these into other instructions.
|
||||
|
@ -116,6 +116,18 @@ EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
|
||||
/// Value type used for condition codes.
|
||||
static const MVT MVT_CC = MVT::i32;
|
||||
|
||||
/// Returns true if VT's elements occupy the lowest bit positions of its
|
||||
/// associated register class without any intervening space.
|
||||
///
|
||||
/// For example, nxv2f16, nxv4f16 and nxv8f16 are legal types that belong to the
|
||||
/// same register class, but only nxv8f16 can be treated as a packed vector.
|
||||
static inline bool isPackedVectorType(EVT VT, SelectionDAG &DAG) {
|
||||
assert(VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
|
||||
"Expected legal vector type!");
|
||||
return VT.isFixedLengthVector() ||
|
||||
VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock;
|
||||
}
|
||||
|
||||
AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
||||
const AArch64Subtarget &STI)
|
||||
: TargetLowering(TM), Subtarget(&STI) {
|
||||
@ -908,6 +920,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
||||
// D68877 for more details.
|
||||
for (MVT VT : MVT::integer_scalable_vector_valuetypes()) {
|
||||
if (isTypeLegal(VT)) {
|
||||
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
|
||||
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
|
||||
setOperationAction(ISD::SELECT, VT, Custom);
|
||||
setOperationAction(ISD::SDIV, VT, Custom);
|
||||
@ -921,16 +934,18 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
||||
setOperationAction(ISD::SRA, VT, Custom);
|
||||
if (VT.getScalarType() == MVT::i1)
|
||||
setOperationAction(ISD::SETCC, VT, Custom);
|
||||
} else {
|
||||
for (auto VT : { MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32 })
|
||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32})
|
||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
|
||||
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
|
||||
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
|
||||
|
||||
for (MVT VT : MVT::fp_scalable_vector_valuetypes()) {
|
||||
if (isTypeLegal(VT)) {
|
||||
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
|
||||
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
|
||||
setOperationAction(ISD::SELECT, VT, Custom);
|
||||
}
|
||||
@ -1037,9 +1052,7 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
|
||||
for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
|
||||
setOperationAction(Op, VT, Expand);
|
||||
|
||||
// EXTRACT_SUBVECTOR/INSERT_SUBVECTOR are used to "cast" between scalable
|
||||
// and fixed length vector types, although with the current level of support
|
||||
// only the former is exercised.
|
||||
// We use EXTRACT_SUBVECTOR to "cast" a scalable vector to a fixed length one.
|
||||
setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
|
||||
|
||||
// Lower fixed length vector operations to scalable equivalents.
|
||||
@ -3469,6 +3482,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
|
||||
return LowerSPLAT_VECTOR(Op, DAG);
|
||||
case ISD::EXTRACT_SUBVECTOR:
|
||||
return LowerEXTRACT_SUBVECTOR(Op, DAG);
|
||||
case ISD::INSERT_SUBVECTOR:
|
||||
return LowerINSERT_SUBVECTOR(Op, DAG);
|
||||
case ISD::SDIV:
|
||||
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SDIV_PRED);
|
||||
case ISD::UDIV:
|
||||
@ -8679,29 +8694,47 @@ AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
|
||||
|
||||
SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
assert(!Op.getValueType().isScalableVector() &&
|
||||
"Unexpected scalable type for custom lowering EXTRACT_SUBVECTOR");
|
||||
|
||||
EVT VT = Op.getOperand(0).getValueType();
|
||||
SDLoc dl(Op);
|
||||
// Just in case...
|
||||
if (!VT.isVector())
|
||||
return SDValue();
|
||||
|
||||
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(1));
|
||||
if (!Cst)
|
||||
return SDValue();
|
||||
unsigned Val = Cst->getZExtValue();
|
||||
assert(Op.getValueType().isFixedLengthVector() &&
|
||||
"Only cases that extract a fixed length vector are supported!");
|
||||
|
||||
EVT InVT = Op.getOperand(0).getValueType();
|
||||
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
||||
unsigned Size = Op.getValueSizeInBits();
|
||||
|
||||
if (InVT.isScalableVector()) {
|
||||
// This will be matched by custom code during ISelDAGToDAG.
|
||||
if (Idx == 0 && isPackedVectorType(InVT, DAG))
|
||||
return Op;
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// This will get lowered to an appropriate EXTRACT_SUBREG in ISel.
|
||||
if (Val == 0)
|
||||
if (Idx == 0 && InVT.getSizeInBits() <= 128)
|
||||
return Op;
|
||||
|
||||
// If this is extracting the upper 64-bits of a 128-bit vector, we match
|
||||
// that directly.
|
||||
if (Size == 64 && Val * VT.getScalarSizeInBits() == 64)
|
||||
if (Size == 64 && Idx * InVT.getScalarSizeInBits() == 64)
|
||||
return Op;
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
assert(Op.getValueType().isScalableVector() &&
|
||||
"Only expect to lower inserts into scalable vectors!");
|
||||
|
||||
EVT InVT = Op.getOperand(1).getValueType();
|
||||
unsigned Idx = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
|
||||
|
||||
// We don't have any patterns for scalable vector yet.
|
||||
if (InVT.isScalableVector() || !useSVEForFixedLengthVectorVT(InVT))
|
||||
return SDValue();
|
||||
|
||||
// This will be matched by custom code during ISelDAGToDAG.
|
||||
if (Idx == 0 && isPackedVectorType(InVT, DAG) && Op.getOperand(0).isUndef())
|
||||
return Op;
|
||||
|
||||
return SDValue();
|
||||
|
@ -850,6 +850,7 @@ private:
|
||||
SDValue LowerToPredicatedOp(SDValue Op, SelectionDAG &DAG,
|
||||
unsigned NewOp) const;
|
||||
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerVectorSRA_SRL_SHL(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
88
test/CodeGen/AArch64/sve-fixed-length-subvector.ll
Normal file
88
test/CodeGen/AArch64/sve-fixed-length-subvector.ll
Normal file
@ -0,0 +1,88 @@
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=128 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefix=NO_SVE
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=256 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=384 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=512 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=640 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=768 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=896 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1024 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1152 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1280 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1408 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1536 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1664 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1792 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=1920 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
|
||||
; RUN: llc -aarch64-sve-vector-bits-min=2048 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
|
||||
|
||||
; Test we can code generater patterns of the form:
|
||||
; fixed_length_vector = ISD::EXTRACT_SUBVECTOR scalable_vector, 0
|
||||
; scalable_vector = ISD::INSERT_SUBVECTOR scalable_vector, fixed_length_vector, 0
|
||||
;
|
||||
; NOTE: Currently shufflevector does not support scalable vectors so it cannot
|
||||
; be used to model the above operations. Instead these tests rely on knowing
|
||||
; how fixed length operation are lowered to scalable ones, with multiple blocks
|
||||
; ensuring insert/extract sequences are not folded away.
|
||||
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
; Don't use SVE when its registers are no bigger than NEON.
|
||||
; NO_SVE-NOT: ptrue
|
||||
|
||||
define void @subvector_v8i32(<8 x i32> *%in, <8 x i32>* %out) #0 {
|
||||
; CHECK-LABEL: subvector_v8i32:
|
||||
; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
|
||||
; CHECK: ld1w { [[DATA:z[0-9]+.s]] }, [[PG]]/z, [x0]
|
||||
; CHECK: st1w { [[DATA]] }, [[PG]], [x1]
|
||||
; CHECK: ret
|
||||
%a = load <8 x i32>, <8 x i32>* %in
|
||||
br label %bb1
|
||||
|
||||
bb1:
|
||||
store <8 x i32> %a, <8 x i32>* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @subvector_v16i32(<16 x i32> *%in, <16 x i32>* %out) #0 {
|
||||
; CHECK-LABEL: subvector_v16i32:
|
||||
; VBITS_GE_512: ptrue [[PG:p[0-9]+]].s, vl16
|
||||
; VBITS_GE_512: ld1w { [[DATA:z[0-9]+.s]] }, [[PG]]/z, [x0]
|
||||
; VBITS_GE_512: st1w { [[DATA]] }, [[PG]], [x1]
|
||||
; CHECKT: ret
|
||||
%a = load <16 x i32>, <16 x i32>* %in
|
||||
br label %bb1
|
||||
|
||||
bb1:
|
||||
store <16 x i32> %a, <16 x i32>* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @subvector_v32i32(<32 x i32> *%in, <32 x i32>* %out) #0 {
|
||||
; CHECK-LABEL: subvector_v32i32:
|
||||
; VBITS_GE_1024: ptrue [[PG:p[0-9]+]].s, vl32
|
||||
; VBITS_GE_1024: ld1w { [[DATA:z[0-9]+.s]] }, [[PG]]/z, [x0]
|
||||
; VBITS_GE_1024: st1w { [[DATA]] }, [[PG]], [x1]
|
||||
; CHECK: ret
|
||||
%a = load <32 x i32>, <32 x i32>* %in
|
||||
br label %bb1
|
||||
|
||||
bb1:
|
||||
store <32 x i32> %a, <32 x i32>* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @subvector_v64i32(<64 x i32> *%in, <64 x i32>* %out) #0 {
|
||||
; CHECK-LABEL: subvector_v64i32:
|
||||
; VBITS_GE_2048: ptrue [[PG:p[0-9]+]].s, vl64
|
||||
; VBITS_GE_2048: ld1w { [[DATA:z[0-9]+.s]] }, [[PG]]/z, [x0]
|
||||
; VBITS_GE_2048: st1w { [[DATA]] }, [[PG]], [x1]
|
||||
; CHECK: ret
|
||||
%a = load <64 x i32>, <64 x i32>* %in
|
||||
br label %bb1
|
||||
|
||||
bb1:
|
||||
store <64 x i32> %a, <64 x i32>* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { "target-features"="+sve" }
|
Loading…
Reference in New Issue
Block a user