mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
[IR] Introduce llvm.experimental.vector.splice intrinsic
This patch introduces a new intrinsic @llvm.experimental.vector.splice that constructs a vector of the same type as the two input vectors, based on a immediate where the sign of the immediate distinguishes two variants. A positive immediate specifies an index into the first vector and a negative immediate specifies the number of trailing elements to extract from the first vector. For example: @llvm.experimental.vector.splice(<A,B,C,D>, <E,F,G,H>, 1) ==> <B, C, D, E> ; index @llvm.experimental.vector.splice(<A,B,C,D>, <E,F,G,H>, -3) ==> <B, C, D, E> ; trailing element count These intrinsics support both fixed and scalable vectors, where the former is lowered to a shufflevector to maintain existing behaviour, although while marked as experimental the recommended way to express this operation for fixed-width vectors is to use shufflevector. For scalable vectors where it is not possible to express a shufflevector mask for this operation, a new ISD node has been implemented. This is one of the named shufflevector intrinsics proposed on the mailing-list in the RFC at [1]. Patch by Paul Walker and Cullen Rhodes. [1] https://lists.llvm.org/pipermail/llvm-dev/2020-November/146864.html Reviewed By: sdesmalen Differential Revision: https://reviews.llvm.org/D94708
This commit is contained in:
parent
ee05374523
commit
6682076a17
@ -16510,6 +16510,52 @@ Arguments:
|
||||
|
||||
The argument to this intrinsic must be a vector.
|
||||
|
||||
'``llvm.experimental.vector.splice``' Intrinsic
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
This is an overloaded intrinsic.
|
||||
|
||||
::
|
||||
|
||||
declare <2 x double> @llvm.experimental.vector.splice.v2f64(<2 x double> %vec1, <2 x double> %vec2, i32 %imm)
|
||||
declare <vscale x 4 x i32> @llvm.experimental.vector.splice.nxv4i32(<vscale x 4 x i32> %vec1, <vscale x 4 x i32> %vec2, i32 %imm)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The '``llvm.experimental.vector.splice.*``' intrinsics construct a vector by
|
||||
concatenating elements from the first input vector with elements of the second
|
||||
input vector, returning a vector of the same type as the input vectors. The
|
||||
signed immediate, modulo the number of elements in the vector, is the index
|
||||
into the first vector from which to extract the result value. This means
|
||||
conceptually that for a positive immediate, a vector is extracted from
|
||||
``concat(%vec1, %vec2)`` starting at index ``imm``, whereas for a negative
|
||||
immediate, it extracts ``-imm`` trailing elements from the first vector, and
|
||||
the remaining elements from ``%vec2``.
|
||||
|
||||
These intrinsics work for both fixed and scalable vectors. While this intrinsic
|
||||
is marked as experimental, the recommended way to express this operation for
|
||||
fixed-width vectors is still to use a shufflevector, as that may allow for more
|
||||
optimization opportunities.
|
||||
|
||||
For example:
|
||||
|
||||
.. code-block:: text
|
||||
|
||||
llvm.experimental.vector.splice(<A,B,C,D>, <E,F,G,H>, 1) ==> <B, C, D, E> ; index
|
||||
llvm.experimental.vector.splice(<A,B,C,D>, <E,F,G,H>, -3) ==> <B, C, D, E> ; trailing elements
|
||||
|
||||
|
||||
Arguments:
|
||||
""""""""""
|
||||
|
||||
The first two operands are vectors with the same type. The third argument
|
||||
``imm`` is the start index, modulo VL, where VL is the runtime vector length of
|
||||
the source/result vector. The ``imm`` is a signed integer constant in the range
|
||||
``-VL <= imm < VL``. For values outside of this range the result is poison.
|
||||
|
||||
Matrix Intrinsics
|
||||
-----------------
|
||||
|
||||
|
@ -556,6 +556,18 @@ enum NodeType {
|
||||
/// in terms of the element size of VEC1/VEC2, not in terms of bytes.
|
||||
VECTOR_SHUFFLE,
|
||||
|
||||
/// VECTOR_SPLICE(VEC1, VEC2, IMM) - Returns a subvector of the same type as
|
||||
/// VEC1/VEC2 from CONCAT_VECTORS(VEC1, VEC2), based on the IMM in two ways.
|
||||
/// Let the result type be T, if IMM is positive it represents the starting
|
||||
/// element number (an index) from which a subvector of type T is extracted
|
||||
/// from CONCAT_VECTORS(VEC1, VEC2). If IMM is negative it represents a count
|
||||
/// specifying the number of trailing elements to extract from VEC1, where the
|
||||
/// elements of T are selected using the following algorithm:
|
||||
/// RESULT[i] = CONCAT_VECTORS(VEC1,VEC2)[VEC1.ElementCount - ABS(IMM) + i]
|
||||
/// If IMM is not in the range [-VL, VL-1] the result vector is undefined. IMM
|
||||
/// is a constant integer.
|
||||
VECTOR_SPLICE,
|
||||
|
||||
/// SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a
|
||||
/// scalar value into element 0 of the resultant vector type. The top
|
||||
/// elements 1 to N-1 of the N-element vector are undefined. The type
|
||||
|
@ -4511,6 +4511,10 @@ public:
|
||||
/// Returns true if the expansion was successful.
|
||||
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const;
|
||||
|
||||
/// Method for building the DAG expansion of ISD::VECTOR_SPLICE. This
|
||||
/// method accepts vectors as its arguments.
|
||||
SDValue expandVectorSplice(SDNode *Node, SelectionDAG &DAG) const;
|
||||
|
||||
//===--------------------------------------------------------------------===//
|
||||
// Instruction Emitting Hooks
|
||||
//
|
||||
|
@ -1659,6 +1659,13 @@ def int_experimental_vector_extract : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
|
||||
[llvm_anyvector_ty, llvm_i64_ty],
|
||||
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
|
||||
|
||||
//===---------- Named shufflevector intrinsics ------===//
|
||||
def int_experimental_vector_splice : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>,
|
||||
LLVMMatchType<0>,
|
||||
llvm_i32_ty],
|
||||
[IntrNoMem, ImmArg<ArgIndex<2>>]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -241,6 +241,9 @@ def SDTMaskedLoad: SDTypeProfile<1, 4, [ // masked load
|
||||
def SDTVecShuffle : SDTypeProfile<1, 2, [
|
||||
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>
|
||||
]>;
|
||||
def SDTVecSlice : SDTypeProfile<1, 3, [ // vector splice
|
||||
SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisInt<3>
|
||||
]>;
|
||||
def SDTVecExtract : SDTypeProfile<1, 2, [ // vector extract
|
||||
SDTCisEltOfVec<0, 1>, SDTCisPtrTy<2>
|
||||
]>;
|
||||
@ -655,6 +658,7 @@ def ist : SDNode<"ISD::STORE" , SDTIStore,
|
||||
|
||||
def vector_shuffle : SDNode<"ISD::VECTOR_SHUFFLE", SDTVecShuffle, []>;
|
||||
def vector_reverse : SDNode<"ISD::VECTOR_REVERSE", SDTVecReverse>;
|
||||
def vector_splice : SDNode<"ISD::VECTOR_SPLICE", SDTVecSlice, []>;
|
||||
def build_vector : SDNode<"ISD::BUILD_VECTOR", SDTypeProfile<1, -1, []>, []>;
|
||||
def splat_vector : SDNode<"ISD::SPLAT_VECTOR", SDTypeProfile<1, 1, []>, []>;
|
||||
def scalar_to_vector : SDNode<"ISD::SCALAR_TO_VECTOR", SDTypeProfile<1, 1, []>,
|
||||
|
@ -3208,6 +3208,10 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
|
||||
Results.push_back(Tmp1);
|
||||
break;
|
||||
}
|
||||
case ISD::VECTOR_SPLICE: {
|
||||
Results.push_back(TLI.expandVectorSplice(Node, DAG));
|
||||
break;
|
||||
}
|
||||
case ISD::EXTRACT_ELEMENT: {
|
||||
EVT OpTy = Node->getOperand(0).getValueType();
|
||||
if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue()) {
|
||||
@ -4715,7 +4719,14 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
|
||||
Results.push_back(Tmp1);
|
||||
break;
|
||||
}
|
||||
|
||||
case ISD::VECTOR_SPLICE: {
|
||||
Tmp1 = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Node->getOperand(0));
|
||||
Tmp2 = DAG.getNode(ISD::ANY_EXTEND, dl, NVT, Node->getOperand(1));
|
||||
Tmp3 = DAG.getNode(ISD::VECTOR_SPLICE, dl, NVT, Tmp1, Tmp2,
|
||||
Node->getOperand(2));
|
||||
Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp3));
|
||||
break;
|
||||
}
|
||||
case ISD::SELECT_CC: {
|
||||
SDValue Cond = Node->getOperand(4);
|
||||
ISD::CondCode CCCode = cast<CondCodeSDNode>(Cond)->get();
|
||||
@ -4753,7 +4764,6 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
|
||||
Results.push_back(Tmp1);
|
||||
break;
|
||||
}
|
||||
|
||||
case ISD::SETCC:
|
||||
case ISD::STRICT_FSETCC:
|
||||
case ISD::STRICT_FSETCCS: {
|
||||
|
@ -100,6 +100,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
|
||||
Res = PromoteIntRes_VECTOR_REVERSE(N); break;
|
||||
case ISD::VECTOR_SHUFFLE:
|
||||
Res = PromoteIntRes_VECTOR_SHUFFLE(N); break;
|
||||
case ISD::VECTOR_SPLICE:
|
||||
Res = PromoteIntRes_VECTOR_SPLICE(N); break;
|
||||
case ISD::INSERT_VECTOR_ELT:
|
||||
Res = PromoteIntRes_INSERT_VECTOR_ELT(N); break;
|
||||
case ISD::BUILD_VECTOR:
|
||||
@ -4616,6 +4618,15 @@ SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
|
||||
return Swap.getValue(1);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SPLICE(SDNode *N) {
|
||||
SDLoc dl(N);
|
||||
|
||||
SDValue V0 = GetPromotedInteger(N->getOperand(0));
|
||||
SDValue V1 = GetPromotedInteger(N->getOperand(1));
|
||||
EVT OutVT = V0.getValueType();
|
||||
|
||||
return DAG.getNode(ISD::VECTOR_SPLICE, dl, OutVT, V0, V1, N->getOperand(2));
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
|
||||
|
||||
|
@ -300,6 +300,7 @@ private:
|
||||
SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N);
|
||||
SDValue PromoteIntRes_VECTOR_REVERSE(SDNode *N);
|
||||
SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
|
||||
SDValue PromoteIntRes_VECTOR_SPLICE(SDNode *N);
|
||||
SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
|
||||
SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N);
|
||||
SDValue PromoteIntRes_SPLAT_VECTOR(SDNode *N);
|
||||
@ -838,6 +839,7 @@ private:
|
||||
void SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
|
||||
SDValue &Hi);
|
||||
void SplitVecRes_VECTOR_SPLICE(SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void SplitVecRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
|
||||
|
@ -947,6 +947,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
|
||||
case ISD::VECTOR_SHUFFLE:
|
||||
SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
|
||||
break;
|
||||
case ISD::VECTOR_SPLICE:
|
||||
SplitVecRes_VECTOR_SPLICE(N, Lo, Hi);
|
||||
break;
|
||||
case ISD::VAARG:
|
||||
SplitVecRes_VAARG(N, Lo, Hi);
|
||||
break;
|
||||
@ -1257,7 +1260,7 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
|
||||
uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
|
||||
Hi = DAG.getNode(
|
||||
ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec,
|
||||
DAG.getVectorIdxConstant(IdxVal + LoVT.getVectorNumElements(), dl));
|
||||
DAG.getVectorIdxConstant(IdxVal + LoVT.getVectorMinNumElements(), dl));
|
||||
}
|
||||
|
||||
void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
|
||||
@ -5519,3 +5522,19 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo,
|
||||
Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, InHi.getValueType(), InHi);
|
||||
Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, InLo.getValueType(), InLo);
|
||||
}
|
||||
|
||||
void DAGTypeLegalizer::SplitVecRes_VECTOR_SPLICE(SDNode *N, SDValue &Lo,
|
||||
SDValue &Hi) {
|
||||
EVT VT = N->getValueType(0);
|
||||
SDLoc DL(N);
|
||||
|
||||
EVT LoVT, HiVT;
|
||||
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
|
||||
|
||||
SDValue Expanded = TLI.expandVectorSplice(N, DAG);
|
||||
Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, Expanded,
|
||||
DAG.getVectorIdxConstant(0, DL));
|
||||
Hi =
|
||||
DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, Expanded,
|
||||
DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
|
||||
}
|
||||
|
@ -7105,6 +7105,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
|
||||
case Intrinsic::experimental_vector_reverse:
|
||||
visitVectorReverse(I);
|
||||
return;
|
||||
case Intrinsic::experimental_vector_splice:
|
||||
visitVectorSplice(I);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@ -10956,3 +10959,37 @@ void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) {
|
||||
setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
|
||||
DAG.getVTList(ValueVTs), Values));
|
||||
}
|
||||
|
||||
void SelectionDAGBuilder::visitVectorSplice(const CallInst &I) {
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
|
||||
|
||||
SDLoc DL = getCurSDLoc();
|
||||
SDValue V1 = getValue(I.getOperand(0));
|
||||
SDValue V2 = getValue(I.getOperand(1));
|
||||
int64_t Imm = cast<ConstantInt>(I.getOperand(2))->getSExtValue();
|
||||
|
||||
// VECTOR_SHUFFLE doesn't support a scalable mask so use a dedicated node.
|
||||
if (VT.isScalableVector()) {
|
||||
MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
|
||||
setValue(&I, DAG.getNode(ISD::VECTOR_SPLICE, DL, VT, V1, V2,
|
||||
DAG.getConstant(Imm, DL, IdxVT)));
|
||||
return;
|
||||
}
|
||||
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
|
||||
if ((-Imm > NumElts) || (Imm >= NumElts)) {
|
||||
// Result is undefined if immediate is out-of-bounds.
|
||||
setValue(&I, DAG.getUNDEF(VT));
|
||||
return;
|
||||
}
|
||||
|
||||
uint64_t Idx = (NumElts + Imm) % NumElts;
|
||||
|
||||
// Use VECTOR_SHUFFLE to maintain original behaviour for fixed-length vectors.
|
||||
SmallVector<int, 8> Mask;
|
||||
for (unsigned i = 0; i < NumElts; ++i)
|
||||
Mask.push_back(Idx + i);
|
||||
setValue(&I, DAG.getVectorShuffle(VT, DL, V1, V2, Mask));
|
||||
}
|
||||
|
@ -778,6 +778,7 @@ private:
|
||||
|
||||
void visitVectorReduce(const CallInst &I, unsigned Intrinsic);
|
||||
void visitVectorReverse(const CallInst &I);
|
||||
void visitVectorSplice(const CallInst &I);
|
||||
|
||||
void visitUserOp1(const Instruction &I) {
|
||||
llvm_unreachable("UserOp1 should not exist at instruction selection time!");
|
||||
|
@ -288,6 +288,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
|
||||
case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";
|
||||
case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
|
||||
case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
|
||||
case ISD::VECTOR_SPLICE: return "vector_splice";
|
||||
case ISD::SPLAT_VECTOR: return "splat_vector";
|
||||
case ISD::VECTOR_REVERSE: return "vector_reverse";
|
||||
case ISD::CARRY_FALSE: return "carry_false";
|
||||
|
@ -8625,3 +8625,76 @@ SDValue TargetLowering::expandFP_TO_INT_SAT(SDNode *Node,
|
||||
SDValue ZeroInt = DAG.getConstant(0, dl, DstVT);
|
||||
return DAG.getSelectCC(dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO);
|
||||
}
|
||||
|
||||
SDValue TargetLowering::expandVectorSplice(SDNode *Node,
|
||||
SelectionDAG &DAG) const {
|
||||
assert(Node->getOpcode() == ISD::VECTOR_SPLICE && "Unexpected opcode!");
|
||||
assert(Node->getValueType(0).isScalableVector() &&
|
||||
"Fixed length vector types expected to use SHUFFLE_VECTOR!");
|
||||
|
||||
EVT VT = Node->getValueType(0);
|
||||
SDValue V1 = Node->getOperand(0);
|
||||
SDValue V2 = Node->getOperand(1);
|
||||
int64_t Imm = cast<ConstantSDNode>(Node->getOperand(2))->getSExtValue();
|
||||
SDLoc DL(Node);
|
||||
|
||||
// Expand through memory thusly:
|
||||
// Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
|
||||
// Store V1, Ptr
|
||||
// Store V2, Ptr + sizeof(V1)
|
||||
// If (Imm < 0)
|
||||
// TrailingElts = -Imm
|
||||
// Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
|
||||
// else
|
||||
// Ptr = Ptr + (Imm * sizeof(VT.Elt))
|
||||
// Res = Load Ptr
|
||||
|
||||
Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
|
||||
|
||||
EVT MemVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
|
||||
VT.getVectorElementCount() * 2);
|
||||
SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
|
||||
EVT PtrVT = StackPtr.getValueType();
|
||||
auto &MF = DAG.getMachineFunction();
|
||||
auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
|
||||
auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
|
||||
|
||||
// Store the lo part of CONCAT_VECTORS(V1, V2)
|
||||
SDValue StoreV1 = DAG.getStore(DAG.getEntryNode(), DL, V1, StackPtr, PtrInfo);
|
||||
// Store the hi part of CONCAT_VECTORS(V1, V2)
|
||||
SDValue OffsetToV2 = DAG.getVScale(
|
||||
DL, PtrVT,
|
||||
APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinSize()));
|
||||
SDValue StackPtr2 = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, OffsetToV2);
|
||||
SDValue StoreV2 = DAG.getStore(StoreV1, DL, V2, StackPtr2, PtrInfo);
|
||||
|
||||
if (Imm >= 0) {
|
||||
// Load back the required element. getVectorElementPointer takes care of
|
||||
// clamping the index if it's out-of-bounds.
|
||||
StackPtr = getVectorElementPointer(DAG, StackPtr, VT, Node->getOperand(2));
|
||||
// Load the spliced result
|
||||
return DAG.getLoad(VT, DL, StoreV2, StackPtr,
|
||||
MachinePointerInfo::getUnknownStack(MF));
|
||||
}
|
||||
|
||||
uint64_t TrailingElts = -Imm;
|
||||
|
||||
// NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
|
||||
TypeSize EltByteSize = VT.getVectorElementType().getStoreSize();
|
||||
SDValue TrailingBytes =
|
||||
DAG.getConstant(TrailingElts * EltByteSize, DL, PtrVT);
|
||||
|
||||
if (TrailingElts > VT.getVectorMinNumElements()) {
|
||||
SDValue VLBytes = DAG.getVScale(
|
||||
DL, PtrVT,
|
||||
APInt(PtrVT.getFixedSizeInBits(), VT.getStoreSize().getKnownMinSize()));
|
||||
TrailingBytes = DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, VLBytes);
|
||||
}
|
||||
|
||||
// Calculate the start address of the spliced result.
|
||||
StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
|
||||
|
||||
// Load the spliced result
|
||||
return DAG.getLoad(VT, DL, StoreV2, StackPtr2,
|
||||
MachinePointerInfo::getUnknownStack(MF));
|
||||
}
|
||||
|
@ -849,6 +849,9 @@ void TargetLoweringBase::initActions() {
|
||||
setOperationAction(ISD::VECREDUCE_FMIN, VT, Expand);
|
||||
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Expand);
|
||||
setOperationAction(ISD::VECREDUCE_SEQ_FMUL, VT, Expand);
|
||||
|
||||
// Named vector shuffles default to expand.
|
||||
setOperationAction(ISD::VECTOR_SPLICE, VT, Expand);
|
||||
}
|
||||
|
||||
// Most targets ignore the @llvm.prefetch intrinsic.
|
||||
|
@ -1108,6 +1108,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
||||
setOperationAction(ISD::MUL, VT, Custom);
|
||||
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
|
||||
setOperationAction(ISD::SELECT, VT, Custom);
|
||||
setOperationAction(ISD::SETCC, VT, Custom);
|
||||
setOperationAction(ISD::SDIV, VT, Custom);
|
||||
setOperationAction(ISD::UDIV, VT, Custom);
|
||||
setOperationAction(ISD::SMIN, VT, Custom);
|
||||
@ -1276,6 +1277,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
||||
for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
|
||||
setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
|
||||
}
|
||||
|
||||
setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv2i1, MVT::nxv2i64);
|
||||
setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv4i1, MVT::nxv4i32);
|
||||
setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv8i1, MVT::nxv8i16);
|
||||
setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv16i1, MVT::nxv16i8);
|
||||
}
|
||||
|
||||
PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
|
||||
|
142
test/CodeGen/AArch64/named-vector-shuffles-neon.ll
Normal file
142
test/CodeGen/AArch64/named-vector-shuffles-neon.ll
Normal file
@ -0,0 +1,142 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -verify-machineinstrs < %s | FileCheck %s
|
||||
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
;
|
||||
; VECTOR_SPLICE (index)
|
||||
;
|
||||
|
||||
define <16 x i8> @splice_v16i8_idx(<16 x i8> %a, <16 x i8> %b) #0 {
|
||||
; CHECK-LABEL: splice_v16i8_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #1
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <16 x i8> @llvm.experimental.vector.splice.v16i8(<16 x i8> %a, <16 x i8> %b, i32 1)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
define <2 x double> @splice_v2f64_idx(<2 x double> %a, <2 x double> %b) #0 {
|
||||
; CHECK-LABEL: splice_v2f64_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #8
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <2 x double> @llvm.experimental.vector.splice.v2f64(<2 x double> %a, <2 x double> %b, i32 1)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
; Verify promote type legalisation works as expected.
|
||||
define <2 x i8> @splice_v2i8_idx(<2 x i8> %a, <2 x i8> %b) #0 {
|
||||
; CHECK-LABEL: splice_v2i8_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ext v0.8b, v0.8b, v1.8b, #4
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <2 x i8> @llvm.experimental.vector.splice.v2i8(<2 x i8> %a, <2 x i8> %b, i32 1)
|
||||
ret <2 x i8> %res
|
||||
}
|
||||
|
||||
; Verify splitvec type legalisation works as expected.
|
||||
define <8 x i32> @splice_v8i32_idx(<8 x i32> %a, <8 x i32> %b) #0 {
|
||||
; CHECK-LABEL: splice_v8i32_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ext v0.16b, v1.16b, v2.16b, #4
|
||||
; CHECK-NEXT: ext v1.16b, v2.16b, v3.16b, #4
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <8 x i32> @llvm.experimental.vector.splice.v8i32(<8 x i32> %a, <8 x i32> %b, i32 5)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
; Verify splitvec type legalisation works as expected.
|
||||
define <16 x float> @splice_v16f32_idx(<16 x float> %a, <16 x float> %b) #0 {
|
||||
; CHECK-LABEL: splice_v16f32_idx:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ext v0.16b, v1.16b, v2.16b, #12
|
||||
; CHECK-NEXT: ext v1.16b, v2.16b, v3.16b, #12
|
||||
; CHECK-NEXT: ext v2.16b, v3.16b, v4.16b, #12
|
||||
; CHECK-NEXT: ext v3.16b, v4.16b, v5.16b, #12
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <16 x float> @llvm.experimental.vector.splice.v16f32(<16 x float> %a, <16 x float> %b, i32 7)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
; Verify out-of-bounds index results in undef vector.
|
||||
define <2 x double> @splice_v2f64_idx_out_of_bounds(<2 x double> %a, <2 x double> %b) #0 {
|
||||
; CHECK-LABEL: splice_v2f64_idx_out_of_bounds:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <2 x double> @llvm.experimental.vector.splice.v2f64(<2 x double> %a, <2 x double> %b, i32 2)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
;
|
||||
; VECTOR_SPLICE (trailing elements)
|
||||
;
|
||||
|
||||
define <16 x i8> @splice_v16i8(<16 x i8> %a, <16 x i8> %b) #0 {
|
||||
; CHECK-LABEL: splice_v16i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #1
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <16 x i8> @llvm.experimental.vector.splice.v16i8(<16 x i8> %a, <16 x i8> %b, i32 -15)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
define <2 x double> @splice_v2f64(<2 x double> %a, <2 x double> %b) #0 {
|
||||
; CHECK-LABEL: splice_v2f64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #8
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <2 x double> @llvm.experimental.vector.splice.v2f64(<2 x double> %a, <2 x double> %b, i32 -1)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
; Verify promote type legalisation works as expected.
|
||||
define <2 x i8> @splice_v2i8(<2 x i8> %a, <2 x i8> %b) #0 {
|
||||
; CHECK-LABEL: splice_v2i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ext v0.8b, v0.8b, v1.8b, #4
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <2 x i8> @llvm.experimental.vector.splice.v2i8(<2 x i8> %a, <2 x i8> %b, i32 -1)
|
||||
ret <2 x i8> %res
|
||||
}
|
||||
|
||||
; Verify splitvec type legalisation works as expected.
|
||||
define <8 x i32> @splice_v8i32(<8 x i32> %a, <8 x i32> %b) #0 {
|
||||
; CHECK-LABEL: splice_v8i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ext v0.16b, v1.16b, v2.16b, #4
|
||||
; CHECK-NEXT: ext v1.16b, v2.16b, v3.16b, #4
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <8 x i32> @llvm.experimental.vector.splice.v8i32(<8 x i32> %a, <8 x i32> %b, i32 -3)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
; Verify splitvec type legalisation works as expected.
|
||||
define <16 x float> @splice_v16f32(<16 x float> %a, <16 x float> %b) #0 {
|
||||
; CHECK-LABEL: splice_v16f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ext v0.16b, v1.16b, v2.16b, #12
|
||||
; CHECK-NEXT: ext v1.16b, v2.16b, v3.16b, #12
|
||||
; CHECK-NEXT: ext v2.16b, v3.16b, v4.16b, #12
|
||||
; CHECK-NEXT: ext v3.16b, v4.16b, v5.16b, #12
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <16 x float> @llvm.experimental.vector.splice.v16f32(<16 x float> %a, <16 x float> %b, i32 -9)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
; Verify out-of-bounds trailing element count results in undef vector.
|
||||
define <2 x double> @splice_v2f64_out_of_bounds(<2 x double> %a, <2 x double> %b) #0 {
|
||||
; CHECK-LABEL: splice_v2f64_out_of_bounds:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ret
|
||||
%res = call <2 x double> @llvm.experimental.vector.splice.v2f64(<2 x double> %a, <2 x double> %b, i32 -3)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
declare <2 x i8> @llvm.experimental.vector.splice.v2i8(<2 x i8>, <2 x i8>, i32)
|
||||
declare <16 x i8> @llvm.experimental.vector.splice.v16i8(<16 x i8>, <16 x i8>, i32)
|
||||
declare <8 x i32> @llvm.experimental.vector.splice.v8i32(<8 x i32>, <8 x i32>, i32)
|
||||
declare <16 x float> @llvm.experimental.vector.splice.v16f32(<16 x float>, <16 x float>, i32)
|
||||
declare <2 x double> @llvm.experimental.vector.splice.v2f64(<2 x double>, <2 x double>, i32)
|
||||
|
||||
attributes #0 = { nounwind "target-features"="+neon" }
|
1310
test/CodeGen/AArch64/named-vector-shuffles-sve.ll
Normal file
1310
test/CodeGen/AArch64/named-vector-shuffles-sve.ll
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user