mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 19:52:54 +01:00
[CodeGen][SelectionDAG]Add new intrinsic experimental.vector.reverse
This patch adds a new intrinsic experimental.vector.reduce that takes a single vector and returns a vector of matching type but with the original lane order reversed. For example: ``` vector.reverse(<A,B,C,D>) ==> <D,C,B,A> ``` The new intrinsic supports fixed and scalable vectors types. The fixed-width vector relies on shufflevector to maintain existing behaviour. Scalable vector uses the new ISD node - VECTOR_REVERSE. This new intrinsic is one of the named shufflevector intrinsics proposed on the mailing-list in the RFC at [1]. Patch by Paul Walker (@paulwalker-arm). [1] https://lists.llvm.org/pipermail/llvm-dev/2020-November/146864.html Differential Revision: https://reviews.llvm.org/D94883
This commit is contained in:
parent
b6252362a0
commit
9ea32f75fa
@ -16233,6 +16233,33 @@ runtime, then the result vector is undefined. The ``idx`` parameter must be a
|
||||
vector index constant type (for most targets this will be an integer pointer
|
||||
type).
|
||||
|
||||
'``llvm.experimental.vector.reverse``' Intrinsic
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Syntax:
|
||||
"""""""
|
||||
This is an overloaded intrinsic.
|
||||
|
||||
::
|
||||
|
||||
declare <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8> %a)
|
||||
declare <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
|
||||
|
||||
Overview:
|
||||
"""""""""
|
||||
|
||||
The '``llvm.experimental.vector.reverse.*``' intrinsics reverse a vector.
|
||||
The intrinsic takes a single vector and returns a vector of matching type but
|
||||
with the original lane order reversed. These intrinsics work for both fixed
|
||||
and scalable vectors. While this intrinsic is marked as experimental the
|
||||
recommended way to express reverse operations for fixed-width vectors is still
|
||||
to use a shufflevector, as that may allow for more optimization opportunities.
|
||||
|
||||
Arguments:
|
||||
""""""""""
|
||||
|
||||
The argument to this intrinsic must be a vector.
|
||||
|
||||
Matrix Intrinsics
|
||||
-----------------
|
||||
|
||||
|
@ -540,6 +540,11 @@ enum NodeType {
|
||||
/// vector, but not the other way around.
|
||||
EXTRACT_SUBVECTOR,
|
||||
|
||||
/// VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR,
|
||||
/// whose elements are shuffled using the following algorithm:
|
||||
/// RESULT[i] = VECTOR[VECTOR.ElementCount - 1 - i]
|
||||
VECTOR_REVERSE,
|
||||
|
||||
/// VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as
|
||||
/// VEC1/VEC2. A VECTOR_SHUFFLE node also contains an array of constant int
|
||||
/// values that indicate which value (or undef) each result element will
|
||||
|
@ -1635,6 +1635,12 @@ def int_preserve_struct_access_index : DefaultAttrsIntrinsic<[llvm_anyptr_ty],
|
||||
ImmArg<ArgIndex<1>>,
|
||||
ImmArg<ArgIndex<2>>]>;
|
||||
|
||||
//===------------ Intrinsics to perform common vector shuffles ------------===//
|
||||
|
||||
def int_experimental_vector_reverse : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
|
||||
[LLVMMatchType<0>],
|
||||
[IntrNoMem]>;
|
||||
|
||||
//===---------- Intrinsics to query properties of scalable vectors --------===//
|
||||
def int_vscale : DefaultAttrsIntrinsic<[llvm_anyint_ty], [], [IntrNoMem]>;
|
||||
|
||||
|
@ -254,6 +254,9 @@ def SDTFPVecReduce : SDTypeProfile<1, 1, [ // FP vector reduction
|
||||
SDTCisFP<0>, SDTCisVec<1>
|
||||
]>;
|
||||
|
||||
def SDTVecReverse : SDTypeProfile<1, 1, [ // vector reverse
|
||||
SDTCisVec<0>, SDTCisSameAs<0,1>
|
||||
]>;
|
||||
|
||||
def SDTSubVecExtract : SDTypeProfile<1, 2, [// subvector extract
|
||||
SDTCisSubVecOfVec<0,1>, SDTCisInt<2>
|
||||
@ -651,6 +654,7 @@ def ist : SDNode<"ISD::STORE" , SDTIStore,
|
||||
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
|
||||
|
||||
def vector_shuffle : SDNode<"ISD::VECTOR_SHUFFLE", SDTVecShuffle, []>;
|
||||
def vector_reverse : SDNode<"ISD::VECTOR_REVERSE", SDTVecReverse>;
|
||||
def build_vector : SDNode<"ISD::BUILD_VECTOR", SDTypeProfile<1, -1, []>, []>;
|
||||
def splat_vector : SDNode<"ISD::SPLAT_VECTOR", SDTypeProfile<1, 1, []>, []>;
|
||||
def scalar_to_vector : SDNode<"ISD::SCALAR_TO_VECTOR", SDTypeProfile<1, 1, []>,
|
||||
|
@ -5373,6 +5373,12 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
|
||||
return Op0;
|
||||
break;
|
||||
}
|
||||
case Intrinsic::experimental_vector_reverse:
|
||||
// experimental.vector.reverse(experimental.vector.reverse(x)) -> x
|
||||
if (match(Op0,
|
||||
m_Intrinsic<Intrinsic::experimental_vector_reverse>(m_Value(X))))
|
||||
return X;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -96,6 +96,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
|
||||
|
||||
case ISD::EXTRACT_SUBVECTOR:
|
||||
Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break;
|
||||
case ISD::VECTOR_REVERSE:
|
||||
Res = PromoteIntRes_VECTOR_REVERSE(N); break;
|
||||
case ISD::VECTOR_SHUFFLE:
|
||||
Res = PromoteIntRes_VECTOR_SHUFFLE(N); break;
|
||||
case ISD::INSERT_VECTOR_ELT:
|
||||
@ -4662,6 +4664,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
|
||||
return DAG.getBuildVector(NOutVT, dl, Ops);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_REVERSE(SDNode *N) {
|
||||
SDLoc dl(N);
|
||||
|
||||
SDValue V0 = GetPromotedInteger(N->getOperand(0));
|
||||
EVT OutVT = V0.getValueType();
|
||||
|
||||
return DAG.getNode(ISD::VECTOR_REVERSE, dl, OutVT, V0);
|
||||
}
|
||||
|
||||
SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) {
|
||||
ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N);
|
||||
|
@ -298,6 +298,7 @@ private:
|
||||
SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
|
||||
SDValue PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, unsigned ResNo);
|
||||
SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N);
|
||||
SDValue PromoteIntRes_VECTOR_REVERSE(SDNode *N);
|
||||
SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
|
||||
SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
|
||||
SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N);
|
||||
@ -834,6 +835,7 @@ private:
|
||||
void SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue &Lo, SDValue &Hi);
|
||||
void SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
|
||||
SDValue &Hi);
|
||||
void SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi);
|
||||
|
@ -930,6 +930,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
|
||||
case ISD::SETCC:
|
||||
SplitVecRes_SETCC(N, Lo, Hi);
|
||||
break;
|
||||
case ISD::VECTOR_REVERSE:
|
||||
SplitVecRes_VECTOR_REVERSE(N, Lo, Hi);
|
||||
break;
|
||||
case ISD::VECTOR_SHUFFLE:
|
||||
SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
|
||||
break;
|
||||
@ -5492,3 +5495,13 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
|
||||
Ops[Idx] = FillVal;
|
||||
return DAG.getBuildVector(NVT, dl, Ops);
|
||||
}
|
||||
|
||||
void DAGTypeLegalizer::SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo,
|
||||
SDValue &Hi) {
|
||||
SDValue InLo, InHi;
|
||||
GetSplitVector(N->getOperand(0), InLo, InHi);
|
||||
SDLoc DL(N);
|
||||
|
||||
Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, InHi.getValueType(), InHi);
|
||||
Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, InLo.getValueType(), InLo);
|
||||
}
|
||||
|
@ -7025,6 +7025,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
|
||||
setValue(&I, DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, Index));
|
||||
return;
|
||||
}
|
||||
case Intrinsic::experimental_vector_reverse:
|
||||
visitVectorReverse(I);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@ -10836,6 +10839,29 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
|
||||
}
|
||||
}
|
||||
|
||||
void SelectionDAGBuilder::visitVectorReverse(const CallInst &I) {
|
||||
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
|
||||
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
|
||||
|
||||
SDLoc DL = getCurSDLoc();
|
||||
SDValue V = getValue(I.getOperand(0));
|
||||
assert(VT == V.getValueType() && "Malformed vector.reverse!");
|
||||
|
||||
if (VT.isScalableVector()) {
|
||||
setValue(&I, DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V));
|
||||
return;
|
||||
}
|
||||
|
||||
// Use VECTOR_SHUFFLE for the fixed-length vector
|
||||
// to maintain existing behavior.
|
||||
SmallVector<int, 8> Mask;
|
||||
unsigned NumElts = VT.getVectorMinNumElements();
|
||||
for (unsigned i = 0; i != NumElts; ++i)
|
||||
Mask.push_back(NumElts - 1 - i);
|
||||
|
||||
setValue(&I, DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), Mask));
|
||||
}
|
||||
|
||||
void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) {
|
||||
SmallVector<EVT, 4> ValueVTs;
|
||||
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(),
|
||||
|
@ -773,6 +773,7 @@ private:
|
||||
void visitGCResult(const GCResultInst &I);
|
||||
|
||||
void visitVectorReduce(const CallInst &I, unsigned Intrinsic);
|
||||
void visitVectorReverse(const CallInst &I);
|
||||
|
||||
void visitUserOp1(const Instruction &I) {
|
||||
llvm_unreachable("UserOp1 should not exist at instruction selection time!");
|
||||
|
@ -289,6 +289,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
|
||||
case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
|
||||
case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
|
||||
case ISD::SPLAT_VECTOR: return "splat_vector";
|
||||
case ISD::VECTOR_REVERSE: return "vector_reverse";
|
||||
case ISD::CARRY_FALSE: return "carry_false";
|
||||
case ISD::ADDC: return "addc";
|
||||
case ISD::ADDE: return "adde";
|
||||
|
@ -3894,7 +3894,7 @@ bool AArch64FastISel::selectRet(const Instruction *I) {
|
||||
return false;
|
||||
|
||||
// Vectors (of > 1 lane) in big endian need tricky handling.
|
||||
if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
|
||||
if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
|
||||
!Subtarget->isLittleEndian())
|
||||
return false;
|
||||
|
||||
|
@ -1853,7 +1853,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
MAKE_CASE(AArch64ISD::CLASTB_N)
|
||||
MAKE_CASE(AArch64ISD::LASTA)
|
||||
MAKE_CASE(AArch64ISD::LASTB)
|
||||
MAKE_CASE(AArch64ISD::REV)
|
||||
MAKE_CASE(AArch64ISD::REINTERPRET_CAST)
|
||||
MAKE_CASE(AArch64ISD::TBL)
|
||||
MAKE_CASE(AArch64ISD::FADD_PRED)
|
||||
@ -3594,7 +3593,7 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
return DAG.getNode(AArch64ISD::LASTB, dl, Op.getValueType(),
|
||||
Op.getOperand(1), Op.getOperand(2));
|
||||
case Intrinsic::aarch64_sve_rev:
|
||||
return DAG.getNode(AArch64ISD::REV, dl, Op.getValueType(),
|
||||
return DAG.getNode(ISD::VECTOR_REVERSE, dl, Op.getValueType(),
|
||||
Op.getOperand(1));
|
||||
case Intrinsic::aarch64_sve_tbl:
|
||||
return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(),
|
||||
|
@ -292,7 +292,6 @@ enum NodeType : unsigned {
|
||||
CLASTB_N,
|
||||
LASTA,
|
||||
LASTB,
|
||||
REV,
|
||||
TBL,
|
||||
|
||||
// Floating-point reductions.
|
||||
|
@ -249,9 +249,6 @@ def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithIn
|
||||
def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>;
|
||||
def AArch64fadda_p : SDNode<"AArch64ISD::FADDA_PRED", SDT_AArch64ReduceWithInit>;
|
||||
|
||||
def SDT_AArch64Rev : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
|
||||
def AArch64rev : SDNode<"AArch64ISD::REV", SDT_AArch64Rev>;
|
||||
|
||||
def SDT_AArch64PTest : SDTypeProfile<0, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
|
||||
def AArch64ptest : SDNode<"AArch64ISD::PTEST", SDT_AArch64PTest>;
|
||||
|
||||
@ -587,8 +584,8 @@ let Predicates = [HasSVE] in {
|
||||
defm REVH_ZPmZ : sve_int_perm_rev_revh<"revh", int_aarch64_sve_revh>;
|
||||
defm REVW_ZPmZ : sve_int_perm_rev_revw<"revw", int_aarch64_sve_revw>;
|
||||
|
||||
defm REV_PP : sve_int_perm_reverse_p<"rev", AArch64rev>;
|
||||
defm REV_ZZ : sve_int_perm_reverse_z<"rev", AArch64rev>;
|
||||
defm REV_PP : sve_int_perm_reverse_p<"rev", vector_reverse>;
|
||||
defm REV_ZZ : sve_int_perm_reverse_z<"rev", vector_reverse>;
|
||||
|
||||
defm SUNPKLO_ZZ : sve_int_perm_unpk<0b00, "sunpklo", AArch64sunpklo>;
|
||||
defm SUNPKHI_ZZ : sve_int_perm_unpk<0b01, "sunpkhi", AArch64sunpkhi>;
|
||||
|
230
test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll
Normal file
230
test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll
Normal file
@ -0,0 +1,230 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -verify-machineinstrs < %s 2>%t | FileCheck --check-prefix=CHECK --check-prefix=CHECK-SELDAG %s
|
||||
; RUN: llc -verify-machineinstrs -O0 < %s 2>%t | FileCheck --check-prefix=CHECK --check-prefix=CHECK-FASTISEL %s
|
||||
|
||||
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
|
||||
|
||||
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
|
||||
; WARN-NOT: warning
|
||||
|
||||
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
;
|
||||
; VECTOR_REVERSE
|
||||
;
|
||||
|
||||
define <16 x i8> @reverse_v16i8(<16 x i8> %a) #0 {
|
||||
; CHECK-LABEL: .LCPI0_0:
|
||||
; CHECK: .byte 15 // 0xf
|
||||
; CHECK-NEXT: .byte 14 // 0xe
|
||||
; CHECK-NEXT: .byte 13 // 0xd
|
||||
; CHECK-NEXT: .byte 12 // 0xc
|
||||
; CHECK-NEXT: .byte 11 // 0xb
|
||||
; CHECK-NEXT: .byte 10 // 0xa
|
||||
; CHECK-NEXT: .byte 9 // 0x9
|
||||
; CHECK-NEXT: .byte 8 // 0x8
|
||||
; CHECK-NEXT: .byte 7 // 0x7
|
||||
; CHECK-NEXT: .byte 6 // 0x6
|
||||
; CHECK-NEXT: .byte 5 // 0x5
|
||||
; CHECK-NEXT: .byte 4 // 0x4
|
||||
; CHECK-NEXT: .byte 3 // 0x3
|
||||
; CHECK-NEXT: .byte 2 // 0x2
|
||||
; CHECK-NEXT: .byte 1 // 0x1
|
||||
; CHECK-NEXT: .byte 0 // 0x0
|
||||
; CHECK-LABEL: reverse_v16i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: adrp x8, .LCPI0_0
|
||||
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0]
|
||||
; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%res = call <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8> %a)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @reverse_v8i16(<8 x i16> %a) #0 {
|
||||
; CHECK-LABEL: .LCPI1_0:
|
||||
; CHECK: .byte 14 // 0xe
|
||||
; CHECK-NEXT: .byte 15 // 0xf
|
||||
; CHECK-NEXT: .byte 12 // 0xc
|
||||
; CHECK-NEXT: .byte 13 // 0xd
|
||||
; CHECK-NEXT: .byte 10 // 0xa
|
||||
; CHECK-NEXT: .byte 11 // 0xb
|
||||
; CHECK-NEXT: .byte 8 // 0x8
|
||||
; CHECK-NEXT: .byte 9 // 0x9
|
||||
; CHECK-NEXT: .byte 6 // 0x6
|
||||
; CHECK-NEXT: .byte 7 // 0x7
|
||||
; CHECK-NEXT: .byte 4 // 0x4
|
||||
; CHECK-NEXT: .byte 5 // 0x5
|
||||
; CHECK-NEXT: .byte 2 // 0x2
|
||||
; CHECK-NEXT: .byte 3 // 0x3
|
||||
; CHECK-NEXT: .byte 0 // 0x0
|
||||
; CHECK-NEXT: .byte 1 // 0x1
|
||||
; CHECK-LABEL: reverse_v8i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: adrp x8, .LCPI1_0
|
||||
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0]
|
||||
; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%res = call <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16> %a)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @reverse_v4i32(<4 x i32> %a) #0 {
|
||||
; CHECK-LABEL: reverse_v4i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: rev64 v0.4s, v0.4s
|
||||
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%res = call <4 x i32> @llvm.experimental.vector.reverse.v4i32(<4 x i32> %a)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @reverse_v2i64(<2 x i64> %a) #0 {
|
||||
; CHECK-LABEL: reverse_v2i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%res = call <2 x i64> @llvm.experimental.vector.reverse.v2i64(<2 x i64> %a)
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <8 x half> @reverse_v8f16(<8 x half> %a) #0 {
|
||||
; CHECK-LABEL: .LCPI4_0:
|
||||
; CHECK: .byte 14 // 0xe
|
||||
; CHECK-NEXT: .byte 15 // 0xf
|
||||
; CHECK-NEXT: .byte 12 // 0xc
|
||||
; CHECK-NEXT: .byte 13 // 0xd
|
||||
; CHECK-NEXT: .byte 10 // 0xa
|
||||
; CHECK-NEXT: .byte 11 // 0xb
|
||||
; CHECK-NEXT: .byte 8 // 0x8
|
||||
; CHECK-NEXT: .byte 9 // 0x9
|
||||
; CHECK-NEXT: .byte 6 // 0x6
|
||||
; CHECK-NEXT: .byte 7 // 0x7
|
||||
; CHECK-NEXT: .byte 4 // 0x4
|
||||
; CHECK-NEXT: .byte 5 // 0x5
|
||||
; CHECK-NEXT: .byte 2 // 0x2
|
||||
; CHECK-NEXT: .byte 3 // 0x3
|
||||
; CHECK-NEXT: .byte 0 // 0x0
|
||||
; CHECK-NEXT: .byte 1 // 0x1
|
||||
; CHECK-LABEL: reverse_v8f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: adrp x8, .LCPI4_0
|
||||
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0]
|
||||
; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%res = call <8 x half> @llvm.experimental.vector.reverse.v8f16(<8 x half> %a)
|
||||
ret <8 x half> %res
|
||||
}
|
||||
|
||||
define <4 x float> @reverse_v4f32(<4 x float> %a) #0 {
|
||||
; CHECK-LABEL: reverse_v4f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: rev64 v0.4s, v0.4s
|
||||
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%res = call <4 x float> @llvm.experimental.vector.reverse.v4f32(<4 x float> %a)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <2 x double> @reverse_v2f64(<2 x double> %a) #0 {
|
||||
; CHECK-LABEL: reverse_v2f64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%res = call <2 x double> @llvm.experimental.vector.reverse.v2f64(<2 x double> %a)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
; Verify promote type legalisation works as expected.
|
||||
define <2 x i8> @reverse_v2i8(<2 x i8> %a) #0 {
|
||||
; CHECK-LABEL: reverse_v2i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: rev64 v0.2s, v0.2s
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%res = call <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8> %a)
|
||||
ret <2 x i8> %res
|
||||
}
|
||||
|
||||
; Verify splitvec type legalisation works as expected.
|
||||
define <8 x i32> @reverse_v8i32(<8 x i32> %a) #0 {
|
||||
; CHECK-LABEL: reverse_v8i32:
|
||||
; CHECK-SELDAG: // %bb.0:
|
||||
; CHECK-SELDAG-NEXT: rev64 v1.4s, v1.4s
|
||||
; CHECK-SELDAG-NEXT: rev64 v2.4s, v0.4s
|
||||
; CHECK-SELDAG-NEXT: ext v0.16b, v1.16b, v1.16b, #8
|
||||
; CHECK-SELDAG-NEXT: ext v1.16b, v2.16b, v2.16b, #8
|
||||
; CHECK-SELDAG-NEXT: ret
|
||||
; CHECK-FASTISEL: // %bb.0:
|
||||
; CHECK-FASTISEL-NEXT: sub sp, sp, #16
|
||||
; CHECK-FASTISEL-NEXT: str q1, [sp]
|
||||
; CHECK-FASTISEL-NEXT: mov v1.16b, v0.16b
|
||||
; CHECK-FASTISEL-NEXT: ldr q0, [sp]
|
||||
; CHECK-FASTISEL-NEXT: rev64 v0.4s, v0.4s
|
||||
; CHECK-FASTISEL-NEXT: ext v0.16b, v0.16b, v0.16b, #8
|
||||
; CHECK-FASTISEL-NEXT: rev64 v1.4s, v1.4s
|
||||
; CHECK-FASTISEL-NEXT: ext v1.16b, v1.16b, v1.16b, #8
|
||||
; CHECK-FASTISEL-NEXT: add sp, sp, #16
|
||||
; CHECK-FASTISEL-NEXT: ret
|
||||
|
||||
%res = call <8 x i32> @llvm.experimental.vector.reverse.v8i32(<8 x i32> %a)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
; Verify splitvec type legalisation works as expected.
|
||||
define <16 x float> @reverse_v16f32(<16 x float> %a) #0 {
|
||||
; CHECK-LABEL: reverse_v16f32:
|
||||
; CHECK-SELDAG: // %bb.0:
|
||||
; CHECK-SELDAG-NEXT: rev64 v3.4s, v3.4s
|
||||
; CHECK-SELDAG-NEXT: rev64 v2.4s, v2.4s
|
||||
; CHECK-SELDAG-NEXT: rev64 v4.4s, v1.4s
|
||||
; CHECK-SELDAG-NEXT: rev64 v5.4s, v0.4s
|
||||
; CHECK-SELDAG-NEXT: ext v0.16b, v3.16b, v3.16b, #8
|
||||
; CHECK-SELDAG-NEXT: ext v1.16b, v2.16b, v2.16b, #8
|
||||
; CHECK-SELDAG-NEXT: ext v2.16b, v4.16b, v4.16b, #8
|
||||
; CHECK-SELDAG-NEXT: ext v3.16b, v5.16b, v5.16b, #8
|
||||
; CHECK-SELDAG-NEXT: ret
|
||||
; CHECK-FASTISEL: // %bb.0:
|
||||
; CHECK-FASTISEL-NEXT: sub sp, sp, #32
|
||||
; CHECK-FASTISEL-NEXT: str q3, [sp, #16]
|
||||
; CHECK-FASTISEL-NEXT: str q2, [sp]
|
||||
; CHECK-FASTISEL-NEXT: mov v2.16b, v1.16b
|
||||
; CHECK-FASTISEL-NEXT: ldr q1, [sp]
|
||||
; CHECK-FASTISEL-NEXT: mov v3.16b, v0.16b
|
||||
; CHECK-FASTISEL-NEXT: ldr q0, [sp, #16]
|
||||
; CHECK-FASTISEL-NEXT: rev64 v0.4s, v0.4s
|
||||
; CHECK-FASTISEL-NEXT: ext v0.16b, v0.16b, v0.16b, #8
|
||||
; CHECK-FASTISEL-NEXT: rev64 v1.4s, v1.4s
|
||||
; CHECK-FASTISEL-NEXT: ext v1.16b, v1.16b, v1.16b, #8
|
||||
; CHECK-FASTISEL-NEXT: rev64 v2.4s, v2.4s
|
||||
; CHECK-FASTISEL-NEXT: ext v2.16b, v2.16b, v2.16b, #8
|
||||
; CHECK-FASTISEL-NEXT: rev64 v3.4s, v3.4s
|
||||
; CHECK-FASTISEL-NEXT: ext v3.16b, v3.16b, v3.16b, #8
|
||||
; CHECK-FASTISEL-NEXT: add sp, sp, #32
|
||||
; CHECK-FASTISEL-NEXT: ret
|
||||
|
||||
%res = call <16 x float> @llvm.experimental.vector.reverse.v16f32(<16 x float> %a)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
|
||||
declare <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8>)
|
||||
declare <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8>)
|
||||
declare <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16>)
|
||||
declare <4 x i32> @llvm.experimental.vector.reverse.v4i32(<4 x i32>)
|
||||
declare <8 x i32> @llvm.experimental.vector.reverse.v8i32(<8 x i32>)
|
||||
declare <2 x i64> @llvm.experimental.vector.reverse.v2i64(<2 x i64>)
|
||||
declare <8 x half> @llvm.experimental.vector.reverse.v8f16(<8 x half>)
|
||||
declare <4 x float> @llvm.experimental.vector.reverse.v4f32(<4 x float>)
|
||||
declare <16 x float> @llvm.experimental.vector.reverse.v16f32(<16 x float>)
|
||||
declare <2 x double> @llvm.experimental.vector.reverse.v2f64(<2 x double>)
|
||||
|
||||
attributes #0 = { nounwind "target-features"="+neon" }
|
238
test/CodeGen/AArch64/named-vector-shuffle-reverse-sve.ll
Normal file
238
test/CodeGen/AArch64/named-vector-shuffle-reverse-sve.ll
Normal file
@ -0,0 +1,238 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -verify-machineinstrs < %s 2>%t | FileCheck --check-prefix=CHECK --check-prefix=CHECK-SELDAG %s
|
||||
; RUN: llc -verify-machineinstrs -O0 < %s 2>%t | FileCheck --check-prefix=CHECK --check-prefix=CHECK-FASTISEL %s
|
||||
|
||||
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
|
||||
|
||||
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
|
||||
; WARN-NOT: warning
|
||||
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
;
|
||||
; VECTOR_REVERSE - PPR
|
||||
;
|
||||
|
||||
define <vscale x 2 x i1> @reverse_nxv2i1(<vscale x 2 x i1> %a) #0 {
|
||||
; CHECK-LABEL: reverse_nxv2i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: rev p0.d, p0.d
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%res = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %a)
|
||||
ret <vscale x 2 x i1> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x i1> @reverse_nxv4i1(<vscale x 4 x i1> %a) #0 {
|
||||
; CHECK-LABEL: reverse_nxv4i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: rev p0.s, p0.s
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%res = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %a)
|
||||
ret <vscale x 4 x i1> %res
|
||||
}
|
||||
|
||||
define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) #0 {
|
||||
; CHECK-LABEL: reverse_nxv8i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: rev p0.h, p0.h
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%res = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %a)
|
||||
ret <vscale x 8 x i1> %res
|
||||
}
|
||||
|
||||
define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) #0 {
|
||||
; CHECK-LABEL: reverse_nxv16i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: rev p0.b, p0.b
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%res = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %a)
|
||||
ret <vscale x 16 x i1> %res
|
||||
}
|
||||
|
||||
; Verify splitvec type legalisation works as expected.
|
||||
define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) #0 {
|
||||
; CHECK-LABEL: reverse_nxv32i1:
|
||||
; CHECK-SELDAG: // %bb.0:
|
||||
; CHECK-SELDAG-NEXT: rev p2.b, p1.b
|
||||
; CHECK-SELDAG-NEXT: rev p1.b, p0.b
|
||||
; CHECK-SELDAG-NEXT: mov p0.b, p2.b
|
||||
; CHECK-SELDAG-NEXT: ret
|
||||
; CHECK-FASTISEL: // %bb.0:
|
||||
; CHECK-FASTISEL-NEXT: str x29, [sp, #-16]
|
||||
; CHECK-FASTISEL-NEXT: addvl sp, sp, #-1
|
||||
; CHECK-FASTISEL-NEXT: str p1, [sp, #7, mul vl]
|
||||
; CHECK-FASTISEL-NEXT: mov p1.b, p0.b
|
||||
; CHECK-FASTISEL-NEXT: ldr p0, [sp, #7, mul vl]
|
||||
; CHECK-FASTISEL-NEXT: rev p0.b, p0.b
|
||||
; CHECK-FASTISEL-NEXT: rev p1.b, p1.b
|
||||
; CHECK-FASTISEL-NEXT: addvl sp, sp, #1
|
||||
; CHECK-FASTISEL-NEXT: ldr x29, [sp], #16
|
||||
; CHECK-FASTISEL-NEXT: ret
|
||||
|
||||
%res = call <vscale x 32 x i1> @llvm.experimental.vector.reverse.nxv32i1(<vscale x 32 x i1> %a)
|
||||
ret <vscale x 32 x i1> %res
|
||||
}
|
||||
|
||||
;
|
||||
; VECTOR_REVERSE - ZPR
|
||||
;
|
||||
|
||||
define <vscale x 16 x i8> @reverse_nxv16i8(<vscale x 16 x i8> %a) #0 {
|
||||
; CHECK-LABEL: reverse_nxv16i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: rev z0.b, z0.b
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%res = call <vscale x 16 x i8> @llvm.experimental.vector.reverse.nxv16i8(<vscale x 16 x i8> %a)
|
||||
ret <vscale x 16 x i8> %res
|
||||
}
|
||||
|
||||
define <vscale x 8 x i16> @reverse_nxv8i16(<vscale x 8 x i16> %a) #0 {
|
||||
; CHECK-LABEL: reverse_nxv8i16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: rev z0.h, z0.h
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%res = call <vscale x 8 x i16> @llvm.experimental.vector.reverse.nxv8i16(<vscale x 8 x i16> %a)
|
||||
ret <vscale x 8 x i16> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x i32> @reverse_nxv4i32(<vscale x 4 x i32> %a) #0 {
|
||||
; CHECK-LABEL: reverse_nxv4i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: rev z0.s, z0.s
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%res = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
|
||||
ret <vscale x 4 x i32> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x i64> @reverse_nxv2i64(<vscale x 2 x i64> %a) #0 {
|
||||
; CHECK-LABEL: reverse_nxv2i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: rev z0.d, z0.d
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%res = call <vscale x 2 x i64> @llvm.experimental.vector.reverse.nxv2i64(<vscale x 2 x i64> %a)
|
||||
ret <vscale x 2 x i64> %res
|
||||
}
|
||||
|
||||
define <vscale x 8 x half> @reverse_nxv8f16(<vscale x 8 x half> %a) #0 {
|
||||
; CHECK-LABEL: reverse_nxv8f16:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: rev z0.h, z0.h
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%res = call <vscale x 8 x half> @llvm.experimental.vector.reverse.nxv8f16(<vscale x 8 x half> %a)
|
||||
ret <vscale x 8 x half> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x float> @reverse_nxv4f32(<vscale x 4 x float> %a) #0 {
|
||||
; CHECK-LABEL: reverse_nxv4f32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: rev z0.s, z0.s
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%res = call <vscale x 4 x float> @llvm.experimental.vector.reverse.nxv4f32(<vscale x 4 x float> %a) ret <vscale x 4 x float> %res
|
||||
}
|
||||
|
||||
define <vscale x 2 x double> @reverse_nxv2f64(<vscale x 2 x double> %a) #0 {
|
||||
; CHECK-LABEL: reverse_nxv2f64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: rev z0.d, z0.d
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%res = call <vscale x 2 x double> @llvm.experimental.vector.reverse.nxv2f64(<vscale x 2 x double> %a)
|
||||
ret <vscale x 2 x double> %res
|
||||
}
|
||||
|
||||
; Verify promote type legalisation works as expected.
|
||||
define <vscale x 2 x i8> @reverse_nxv2i8(<vscale x 2 x i8> %a) #0 {
|
||||
; CHECK-LABEL: reverse_nxv2i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: rev z0.d, z0.d
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
%res = call <vscale x 2 x i8> @llvm.experimental.vector.reverse.nxv2i8(<vscale x 2 x i8> %a)
|
||||
ret <vscale x 2 x i8> %res
|
||||
}
|
||||
|
||||
; Verify splitvec type legalisation works as expected.
|
||||
define <vscale x 8 x i32> @reverse_nxv8i32(<vscale x 8 x i32> %a) #0 {
|
||||
; CHECK-LABEL: reverse_nxv8i32:
|
||||
; CHECK-SELDAG: // %bb.0:
|
||||
; CHECK-SELDAG-NEXT: rev z2.s, z1.s
|
||||
; CHECK-SELDAG-NEXT: rev z1.s, z0.s
|
||||
; CHECK-SELDAG-NEXT: mov z0.d, z2.d
|
||||
; CHECK-SELDAG-NEXT: ret
|
||||
; CHECK-FASTISEL: // %bb.0:
|
||||
; CHECK-FASTISEL-NEXT: str x29, [sp, #-16]
|
||||
; CHECK-FASTISEL-NEXT: addvl sp, sp, #-1
|
||||
; CHECK-FASTISEL-NEXT: str z1, [sp]
|
||||
; CHECK-FASTISEL-NEXT: mov z1.d, z0.d
|
||||
; CHECK-FASTISEL-NEXT: ldr z0, [sp]
|
||||
; CHECK-FASTISEL-NEXT: rev z0.s, z0.s
|
||||
; CHECK-FASTISEL-NEXT: rev z1.s, z1.s
|
||||
; CHECK-FASTISEL-NEXT: addvl sp, sp, #1
|
||||
; CHECK-FASTISEL-NEXT: ldr x29, [sp], #16
|
||||
; CHECK-FASTISEL-NEXT: ret
|
||||
|
||||
%res = call <vscale x 8 x i32> @llvm.experimental.vector.reverse.nxv8i32(<vscale x 8 x i32> %a)
|
||||
ret <vscale x 8 x i32> %res
|
||||
}
|
||||
|
||||
; Verify splitvec type legalisation works as expected.
|
||||
define <vscale x 16 x float> @reverse_nxv16f32(<vscale x 16 x float> %a) #0 {
|
||||
; CHECK-LABEL: reverse_nxv16f32:
|
||||
; CHECK-SELDAG: // %bb.0:
|
||||
; CHECK-SELDAG-NEXT: rev z5.s, z3.s
|
||||
; CHECK-SELDAG-NEXT: rev z4.s, z2.s
|
||||
; CHECK-SELDAG-NEXT: rev z2.s, z1.s
|
||||
; CHECK-SELDAG-NEXT: rev z3.s, z0.s
|
||||
; CHECK-SELDAG-NEXT: mov z0.d, z5.d
|
||||
; CHECK-SELDAG-NEXT: mov z1.d, z4.d
|
||||
; CHECK-SELDAG-NEXT: ret
|
||||
; CHECK-FASTISEL: // %bb.0:
|
||||
; CHECK-FASTISEL-NEXT: str x29, [sp, #-16]
|
||||
; CHECK-FASTISEL-NEXT: addvl sp, sp, #-2
|
||||
; CHECK-FASTISEL-NEXT: str z3, [sp, #1, mul vl]
|
||||
; CHECK-FASTISEL-NEXT: str z2, [sp]
|
||||
; CHECK-FASTISEL-NEXT: mov z2.d, z1.d
|
||||
; CHECK-FASTISEL-NEXT: ldr z1, [sp]
|
||||
; CHECK-FASTISEL-NEXT: mov z3.d, z0.d
|
||||
; CHECK-FASTISEL-NEXT: ldr z0, [sp, #1, mul vl]
|
||||
; CHECK-FASTISEL-NEXT: rev z0.s, z0.s
|
||||
; CHECK-FASTISEL-NEXT: rev z1.s, z1.s
|
||||
; CHECK-FASTISEL-NEXT: rev z2.s, z2.s
|
||||
; CHECK-FASTISEL-NEXT: rev z3.s, z3.s
|
||||
; CHECK-FASTISEL-NEXT: addvl sp, sp, #2
|
||||
; CHECK-FASTISEL-NEXT: ldr x29, [sp], #16
|
||||
; CHECK-FASTISEL-NEXT: ret
|
||||
|
||||
%res = call <vscale x 16 x float> @llvm.experimental.vector.reverse.nxv16f32(<vscale x 16 x float> %a)
|
||||
ret <vscale x 16 x float> %res
|
||||
}
|
||||
|
||||
|
||||
declare <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1>)
|
||||
declare <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1>)
|
||||
declare <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1>)
|
||||
declare <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1>)
|
||||
declare <vscale x 32 x i1> @llvm.experimental.vector.reverse.nxv32i1(<vscale x 32 x i1>)
|
||||
declare <vscale x 2 x i8> @llvm.experimental.vector.reverse.nxv2i8(<vscale x 2 x i8>)
|
||||
declare <vscale x 16 x i8> @llvm.experimental.vector.reverse.nxv16i8(<vscale x 16 x i8>)
|
||||
declare <vscale x 8 x i16> @llvm.experimental.vector.reverse.nxv8i16(<vscale x 8 x i16>)
|
||||
declare <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32>)
|
||||
declare <vscale x 8 x i32> @llvm.experimental.vector.reverse.nxv8i32(<vscale x 8 x i32>)
|
||||
declare <vscale x 2 x i64> @llvm.experimental.vector.reverse.nxv2i64(<vscale x 2 x i64>)
|
||||
declare <vscale x 8 x half> @llvm.experimental.vector.reverse.nxv8f16(<vscale x 8 x half>)
|
||||
declare <vscale x 4 x float> @llvm.experimental.vector.reverse.nxv4f32(<vscale x 4 x float>)
|
||||
declare <vscale x 16 x float> @llvm.experimental.vector.reverse.nxv16f32(<vscale x 16 x float>)
|
||||
declare <vscale x 2 x double> @llvm.experimental.vector.reverse.nxv2f64(<vscale x 2 x double>)
|
||||
|
||||
|
||||
attributes #0 = { nounwind "target-features"="+sve" }
|
139
test/CodeGen/X86/named-vector-shuffle-reverse.ll
Normal file
139
test/CodeGen/X86/named-vector-shuffle-reverse.ll
Normal file
@ -0,0 +1,139 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -verify-machineinstrs < %s 2>%t | FileCheck %s
|
||||
|
||||
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
|
||||
|
||||
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
|
||||
; WARN-NOT: warning
|
||||
|
||||
|
||||
target triple = "x86_64-unknown-unknown"
|
||||
|
||||
;
|
||||
; VECTOR_REVERSE
|
||||
;
|
||||
|
||||
define <16 x i8> @reverse_v16i8(<16 x i8> %a) #0 {
|
||||
; CHECK-LABEL: reverse_v16i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pxor %xmm1, %xmm1
|
||||
; CHECK-NEXT: movdqa %xmm0, %xmm2
|
||||
; CHECK-NEXT: punpcklbw %xmm1, %xmm
|
||||
; CHECK-NEXT: pshufd $78, %xmm2, %xmm2
|
||||
; CHECK-NEXT: pshuflw $27, %xmm2, %xmm2
|
||||
; CHECK-NEXT: pshufhw $27, %xmm2, %xmm2
|
||||
; CHECK-NEXT: punpckhbw %xmm1, %xmm0
|
||||
; CHECK-NEXT: pshufd $78, %xmm0, %xmm0
|
||||
; CHECK-NEXT: pshuflw $27, %xmm0, %xmm0
|
||||
; CHECK-NEXT: pshufhw $27, %xmm0, %xmm0
|
||||
; CHECK-NEXT: packuswb %xmm2, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
|
||||
%res = call <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8> %a)
|
||||
ret <16 x i8> %res
|
||||
}
|
||||
|
||||
define <8 x i16> @reverse_v8i16(<8 x i16> %a) #0 {
|
||||
; CHECK-LABEL: reverse_v8i16:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pshufd $78, %xmm0, %xmm
|
||||
; CHECK-NEXT: pshuflw $27, %xmm0, %xmm0
|
||||
; CHECK-NEXT: pshufhw $27, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16> %a)
|
||||
ret <8 x i16> %res
|
||||
}
|
||||
|
||||
define <4 x i32> @reverse_v4i32(<4 x i32> %a) #0 {
|
||||
; CHECK-LABEL: reverse_v4i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pshufd $27, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x i32> @llvm.experimental.vector.reverse.v4i32(<4 x i32> %a)
|
||||
ret <4 x i32> %res
|
||||
}
|
||||
|
||||
define <2 x i64> @reverse_v2i64(<2 x i64> %a) #0 {
|
||||
; CHECK-LABEL: reverse_v2i64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pshufd $78, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x i64> @llvm.experimental.vector.reverse.v2i64(<2 x i64> %a)
|
||||
ret <2 x i64> %res
|
||||
}
|
||||
|
||||
define <4 x float> @reverse_v4f32(<4 x float> %a) #0 {
|
||||
; CHECK-LABEL: reverse_v4f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: shufps $27, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <4 x float> @llvm.experimental.vector.reverse.v4f32(<4 x float> %a)
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
define <2 x double> @reverse_v2f64(<2 x double> %a) #0 {
|
||||
; CHECK-LABEL: reverse_v2f64:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: shufps $78, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x double> @llvm.experimental.vector.reverse.v2f64(<2 x double> %a)
|
||||
ret <2 x double> %res
|
||||
}
|
||||
|
||||
; Verify promote type legalisation works as expected.
|
||||
define <2 x i8> @reverse_v2i8(<2 x i8> %a) #0 {
|
||||
; CHECK-LABEL: reverse_v2i8:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movdqa %xmm0, %xmm1
|
||||
; CHECK-NEXT: psrlw $8, %xmm1
|
||||
; CHECK-NEXT: psllw $8, %xmm0
|
||||
; CHECK-NEXT: por %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8> %a)
|
||||
ret <2 x i8> %res
|
||||
}
|
||||
|
||||
; Verify splitvec type legalisation works as expected.
|
||||
define <8 x i32> @reverse_v8i32(<8 x i32> %a) #0 {
|
||||
; CHECK-LABEL: reverse_v8i32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: pshufd $27, %xmm1, %xmm2
|
||||
; CHECK-NEXT: pshufd $27, %xmm0, %xmm1
|
||||
; CHECK-NEXT: movdqa %xmm2, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <8 x i32> @llvm.experimental.vector.reverse.v8i32(<8 x i32> %a)
|
||||
ret <8 x i32> %res
|
||||
}
|
||||
|
||||
; Verify splitvec type legalisation works as expected.
|
||||
define <16 x float> @reverse_v16f32(<16 x float> %a) #0 {
|
||||
; CHECK-LABEL: reverse_v16f32:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: movaps %xmm1, %xmm4
|
||||
; CHECK-NEXT: movaps %xmm0, %xmm5
|
||||
; CHECK-NEXT: shufps $27, %xmm3, %xmm
|
||||
; CHECK-NEXT: shufps $27, %xmm2, %xmm2
|
||||
; CHECK-NEXT: shufps $27, %xmm1, %xmm4
|
||||
; CHECK-NEXT: shufps $27, %xmm0, %xmm5
|
||||
; CHECK-NEXT: movaps %xmm3, %xmm0
|
||||
; CHECK-NEXT: movaps %xmm2, %xmm1
|
||||
; CHECK-NEXT: movaps %xmm4, %xmm2
|
||||
; CHECK-NEXT: movaps %xmm5, %xmm3
|
||||
|
||||
%res = call <16 x float> @llvm.experimental.vector.reverse.v16f32(<16 x float> %a)
|
||||
ret <16 x float> %res
|
||||
}
|
||||
|
||||
|
||||
declare <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8>)
|
||||
declare <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8>)
|
||||
declare <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16>)
|
||||
declare <4 x i32> @llvm.experimental.vector.reverse.v4i32(<4 x i32>)
|
||||
declare <8 x i32> @llvm.experimental.vector.reverse.v8i32(<8 x i32>)
|
||||
declare <2 x i64> @llvm.experimental.vector.reverse.v2i64(<2 x i64>)
|
||||
declare <8 x half> @llvm.experimental.vector.reverse.v8f16(<8 x half>)
|
||||
declare <4 x float> @llvm.experimental.vector.reverse.v4f32(<4 x float>)
|
||||
declare <16 x float> @llvm.experimental.vector.reverse.v16f32(<16 x float>)
|
||||
declare <2 x double> @llvm.experimental.vector.reverse.v2f64(<2 x double>)
|
||||
|
||||
attributes #0 = { nounwind }
|
17
test/Transforms/InstSimplify/named-vector-shuffle-reverse.ll
Normal file
17
test/Transforms/InstSimplify/named-vector-shuffle-reverse.ll
Normal file
@ -0,0 +1,17 @@
|
||||
; RUN: opt -instsimplify -S < %s 2>%t | FileCheck %s
|
||||
|
||||
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
|
||||
|
||||
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
|
||||
; WARN-NOT: warning
|
||||
|
||||
; Test back to back reverse shuffles are eliminated.
|
||||
define <vscale x 4 x i32> @shuffle_b2b_reverse(<vscale x 4 x i32> %a) {
|
||||
; CHECK-LABEL: @shuffle_b2b_reverse(
|
||||
; CHECK: ret <vscale x 4 x i32> %a
|
||||
%rev = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
|
||||
%rev.rev = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %rev)
|
||||
ret <vscale x 4 x i32> %rev.rev
|
||||
}
|
||||
|
||||
declare <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32>)
|
Loading…
Reference in New Issue
Block a user