1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 19:52:54 +01:00

[CodeGen][SelectionDAG]Add new intrinsic experimental.vector.reverse

This patch adds  a new intrinsic experimental.vector.reduce that takes a single
vector and returns a vector of matching type but with the original lane order
 reversed. For example:

```
vector.reverse(<A,B,C,D>) ==> <D,C,B,A>
```

The new intrinsic supports fixed and scalable vectors types.
The fixed-width vector relies on shufflevector to maintain existing behaviour.
Scalable vector uses the new ISD node - VECTOR_REVERSE.

This new intrinsic is one of the named shufflevector intrinsics proposed on the
mailing-list in the RFC at [1].

Patch by Paul Walker (@paulwalker-arm).

[1] https://lists.llvm.org/pipermail/llvm-dev/2020-November/146864.html

Differential Revision: https://reviews.llvm.org/D94883
This commit is contained in:
Caroline Concatto 2021-01-15 16:46:42 +00:00
parent b6252362a0
commit 9ea32f75fa
19 changed files with 729 additions and 9 deletions

View File

@ -16233,6 +16233,33 @@ runtime, then the result vector is undefined. The ``idx`` parameter must be a
vector index constant type (for most targets this will be an integer pointer
type).
'``llvm.experimental.vector.reverse``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Syntax:
"""""""
This is an overloaded intrinsic.
::
declare <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8> %a)
declare <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
Overview:
"""""""""
The '``llvm.experimental.vector.reverse.*``' intrinsics reverse a vector.
The intrinsic takes a single vector and returns a vector of matching type but
with the original lane order reversed. These intrinsics work for both fixed
and scalable vectors. While this intrinsic is marked as experimental the
recommended way to express reverse operations for fixed-width vectors is still
to use a shufflevector, as that may allow for more optimization opportunities.
Arguments:
""""""""""
The argument to this intrinsic must be a vector.
Matrix Intrinsics
-----------------

View File

@ -540,6 +540,11 @@ enum NodeType {
/// vector, but not the other way around.
EXTRACT_SUBVECTOR,
/// VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR,
/// whose elements are shuffled using the following algorithm:
/// RESULT[i] = VECTOR[VECTOR.ElementCount - 1 - i]
VECTOR_REVERSE,
/// VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as
/// VEC1/VEC2. A VECTOR_SHUFFLE node also contains an array of constant int
/// values that indicate which value (or undef) each result element will

View File

@ -1635,6 +1635,12 @@ def int_preserve_struct_access_index : DefaultAttrsIntrinsic<[llvm_anyptr_ty],
ImmArg<ArgIndex<1>>,
ImmArg<ArgIndex<2>>]>;
//===------------ Intrinsics to perform common vector shuffles ------------===//
def int_experimental_vector_reverse : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>],
[IntrNoMem]>;
//===---------- Intrinsics to query properties of scalable vectors --------===//
def int_vscale : DefaultAttrsIntrinsic<[llvm_anyint_ty], [], [IntrNoMem]>;

View File

@ -254,6 +254,9 @@ def SDTFPVecReduce : SDTypeProfile<1, 1, [ // FP vector reduction
SDTCisFP<0>, SDTCisVec<1>
]>;
def SDTVecReverse : SDTypeProfile<1, 1, [ // vector reverse
SDTCisVec<0>, SDTCisSameAs<0,1>
]>;
def SDTSubVecExtract : SDTypeProfile<1, 2, [// subvector extract
SDTCisSubVecOfVec<0,1>, SDTCisInt<2>
@ -651,6 +654,7 @@ def ist : SDNode<"ISD::STORE" , SDTIStore,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def vector_shuffle : SDNode<"ISD::VECTOR_SHUFFLE", SDTVecShuffle, []>;
def vector_reverse : SDNode<"ISD::VECTOR_REVERSE", SDTVecReverse>;
def build_vector : SDNode<"ISD::BUILD_VECTOR", SDTypeProfile<1, -1, []>, []>;
def splat_vector : SDNode<"ISD::SPLAT_VECTOR", SDTypeProfile<1, 1, []>, []>;
def scalar_to_vector : SDNode<"ISD::SCALAR_TO_VECTOR", SDTypeProfile<1, 1, []>,

View File

@ -5373,6 +5373,12 @@ static Value *simplifyUnaryIntrinsic(Function *F, Value *Op0,
return Op0;
break;
}
case Intrinsic::experimental_vector_reverse:
// experimental.vector.reverse(experimental.vector.reverse(x)) -> x
if (match(Op0,
m_Intrinsic<Intrinsic::experimental_vector_reverse>(m_Value(X))))
return X;
break;
default:
break;
}

View File

@ -96,6 +96,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::EXTRACT_SUBVECTOR:
Res = PromoteIntRes_EXTRACT_SUBVECTOR(N); break;
case ISD::VECTOR_REVERSE:
Res = PromoteIntRes_VECTOR_REVERSE(N); break;
case ISD::VECTOR_SHUFFLE:
Res = PromoteIntRes_VECTOR_SHUFFLE(N); break;
case ISD::INSERT_VECTOR_ELT:
@ -4662,6 +4664,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
return DAG.getBuildVector(NOutVT, dl, Ops);
}
SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_REVERSE(SDNode *N) {
SDLoc dl(N);
SDValue V0 = GetPromotedInteger(N->getOperand(0));
EVT OutVT = V0.getValueType();
return DAG.getNode(ISD::VECTOR_REVERSE, dl, OutVT, V0);
}
SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) {
ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N);

View File

@ -298,6 +298,7 @@ private:
SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
SDValue PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, unsigned ResNo);
SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N);
SDValue PromoteIntRes_VECTOR_REVERSE(SDNode *N);
SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N);
@ -834,6 +835,7 @@ private:
void SplitVecRes_MGATHER(MaskedGatherSDNode *MGT, SDValue &Lo, SDValue &Hi);
void SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
SDValue &Hi);
void SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi);

View File

@ -930,6 +930,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SETCC:
SplitVecRes_SETCC(N, Lo, Hi);
break;
case ISD::VECTOR_REVERSE:
SplitVecRes_VECTOR_REVERSE(N, Lo, Hi);
break;
case ISD::VECTOR_SHUFFLE:
SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
break;
@ -5492,3 +5495,13 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
Ops[Idx] = FillVal;
return DAG.getBuildVector(NVT, dl, Ops);
}
void DAGTypeLegalizer::SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue InLo, InHi;
GetSplitVector(N->getOperand(0), InLo, InHi);
SDLoc DL(N);
Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, InHi.getValueType(), InHi);
Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, InLo.getValueType(), InLo);
}

View File

@ -7025,6 +7025,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
setValue(&I, DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, Index));
return;
}
case Intrinsic::experimental_vector_reverse:
visitVectorReverse(I);
return;
}
}
@ -10836,6 +10839,29 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
}
}
void SelectionDAGBuilder::visitVectorReverse(const CallInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
SDLoc DL = getCurSDLoc();
SDValue V = getValue(I.getOperand(0));
assert(VT == V.getValueType() && "Malformed vector.reverse!");
if (VT.isScalableVector()) {
setValue(&I, DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V));
return;
}
// Use VECTOR_SHUFFLE for the fixed-length vector
// to maintain existing behavior.
SmallVector<int, 8> Mask;
unsigned NumElts = VT.getVectorMinNumElements();
for (unsigned i = 0; i != NumElts; ++i)
Mask.push_back(NumElts - 1 - i);
setValue(&I, DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), Mask));
}
void SelectionDAGBuilder::visitFreeze(const FreezeInst &I) {
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(), I.getType(),

View File

@ -773,6 +773,7 @@ private:
void visitGCResult(const GCResultInst &I);
void visitVectorReduce(const CallInst &I, unsigned Intrinsic);
void visitVectorReverse(const CallInst &I);
void visitUserOp1(const Instruction &I) {
llvm_unreachable("UserOp1 should not exist at instruction selection time!");

View File

@ -289,6 +289,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
case ISD::SPLAT_VECTOR: return "splat_vector";
case ISD::VECTOR_REVERSE: return "vector_reverse";
case ISD::CARRY_FALSE: return "carry_false";
case ISD::ADDC: return "addc";
case ISD::ADDE: return "adde";

View File

@ -3894,7 +3894,7 @@ bool AArch64FastISel::selectRet(const Instruction *I) {
return false;
// Vectors (of > 1 lane) in big endian need tricky handling.
if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
!Subtarget->isLittleEndian())
return false;

View File

@ -1853,7 +1853,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::CLASTB_N)
MAKE_CASE(AArch64ISD::LASTA)
MAKE_CASE(AArch64ISD::LASTB)
MAKE_CASE(AArch64ISD::REV)
MAKE_CASE(AArch64ISD::REINTERPRET_CAST)
MAKE_CASE(AArch64ISD::TBL)
MAKE_CASE(AArch64ISD::FADD_PRED)
@ -3594,7 +3593,7 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(AArch64ISD::LASTB, dl, Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::aarch64_sve_rev:
return DAG.getNode(AArch64ISD::REV, dl, Op.getValueType(),
return DAG.getNode(ISD::VECTOR_REVERSE, dl, Op.getValueType(),
Op.getOperand(1));
case Intrinsic::aarch64_sve_tbl:
return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(),

View File

@ -292,7 +292,6 @@ enum NodeType : unsigned {
CLASTB_N,
LASTA,
LASTB,
REV,
TBL,
// Floating-point reductions.

View File

@ -249,9 +249,6 @@ def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithIn
def AArch64clastb_n : SDNode<"AArch64ISD::CLASTB_N", SDT_AArch64ReduceWithInit>;
def AArch64fadda_p : SDNode<"AArch64ISD::FADDA_PRED", SDT_AArch64ReduceWithInit>;
def SDT_AArch64Rev : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
def AArch64rev : SDNode<"AArch64ISD::REV", SDT_AArch64Rev>;
def SDT_AArch64PTest : SDTypeProfile<0, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
def AArch64ptest : SDNode<"AArch64ISD::PTEST", SDT_AArch64PTest>;
@ -587,8 +584,8 @@ let Predicates = [HasSVE] in {
defm REVH_ZPmZ : sve_int_perm_rev_revh<"revh", int_aarch64_sve_revh>;
defm REVW_ZPmZ : sve_int_perm_rev_revw<"revw", int_aarch64_sve_revw>;
defm REV_PP : sve_int_perm_reverse_p<"rev", AArch64rev>;
defm REV_ZZ : sve_int_perm_reverse_z<"rev", AArch64rev>;
defm REV_PP : sve_int_perm_reverse_p<"rev", vector_reverse>;
defm REV_ZZ : sve_int_perm_reverse_z<"rev", vector_reverse>;
defm SUNPKLO_ZZ : sve_int_perm_unpk<0b00, "sunpklo", AArch64sunpklo>;
defm SUNPKHI_ZZ : sve_int_perm_unpk<0b01, "sunpkhi", AArch64sunpkhi>;

View File

@ -0,0 +1,230 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs < %s 2>%t | FileCheck --check-prefix=CHECK --check-prefix=CHECK-SELDAG %s
; RUN: llc -verify-machineinstrs -O0 < %s 2>%t | FileCheck --check-prefix=CHECK --check-prefix=CHECK-FASTISEL %s
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
; WARN-NOT: warning
target triple = "aarch64-unknown-linux-gnu"
;
; VECTOR_REVERSE
;
define <16 x i8> @reverse_v16i8(<16 x i8> %a) #0 {
; CHECK-LABEL: .LCPI0_0:
; CHECK: .byte 15 // 0xf
; CHECK-NEXT: .byte 14 // 0xe
; CHECK-NEXT: .byte 13 // 0xd
; CHECK-NEXT: .byte 12 // 0xc
; CHECK-NEXT: .byte 11 // 0xb
; CHECK-NEXT: .byte 10 // 0xa
; CHECK-NEXT: .byte 9 // 0x9
; CHECK-NEXT: .byte 8 // 0x8
; CHECK-NEXT: .byte 7 // 0x7
; CHECK-NEXT: .byte 6 // 0x6
; CHECK-NEXT: .byte 5 // 0x5
; CHECK-NEXT: .byte 4 // 0x4
; CHECK-NEXT: .byte 3 // 0x3
; CHECK-NEXT: .byte 2 // 0x2
; CHECK-NEXT: .byte 1 // 0x1
; CHECK-NEXT: .byte 0 // 0x0
; CHECK-LABEL: reverse_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI0_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0]
; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b
; CHECK-NEXT: ret
%res = call <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8> %a)
ret <16 x i8> %res
}
define <8 x i16> @reverse_v8i16(<8 x i16> %a) #0 {
; CHECK-LABEL: .LCPI1_0:
; CHECK: .byte 14 // 0xe
; CHECK-NEXT: .byte 15 // 0xf
; CHECK-NEXT: .byte 12 // 0xc
; CHECK-NEXT: .byte 13 // 0xd
; CHECK-NEXT: .byte 10 // 0xa
; CHECK-NEXT: .byte 11 // 0xb
; CHECK-NEXT: .byte 8 // 0x8
; CHECK-NEXT: .byte 9 // 0x9
; CHECK-NEXT: .byte 6 // 0x6
; CHECK-NEXT: .byte 7 // 0x7
; CHECK-NEXT: .byte 4 // 0x4
; CHECK-NEXT: .byte 5 // 0x5
; CHECK-NEXT: .byte 2 // 0x2
; CHECK-NEXT: .byte 3 // 0x3
; CHECK-NEXT: .byte 0 // 0x0
; CHECK-NEXT: .byte 1 // 0x1
; CHECK-LABEL: reverse_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI1_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0]
; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b
; CHECK-NEXT: ret
%res = call <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16> %a)
ret <8 x i16> %res
}
define <4 x i32> @reverse_v4i32(<4 x i32> %a) #0 {
; CHECK-LABEL: reverse_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: rev64 v0.4s, v0.4s
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: ret
%res = call <4 x i32> @llvm.experimental.vector.reverse.v4i32(<4 x i32> %a)
ret <4 x i32> %res
}
define <2 x i64> @reverse_v2i64(<2 x i64> %a) #0 {
; CHECK-LABEL: reverse_v2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: ret
%res = call <2 x i64> @llvm.experimental.vector.reverse.v2i64(<2 x i64> %a)
ret <2 x i64> %res
}
define <8 x half> @reverse_v8f16(<8 x half> %a) #0 {
; CHECK-LABEL: .LCPI4_0:
; CHECK: .byte 14 // 0xe
; CHECK-NEXT: .byte 15 // 0xf
; CHECK-NEXT: .byte 12 // 0xc
; CHECK-NEXT: .byte 13 // 0xd
; CHECK-NEXT: .byte 10 // 0xa
; CHECK-NEXT: .byte 11 // 0xb
; CHECK-NEXT: .byte 8 // 0x8
; CHECK-NEXT: .byte 9 // 0x9
; CHECK-NEXT: .byte 6 // 0x6
; CHECK-NEXT: .byte 7 // 0x7
; CHECK-NEXT: .byte 4 // 0x4
; CHECK-NEXT: .byte 5 // 0x5
; CHECK-NEXT: .byte 2 // 0x2
; CHECK-NEXT: .byte 3 // 0x3
; CHECK-NEXT: .byte 0 // 0x0
; CHECK-NEXT: .byte 1 // 0x1
; CHECK-LABEL: reverse_v8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: adrp x8, .LCPI4_0
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0]
; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b
; CHECK-NEXT: ret
%res = call <8 x half> @llvm.experimental.vector.reverse.v8f16(<8 x half> %a)
ret <8 x half> %res
}
define <4 x float> @reverse_v4f32(<4 x float> %a) #0 {
; CHECK-LABEL: reverse_v4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: rev64 v0.4s, v0.4s
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: ret
%res = call <4 x float> @llvm.experimental.vector.reverse.v4f32(<4 x float> %a)
ret <4 x float> %res
}
define <2 x double> @reverse_v2f64(<2 x double> %a) #0 {
; CHECK-LABEL: reverse_v2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: ret
%res = call <2 x double> @llvm.experimental.vector.reverse.v2f64(<2 x double> %a)
ret <2 x double> %res
}
; Verify promote type legalisation works as expected.
define <2 x i8> @reverse_v2i8(<2 x i8> %a) #0 {
; CHECK-LABEL: reverse_v2i8:
; CHECK: // %bb.0:
; CHECK-NEXT: rev64 v0.2s, v0.2s
; CHECK-NEXT: ret
%res = call <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8> %a)
ret <2 x i8> %res
}
; Verify splitvec type legalisation works as expected.
define <8 x i32> @reverse_v8i32(<8 x i32> %a) #0 {
; CHECK-LABEL: reverse_v8i32:
; CHECK-SELDAG: // %bb.0:
; CHECK-SELDAG-NEXT: rev64 v1.4s, v1.4s
; CHECK-SELDAG-NEXT: rev64 v2.4s, v0.4s
; CHECK-SELDAG-NEXT: ext v0.16b, v1.16b, v1.16b, #8
; CHECK-SELDAG-NEXT: ext v1.16b, v2.16b, v2.16b, #8
; CHECK-SELDAG-NEXT: ret
; CHECK-FASTISEL: // %bb.0:
; CHECK-FASTISEL-NEXT: sub sp, sp, #16
; CHECK-FASTISEL-NEXT: str q1, [sp]
; CHECK-FASTISEL-NEXT: mov v1.16b, v0.16b
; CHECK-FASTISEL-NEXT: ldr q0, [sp]
; CHECK-FASTISEL-NEXT: rev64 v0.4s, v0.4s
; CHECK-FASTISEL-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-FASTISEL-NEXT: rev64 v1.4s, v1.4s
; CHECK-FASTISEL-NEXT: ext v1.16b, v1.16b, v1.16b, #8
; CHECK-FASTISEL-NEXT: add sp, sp, #16
; CHECK-FASTISEL-NEXT: ret
%res = call <8 x i32> @llvm.experimental.vector.reverse.v8i32(<8 x i32> %a)
ret <8 x i32> %res
}
; Verify splitvec type legalisation works as expected.
define <16 x float> @reverse_v16f32(<16 x float> %a) #0 {
; CHECK-LABEL: reverse_v16f32:
; CHECK-SELDAG: // %bb.0:
; CHECK-SELDAG-NEXT: rev64 v3.4s, v3.4s
; CHECK-SELDAG-NEXT: rev64 v2.4s, v2.4s
; CHECK-SELDAG-NEXT: rev64 v4.4s, v1.4s
; CHECK-SELDAG-NEXT: rev64 v5.4s, v0.4s
; CHECK-SELDAG-NEXT: ext v0.16b, v3.16b, v3.16b, #8
; CHECK-SELDAG-NEXT: ext v1.16b, v2.16b, v2.16b, #8
; CHECK-SELDAG-NEXT: ext v2.16b, v4.16b, v4.16b, #8
; CHECK-SELDAG-NEXT: ext v3.16b, v5.16b, v5.16b, #8
; CHECK-SELDAG-NEXT: ret
; CHECK-FASTISEL: // %bb.0:
; CHECK-FASTISEL-NEXT: sub sp, sp, #32
; CHECK-FASTISEL-NEXT: str q3, [sp, #16]
; CHECK-FASTISEL-NEXT: str q2, [sp]
; CHECK-FASTISEL-NEXT: mov v2.16b, v1.16b
; CHECK-FASTISEL-NEXT: ldr q1, [sp]
; CHECK-FASTISEL-NEXT: mov v3.16b, v0.16b
; CHECK-FASTISEL-NEXT: ldr q0, [sp, #16]
; CHECK-FASTISEL-NEXT: rev64 v0.4s, v0.4s
; CHECK-FASTISEL-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-FASTISEL-NEXT: rev64 v1.4s, v1.4s
; CHECK-FASTISEL-NEXT: ext v1.16b, v1.16b, v1.16b, #8
; CHECK-FASTISEL-NEXT: rev64 v2.4s, v2.4s
; CHECK-FASTISEL-NEXT: ext v2.16b, v2.16b, v2.16b, #8
; CHECK-FASTISEL-NEXT: rev64 v3.4s, v3.4s
; CHECK-FASTISEL-NEXT: ext v3.16b, v3.16b, v3.16b, #8
; CHECK-FASTISEL-NEXT: add sp, sp, #32
; CHECK-FASTISEL-NEXT: ret
%res = call <16 x float> @llvm.experimental.vector.reverse.v16f32(<16 x float> %a)
ret <16 x float> %res
}
declare <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8>)
declare <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8>)
declare <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16>)
declare <4 x i32> @llvm.experimental.vector.reverse.v4i32(<4 x i32>)
declare <8 x i32> @llvm.experimental.vector.reverse.v8i32(<8 x i32>)
declare <2 x i64> @llvm.experimental.vector.reverse.v2i64(<2 x i64>)
declare <8 x half> @llvm.experimental.vector.reverse.v8f16(<8 x half>)
declare <4 x float> @llvm.experimental.vector.reverse.v4f32(<4 x float>)
declare <16 x float> @llvm.experimental.vector.reverse.v16f32(<16 x float>)
declare <2 x double> @llvm.experimental.vector.reverse.v2f64(<2 x double>)
attributes #0 = { nounwind "target-features"="+neon" }

View File

@ -0,0 +1,238 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs < %s 2>%t | FileCheck --check-prefix=CHECK --check-prefix=CHECK-SELDAG %s
; RUN: llc -verify-machineinstrs -O0 < %s 2>%t | FileCheck --check-prefix=CHECK --check-prefix=CHECK-FASTISEL %s
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
; WARN-NOT: warning
target triple = "aarch64-unknown-linux-gnu"
;
; VECTOR_REVERSE - PPR
;
define <vscale x 2 x i1> @reverse_nxv2i1(<vscale x 2 x i1> %a) #0 {
; CHECK-LABEL: reverse_nxv2i1:
; CHECK: // %bb.0:
; CHECK-NEXT: rev p0.d, p0.d
; CHECK-NEXT: ret
%res = call <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1> %a)
ret <vscale x 2 x i1> %res
}
define <vscale x 4 x i1> @reverse_nxv4i1(<vscale x 4 x i1> %a) #0 {
; CHECK-LABEL: reverse_nxv4i1:
; CHECK: // %bb.0:
; CHECK-NEXT: rev p0.s, p0.s
; CHECK-NEXT: ret
%res = call <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1> %a)
ret <vscale x 4 x i1> %res
}
define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) #0 {
; CHECK-LABEL: reverse_nxv8i1:
; CHECK: // %bb.0:
; CHECK-NEXT: rev p0.h, p0.h
; CHECK-NEXT: ret
%res = call <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1> %a)
ret <vscale x 8 x i1> %res
}
define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) #0 {
; CHECK-LABEL: reverse_nxv16i1:
; CHECK: // %bb.0:
; CHECK-NEXT: rev p0.b, p0.b
; CHECK-NEXT: ret
%res = call <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1> %a)
ret <vscale x 16 x i1> %res
}
; Verify splitvec type legalisation works as expected.
define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) #0 {
; CHECK-LABEL: reverse_nxv32i1:
; CHECK-SELDAG: // %bb.0:
; CHECK-SELDAG-NEXT: rev p2.b, p1.b
; CHECK-SELDAG-NEXT: rev p1.b, p0.b
; CHECK-SELDAG-NEXT: mov p0.b, p2.b
; CHECK-SELDAG-NEXT: ret
; CHECK-FASTISEL: // %bb.0:
; CHECK-FASTISEL-NEXT: str x29, [sp, #-16]
; CHECK-FASTISEL-NEXT: addvl sp, sp, #-1
; CHECK-FASTISEL-NEXT: str p1, [sp, #7, mul vl]
; CHECK-FASTISEL-NEXT: mov p1.b, p0.b
; CHECK-FASTISEL-NEXT: ldr p0, [sp, #7, mul vl]
; CHECK-FASTISEL-NEXT: rev p0.b, p0.b
; CHECK-FASTISEL-NEXT: rev p1.b, p1.b
; CHECK-FASTISEL-NEXT: addvl sp, sp, #1
; CHECK-FASTISEL-NEXT: ldr x29, [sp], #16
; CHECK-FASTISEL-NEXT: ret
%res = call <vscale x 32 x i1> @llvm.experimental.vector.reverse.nxv32i1(<vscale x 32 x i1> %a)
ret <vscale x 32 x i1> %res
}
;
; VECTOR_REVERSE - ZPR
;
define <vscale x 16 x i8> @reverse_nxv16i8(<vscale x 16 x i8> %a) #0 {
; CHECK-LABEL: reverse_nxv16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: rev z0.b, z0.b
; CHECK-NEXT: ret
%res = call <vscale x 16 x i8> @llvm.experimental.vector.reverse.nxv16i8(<vscale x 16 x i8> %a)
ret <vscale x 16 x i8> %res
}
define <vscale x 8 x i16> @reverse_nxv8i16(<vscale x 8 x i16> %a) #0 {
; CHECK-LABEL: reverse_nxv8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: rev z0.h, z0.h
; CHECK-NEXT: ret
%res = call <vscale x 8 x i16> @llvm.experimental.vector.reverse.nxv8i16(<vscale x 8 x i16> %a)
ret <vscale x 8 x i16> %res
}
define <vscale x 4 x i32> @reverse_nxv4i32(<vscale x 4 x i32> %a) #0 {
; CHECK-LABEL: reverse_nxv4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: rev z0.s, z0.s
; CHECK-NEXT: ret
%res = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
ret <vscale x 4 x i32> %res
}
define <vscale x 2 x i64> @reverse_nxv2i64(<vscale x 2 x i64> %a) #0 {
; CHECK-LABEL: reverse_nxv2i64:
; CHECK: // %bb.0:
; CHECK-NEXT: rev z0.d, z0.d
; CHECK-NEXT: ret
%res = call <vscale x 2 x i64> @llvm.experimental.vector.reverse.nxv2i64(<vscale x 2 x i64> %a)
ret <vscale x 2 x i64> %res
}
define <vscale x 8 x half> @reverse_nxv8f16(<vscale x 8 x half> %a) #0 {
; CHECK-LABEL: reverse_nxv8f16:
; CHECK: // %bb.0:
; CHECK-NEXT: rev z0.h, z0.h
; CHECK-NEXT: ret
%res = call <vscale x 8 x half> @llvm.experimental.vector.reverse.nxv8f16(<vscale x 8 x half> %a)
ret <vscale x 8 x half> %res
}
define <vscale x 4 x float> @reverse_nxv4f32(<vscale x 4 x float> %a) #0 {
; CHECK-LABEL: reverse_nxv4f32:
; CHECK: // %bb.0:
; CHECK-NEXT: rev z0.s, z0.s
; CHECK-NEXT: ret
%res = call <vscale x 4 x float> @llvm.experimental.vector.reverse.nxv4f32(<vscale x 4 x float> %a) ret <vscale x 4 x float> %res
}
define <vscale x 2 x double> @reverse_nxv2f64(<vscale x 2 x double> %a) #0 {
; CHECK-LABEL: reverse_nxv2f64:
; CHECK: // %bb.0:
; CHECK-NEXT: rev z0.d, z0.d
; CHECK-NEXT: ret
%res = call <vscale x 2 x double> @llvm.experimental.vector.reverse.nxv2f64(<vscale x 2 x double> %a)
ret <vscale x 2 x double> %res
}
; Verify promote type legalisation works as expected.
define <vscale x 2 x i8> @reverse_nxv2i8(<vscale x 2 x i8> %a) #0 {
; CHECK-LABEL: reverse_nxv2i8:
; CHECK: // %bb.0:
; CHECK-NEXT: rev z0.d, z0.d
; CHECK-NEXT: ret
%res = call <vscale x 2 x i8> @llvm.experimental.vector.reverse.nxv2i8(<vscale x 2 x i8> %a)
ret <vscale x 2 x i8> %res
}
; Verify splitvec type legalisation works as expected.
define <vscale x 8 x i32> @reverse_nxv8i32(<vscale x 8 x i32> %a) #0 {
; CHECK-LABEL: reverse_nxv8i32:
; CHECK-SELDAG: // %bb.0:
; CHECK-SELDAG-NEXT: rev z2.s, z1.s
; CHECK-SELDAG-NEXT: rev z1.s, z0.s
; CHECK-SELDAG-NEXT: mov z0.d, z2.d
; CHECK-SELDAG-NEXT: ret
; CHECK-FASTISEL: // %bb.0:
; CHECK-FASTISEL-NEXT: str x29, [sp, #-16]
; CHECK-FASTISEL-NEXT: addvl sp, sp, #-1
; CHECK-FASTISEL-NEXT: str z1, [sp]
; CHECK-FASTISEL-NEXT: mov z1.d, z0.d
; CHECK-FASTISEL-NEXT: ldr z0, [sp]
; CHECK-FASTISEL-NEXT: rev z0.s, z0.s
; CHECK-FASTISEL-NEXT: rev z1.s, z1.s
; CHECK-FASTISEL-NEXT: addvl sp, sp, #1
; CHECK-FASTISEL-NEXT: ldr x29, [sp], #16
; CHECK-FASTISEL-NEXT: ret
%res = call <vscale x 8 x i32> @llvm.experimental.vector.reverse.nxv8i32(<vscale x 8 x i32> %a)
ret <vscale x 8 x i32> %res
}
; Verify splitvec type legalisation works as expected.
define <vscale x 16 x float> @reverse_nxv16f32(<vscale x 16 x float> %a) #0 {
; CHECK-LABEL: reverse_nxv16f32:
; CHECK-SELDAG: // %bb.0:
; CHECK-SELDAG-NEXT: rev z5.s, z3.s
; CHECK-SELDAG-NEXT: rev z4.s, z2.s
; CHECK-SELDAG-NEXT: rev z2.s, z1.s
; CHECK-SELDAG-NEXT: rev z3.s, z0.s
; CHECK-SELDAG-NEXT: mov z0.d, z5.d
; CHECK-SELDAG-NEXT: mov z1.d, z4.d
; CHECK-SELDAG-NEXT: ret
; CHECK-FASTISEL: // %bb.0:
; CHECK-FASTISEL-NEXT: str x29, [sp, #-16]
; CHECK-FASTISEL-NEXT: addvl sp, sp, #-2
; CHECK-FASTISEL-NEXT: str z3, [sp, #1, mul vl]
; CHECK-FASTISEL-NEXT: str z2, [sp]
; CHECK-FASTISEL-NEXT: mov z2.d, z1.d
; CHECK-FASTISEL-NEXT: ldr z1, [sp]
; CHECK-FASTISEL-NEXT: mov z3.d, z0.d
; CHECK-FASTISEL-NEXT: ldr z0, [sp, #1, mul vl]
; CHECK-FASTISEL-NEXT: rev z0.s, z0.s
; CHECK-FASTISEL-NEXT: rev z1.s, z1.s
; CHECK-FASTISEL-NEXT: rev z2.s, z2.s
; CHECK-FASTISEL-NEXT: rev z3.s, z3.s
; CHECK-FASTISEL-NEXT: addvl sp, sp, #2
; CHECK-FASTISEL-NEXT: ldr x29, [sp], #16
; CHECK-FASTISEL-NEXT: ret
%res = call <vscale x 16 x float> @llvm.experimental.vector.reverse.nxv16f32(<vscale x 16 x float> %a)
ret <vscale x 16 x float> %res
}
declare <vscale x 2 x i1> @llvm.experimental.vector.reverse.nxv2i1(<vscale x 2 x i1>)
declare <vscale x 4 x i1> @llvm.experimental.vector.reverse.nxv4i1(<vscale x 4 x i1>)
declare <vscale x 8 x i1> @llvm.experimental.vector.reverse.nxv8i1(<vscale x 8 x i1>)
declare <vscale x 16 x i1> @llvm.experimental.vector.reverse.nxv16i1(<vscale x 16 x i1>)
declare <vscale x 32 x i1> @llvm.experimental.vector.reverse.nxv32i1(<vscale x 32 x i1>)
declare <vscale x 2 x i8> @llvm.experimental.vector.reverse.nxv2i8(<vscale x 2 x i8>)
declare <vscale x 16 x i8> @llvm.experimental.vector.reverse.nxv16i8(<vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.experimental.vector.reverse.nxv8i16(<vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32>)
declare <vscale x 8 x i32> @llvm.experimental.vector.reverse.nxv8i32(<vscale x 8 x i32>)
declare <vscale x 2 x i64> @llvm.experimental.vector.reverse.nxv2i64(<vscale x 2 x i64>)
declare <vscale x 8 x half> @llvm.experimental.vector.reverse.nxv8f16(<vscale x 8 x half>)
declare <vscale x 4 x float> @llvm.experimental.vector.reverse.nxv4f32(<vscale x 4 x float>)
declare <vscale x 16 x float> @llvm.experimental.vector.reverse.nxv16f32(<vscale x 16 x float>)
declare <vscale x 2 x double> @llvm.experimental.vector.reverse.nxv2f64(<vscale x 2 x double>)
attributes #0 = { nounwind "target-features"="+sve" }

View File

@ -0,0 +1,139 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs < %s 2>%t | FileCheck %s
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
; WARN-NOT: warning
target triple = "x86_64-unknown-unknown"
;
; VECTOR_REVERSE
;
define <16 x i8> @reverse_v16i8(<16 x i8> %a) #0 {
; CHECK-LABEL: reverse_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: pxor %xmm1, %xmm1
; CHECK-NEXT: movdqa %xmm0, %xmm2
; CHECK-NEXT: punpcklbw %xmm1, %xmm
; CHECK-NEXT: pshufd $78, %xmm2, %xmm2
; CHECK-NEXT: pshuflw $27, %xmm2, %xmm2
; CHECK-NEXT: pshufhw $27, %xmm2, %xmm2
; CHECK-NEXT: punpckhbw %xmm1, %xmm0
; CHECK-NEXT: pshufd $78, %xmm0, %xmm0
; CHECK-NEXT: pshuflw $27, %xmm0, %xmm0
; CHECK-NEXT: pshufhw $27, %xmm0, %xmm0
; CHECK-NEXT: packuswb %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8> %a)
ret <16 x i8> %res
}
define <8 x i16> @reverse_v8i16(<8 x i16> %a) #0 {
; CHECK-LABEL: reverse_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: pshufd $78, %xmm0, %xmm
; CHECK-NEXT: pshuflw $27, %xmm0, %xmm0
; CHECK-NEXT: pshufhw $27, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16> %a)
ret <8 x i16> %res
}
define <4 x i32> @reverse_v4i32(<4 x i32> %a) #0 {
; CHECK-LABEL: reverse_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: pshufd $27, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <4 x i32> @llvm.experimental.vector.reverse.v4i32(<4 x i32> %a)
ret <4 x i32> %res
}
define <2 x i64> @reverse_v2i64(<2 x i64> %a) #0 {
; CHECK-LABEL: reverse_v2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: pshufd $78, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <2 x i64> @llvm.experimental.vector.reverse.v2i64(<2 x i64> %a)
ret <2 x i64> %res
}
define <4 x float> @reverse_v4f32(<4 x float> %a) #0 {
; CHECK-LABEL: reverse_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: shufps $27, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <4 x float> @llvm.experimental.vector.reverse.v4f32(<4 x float> %a)
ret <4 x float> %res
}
define <2 x double> @reverse_v2f64(<2 x double> %a) #0 {
; CHECK-LABEL: reverse_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: shufps $78, %xmm0, %xmm0
; CHECK-NEXT: retq
%res = call <2 x double> @llvm.experimental.vector.reverse.v2f64(<2 x double> %a)
ret <2 x double> %res
}
; Verify promote type legalisation works as expected.
define <2 x i8> @reverse_v2i8(<2 x i8> %a) #0 {
; CHECK-LABEL: reverse_v2i8:
; CHECK: # %bb.0:
; CHECK-NEXT: movdqa %xmm0, %xmm1
; CHECK-NEXT: psrlw $8, %xmm1
; CHECK-NEXT: psllw $8, %xmm0
; CHECK-NEXT: por %xmm1, %xmm0
; CHECK-NEXT: retq
%res = call <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8> %a)
ret <2 x i8> %res
}
; Verify splitvec type legalisation works as expected.
define <8 x i32> @reverse_v8i32(<8 x i32> %a) #0 {
; CHECK-LABEL: reverse_v8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: pshufd $27, %xmm1, %xmm2
; CHECK-NEXT: pshufd $27, %xmm0, %xmm1
; CHECK-NEXT: movdqa %xmm2, %xmm0
; CHECK-NEXT: retq
%res = call <8 x i32> @llvm.experimental.vector.reverse.v8i32(<8 x i32> %a)
ret <8 x i32> %res
}
; Verify splitvec type legalisation works as expected.
define <16 x float> @reverse_v16f32(<16 x float> %a) #0 {
; CHECK-LABEL: reverse_v16f32:
; CHECK: # %bb.0:
; CHECK-NEXT: movaps %xmm1, %xmm4
; CHECK-NEXT: movaps %xmm0, %xmm5
; CHECK-NEXT: shufps $27, %xmm3, %xmm
; CHECK-NEXT: shufps $27, %xmm2, %xmm2
; CHECK-NEXT: shufps $27, %xmm1, %xmm4
; CHECK-NEXT: shufps $27, %xmm0, %xmm5
; CHECK-NEXT: movaps %xmm3, %xmm0
; CHECK-NEXT: movaps %xmm2, %xmm1
; CHECK-NEXT: movaps %xmm4, %xmm2
; CHECK-NEXT: movaps %xmm5, %xmm3
%res = call <16 x float> @llvm.experimental.vector.reverse.v16f32(<16 x float> %a)
ret <16 x float> %res
}
declare <2 x i8> @llvm.experimental.vector.reverse.v2i8(<2 x i8>)
declare <16 x i8> @llvm.experimental.vector.reverse.v16i8(<16 x i8>)
declare <8 x i16> @llvm.experimental.vector.reverse.v8i16(<8 x i16>)
declare <4 x i32> @llvm.experimental.vector.reverse.v4i32(<4 x i32>)
declare <8 x i32> @llvm.experimental.vector.reverse.v8i32(<8 x i32>)
declare <2 x i64> @llvm.experimental.vector.reverse.v2i64(<2 x i64>)
declare <8 x half> @llvm.experimental.vector.reverse.v8f16(<8 x half>)
declare <4 x float> @llvm.experimental.vector.reverse.v4f32(<4 x float>)
declare <16 x float> @llvm.experimental.vector.reverse.v16f32(<16 x float>)
declare <2 x double> @llvm.experimental.vector.reverse.v2f64(<2 x double>)
attributes #0 = { nounwind }

View File

@ -0,0 +1,17 @@
; RUN: opt -instsimplify -S < %s 2>%t | FileCheck %s
; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
; WARN-NOT: warning
; Test back to back reverse shuffles are eliminated.
define <vscale x 4 x i32> @shuffle_b2b_reverse(<vscale x 4 x i32> %a) {
; CHECK-LABEL: @shuffle_b2b_reverse(
; CHECK: ret <vscale x 4 x i32> %a
%rev = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
%rev.rev = tail call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %rev)
ret <vscale x 4 x i32> %rev.rev
}
declare <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32>)