1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 03:33:20 +01:00

[AArch64 NEON] Try to generate CONCAT_VECTOR when lowering BUILD_VECTOR or SHUFFLE_VECTOR.

Replace r199791.

llvm-svn: 200180
This commit is contained in:
Kevin Qin 2014-01-27 02:53:54 +00:00
parent d83dee8270
commit 436aae7633
3 changed files with 365 additions and 29 deletions

View File

@ -4153,22 +4153,76 @@ AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
return false;
}
// Check whether a shuffle_vector could be presented as concat_vector.
bool AArch64TargetLowering::isConcatVector(SDValue Op, SelectionDAG &DAG,
SDValue V0, SDValue V1,
const int *Mask,
SDValue &Res) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
if (VT.getSizeInBits() != 128)
return false;
if (VT.getVectorElementType() != V0.getValueType().getVectorElementType() ||
VT.getVectorElementType() != V1.getValueType().getVectorElementType())
return false;
// Check whether a Build Vector could be presented as Shuffle Vector. If yes,
// try to call LowerVECTOR_SHUFFLE to lower it.
unsigned NumElts = VT.getVectorNumElements();
bool isContactVector = true;
bool splitV0 = false;
if (V0.getValueType().getSizeInBits() == 128)
splitV0 = true;
for (int I = 0, E = NumElts / 2; I != E; I++) {
if (Mask[I] != I) {
isContactVector = false;
break;
}
}
if (isContactVector) {
int offset = NumElts / 2;
for (int I = NumElts / 2, E = NumElts; I != E; I++) {
if (Mask[I] != I + splitV0 * offset) {
isContactVector = false;
break;
}
}
}
if (isContactVector) {
EVT CastVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
NumElts / 2);
if (splitV0) {
V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
DAG.getConstant(0, MVT::i64));
}
if (V1.getValueType().getSizeInBits() == 128) {
V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
DAG.getConstant(0, MVT::i64));
}
Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
return true;
}
return false;
}
// Check whether a Build Vector could be presented as Shuffle Vector.
// This Shuffle Vector maybe not legalized, so the length of its operand and
// the length of result may not equal.
bool AArch64TargetLowering::isKnownShuffleVector(SDValue Op, SelectionDAG &DAG,
SDValue &Res) const {
SDValue &V0, SDValue &V1,
int *Mask) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
unsigned NumElts = VT.getVectorNumElements();
unsigned V0NumElts = 0;
int Mask[16];
SDValue V0, V1;
// Check if all elements are extracted from less than 3 vectors.
for (unsigned i = 0; i < NumElts; ++i) {
SDValue Elt = Op.getOperand(i);
if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
Elt.getOperand(0).getValueType().getVectorElementType() !=
VT.getVectorElementType())
return false;
if (V0.getNode() == 0) {
@ -4189,25 +4243,7 @@ bool AArch64TargetLowering::isKnownShuffleVector(SDValue Op, SelectionDAG &DAG,
return false;
}
}
if (!V1.getNode() && V0NumElts == NumElts * 2) {
V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
DAG.getConstant(NumElts, MVT::i64));
V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
DAG.getConstant(0, MVT::i64));
V0NumElts = V0.getValueType().getVectorNumElements();
}
if (V1.getNode() && NumElts == V0NumElts &&
V0NumElts == V1.getValueType().getVectorNumElements()) {
SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask);
if(Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE)
Res = Shuffle;
else
Res = LowerVECTOR_SHUFFLE(Shuffle, DAG);
return true;
} else
return false;
return true;
}
// If this is a case we can't handle, return null and let the default
@ -4413,9 +4449,31 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
return SDValue();
// Try to lower this in lowering ShuffleVector way.
SDValue Shuf;
if (isKnownShuffleVector(Op, DAG, Shuf))
return Shuf;
SDValue V0, V1;
int Mask[16];
if (isKnownShuffleVector(Op, DAG, V0, V1, Mask)) {
unsigned V0NumElts = V0.getValueType().getVectorNumElements();
if (!V1.getNode() && V0NumElts == NumElts * 2) {
V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
DAG.getConstant(NumElts, MVT::i64));
V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
DAG.getConstant(0, MVT::i64));
V0NumElts = V0.getValueType().getVectorNumElements();
}
if (V1.getNode() && NumElts == V0NumElts &&
V0NumElts == V1.getValueType().getVectorNumElements()) {
SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask);
if (Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE)
return Shuffle;
else
return LowerVECTOR_SHUFFLE(Shuffle, DAG);
} else {
SDValue Res;
if (isConcatVector(Op, DAG, V0, V1, Mask, Res))
return Res;
}
}
// If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
// know the default expansion would otherwise fall back on something even
@ -4601,6 +4659,10 @@ AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
return DAG.getNode(ISDNo, dl, VT, V1, V2);
}
SDValue Res;
if (isConcatVector(Op, DAG, V1, V2, &ShuffleMask[0], Res))
return Res;
// If the element of shuffle mask are all the same constant, we can
// transform it into either NEON_VDUP or NEON_VDUPLANE
if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {

View File

@ -232,7 +232,11 @@ public:
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
bool isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, SDValue &Res) const;
bool isConcatVector(SDValue Op, SelectionDAG &DAG, SDValue V0, SDValue V1,
const int *Mask, SDValue &Res) const;
bool isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, SDValue &V0,
SDValue &V1, int *Mask) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const AArch64Subtarget *ST) const;

View File

@ -1021,6 +1021,276 @@ entry:
ret <2 x i32> %h
}
define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 {
; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
ret <16 x i8> %vecinit30
}
define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecext = extractelement <8 x i8> %x, i32 0
%vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
%vecext1 = extractelement <8 x i8> %x, i32 1
%vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
%vecext3 = extractelement <8 x i8> %x, i32 2
%vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
%vecext5 = extractelement <8 x i8> %x, i32 3
%vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
%vecext7 = extractelement <8 x i8> %x, i32 4
%vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
%vecext9 = extractelement <8 x i8> %x, i32 5
%vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
%vecext11 = extractelement <8 x i8> %x, i32 6
%vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
%vecext13 = extractelement <8 x i8> %x, i32 7
%vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
%vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
ret <16 x i8> %vecinit30
}
define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 {
; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecext = extractelement <16 x i8> %x, i32 0
%vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
%vecext1 = extractelement <16 x i8> %x, i32 1
%vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
%vecext3 = extractelement <16 x i8> %x, i32 2
%vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
%vecext5 = extractelement <16 x i8> %x, i32 3
%vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
%vecext7 = extractelement <16 x i8> %x, i32 4
%vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
%vecext9 = extractelement <16 x i8> %x, i32 5
%vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
%vecext11 = extractelement <16 x i8> %x, i32 6
%vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
%vecext13 = extractelement <16 x i8> %x, i32 7
%vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
%vecext15 = extractelement <8 x i8> %y, i32 0
%vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
%vecext17 = extractelement <8 x i8> %y, i32 1
%vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
%vecext19 = extractelement <8 x i8> %y, i32 2
%vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
%vecext21 = extractelement <8 x i8> %y, i32 3
%vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
%vecext23 = extractelement <8 x i8> %y, i32 4
%vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
%vecext25 = extractelement <8 x i8> %y, i32 5
%vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
%vecext27 = extractelement <8 x i8> %y, i32 6
%vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
%vecext29 = extractelement <8 x i8> %y, i32 7
%vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
ret <16 x i8> %vecinit30
}
define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 {
; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecext = extractelement <8 x i8> %x, i32 0
%vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
%vecext1 = extractelement <8 x i8> %x, i32 1
%vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
%vecext3 = extractelement <8 x i8> %x, i32 2
%vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
%vecext5 = extractelement <8 x i8> %x, i32 3
%vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
%vecext7 = extractelement <8 x i8> %x, i32 4
%vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
%vecext9 = extractelement <8 x i8> %x, i32 5
%vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
%vecext11 = extractelement <8 x i8> %x, i32 6
%vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
%vecext13 = extractelement <8 x i8> %x, i32 7
%vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
%vecext15 = extractelement <8 x i8> %y, i32 0
%vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
%vecext17 = extractelement <8 x i8> %y, i32 1
%vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
%vecext19 = extractelement <8 x i8> %y, i32 2
%vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
%vecext21 = extractelement <8 x i8> %y, i32 3
%vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
%vecext23 = extractelement <8 x i8> %y, i32 4
%vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
%vecext25 = extractelement <8 x i8> %y, i32 5
%vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
%vecext27 = extractelement <8 x i8> %y, i32 6
%vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
%vecext29 = extractelement <8 x i8> %y, i32 7
%vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
ret <16 x i8> %vecinit30
}
define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 {
; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
ret <8 x i16> %vecinit14
}
define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecext = extractelement <4 x i16> %x, i32 0
%vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
%vecext1 = extractelement <4 x i16> %x, i32 1
%vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
%vecext3 = extractelement <4 x i16> %x, i32 2
%vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
%vecext5 = extractelement <4 x i16> %x, i32 3
%vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
%vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
ret <8 x i16> %vecinit14
}
define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 {
; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecext = extractelement <8 x i16> %x, i32 0
%vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
%vecext1 = extractelement <8 x i16> %x, i32 1
%vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
%vecext3 = extractelement <8 x i16> %x, i32 2
%vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
%vecext5 = extractelement <8 x i16> %x, i32 3
%vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
%vecext7 = extractelement <4 x i16> %y, i32 0
%vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
%vecext9 = extractelement <4 x i16> %y, i32 1
%vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
%vecext11 = extractelement <4 x i16> %y, i32 2
%vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
%vecext13 = extractelement <4 x i16> %y, i32 3
%vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
ret <8 x i16> %vecinit14
}
define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 {
; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecext = extractelement <4 x i16> %x, i32 0
%vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
%vecext1 = extractelement <4 x i16> %x, i32 1
%vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
%vecext3 = extractelement <4 x i16> %x, i32 2
%vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
%vecext5 = extractelement <4 x i16> %x, i32 3
%vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
%vecext7 = extractelement <4 x i16> %y, i32 0
%vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
%vecext9 = extractelement <4 x i16> %y, i32 1
%vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
%vecext11 = extractelement <4 x i16> %y, i32 2
%vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
%vecext13 = extractelement <4 x i16> %y, i32 3
%vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
ret <8 x i16> %vecinit14
}
define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 {
; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x i32> %vecinit6
}
define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecext = extractelement <2 x i32> %x, i32 0
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
%vecext1 = extractelement <2 x i32> %x, i32 1
%vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
%vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x i32> %vecinit6
}
define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 {
; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecext = extractelement <4 x i32> %x, i32 0
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
%vecext1 = extractelement <4 x i32> %x, i32 1
%vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
%vecext3 = extractelement <2 x i32> %y, i32 0
%vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
%vecext5 = extractelement <2 x i32> %y, i32 1
%vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
ret <4 x i32> %vecinit6
}
define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 {
; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecext = extractelement <2 x i32> %x, i32 0
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
%vecext1 = extractelement <2 x i32> %x, i32 1
%vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
%vecext3 = extractelement <2 x i32> %y, i32 0
%vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
%vecext5 = extractelement <2 x i32> %y, i32 1
%vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
ret <4 x i32> %vecinit6
}
define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 {
; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
ret <2 x i64> %vecinit2
}
define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 {
; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecext = extractelement <1 x i64> %x, i32 0
%vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
%vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
ret <2 x i64> %vecinit2
}
define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 {
; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecext = extractelement <2 x i64> %x, i32 0
%vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
%vecext1 = extractelement <1 x i64> %y, i32 0
%vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
ret <2 x i64> %vecinit2
}
define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 {
; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecext = extractelement <1 x i64> %x, i32 0
%vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
%vecext1 = extractelement <1 x i64> %y, i32 0
%vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
ret <2 x i64> %vecinit2
}
declare <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16>, <1 x i16>)
define <1 x i16> @test_copy_FPR16_FPR16(<1 x i16> %a, <1 x i16> %b) {