1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 03:33:20 +01:00

[AArch64 NEON] Try to generate CONCAT_VECTOR when lowering BUILD_VECTOR or SHUFFLE_VECTOR.

Replace r199791.

llvm-svn: 200180
This commit is contained in:
Kevin Qin 2014-01-27 02:53:54 +00:00
parent d83dee8270
commit 436aae7633
3 changed files with 365 additions and 29 deletions

View File

@ -4153,22 +4153,76 @@ AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
return false; return false;
} }
// Check whether a shuffle_vector could be presented as concat_vector.
bool AArch64TargetLowering::isConcatVector(SDValue Op, SelectionDAG &DAG,
SDValue V0, SDValue V1,
const int *Mask,
SDValue &Res) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
if (VT.getSizeInBits() != 128)
return false;
if (VT.getVectorElementType() != V0.getValueType().getVectorElementType() ||
VT.getVectorElementType() != V1.getValueType().getVectorElementType())
return false;
// Check whether a Build Vector could be presented as Shuffle Vector. If yes, unsigned NumElts = VT.getVectorNumElements();
// try to call LowerVECTOR_SHUFFLE to lower it. bool isContactVector = true;
bool splitV0 = false;
if (V0.getValueType().getSizeInBits() == 128)
splitV0 = true;
for (int I = 0, E = NumElts / 2; I != E; I++) {
if (Mask[I] != I) {
isContactVector = false;
break;
}
}
if (isContactVector) {
int offset = NumElts / 2;
for (int I = NumElts / 2, E = NumElts; I != E; I++) {
if (Mask[I] != I + splitV0 * offset) {
isContactVector = false;
break;
}
}
}
if (isContactVector) {
EVT CastVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
NumElts / 2);
if (splitV0) {
V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
DAG.getConstant(0, MVT::i64));
}
if (V1.getValueType().getSizeInBits() == 128) {
V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
DAG.getConstant(0, MVT::i64));
}
Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
return true;
}
return false;
}
// Check whether a Build Vector could be presented as Shuffle Vector.
// This Shuffle Vector maybe not legalized, so the length of its operand and
// the length of result may not equal.
bool AArch64TargetLowering::isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, bool AArch64TargetLowering::isKnownShuffleVector(SDValue Op, SelectionDAG &DAG,
SDValue &Res) const { SDValue &V0, SDValue &V1,
int *Mask) const {
SDLoc DL(Op); SDLoc DL(Op);
EVT VT = Op.getValueType(); EVT VT = Op.getValueType();
unsigned NumElts = VT.getVectorNumElements(); unsigned NumElts = VT.getVectorNumElements();
unsigned V0NumElts = 0; unsigned V0NumElts = 0;
int Mask[16];
SDValue V0, V1;
// Check if all elements are extracted from less than 3 vectors. // Check if all elements are extracted from less than 3 vectors.
for (unsigned i = 0; i < NumElts; ++i) { for (unsigned i = 0; i < NumElts; ++i) {
SDValue Elt = Op.getOperand(i); SDValue Elt = Op.getOperand(i);
if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT) if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
Elt.getOperand(0).getValueType().getVectorElementType() !=
VT.getVectorElementType())
return false; return false;
if (V0.getNode() == 0) { if (V0.getNode() == 0) {
@ -4189,25 +4243,7 @@ bool AArch64TargetLowering::isKnownShuffleVector(SDValue Op, SelectionDAG &DAG,
return false; return false;
} }
} }
return true;
if (!V1.getNode() && V0NumElts == NumElts * 2) {
V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
DAG.getConstant(NumElts, MVT::i64));
V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
DAG.getConstant(0, MVT::i64));
V0NumElts = V0.getValueType().getVectorNumElements();
}
if (V1.getNode() && NumElts == V0NumElts &&
V0NumElts == V1.getValueType().getVectorNumElements()) {
SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask);
if(Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE)
Res = Shuffle;
else
Res = LowerVECTOR_SHUFFLE(Shuffle, DAG);
return true;
} else
return false;
} }
// If this is a case we can't handle, return null and let the default // If this is a case we can't handle, return null and let the default
@ -4413,9 +4449,31 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
return SDValue(); return SDValue();
// Try to lower this in lowering ShuffleVector way. // Try to lower this in lowering ShuffleVector way.
SDValue Shuf; SDValue V0, V1;
if (isKnownShuffleVector(Op, DAG, Shuf)) int Mask[16];
return Shuf; if (isKnownShuffleVector(Op, DAG, V0, V1, Mask)) {
unsigned V0NumElts = V0.getValueType().getVectorNumElements();
if (!V1.getNode() && V0NumElts == NumElts * 2) {
V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
DAG.getConstant(NumElts, MVT::i64));
V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
DAG.getConstant(0, MVT::i64));
V0NumElts = V0.getValueType().getVectorNumElements();
}
if (V1.getNode() && NumElts == V0NumElts &&
V0NumElts == V1.getValueType().getVectorNumElements()) {
SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask);
if (Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE)
return Shuffle;
else
return LowerVECTOR_SHUFFLE(Shuffle, DAG);
} else {
SDValue Res;
if (isConcatVector(Op, DAG, V0, V1, Mask, Res))
return Res;
}
}
// If all else fails, just use a sequence of INSERT_VECTOR_ELT when we // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
// know the default expansion would otherwise fall back on something even // know the default expansion would otherwise fall back on something even
@ -4601,6 +4659,10 @@ AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
return DAG.getNode(ISDNo, dl, VT, V1, V2); return DAG.getNode(ISDNo, dl, VT, V1, V2);
} }
SDValue Res;
if (isConcatVector(Op, DAG, V1, V2, &ShuffleMask[0], Res))
return Res;
// If the element of shuffle mask are all the same constant, we can // If the element of shuffle mask are all the same constant, we can
// transform it into either NEON_VDUP or NEON_VDUPLANE // transform it into either NEON_VDUP or NEON_VDUPLANE
if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {

View File

@ -232,7 +232,11 @@ public:
SDLoc dl, SelectionDAG &DAG, SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const; SmallVectorImpl<SDValue> &InVals) const;
bool isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, SDValue &Res) const; bool isConcatVector(SDValue Op, SelectionDAG &DAG, SDValue V0, SDValue V1,
const int *Mask, SDValue &Res) const;
bool isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, SDValue &V0,
SDValue &V1, int *Mask) const;
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const AArch64Subtarget *ST) const; const AArch64Subtarget *ST) const;

View File

@ -1021,6 +1021,276 @@ entry:
ret <2 x i32> %h ret <2 x i32> %h
} }
define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 {
; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
ret <16 x i8> %vecinit30
}
define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecext = extractelement <8 x i8> %x, i32 0
%vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
%vecext1 = extractelement <8 x i8> %x, i32 1
%vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
%vecext3 = extractelement <8 x i8> %x, i32 2
%vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
%vecext5 = extractelement <8 x i8> %x, i32 3
%vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
%vecext7 = extractelement <8 x i8> %x, i32 4
%vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
%vecext9 = extractelement <8 x i8> %x, i32 5
%vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
%vecext11 = extractelement <8 x i8> %x, i32 6
%vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
%vecext13 = extractelement <8 x i8> %x, i32 7
%vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
%vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
ret <16 x i8> %vecinit30
}
define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 {
; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecext = extractelement <16 x i8> %x, i32 0
%vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
%vecext1 = extractelement <16 x i8> %x, i32 1
%vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
%vecext3 = extractelement <16 x i8> %x, i32 2
%vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
%vecext5 = extractelement <16 x i8> %x, i32 3
%vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
%vecext7 = extractelement <16 x i8> %x, i32 4
%vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
%vecext9 = extractelement <16 x i8> %x, i32 5
%vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
%vecext11 = extractelement <16 x i8> %x, i32 6
%vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
%vecext13 = extractelement <16 x i8> %x, i32 7
%vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
%vecext15 = extractelement <8 x i8> %y, i32 0
%vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
%vecext17 = extractelement <8 x i8> %y, i32 1
%vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
%vecext19 = extractelement <8 x i8> %y, i32 2
%vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
%vecext21 = extractelement <8 x i8> %y, i32 3
%vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
%vecext23 = extractelement <8 x i8> %y, i32 4
%vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
%vecext25 = extractelement <8 x i8> %y, i32 5
%vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
%vecext27 = extractelement <8 x i8> %y, i32 6
%vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
%vecext29 = extractelement <8 x i8> %y, i32 7
%vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
ret <16 x i8> %vecinit30
}
define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 {
; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecext = extractelement <8 x i8> %x, i32 0
%vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
%vecext1 = extractelement <8 x i8> %x, i32 1
%vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
%vecext3 = extractelement <8 x i8> %x, i32 2
%vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
%vecext5 = extractelement <8 x i8> %x, i32 3
%vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
%vecext7 = extractelement <8 x i8> %x, i32 4
%vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
%vecext9 = extractelement <8 x i8> %x, i32 5
%vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
%vecext11 = extractelement <8 x i8> %x, i32 6
%vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
%vecext13 = extractelement <8 x i8> %x, i32 7
%vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
%vecext15 = extractelement <8 x i8> %y, i32 0
%vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
%vecext17 = extractelement <8 x i8> %y, i32 1
%vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
%vecext19 = extractelement <8 x i8> %y, i32 2
%vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
%vecext21 = extractelement <8 x i8> %y, i32 3
%vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
%vecext23 = extractelement <8 x i8> %y, i32 4
%vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
%vecext25 = extractelement <8 x i8> %y, i32 5
%vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
%vecext27 = extractelement <8 x i8> %y, i32 6
%vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
%vecext29 = extractelement <8 x i8> %y, i32 7
%vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
ret <16 x i8> %vecinit30
}
define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 {
; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
ret <8 x i16> %vecinit14
}
define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecext = extractelement <4 x i16> %x, i32 0
%vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
%vecext1 = extractelement <4 x i16> %x, i32 1
%vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
%vecext3 = extractelement <4 x i16> %x, i32 2
%vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
%vecext5 = extractelement <4 x i16> %x, i32 3
%vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
%vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
ret <8 x i16> %vecinit14
}
define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 {
; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecext = extractelement <8 x i16> %x, i32 0
%vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
%vecext1 = extractelement <8 x i16> %x, i32 1
%vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
%vecext3 = extractelement <8 x i16> %x, i32 2
%vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
%vecext5 = extractelement <8 x i16> %x, i32 3
%vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
%vecext7 = extractelement <4 x i16> %y, i32 0
%vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
%vecext9 = extractelement <4 x i16> %y, i32 1
%vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
%vecext11 = extractelement <4 x i16> %y, i32 2
%vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
%vecext13 = extractelement <4 x i16> %y, i32 3
%vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
ret <8 x i16> %vecinit14
}
define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 {
; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecext = extractelement <4 x i16> %x, i32 0
%vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
%vecext1 = extractelement <4 x i16> %x, i32 1
%vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
%vecext3 = extractelement <4 x i16> %x, i32 2
%vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
%vecext5 = extractelement <4 x i16> %x, i32 3
%vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
%vecext7 = extractelement <4 x i16> %y, i32 0
%vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
%vecext9 = extractelement <4 x i16> %y, i32 1
%vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
%vecext11 = extractelement <4 x i16> %y, i32 2
%vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
%vecext13 = extractelement <4 x i16> %y, i32 3
%vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
ret <8 x i16> %vecinit14
}
define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 {
; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x i32> %vecinit6
}
define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecext = extractelement <2 x i32> %x, i32 0
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
%vecext1 = extractelement <2 x i32> %x, i32 1
%vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
%vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x i32> %vecinit6
}
define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 {
; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecext = extractelement <4 x i32> %x, i32 0
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
%vecext1 = extractelement <4 x i32> %x, i32 1
%vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
%vecext3 = extractelement <2 x i32> %y, i32 0
%vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
%vecext5 = extractelement <2 x i32> %y, i32 1
%vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
ret <4 x i32> %vecinit6
}
define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 {
; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecext = extractelement <2 x i32> %x, i32 0
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
%vecext1 = extractelement <2 x i32> %x, i32 1
%vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
%vecext3 = extractelement <2 x i32> %y, i32 0
%vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
%vecext5 = extractelement <2 x i32> %y, i32 1
%vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
ret <4 x i32> %vecinit6
}
define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 {
; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
ret <2 x i64> %vecinit2
}
define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 {
; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecext = extractelement <1 x i64> %x, i32 0
%vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
%vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
ret <2 x i64> %vecinit2
}
define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 {
; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecext = extractelement <2 x i64> %x, i32 0
%vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
%vecext1 = extractelement <1 x i64> %y, i32 0
%vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
ret <2 x i64> %vecinit2
}
define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 {
; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64:
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
entry:
%vecext = extractelement <1 x i64> %x, i32 0
%vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
%vecext1 = extractelement <1 x i64> %y, i32 0
%vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
ret <2 x i64> %vecinit2
}
declare <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16>, <1 x i16>) declare <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16>, <1 x i16>)
define <1 x i16> @test_copy_FPR16_FPR16(<1 x i16> %a, <1 x i16> %b) { define <1 x i16> @test_copy_FPR16_FPR16(<1 x i16> %a, <1 x i16> %b) {