mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
[AArch64 NEON] Try to generate CONCAT_VECTOR when lowering BUILD_VECTOR or SHUFFLE_VECTOR.
Replace r199791. llvm-svn: 200180
This commit is contained in:
parent
d83dee8270
commit
436aae7633
@ -4153,22 +4153,76 @@ AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
|
||||
|
||||
return false;
|
||||
}
|
||||
// Check whether a shuffle_vector could be presented as concat_vector.
|
||||
bool AArch64TargetLowering::isConcatVector(SDValue Op, SelectionDAG &DAG,
|
||||
SDValue V0, SDValue V1,
|
||||
const int *Mask,
|
||||
SDValue &Res) const {
|
||||
SDLoc DL(Op);
|
||||
EVT VT = Op.getValueType();
|
||||
if (VT.getSizeInBits() != 128)
|
||||
return false;
|
||||
if (VT.getVectorElementType() != V0.getValueType().getVectorElementType() ||
|
||||
VT.getVectorElementType() != V1.getValueType().getVectorElementType())
|
||||
return false;
|
||||
|
||||
// Check whether a Build Vector could be presented as Shuffle Vector. If yes,
|
||||
// try to call LowerVECTOR_SHUFFLE to lower it.
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
bool isContactVector = true;
|
||||
bool splitV0 = false;
|
||||
if (V0.getValueType().getSizeInBits() == 128)
|
||||
splitV0 = true;
|
||||
|
||||
for (int I = 0, E = NumElts / 2; I != E; I++) {
|
||||
if (Mask[I] != I) {
|
||||
isContactVector = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (isContactVector) {
|
||||
int offset = NumElts / 2;
|
||||
for (int I = NumElts / 2, E = NumElts; I != E; I++) {
|
||||
if (Mask[I] != I + splitV0 * offset) {
|
||||
isContactVector = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (isContactVector) {
|
||||
EVT CastVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
|
||||
NumElts / 2);
|
||||
if (splitV0) {
|
||||
V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
|
||||
DAG.getConstant(0, MVT::i64));
|
||||
}
|
||||
if (V1.getValueType().getSizeInBits() == 128) {
|
||||
V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
|
||||
DAG.getConstant(0, MVT::i64));
|
||||
}
|
||||
Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check whether a Build Vector could be presented as Shuffle Vector.
|
||||
// This Shuffle Vector maybe not legalized, so the length of its operand and
|
||||
// the length of result may not equal.
|
||||
bool AArch64TargetLowering::isKnownShuffleVector(SDValue Op, SelectionDAG &DAG,
|
||||
SDValue &Res) const {
|
||||
SDValue &V0, SDValue &V1,
|
||||
int *Mask) const {
|
||||
SDLoc DL(Op);
|
||||
EVT VT = Op.getValueType();
|
||||
unsigned NumElts = VT.getVectorNumElements();
|
||||
unsigned V0NumElts = 0;
|
||||
int Mask[16];
|
||||
SDValue V0, V1;
|
||||
|
||||
// Check if all elements are extracted from less than 3 vectors.
|
||||
for (unsigned i = 0; i < NumElts; ++i) {
|
||||
SDValue Elt = Op.getOperand(i);
|
||||
if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
|
||||
if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
|
||||
Elt.getOperand(0).getValueType().getVectorElementType() !=
|
||||
VT.getVectorElementType())
|
||||
return false;
|
||||
|
||||
if (V0.getNode() == 0) {
|
||||
@ -4189,25 +4243,7 @@ bool AArch64TargetLowering::isKnownShuffleVector(SDValue Op, SelectionDAG &DAG,
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!V1.getNode() && V0NumElts == NumElts * 2) {
|
||||
V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
|
||||
DAG.getConstant(NumElts, MVT::i64));
|
||||
V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
|
||||
DAG.getConstant(0, MVT::i64));
|
||||
V0NumElts = V0.getValueType().getVectorNumElements();
|
||||
}
|
||||
|
||||
if (V1.getNode() && NumElts == V0NumElts &&
|
||||
V0NumElts == V1.getValueType().getVectorNumElements()) {
|
||||
SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask);
|
||||
if(Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE)
|
||||
Res = Shuffle;
|
||||
else
|
||||
Res = LowerVECTOR_SHUFFLE(Shuffle, DAG);
|
||||
return true;
|
||||
} else
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
// If this is a case we can't handle, return null and let the default
|
||||
@ -4413,9 +4449,31 @@ AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
|
||||
return SDValue();
|
||||
|
||||
// Try to lower this in lowering ShuffleVector way.
|
||||
SDValue Shuf;
|
||||
if (isKnownShuffleVector(Op, DAG, Shuf))
|
||||
return Shuf;
|
||||
SDValue V0, V1;
|
||||
int Mask[16];
|
||||
if (isKnownShuffleVector(Op, DAG, V0, V1, Mask)) {
|
||||
unsigned V0NumElts = V0.getValueType().getVectorNumElements();
|
||||
if (!V1.getNode() && V0NumElts == NumElts * 2) {
|
||||
V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
|
||||
DAG.getConstant(NumElts, MVT::i64));
|
||||
V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0,
|
||||
DAG.getConstant(0, MVT::i64));
|
||||
V0NumElts = V0.getValueType().getVectorNumElements();
|
||||
}
|
||||
|
||||
if (V1.getNode() && NumElts == V0NumElts &&
|
||||
V0NumElts == V1.getValueType().getVectorNumElements()) {
|
||||
SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask);
|
||||
if (Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE)
|
||||
return Shuffle;
|
||||
else
|
||||
return LowerVECTOR_SHUFFLE(Shuffle, DAG);
|
||||
} else {
|
||||
SDValue Res;
|
||||
if (isConcatVector(Op, DAG, V0, V1, Mask, Res))
|
||||
return Res;
|
||||
}
|
||||
}
|
||||
|
||||
// If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
|
||||
// know the default expansion would otherwise fall back on something even
|
||||
@ -4601,6 +4659,10 @@ AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
|
||||
return DAG.getNode(ISDNo, dl, VT, V1, V2);
|
||||
}
|
||||
|
||||
SDValue Res;
|
||||
if (isConcatVector(Op, DAG, V1, V2, &ShuffleMask[0], Res))
|
||||
return Res;
|
||||
|
||||
// If the element of shuffle mask are all the same constant, we can
|
||||
// transform it into either NEON_VDUP or NEON_VDUPLANE
|
||||
if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
|
||||
|
@ -232,7 +232,11 @@ public:
|
||||
SDLoc dl, SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &InVals) const;
|
||||
|
||||
bool isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, SDValue &Res) const;
|
||||
bool isConcatVector(SDValue Op, SelectionDAG &DAG, SDValue V0, SDValue V1,
|
||||
const int *Mask, SDValue &Res) const;
|
||||
|
||||
bool isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, SDValue &V0,
|
||||
SDValue &V1, int *Mask) const;
|
||||
|
||||
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
|
||||
const AArch64Subtarget *ST) const;
|
||||
|
@ -1021,6 +1021,276 @@ entry:
|
||||
ret <2 x i32> %h
|
||||
}
|
||||
|
||||
define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 {
|
||||
; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8:
|
||||
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
|
||||
entry:
|
||||
%vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
|
||||
ret <16 x i8> %vecinit30
|
||||
}
|
||||
|
||||
define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
|
||||
; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8:
|
||||
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
|
||||
entry:
|
||||
%vecext = extractelement <8 x i8> %x, i32 0
|
||||
%vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
|
||||
%vecext1 = extractelement <8 x i8> %x, i32 1
|
||||
%vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
|
||||
%vecext3 = extractelement <8 x i8> %x, i32 2
|
||||
%vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
|
||||
%vecext5 = extractelement <8 x i8> %x, i32 3
|
||||
%vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
|
||||
%vecext7 = extractelement <8 x i8> %x, i32 4
|
||||
%vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
|
||||
%vecext9 = extractelement <8 x i8> %x, i32 5
|
||||
%vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
|
||||
%vecext11 = extractelement <8 x i8> %x, i32 6
|
||||
%vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
|
||||
%vecext13 = extractelement <8 x i8> %x, i32 7
|
||||
%vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
|
||||
%vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
|
||||
ret <16 x i8> %vecinit30
|
||||
}
|
||||
|
||||
define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 {
|
||||
; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8:
|
||||
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
|
||||
entry:
|
||||
%vecext = extractelement <16 x i8> %x, i32 0
|
||||
%vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
|
||||
%vecext1 = extractelement <16 x i8> %x, i32 1
|
||||
%vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
|
||||
%vecext3 = extractelement <16 x i8> %x, i32 2
|
||||
%vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
|
||||
%vecext5 = extractelement <16 x i8> %x, i32 3
|
||||
%vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
|
||||
%vecext7 = extractelement <16 x i8> %x, i32 4
|
||||
%vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
|
||||
%vecext9 = extractelement <16 x i8> %x, i32 5
|
||||
%vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
|
||||
%vecext11 = extractelement <16 x i8> %x, i32 6
|
||||
%vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
|
||||
%vecext13 = extractelement <16 x i8> %x, i32 7
|
||||
%vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
|
||||
%vecext15 = extractelement <8 x i8> %y, i32 0
|
||||
%vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
|
||||
%vecext17 = extractelement <8 x i8> %y, i32 1
|
||||
%vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
|
||||
%vecext19 = extractelement <8 x i8> %y, i32 2
|
||||
%vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
|
||||
%vecext21 = extractelement <8 x i8> %y, i32 3
|
||||
%vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
|
||||
%vecext23 = extractelement <8 x i8> %y, i32 4
|
||||
%vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
|
||||
%vecext25 = extractelement <8 x i8> %y, i32 5
|
||||
%vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
|
||||
%vecext27 = extractelement <8 x i8> %y, i32 6
|
||||
%vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
|
||||
%vecext29 = extractelement <8 x i8> %y, i32 7
|
||||
%vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
|
||||
ret <16 x i8> %vecinit30
|
||||
}
|
||||
|
||||
define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 {
|
||||
; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8:
|
||||
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
|
||||
entry:
|
||||
%vecext = extractelement <8 x i8> %x, i32 0
|
||||
%vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0
|
||||
%vecext1 = extractelement <8 x i8> %x, i32 1
|
||||
%vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1
|
||||
%vecext3 = extractelement <8 x i8> %x, i32 2
|
||||
%vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2
|
||||
%vecext5 = extractelement <8 x i8> %x, i32 3
|
||||
%vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3
|
||||
%vecext7 = extractelement <8 x i8> %x, i32 4
|
||||
%vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4
|
||||
%vecext9 = extractelement <8 x i8> %x, i32 5
|
||||
%vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5
|
||||
%vecext11 = extractelement <8 x i8> %x, i32 6
|
||||
%vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6
|
||||
%vecext13 = extractelement <8 x i8> %x, i32 7
|
||||
%vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7
|
||||
%vecext15 = extractelement <8 x i8> %y, i32 0
|
||||
%vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8
|
||||
%vecext17 = extractelement <8 x i8> %y, i32 1
|
||||
%vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9
|
||||
%vecext19 = extractelement <8 x i8> %y, i32 2
|
||||
%vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10
|
||||
%vecext21 = extractelement <8 x i8> %y, i32 3
|
||||
%vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11
|
||||
%vecext23 = extractelement <8 x i8> %y, i32 4
|
||||
%vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12
|
||||
%vecext25 = extractelement <8 x i8> %y, i32 5
|
||||
%vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13
|
||||
%vecext27 = extractelement <8 x i8> %y, i32 6
|
||||
%vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14
|
||||
%vecext29 = extractelement <8 x i8> %y, i32 7
|
||||
%vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15
|
||||
ret <16 x i8> %vecinit30
|
||||
}
|
||||
|
||||
define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 {
|
||||
; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16:
|
||||
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
|
||||
entry:
|
||||
%vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
|
||||
ret <8 x i16> %vecinit14
|
||||
}
|
||||
|
||||
define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
|
||||
; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16:
|
||||
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
|
||||
entry:
|
||||
%vecext = extractelement <4 x i16> %x, i32 0
|
||||
%vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
|
||||
%vecext1 = extractelement <4 x i16> %x, i32 1
|
||||
%vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
|
||||
%vecext3 = extractelement <4 x i16> %x, i32 2
|
||||
%vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
|
||||
%vecext5 = extractelement <4 x i16> %x, i32 3
|
||||
%vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
|
||||
%vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
|
||||
ret <8 x i16> %vecinit14
|
||||
}
|
||||
|
||||
define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 {
|
||||
; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16:
|
||||
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
|
||||
entry:
|
||||
%vecext = extractelement <8 x i16> %x, i32 0
|
||||
%vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
|
||||
%vecext1 = extractelement <8 x i16> %x, i32 1
|
||||
%vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
|
||||
%vecext3 = extractelement <8 x i16> %x, i32 2
|
||||
%vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
|
||||
%vecext5 = extractelement <8 x i16> %x, i32 3
|
||||
%vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
|
||||
%vecext7 = extractelement <4 x i16> %y, i32 0
|
||||
%vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
|
||||
%vecext9 = extractelement <4 x i16> %y, i32 1
|
||||
%vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
|
||||
%vecext11 = extractelement <4 x i16> %y, i32 2
|
||||
%vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
|
||||
%vecext13 = extractelement <4 x i16> %y, i32 3
|
||||
%vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
|
||||
ret <8 x i16> %vecinit14
|
||||
}
|
||||
|
||||
define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 {
|
||||
; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16:
|
||||
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
|
||||
entry:
|
||||
%vecext = extractelement <4 x i16> %x, i32 0
|
||||
%vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0
|
||||
%vecext1 = extractelement <4 x i16> %x, i32 1
|
||||
%vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1
|
||||
%vecext3 = extractelement <4 x i16> %x, i32 2
|
||||
%vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2
|
||||
%vecext5 = extractelement <4 x i16> %x, i32 3
|
||||
%vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3
|
||||
%vecext7 = extractelement <4 x i16> %y, i32 0
|
||||
%vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4
|
||||
%vecext9 = extractelement <4 x i16> %y, i32 1
|
||||
%vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5
|
||||
%vecext11 = extractelement <4 x i16> %y, i32 2
|
||||
%vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6
|
||||
%vecext13 = extractelement <4 x i16> %y, i32 3
|
||||
%vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7
|
||||
ret <8 x i16> %vecinit14
|
||||
}
|
||||
|
||||
define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 {
|
||||
; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32:
|
||||
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
|
||||
entry:
|
||||
%vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
|
||||
ret <4 x i32> %vecinit6
|
||||
}
|
||||
|
||||
define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
|
||||
; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32:
|
||||
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
|
||||
entry:
|
||||
%vecext = extractelement <2 x i32> %x, i32 0
|
||||
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
|
||||
%vecext1 = extractelement <2 x i32> %x, i32 1
|
||||
%vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
|
||||
%vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
|
||||
ret <4 x i32> %vecinit6
|
||||
}
|
||||
|
||||
define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 {
|
||||
; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32:
|
||||
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
|
||||
entry:
|
||||
%vecext = extractelement <4 x i32> %x, i32 0
|
||||
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
|
||||
%vecext1 = extractelement <4 x i32> %x, i32 1
|
||||
%vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
|
||||
%vecext3 = extractelement <2 x i32> %y, i32 0
|
||||
%vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
|
||||
%vecext5 = extractelement <2 x i32> %y, i32 1
|
||||
%vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
|
||||
ret <4 x i32> %vecinit6
|
||||
}
|
||||
|
||||
define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 {
|
||||
; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32:
|
||||
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
|
||||
entry:
|
||||
%vecext = extractelement <2 x i32> %x, i32 0
|
||||
%vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0
|
||||
%vecext1 = extractelement <2 x i32> %x, i32 1
|
||||
%vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1
|
||||
%vecext3 = extractelement <2 x i32> %y, i32 0
|
||||
%vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2
|
||||
%vecext5 = extractelement <2 x i32> %y, i32 1
|
||||
%vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3
|
||||
ret <4 x i32> %vecinit6
|
||||
}
|
||||
|
||||
define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 {
|
||||
; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64:
|
||||
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
|
||||
entry:
|
||||
%vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
|
||||
ret <2 x i64> %vecinit2
|
||||
}
|
||||
|
||||
define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 {
|
||||
; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64:
|
||||
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
|
||||
entry:
|
||||
%vecext = extractelement <1 x i64> %x, i32 0
|
||||
%vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
|
||||
%vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2>
|
||||
ret <2 x i64> %vecinit2
|
||||
}
|
||||
|
||||
define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 {
|
||||
; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64:
|
||||
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
|
||||
entry:
|
||||
%vecext = extractelement <2 x i64> %x, i32 0
|
||||
%vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
|
||||
%vecext1 = extractelement <1 x i64> %y, i32 0
|
||||
%vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
|
||||
ret <2 x i64> %vecinit2
|
||||
}
|
||||
|
||||
define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 {
|
||||
; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64:
|
||||
; CHECK: ins {{v[0-9]+}}.d[1], {{v[0-9]+}}.d[0]
|
||||
entry:
|
||||
%vecext = extractelement <1 x i64> %x, i32 0
|
||||
%vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0
|
||||
%vecext1 = extractelement <1 x i64> %y, i32 0
|
||||
%vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1
|
||||
ret <2 x i64> %vecinit2
|
||||
}
|
||||
|
||||
declare <1 x i16> @llvm.aarch64.neon.vsqadd.v1i16(<1 x i16>, <1 x i16>)
|
||||
|
||||
define <1 x i16> @test_copy_FPR16_FPR16(<1 x i16> %a, <1 x i16> %b) {
|
||||
|
Loading…
Reference in New Issue
Block a user