mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
[ARM] Look through concat when lowering in-place shuffles (VZIP, ..)
Currently, we canonicalize shuffles that produce a result larger than their operands with: shuffle(concat(v1, undef), concat(v2, undef)) -> shuffle(concat(v1, v2), undef) because we can access quad vectors (see PerformVECTOR_SHUFFLECombine). This is useful in the general case, but there are special cases where native shuffles produce larger results: the two-result ops. We can look through the concat when lowering them: shuffle(concat(v1, v2), undef) -> concat(VZIP(v1, v2):0, :1) This lets us generate the native shuffles instead of scalarizing to dozens of VMOVs. Differential Revision: http://reviews.llvm.org/D10424 llvm-svn: 240118
This commit is contained in:
parent
d492dfd93e
commit
57ebe7191c
@ -5715,6 +5715,44 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
|
||||
.getValue(WhichResult);
|
||||
}
|
||||
|
||||
// Also check for these shuffles through CONCAT_VECTORS: we canonicalize
|
||||
// shuffles that produce a result larger than their operands with:
|
||||
// shuffle(concat(v1, undef), concat(v2, undef))
|
||||
// ->
|
||||
// shuffle(concat(v1, v2), undef)
|
||||
// because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
|
||||
//
|
||||
// This is useful in the general case, but there are special cases where
|
||||
// native shuffles produce larger results: the two-result ops.
|
||||
//
|
||||
// Look through the concat when lowering them:
|
||||
// shuffle(concat(v1, v2), undef)
|
||||
// ->
|
||||
// concat(VZIP(v1, v2):0, :1)
|
||||
//
|
||||
if (V1->getOpcode() == ISD::CONCAT_VECTORS &&
|
||||
V2->getOpcode() == ISD::UNDEF) {
|
||||
SDValue SubV1 = V1->getOperand(0);
|
||||
SDValue SubV2 = V1->getOperand(1);
|
||||
EVT SubVT = SubV1.getValueType();
|
||||
|
||||
// We expect these to have been canonicalized to -1.
|
||||
assert(std::all_of(ShuffleMask.begin(), ShuffleMask.end(), [&](int i) {
|
||||
return i < (int)VT.getVectorNumElements();
|
||||
}) && "Unexpected shuffle index into UNDEF operand!");
|
||||
|
||||
if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
|
||||
ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
|
||||
if (isV_UNDEF)
|
||||
SubV2 = SubV1;
|
||||
assert((WhichResult == 0) &&
|
||||
"In-place shuffle of concat can only have one result!");
|
||||
SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
|
||||
SubV1, SubV2);
|
||||
return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
|
||||
Res.getValue(1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If the shuffle is not directly supported and it has 4 elements, use
|
||||
|
@ -20,40 +20,9 @@ define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
|
||||
define <16 x i8> @vtrni8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
|
||||
; CHECK-LABEL: vtrni8_Qres:
|
||||
; CHECK: @ BB#0:
|
||||
; CHECK-NEXT: vldr d19, [r0]
|
||||
; CHECK-NEXT: vldr d18, [r1]
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[0]
|
||||
; CHECK-NEXT: vmov.8 d16[0], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[0]
|
||||
; CHECK-NEXT: vmov.8 d16[1], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[2]
|
||||
; CHECK-NEXT: vmov.8 d16[2], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[2]
|
||||
; CHECK-NEXT: vmov.8 d16[3], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[4]
|
||||
; CHECK-NEXT: vmov.8 d16[4], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[4]
|
||||
; CHECK-NEXT: vmov.8 d16[5], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[6]
|
||||
; CHECK-NEXT: vmov.8 d16[6], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[6]
|
||||
; CHECK-NEXT: vmov.8 d16[7], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[1]
|
||||
; CHECK-NEXT: vmov.8 d17[0], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[1]
|
||||
; CHECK-NEXT: vmov.8 d17[1], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[3]
|
||||
; CHECK-NEXT: vmov.8 d17[2], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[3]
|
||||
; CHECK-NEXT: vmov.8 d17[3], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[5]
|
||||
; CHECK-NEXT: vmov.8 d17[4], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[5]
|
||||
; CHECK-NEXT: vmov.8 d17[5], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[7]
|
||||
; CHECK-NEXT: vmov.8 d17[6], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[7]
|
||||
; CHECK-NEXT: vmov.8 d17[7], r0
|
||||
; CHECK-NEXT: vldr d17, [r1]
|
||||
; CHECK-NEXT: vldr d16, [r0]
|
||||
; CHECK-NEXT: vtrn.8 d16, d17
|
||||
; CHECK-NEXT: vmov r0, r1, d16
|
||||
; CHECK-NEXT: vmov r2, r3, d17
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
@ -83,26 +52,11 @@ define <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
|
||||
define <8 x i16> @vtrni16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind {
|
||||
; CHECK-LABEL: vtrni16_Qres:
|
||||
; CHECK: @ BB#0:
|
||||
; CHECK-NEXT: vldr d16, [r0]
|
||||
; CHECK-NEXT: vldr d17, [r1]
|
||||
; CHECK-NEXT: vmov.u16 r0, d16[0]
|
||||
; CHECK-NEXT: vmov.16 d18[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, d17[0]
|
||||
; CHECK-NEXT: vmov.16 d18[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, d16[2]
|
||||
; CHECK-NEXT: vmov.16 d18[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, d17[2]
|
||||
; CHECK-NEXT: vmov.16 d18[3], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, d16[1]
|
||||
; CHECK-NEXT: vmov.16 d19[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, d17[1]
|
||||
; CHECK-NEXT: vmov.16 d19[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, d16[3]
|
||||
; CHECK-NEXT: vmov.16 d19[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, d17[3]
|
||||
; CHECK-NEXT: vmov.16 d19[3], r0
|
||||
; CHECK-NEXT: vmov r0, r1, d18
|
||||
; CHECK-NEXT: vmov r2, r3, d19
|
||||
; CHECK-NEXT: vldr d16, [r0]
|
||||
; CHECK-NEXT: vtrn.16 d16, d17
|
||||
; CHECK-NEXT: vmov r0, r1, d16
|
||||
; CHECK-NEXT: vmov r2, r3, d17
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
%tmp1 = load <4 x i16>, <4 x i16>* %A
|
||||
%tmp2 = load <4 x i16>, <4 x i16>* %B
|
||||
@ -132,8 +86,7 @@ define <4 x i32> @vtrni32_Qres(<2 x i32>* %A, <2 x i32>* %B) nounwind {
|
||||
; CHECK: @ BB#0:
|
||||
; CHECK-NEXT: vldr d17, [r1]
|
||||
; CHECK-NEXT: vldr d16, [r0]
|
||||
; CHECK-NEXT: vrev64.32 q9, q8
|
||||
; CHECK-NEXT: vuzp.32 q8, q9
|
||||
; CHECK-NEXT: vtrn.32 d16, d17
|
||||
; CHECK-NEXT: vmov r0, r1, d16
|
||||
; CHECK-NEXT: vmov r2, r3, d17
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
@ -165,8 +118,7 @@ define <4 x float> @vtrnf_Qres(<2 x float>* %A, <2 x float>* %B) nounwind {
|
||||
; CHECK: @ BB#0:
|
||||
; CHECK-NEXT: vldr d17, [r1]
|
||||
; CHECK-NEXT: vldr d16, [r0]
|
||||
; CHECK-NEXT: vrev64.32 q9, q8
|
||||
; CHECK-NEXT: vuzp.32 q8, q9
|
||||
; CHECK-NEXT: vtrn.32 d16, d17
|
||||
; CHECK-NEXT: vmov r0, r1, d16
|
||||
; CHECK-NEXT: vmov r2, r3, d17
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
@ -329,32 +281,9 @@ define <8 x i8> @vtrni8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
|
||||
define <16 x i8> @vtrni8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
|
||||
; CHECK-LABEL: vtrni8_undef_Qres:
|
||||
; CHECK: @ BB#0:
|
||||
; CHECK-NEXT: vldr d18, [r0]
|
||||
; CHECK-NEXT: vldr d19, [r1]
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[0]
|
||||
; CHECK-NEXT: vmov.8 d16[0], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[2]
|
||||
; CHECK-NEXT: vmov.8 d16[2], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[2]
|
||||
; CHECK-NEXT: vmov.8 d16[3], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[4]
|
||||
; CHECK-NEXT: vmov.8 d16[5], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[6]
|
||||
; CHECK-NEXT: vmov.8 d16[6], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[6]
|
||||
; CHECK-NEXT: vmov.8 d16[7], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[1]
|
||||
; CHECK-NEXT: vmov.8 d17[0], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[1]
|
||||
; CHECK-NEXT: vmov.8 d17[1], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[3]
|
||||
; CHECK-NEXT: vmov.8 d17[2], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[3]
|
||||
; CHECK-NEXT: vmov.8 d17[3], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[5]
|
||||
; CHECK-NEXT: vmov.8 d17[4], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[7]
|
||||
; CHECK-NEXT: vmov.8 d17[7], r0
|
||||
; CHECK-NEXT: vldr d17, [r1]
|
||||
; CHECK-NEXT: vldr d16, [r0]
|
||||
; CHECK-NEXT: vtrn.8 d16, d17
|
||||
; CHECK-NEXT: vmov r0, r1, d16
|
||||
; CHECK-NEXT: vmov r2, r3, d17
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
|
@ -20,40 +20,9 @@ define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
|
||||
define <16 x i8> @vuzpi8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
|
||||
; CHECK-LABEL: vuzpi8_Qres:
|
||||
; CHECK: @ BB#0:
|
||||
; CHECK-NEXT: vldr d19, [r0]
|
||||
; CHECK-NEXT: vldr d18, [r1]
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[0]
|
||||
; CHECK-NEXT: vmov.8 d16[0], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[2]
|
||||
; CHECK-NEXT: vmov.8 d16[1], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[4]
|
||||
; CHECK-NEXT: vmov.8 d16[2], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[6]
|
||||
; CHECK-NEXT: vmov.8 d16[3], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[0]
|
||||
; CHECK-NEXT: vmov.8 d16[4], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[2]
|
||||
; CHECK-NEXT: vmov.8 d16[5], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[4]
|
||||
; CHECK-NEXT: vmov.8 d16[6], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[6]
|
||||
; CHECK-NEXT: vmov.8 d16[7], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[1]
|
||||
; CHECK-NEXT: vmov.8 d17[0], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[3]
|
||||
; CHECK-NEXT: vmov.8 d17[1], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[5]
|
||||
; CHECK-NEXT: vmov.8 d17[2], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[7]
|
||||
; CHECK-NEXT: vmov.8 d17[3], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[1]
|
||||
; CHECK-NEXT: vmov.8 d17[4], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[3]
|
||||
; CHECK-NEXT: vmov.8 d17[5], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[5]
|
||||
; CHECK-NEXT: vmov.8 d17[6], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[7]
|
||||
; CHECK-NEXT: vmov.8 d17[7], r0
|
||||
; CHECK-NEXT: vldr d17, [r1]
|
||||
; CHECK-NEXT: vldr d16, [r0]
|
||||
; CHECK-NEXT: vuzp.8 d16, d17
|
||||
; CHECK-NEXT: vmov r0, r1, d16
|
||||
; CHECK-NEXT: vmov r2, r3, d17
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
@ -83,26 +52,11 @@ define <4 x i16> @vuzpi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
|
||||
define <8 x i16> @vuzpi16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind {
|
||||
; CHECK-LABEL: vuzpi16_Qres:
|
||||
; CHECK: @ BB#0:
|
||||
; CHECK-NEXT: vldr d16, [r0]
|
||||
; CHECK-NEXT: vldr d17, [r1]
|
||||
; CHECK-NEXT: vmov.u16 r0, d16[0]
|
||||
; CHECK-NEXT: vmov.16 d18[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, d16[2]
|
||||
; CHECK-NEXT: vmov.16 d18[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, d17[0]
|
||||
; CHECK-NEXT: vmov.16 d18[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, d17[2]
|
||||
; CHECK-NEXT: vmov.16 d18[3], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, d16[1]
|
||||
; CHECK-NEXT: vmov.16 d19[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, d16[3]
|
||||
; CHECK-NEXT: vmov.16 d19[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, d17[1]
|
||||
; CHECK-NEXT: vmov.16 d19[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, d17[3]
|
||||
; CHECK-NEXT: vmov.16 d19[3], r0
|
||||
; CHECK-NEXT: vmov r0, r1, d18
|
||||
; CHECK-NEXT: vmov r2, r3, d19
|
||||
; CHECK-NEXT: vldr d16, [r0]
|
||||
; CHECK-NEXT: vuzp.16 d16, d17
|
||||
; CHECK-NEXT: vmov r0, r1, d16
|
||||
; CHECK-NEXT: vmov r2, r3, d17
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
%tmp1 = load <4 x i16>, <4 x i16>* %A
|
||||
%tmp2 = load <4 x i16>, <4 x i16>* %B
|
||||
@ -266,32 +220,9 @@ define <8 x i8> @vuzpi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
|
||||
define <16 x i8> @vuzpi8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
|
||||
; CHECK-LABEL: vuzpi8_undef_Qres:
|
||||
; CHECK: @ BB#0:
|
||||
; CHECK-NEXT: vldr d18, [r0]
|
||||
; CHECK-NEXT: vldr d19, [r1]
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[0]
|
||||
; CHECK-NEXT: vmov.8 d16[0], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[2]
|
||||
; CHECK-NEXT: vmov.8 d16[1], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[0]
|
||||
; CHECK-NEXT: vmov.8 d16[4], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[2]
|
||||
; CHECK-NEXT: vmov.8 d16[5], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[4]
|
||||
; CHECK-NEXT: vmov.8 d16[6], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[6]
|
||||
; CHECK-NEXT: vmov.8 d16[7], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[1]
|
||||
; CHECK-NEXT: vmov.8 d17[0], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[3]
|
||||
; CHECK-NEXT: vmov.8 d17[1], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[5]
|
||||
; CHECK-NEXT: vmov.8 d17[2], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[7]
|
||||
; CHECK-NEXT: vmov.8 d17[3], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[5]
|
||||
; CHECK-NEXT: vmov.8 d17[6], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[7]
|
||||
; CHECK-NEXT: vmov.8 d17[7], r0
|
||||
; CHECK-NEXT: vldr d17, [r1]
|
||||
; CHECK-NEXT: vldr d16, [r0]
|
||||
; CHECK-NEXT: vuzp.8 d16, d17
|
||||
; CHECK-NEXT: vmov r0, r1, d16
|
||||
; CHECK-NEXT: vmov r2, r3, d17
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
|
@ -20,40 +20,9 @@ define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
|
||||
define <16 x i8> @vzipi8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
|
||||
; CHECK-LABEL: vzipi8_Qres:
|
||||
; CHECK: @ BB#0:
|
||||
; CHECK-NEXT: vldr d19, [r0]
|
||||
; CHECK-NEXT: vldr d18, [r1]
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[0]
|
||||
; CHECK-NEXT: vmov.8 d16[0], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[0]
|
||||
; CHECK-NEXT: vmov.8 d16[1], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[1]
|
||||
; CHECK-NEXT: vmov.8 d16[2], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[1]
|
||||
; CHECK-NEXT: vmov.8 d16[3], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[2]
|
||||
; CHECK-NEXT: vmov.8 d16[4], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[2]
|
||||
; CHECK-NEXT: vmov.8 d16[5], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[3]
|
||||
; CHECK-NEXT: vmov.8 d16[6], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[3]
|
||||
; CHECK-NEXT: vmov.8 d16[7], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[4]
|
||||
; CHECK-NEXT: vmov.8 d17[0], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[4]
|
||||
; CHECK-NEXT: vmov.8 d17[1], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[5]
|
||||
; CHECK-NEXT: vmov.8 d17[2], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[5]
|
||||
; CHECK-NEXT: vmov.8 d17[3], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[6]
|
||||
; CHECK-NEXT: vmov.8 d17[4], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[6]
|
||||
; CHECK-NEXT: vmov.8 d17[5], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[7]
|
||||
; CHECK-NEXT: vmov.8 d17[6], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[7]
|
||||
; CHECK-NEXT: vmov.8 d17[7], r0
|
||||
; CHECK-NEXT: vldr d17, [r1]
|
||||
; CHECK-NEXT: vldr d16, [r0]
|
||||
; CHECK-NEXT: vzip.8 d16, d17
|
||||
; CHECK-NEXT: vmov r0, r1, d16
|
||||
; CHECK-NEXT: vmov r2, r3, d17
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
@ -83,26 +52,11 @@ define <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
|
||||
define <8 x i16> @vzipi16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind {
|
||||
; CHECK-LABEL: vzipi16_Qres:
|
||||
; CHECK: @ BB#0:
|
||||
; CHECK-NEXT: vldr d16, [r0]
|
||||
; CHECK-NEXT: vldr d17, [r1]
|
||||
; CHECK-NEXT: vmov.u16 r0, d16[0]
|
||||
; CHECK-NEXT: vmov.16 d18[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, d17[0]
|
||||
; CHECK-NEXT: vmov.16 d18[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, d16[1]
|
||||
; CHECK-NEXT: vmov.16 d18[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, d17[1]
|
||||
; CHECK-NEXT: vmov.16 d18[3], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, d16[2]
|
||||
; CHECK-NEXT: vmov.16 d19[0], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, d17[2]
|
||||
; CHECK-NEXT: vmov.16 d19[1], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, d16[3]
|
||||
; CHECK-NEXT: vmov.16 d19[2], r0
|
||||
; CHECK-NEXT: vmov.u16 r0, d17[3]
|
||||
; CHECK-NEXT: vmov.16 d19[3], r0
|
||||
; CHECK-NEXT: vmov r0, r1, d18
|
||||
; CHECK-NEXT: vmov r2, r3, d19
|
||||
; CHECK-NEXT: vldr d16, [r0]
|
||||
; CHECK-NEXT: vzip.16 d16, d17
|
||||
; CHECK-NEXT: vmov r0, r1, d16
|
||||
; CHECK-NEXT: vmov r2, r3, d17
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
%tmp1 = load <4 x i16>, <4 x i16>* %A
|
||||
%tmp2 = load <4 x i16>, <4 x i16>* %B
|
||||
@ -266,32 +220,9 @@ define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind {
|
||||
define <16 x i8> @vzipi8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind {
|
||||
; CHECK-LABEL: vzipi8_undef_Qres:
|
||||
; CHECK: @ BB#0:
|
||||
; CHECK-NEXT: vldr d18, [r0]
|
||||
; CHECK-NEXT: vldr d19, [r1]
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[0]
|
||||
; CHECK-NEXT: vmov.8 d16[0], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[1]
|
||||
; CHECK-NEXT: vmov.8 d16[2], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[1]
|
||||
; CHECK-NEXT: vmov.8 d16[3], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[2]
|
||||
; CHECK-NEXT: vmov.8 d16[5], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[3]
|
||||
; CHECK-NEXT: vmov.8 d16[6], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[3]
|
||||
; CHECK-NEXT: vmov.8 d16[7], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[4]
|
||||
; CHECK-NEXT: vmov.8 d17[0], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[4]
|
||||
; CHECK-NEXT: vmov.8 d17[1], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[5]
|
||||
; CHECK-NEXT: vmov.8 d17[2], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[5]
|
||||
; CHECK-NEXT: vmov.8 d17[3], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d18[6]
|
||||
; CHECK-NEXT: vmov.8 d17[4], r0
|
||||
; CHECK-NEXT: vmov.u8 r0, d19[7]
|
||||
; CHECK-NEXT: vmov.8 d17[7], r0
|
||||
; CHECK-NEXT: vldr d17, [r1]
|
||||
; CHECK-NEXT: vldr d16, [r0]
|
||||
; CHECK-NEXT: vzip.8 d16, d17
|
||||
; CHECK-NEXT: vmov r0, r1, d16
|
||||
; CHECK-NEXT: vmov r2, r3, d17
|
||||
; CHECK-NEXT: mov pc, lr
|
||||
|
Loading…
x
Reference in New Issue
Block a user