1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00

[PowerPC] Fix a performance bug for PPC::XXSLDWI.

There are some VectorShuffle Nodes in SDAG which can be selected to XXSLDWI
instruction, this patch recognizes them and does the selection to improve the
PPC performance.

llvm-svn: 303822
This commit is contained in:
Tony Jiang 2017-05-24 23:48:29 +00:00
parent bbfcd24e3e
commit 82d9e3d26c
8 changed files with 440 additions and 41 deletions

View File

@ -1596,9 +1596,8 @@ bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) {
return true;
}
bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
unsigned &InsertAtByte, bool &Swap, bool IsLE) {
// Check that the mask is shuffling words
static bool isWordShuffleMask(ShuffleVectorSDNode *N) {
for (unsigned i = 0; i < 4; ++i) {
unsigned B0 = N->getMaskElt(i*4);
unsigned B1 = N->getMaskElt(i*4+1);
@ -1610,6 +1609,14 @@ bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
return false;
}
return true;
}
bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
unsigned &InsertAtByte, bool &Swap, bool IsLE) {
if (!isWordShuffleMask(N))
return false;
// Now we look at mask elements 0,4,8,12
unsigned M0 = N->getMaskElt(0) / 4;
unsigned M1 = N->getMaskElt(4) / 4;
@ -1680,6 +1687,69 @@ bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
return false;
}
bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
bool &Swap, bool IsLE) {
assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
// Ensure each byte index of the word is consecutive.
if (!isWordShuffleMask(N))
return false;
// Now we look at mask elements 0,4,8,12, which are the beginning of words.
unsigned M0 = N->getMaskElt(0) / 4;
unsigned M1 = N->getMaskElt(4) / 4;
unsigned M2 = N->getMaskElt(8) / 4;
unsigned M3 = N->getMaskElt(12) / 4;
// If both vector operands for the shuffle are the same vector, the mask will
// contain only elements from the first one and the second one will be undef.
if (N->getOperand(1).isUndef()) {
assert(M0 < 4 && "Indexing into an undef vector?");
if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
return false;
ShiftElts = IsLE ? (4 - M0) % 4 : M0;
Swap = false;
return true;
}
// Ensure each word index of the ShuffleVector Mask is consecutive.
if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
return false;
if (IsLE) {
if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
// Input vectors don't need to be swapped if the leading element
// of the result is one of the 3 left elements of the second vector
// (or if there is no shift to be done at all).
Swap = false;
ShiftElts = (8 - M0) % 8;
} else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
// Input vectors need to be swapped if the leading element
// of the result is one of the 3 left elements of the first vector
// (or if we're shifting by 4 - thereby simply swapping the vectors).
Swap = true;
ShiftElts = (4 - M0) % 4;
}
return true;
} else { // BE
if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
// Input vectors don't need to be swapped if the leading element
// of the result is one of the 4 elements of the first vector.
Swap = false;
ShiftElts = M0;
} else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
// Input vectors need to be swapped if the leading element
// of the result is one of the 4 elements of the right vector.
Swap = true;
ShiftElts = M0 - 4;
}
return true;
}
}
/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
/// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize,
@ -7679,6 +7749,20 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
}
if (Subtarget.hasVSX() &&
PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
if (Swap)
std::swap(V1, V2);
SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
SDValue Conv2 =
DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
DAG.getConstant(ShiftElts, dl, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
}
if (Subtarget.hasVSX()) {
if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);

View File

@ -450,7 +450,11 @@ namespace llvm {
/// a VMRGEW or VMRGOW instruction
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
unsigned ShuffleKind, SelectionDAG &DAG);
/// isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable
/// for a XXSLDWI instruction.
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
bool &Swap, bool IsLE);
/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the
/// shift amount, otherwise return -1.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,

View File

@ -46,7 +46,7 @@ def SDT_PPCVecSplat : SDTypeProfile<1, 2, [ SDTCisVec<0>,
]>;
def SDT_PPCVecShift : SDTypeProfile<1, 3, [ SDTCisVec<0>,
SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>
SDTCisVec<1>, SDTCisVec<2>, SDTCisPtrTy<3>
]>;
def SDT_PPCVecInsert : SDTypeProfile<1, 3, [ SDTCisVec<0>,

View File

@ -1066,6 +1066,10 @@ def : Pat<(v4f32 (PPCxxswapd v4f32:$src)), (XXPERMDI $src, $src, 2)>;
def : Pat<(v4i32 (PPCxxswapd v4i32:$src)), (XXPERMDI $src, $src, 2)>;
def : Pat<(v2f64 (PPCswapNoChain v2f64:$src)), (XXPERMDI $src, $src, 2)>;
// PPCvecshl XT, XA, XA, 2 can be selected to both XXSLDWI XT,XA,XA,2 and
// XXSWAPD XT,XA (i.e. XXPERMDI XT,XA,XA,2), the later one is more profitable.
def : Pat<(v4i32 (PPCvecshl v4i32:$src, v4i32:$src, 2)), (XXPERMDI $src, $src, 2)>;
// Selects.
def : Pat<(v2f64 (selectcc i1:$lhs, i1:$rhs, v2f64:$tval, v2f64:$fval, SETLT)),
(SELECT_VSRC (CRANDC $lhs, $rhs), $tval, $fval)>;

View File

@ -21,7 +21,7 @@ entry:
ret <16 x i8> %strided.vec
; CHECK-LABEL: @test2
; CHECK: vsldoi 2, 2, 2, 12
; CHECK: xxsldwi 34, 34, 34, 3
; CHECK: blr
}

View File

@ -6,7 +6,7 @@
define <4 x float> @_Z7testInsILj0ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_
; CHECK: xxsldwi 0, 35, 35, 2
; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 12
; CHECK-BE-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
@ -45,7 +45,7 @@ entry:
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 12
; CHECK-BE-LABEL: _Z7testInsILj0ELj3EDv4_fET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 2
; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 0
%vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 7, i32 1, i32 2, i32 3>
ret <4 x float> %vecins
@ -54,7 +54,7 @@ entry:
define <4 x float> @_Z7testInsILj1ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_
; CHECK: xxsldwi 0, 35, 35, 2
; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 8
; CHECK-BE-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
@ -93,7 +93,7 @@ entry:
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 8
; CHECK-BE-LABEL: _Z7testInsILj1ELj3EDv4_fET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 2
; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 4
%vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
ret <4 x float> %vecins
@ -102,7 +102,7 @@ entry:
define <4 x float> @_Z7testInsILj2ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_
; CHECK: xxsldwi 0, 35, 35, 2
; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 4
; CHECK-BE-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
@ -141,7 +141,7 @@ entry:
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 4
; CHECK-BE-LABEL: _Z7testInsILj2ELj3EDv4_fET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 2
; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 8
%vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 7, i32 3>
ret <4 x float> %vecins
@ -150,7 +150,7 @@ entry:
define <4 x float> @_Z7testInsILj3ELj0EDv4_fET1_S1_S1_(<4 x float> %a, <4 x float> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_
; CHECK: xxsldwi 0, 35, 35, 2
; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 0
; CHECK-BE-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
@ -189,7 +189,7 @@ entry:
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 0
; CHECK-BE-LABEL: _Z7testInsILj3ELj3EDv4_fET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 2
; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 12
%vecins = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
ret <4 x float> %vecins
@ -198,7 +198,7 @@ entry:
define <4 x i32> @_Z7testInsILj0ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_
; CHECK: xxsldwi 0, 35, 35, 2
; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 12
; CHECK-BE-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
@ -237,7 +237,7 @@ entry:
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 12
; CHECK-BE-LABEL: _Z7testInsILj0ELj3EDv4_jET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 2
; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 0
%vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 1, i32 2, i32 3>
ret <4 x i32> %vecins
@ -246,7 +246,7 @@ entry:
define <4 x i32> @_Z7testInsILj1ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_
; CHECK: xxsldwi 0, 35, 35, 2
; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 8
; CHECK-BE-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
@ -285,7 +285,7 @@ entry:
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 8
; CHECK-BE-LABEL: _Z7testInsILj1ELj3EDv4_jET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 2
; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 4
%vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
ret <4 x i32> %vecins
@ -294,7 +294,7 @@ entry:
define <4 x i32> @_Z7testInsILj2ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_
; CHECK: xxsldwi 0, 35, 35, 2
; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 4
; CHECK-BE-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
@ -333,7 +333,7 @@ entry:
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 4
; CHECK-BE-LABEL: _Z7testInsILj2ELj3EDv4_jET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 2
; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 8
%vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 7, i32 3>
ret <4 x i32> %vecins
@ -342,7 +342,7 @@ entry:
define <4 x i32> @_Z7testInsILj3ELj0EDv4_jET1_S1_S1_(<4 x i32> %a, <4 x i32> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_
; CHECK: xxsldwi 0, 35, 35, 2
; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 0
; CHECK-BE-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
@ -381,7 +381,7 @@ entry:
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 0
; CHECK-BE-LABEL: _Z7testInsILj3ELj3EDv4_jET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 2
; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 12
%vecins = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
ret <4 x i32> %vecins
@ -546,7 +546,7 @@ entry:
define <4 x float> @_Z7testInsILj0ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_
; CHECK: xxsldwi 0, 35, 35, 2
; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 12
; CHECK-BE-LABEL: _Z7testInsILj0ELj0EDv4_fET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
@ -585,7 +585,7 @@ entry:
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 12
; CHECK-BE-LABEL: _Z7testInsILj0ELj3EDv4_fET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 2
; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 0
%vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 3, i32 5, i32 6, i32 7>
ret <4 x float> %vecins
@ -594,7 +594,7 @@ entry:
define <4 x float> @_Z7testInsILj1ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_
; CHECK: xxsldwi 0, 35, 35, 2
; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 8
; CHECK-BE-LABEL: _Z7testInsILj1ELj0EDv4_fET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
@ -633,7 +633,7 @@ entry:
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 8
; CHECK-BE-LABEL: _Z7testInsILj1ELj3EDv4_fET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 2
; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 4
%vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 4, i32 3, i32 6, i32 7>
ret <4 x float> %vecins
@ -642,7 +642,7 @@ entry:
define <4 x float> @_Z7testInsILj2ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_
; CHECK: xxsldwi 0, 35, 35, 2
; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 4
; CHECK-BE-LABEL: _Z7testInsILj2ELj0EDv4_fET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
@ -681,7 +681,7 @@ entry:
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 4
; CHECK-BE-LABEL: _Z7testInsILj2ELj3EDv4_fET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 2
; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 8
%vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 3, i32 7>
ret <4 x float> %vecins
@ -690,7 +690,7 @@ entry:
define <4 x float> @_Z7testInsILj3ELj0EDv4_fET1_S1_S1_r(<4 x float> %a, <4 x float> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_
; CHECK: xxsldwi 0, 35, 35, 2
; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 0
; CHECK-BE-LABEL: _Z7testInsILj3ELj0EDv4_fET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
@ -729,7 +729,7 @@ entry:
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 0
; CHECK-BE-LABEL: _Z7testInsILj3ELj3EDv4_fET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 2
; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 12
%vecins = shufflevector <4 x float> %b, <4 x float> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
ret <4 x float> %vecins
@ -738,7 +738,7 @@ entry:
define <4 x i32> @_Z7testInsILj0ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_
; CHECK: xxsldwi 0, 35, 35, 2
; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 12
; CHECK-BE-LABEL: _Z7testInsILj0ELj0EDv4_jET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
@ -777,7 +777,7 @@ entry:
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 12
; CHECK-BE-LABEL: _Z7testInsILj0ELj3EDv4_jET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 2
; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 0
%vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 3, i32 5, i32 6, i32 7>
ret <4 x i32> %vecins
@ -786,7 +786,7 @@ entry:
define <4 x i32> @_Z7testInsILj1ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_
; CHECK: xxsldwi 0, 35, 35, 2
; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 8
; CHECK-BE-LABEL: _Z7testInsILj1ELj0EDv4_jET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
@ -825,7 +825,7 @@ entry:
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 8
; CHECK-BE-LABEL: _Z7testInsILj1ELj3EDv4_jET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 2
; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 4
%vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 4, i32 3, i32 6, i32 7>
ret <4 x i32> %vecins
@ -834,7 +834,7 @@ entry:
define <4 x i32> @_Z7testInsILj2ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_
; CHECK: xxsldwi 0, 35, 35, 2
; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 4
; CHECK-BE-LABEL: _Z7testInsILj2ELj0EDv4_jET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
@ -873,7 +873,7 @@ entry:
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 4
; CHECK-BE-LABEL: _Z7testInsILj2ELj3EDv4_jET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 2
; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 8
%vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 3, i32 7>
ret <4 x i32> %vecins
@ -882,7 +882,7 @@ entry:
define <4 x i32> @_Z7testInsILj3ELj0EDv4_jET1_S1_S1_r(<4 x i32> %a, <4 x i32> %b) {
entry:
; CHECK-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_
; CHECK: xxsldwi 0, 35, 35, 2
; CHECK: xxswapd 0, 35
; CHECK: xxinsertw 34, 0, 0
; CHECK-BE-LABEL: _Z7testInsILj3ELj0EDv4_jET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 3
@ -921,7 +921,7 @@ entry:
; CHECK: xxsldwi 0, 35, 35, 3
; CHECK: xxinsertw 34, 0, 0
; CHECK-BE-LABEL: _Z7testInsILj3ELj3EDv4_jET1_S1_S1_
; CHECK-BE: xxsldwi 0, 35, 35, 2
; CHECK-BE: xxswapd 0, 35
; CHECK-BE: xxinsertw 34, 0, 12
%vecins = shufflevector <4 x i32> %b, <4 x i32> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
ret <4 x i32> %vecins

View File

@ -9,11 +9,11 @@ define <4 x float> @bar(float* %p, float* %q) {
%6 = shufflevector <12 x float> %5, <12 x float> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
ret <4 x float> %6
; CHECK: vsldoi
; CHECK: xxsldwi
; CHECK-NEXT: vmrghw
; CHECK-NEXT: vmrglw
; CHECK-NEXT: vsldoi
; CHECK-NEXT: vsldoi
; CHECK-NEXT: vsldoi
; CHECK-NEXT: xxsldwi
; CHECK-NEXT: xxsldwi
; CHECK-NEXT: xxsldwi
; CHECK-NEXT: blr
}

View File

@ -0,0 +1,307 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | \
; RUN: FileCheck %s -check-prefix=CHECK-LE
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | \
; RUN: FileCheck %s -check-prefix=CHECK-BE
; Possible LE ShuffleVector masks (Case 1):
; ShuffleVector((vector int)a, vector(int)b, 0, 1, 2, 3)
; ShuffleVector((vector int)a, vector(int)b, 7, 0, 1, 2)
; ShuffleVector((vector int)a, vector(int)b, 6, 7, 0, 1)
; ShuffleVector((vector int)a, vector(int)b, 5, 6, 7, 0)
; which targets at:
; xxsldwi a, b, 0
; xxsldwi a, b, 1
; xxsldwi a, b, 2
; xxsldwi a, b, 3
; Possible LE Swap ShuffleVector masks (Case 2):
; ShuffleVector((vector int)a, vector(int)b, 4, 5, 6, 7)
; ShuffleVector((vector int)a, vector(int)b, 3, 4, 5, 6)
; ShuffleVector((vector int)a, vector(int)b, 2, 3, 4, 5)
; ShuffleVector((vector int)a, vector(int)b, 1, 2, 3, 4)
; which targets at:
; xxsldwi b, a, 0
; xxsldwi b, a, 1
; xxsldwi b, a, 2
; xxsldwi b, a, 3
; Possible LE ShuffleVector masks when a == b, b is undef (Case 3):
; ShuffleVector((vector int)a, vector(int)a, 0, 1, 2, 3)
; ShuffleVector((vector int)a, vector(int)a, 3, 0, 1, 2)
; ShuffleVector((vector int)a, vector(int)a, 2, 3, 0, 1)
; ShuffleVector((vector int)a, vector(int)a, 1, 2, 3, 0)
; which targets at:
; xxsldwi a, a, 0
; xxsldwi a, a, 1
; xxsldwi a, a, 2
; xxsldwi a, a, 3
; Possible BE ShuffleVector masks (Case 4):
; ShuffleVector((vector int)a, vector(int)b, 0, 1, 2, 3)
; ShuffleVector((vector int)a, vector(int)b, 1, 2, 3, 4)
; ShuffleVector((vector int)a, vector(int)b, 2, 3, 4, 5)
; ShuffleVector((vector int)a, vector(int)b, 3, 4, 5, 6)
; which targets at:
; xxsldwi b, a, 0
; xxsldwi b, a, 1
; xxsldwi a, a, 2
; xxsldwi a, a, 3
; Possible BE Swap ShuffleVector masks (Case 5):
; ShuffleVector((vector int)a, vector(int)b, 4, 5, 6, 7)
; ShuffleVector((vector int)a, vector(int)b, 5, 6, 7, 0)
; ShuffleVector((vector int)a, vector(int)b, 6, 7, 0, 1)
; ShuffleVector((vector int)a, vector(int)b, 7, 0, 1, 2)
; which targets at:
; xxsldwi b, a, 0
; xxsldwi b, a, 1
; xxsldwi b, a, 2
; xxsldwi b, a, 3
; Possible BE ShuffleVector masks when a == b, b is undef (Case 6):
; ShuffleVector((vector int)a, vector(int)b, 0, 1, 2, 3)
; ShuffleVector((vector int)a, vector(int)a, 1, 2, 3, 0)
; ShuffleVector((vector int)a, vector(int)a, 2, 3, 0, 1)
; ShuffleVector((vector int)a, vector(int)a, 3, 0, 1, 2)
; which targets at:
; xxsldwi a, a, 0
; xxsldwi a, a, 1
; xxsldwi a, a, 2
; xxsldwi a, a, 3
define <4 x i32> @check_le_vec_sldwi_va_vb_0(<4 x i32> %VA, <4 x i32> %VB) {
entry:
%0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %0
; CHECK-LE-LABEL: @check_le_vec_sldwi_va_vb_0
; CHECK-LE: blr
}
define <4 x i32> @check_le_vec_sldwi_va_vb_1(<4 x i32> %VA, <4 x i32> %VB) {
entry:
%0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
ret <4 x i32> %0
; CHECK-LE-LABEL: @check_le_vec_sldwi_va_vb_1
; CHECK-LE: xxsldwi 34, 34, 35, 1
; CHECK-LE: blr
}
define <4 x i32> @check_le_vec_sldwi_va_vb_2(<4 x i32> %VA, <4 x i32> %VB) {
entry:
%0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
ret <4 x i32> %0
; CHECK-LE-LABEL: @check_le_vec_sldwi_va_vb_2
; CHECK-LE: xxsldwi 34, 34, 35, 2
; CHECK-LE: blr
}
define <4 x i32> @check_le_vec_sldwi_va_vb_3(<4 x i32> %VA, <4 x i32> %VB) {
entry:
%0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 5, i32 6, i32 7, i32 0>
ret <4 x i32> %0
; CHECK-LE-LABEL: @check_le_vec_sldwi_va_vb_3
; CHECK-LE: xxsldwi 34, 34, 35, 3
; CHECK-LE: blr
}
define <4 x i32> @check_le_swap_vec_sldwi_va_vb_0(<4 x i32> %VA, <4 x i32> %VB) {
entry:
%0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
ret <4 x i32> %0
; CHECK-LE-LABEL: @check_le_swap_vec_sldwi_va_vb_0
; CHECK-LE; vmr 2, 3
; CHECK-LE: blr
}
define <4 x i32> @check_le_swap_vec_sldwi_va_vb_1(<4 x i32> %VA, <4 x i32> %VB) {
entry:
%0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
ret <4 x i32> %0
; CHECK-LE-LABEL: @check_le_swap_vec_sldwi_va_vb_1
; CHECK-LE: xxsldwi 34, 35, 34, 1
; CHECK-LE: blr
}
define <4 x i32> @check_le_swap_vec_sldwi_va_vb_2(<4 x i32> %VA, <4 x i32> %VB) {
entry:
%0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
ret <4 x i32> %0
; CHECK-LE-LABEL: @check_le_swap_vec_sldwi_va_vb_2
; CHECK-LE: xxsldwi 34, 35, 34, 2
; CHECK-LE: blr
}
define <4 x i32> @check_le_swap_vec_sldwi_va_vb_3(<4 x i32> %VA, <4 x i32> %VB) {
entry:
%0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
ret <4 x i32> %0
; CHECK-LE-LABEL: @check_le_swap_vec_sldwi_va_vb_3
; CHECK-LE: xxsldwi 34, 35, 34, 3
; CHECK-LE: blr
}
define <4 x i32> @check_le_vec_sldwi_va_undef_0(<4 x i32> %VA) {
entry:
%0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %0
; CHECK-LE-LABEL: @check_le_vec_sldwi_va_undef_0
; CHECK-LE: blr
}
define <4 x i32> @check_le_vec_sldwi_va_undef_1(<4 x i32> %VA) {
entry:
%0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
ret <4 x i32> %0
; CHECK-BE-LABEL: @check_le_vec_sldwi_va_undef_1
; CHECK-LE: xxsldwi 34, 34, 34, 1
; CHECK-LE: blr
}
define <4 x i32> @check_le_vec_sldwi_va_undef_2(<4 x i32> %VA) {
entry:
%0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
ret <4 x i32> %0
; CHECK-LE-LABEL: @check_le_vec_sldwi_va_undef_2
; CHECK-LE: xxswapd 34, 34
; CHECK-LE: blr
}
define <4 x i32> @check_le_vec_sldwi_va_undef_3(<4 x i32> %VA) {
entry:
%0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
ret <4 x i32> %0
; CHECK-LE-LABEL: @check_le_vec_sldwi_va_undef_3
; CHECK-LE: xxsldwi 34, 34, 34, 3
; CHECK-LE: blr
}
define <4 x i32> @check_be_vec_sldwi_va_vb_0(<4 x i32> %VA, <4 x i32> %VB) {
entry:
%0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %0
; CHECK-BE-LABEL: @check_be_vec_sldwi_va_vb_0
; CHECK-BE: blr
}
define <4 x i32> @check_be_vec_sldwi_va_vb_1(<4 x i32> %VA, <4 x i32> %VB) {
entry:
%0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
ret <4 x i32> %0
; CHECK-BE-LABEL: @check_be_vec_sldwi_va_vb_1
; CHECK-BE: xxsldwi 34, 34, 35, 1
; CHECK-BE: blr
}
define <4 x i32> @check_be_vec_sldwi_va_vb_2(<4 x i32> %VA, <4 x i32> %VB) {
entry:
%0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
ret <4 x i32> %0
; CHECK-BE-LABEL: @check_be_vec_sldwi_va_vb_2
; CHECK-BE: xxsldwi 34, 34, 35, 2
; CHECK-BE: blr
}
define <4 x i32> @check_be_vec_sldwi_va_vb_3(<4 x i32> %VA, <4 x i32> %VB) {
entry:
%0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
ret <4 x i32> %0
; CHECK-BE-LABEL: @check_be_vec_sldwi_va_vb_3
; CHECK-BE: xxsldwi 34, 34, 35, 3
; CHECK-BE: blr
}
define <4 x i32> @check_be_swap_vec_sldwi_va_vb_0(<4 x i32> %VA, <4 x i32> %VB) {
entry:
%0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
ret <4 x i32> %0
; CHECK-BE-LABEL: @check_be_swap_vec_sldwi_va_vb_0
; CHECK-LE; vmr 2, 3
; CHECK-BE: blr
}
define <4 x i32> @check_be_swap_vec_sldwi_va_vb_1(<4 x i32> %VA, <4 x i32> %VB) {
entry:
%0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 5, i32 6, i32 7, i32 0>
ret <4 x i32> %0
; CHECK-BE-LABEL: @check_be_swap_vec_sldwi_va_vb_1
; CHECK-BE: xxsldwi 34, 35, 34, 1
; CHECK-BE: blr
}
define <4 x i32> @check_be_swap_vec_sldwi_va_vb_2(<4 x i32> %VA, <4 x i32> %VB) {
entry:
%0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 6, i32 7, i32 0, i32 1>
ret <4 x i32> %0
; CHECK-BE-LABEL: @check_be_swap_vec_sldwi_va_vb_2
; CHECK-BE: xxsldwi 34, 35, 34, 2
; CHECK-BE: blr
}
define <4 x i32> @check_be_swap_vec_sldwi_va_vb_3(<4 x i32> %VA, <4 x i32> %VB) {
entry:
%0 = shufflevector <4 x i32> %VA, <4 x i32> %VB, <4 x i32> <i32 7, i32 0, i32 1, i32 2>
ret <4 x i32> %0
; CHECK-BE-LABEL: @check_be_swap_vec_sldwi_va_vb_3
; CHECK-BE: xxsldwi 34, 35, 34, 3
; CHECK-BE: blr
}
define <4 x i32> @check_be_vec_sldwi_va_undef_0(<4 x i32> %VA) {
entry:
%0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
ret <4 x i32> %0
; CHECK-LE-LABEL: @check_be_vec_sldwi_va_undef_0
; CHECK-BE: blr
}
define <4 x i32> @check_be_vec_sldwi_va_undef_1(<4 x i32> %VA) {
entry:
%0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
ret <4 x i32> %0
; CHECK-BE-LABEL: @check_be_vec_sldwi_va_undef_1
; CHECK-BE: xxsldwi 34, 34, 34, 1
; CHECK-BE: blr
}
define <4 x i32> @check_be_vec_sldwi_va_undef_2(<4 x i32> %VA) {
entry:
%0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
ret <4 x i32> %0
; CHECK-BE-LABEL: @check_be_vec_sldwi_va_undef_2
; CHECK-BE: xxswapd 34, 34
; CHECK-BE: blr
}
define <4 x i32> @check_be_vec_sldwi_va_undef_3(<4 x i32> %VA) {
entry:
%0 = shufflevector <4 x i32> %VA, <4 x i32> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
ret <4 x i32> %0
; CHECK-BE-LABEL: @check_be_vec_sldwi_va_undef_3
; CHECK-BE: xxsldwi 34, 34, 34, 3
; CHECK-BE: blr
}
; More test cases to test different types of vector inputs
define <16 x i8> @test_le_vec_sldwi_v16i8_v16i8(<16 x i8> %VA, <16 x i8> %VB) {
entry:
%0 = shufflevector <16 x i8> %VA, <16 x i8> %VB,<16 x i32> <i32 28, i32 29, i32 30, i32 31,i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
ret <16 x i8> %0
; CHECK-LE-LABEL: @test_le_vec_sldwi_v16i8_v16i8
; CHECK-LE: xxsldwi 34, 34, 35, 1
; CHECK-LE: blr
}
define <8 x i16> @test_le_vec_sldwi_v8i16_v8i16(<8 x i16> %VA, <8 x i16> %VB) {
entry:
%0 = shufflevector <8 x i16> %VA, <8 x i16> %VB,<8 x i32> <i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
ret <8 x i16> %0
; CHECK-LE-LABEL: @test_le_vec_sldwi_v8i16_v8i16
; CHECK-LE: xxsldwi 34, 34, 35, 1
; CHECK-LE: blr
}
; Note here xxpermdi 34, 34, 35, 2 <=> xxsldwi 34, 34, 35, 2
define <2 x i64> @test_be_vec_sldwi_v2i64_v2i64(<2 x i64> %VA, <2 x i64> %VB) {
entry:
%0 = shufflevector <2 x i64> %VA, <2 x i64> %VB,<2 x i32> <i32 3, i32 0>
ret <2 x i64> %0
; CHECK-LE-LABEL: @test_be_vec_sldwi_v2i64_v2i64
; CHECK-LE: xxpermdi 34, 34, 35, 2
; CHECK-LE: blr
}