1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00

Adds code to PPC ISEL lowering to recognize half-word inserts from vector_shuffles, and use P9 shift and vector insert instructions instead of vperm.

Differential Revision: https://reviews.llvm.org/D34160

llvm-svn: 317111
This commit is contained in:
Graham Yiu 2017-11-01 18:06:56 +00:00
parent dcf81a0c5e
commit 3a093847f1
4 changed files with 439 additions and 5 deletions

View File

@ -114,6 +114,8 @@ cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
STATISTIC(NumTailCalls, "Number of tail calls");
STATISTIC(NumSiblingCalls, "Number of sibling calls");
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
// FIXME: Remove this once the bug has been fixed!
extern cl::opt<bool> ANDIGlueBug;
@ -7886,6 +7888,118 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
return DAG.getNode(ISD::BITCAST, dl, VT, T);
}
/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
/// SDValue.
SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
SelectionDAG &DAG) const {
const unsigned NumHalfWords = 8;
const unsigned BytesInVector = NumHalfWords * 2;
// Check that the shuffle is on half-words.
if (!isNByteElemShuffleMask(N, 2, 1))
return SDValue();
bool IsLE = Subtarget.isLittleEndian();
SDLoc dl(N);
SDValue V1 = N->getOperand(0);
SDValue V2 = N->getOperand(1);
unsigned ShiftElts = 0, InsertAtByte = 0;
bool Swap = false;
// Shifts required to get the half-word we want at element 3.
unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
uint32_t Mask = 0;
uint32_t OriginalOrderLow = 0x1234567;
uint32_t OriginalOrderHigh = 0x89ABCDEF;
// Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
// 32-bit space, only need 4-bit nibbles per element.
for (unsigned i = 0; i < NumHalfWords; ++i) {
unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
}
// For each mask element, find out if we're just inserting something
// from V2 into V1 or vice versa. Possible permutations inserting an element
// from V2 into V1:
// X, 1, 2, 3, 4, 5, 6, 7
// 0, X, 2, 3, 4, 5, 6, 7
// 0, 1, X, 3, 4, 5, 6, 7
// 0, 1, 2, X, 4, 5, 6, 7
// 0, 1, 2, 3, X, 5, 6, 7
// 0, 1, 2, 3, 4, X, 6, 7
// 0, 1, 2, 3, 4, 5, X, 7
// 0, 1, 2, 3, 4, 5, 6, X
// Inserting from V1 into V2 will be similar, except mask range will be [8,15].
bool FoundCandidate = false;
// Go through the mask of half-words to find an element that's being moved
// from one vector to the other.
for (unsigned i = 0; i < NumHalfWords; ++i) {
unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
uint32_t MaskOtherElts = ~(0xF << MaskShift);
uint32_t TargetOrder = 0x0;
// If both vector operands for the shuffle are the same vector, the mask
// will contain only elements from the first one and the second one will be
// undef.
if (V2.isUndef()) {
ShiftElts = 0;
unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
TargetOrder = OriginalOrderLow;
Swap = false;
// Skip if not the correct element or mask of other elements don't equal
// to our expected order.
if (MaskOneElt == VINSERTHSrcElem &&
(Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
FoundCandidate = true;
break;
}
} else { // If both operands are defined.
// Target order is [8,15] if the current mask is between [0,7].
TargetOrder =
(MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
// Skip if mask of other elements don't equal our expected order.
if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
// We only need the last 3 bits for the number of shifts.
ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
: BigEndianShifts[MaskOneElt & 0x7];
InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
Swap = MaskOneElt < NumHalfWords;
FoundCandidate = true;
break;
}
}
}
if (!FoundCandidate)
return SDValue();
// Candidate found, construct the proper SDAG sequence with VINSERTH,
// optionally with VECSHL if shift is required.
if (Swap)
std::swap(V1, V2);
if (V2.isUndef())
V2 = V1;
SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
if (ShiftElts) {
// Double ShiftElts because we're left shifting on v16i8 type.
SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
DAG.getConstant(InsertAtByte, dl, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
}
SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
DAG.getConstant(InsertAtByte, dl, MVT::i32));
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
}
/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
/// is a shuffle we can handle in a single instruction, return it. Otherwise,
/// return the code it can be lowered into. Worst case, it can always be
@ -7920,6 +8034,11 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
}
if (Subtarget.hasP9Altivec()) {
SDValue NewISDNode = lowerToVINSERTH(SVOp, DAG);
if (NewISDNode)
return NewISDNode;
}
if (Subtarget.hasVSX() &&
PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {

View File

@ -1072,7 +1072,14 @@ namespace llvm {
SDValue
combineElementTruncationToVectorTruncation(SDNode *N,
DAGCombinerInfo &DCI) const;
};
/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be
/// handled by the VINSERTH instruction introduced in ISA 3.0. This is
/// essentially any shuffle of v8i16 vectors that just inserts one element
/// from one vector into the other.
SDValue lowerToVINSERTH(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;
}; // end class PPCTargetLowering
namespace PPC {

View File

@ -477,10 +477,10 @@ def VPERM : VA1a_Int_Ty3<43, "vperm", int_ppc_altivec_vperm,
def VSEL : VA1a_Int_Ty<42, "vsel", int_ppc_altivec_vsel, v4i32>;
// Shuffles.
def VSLDOI : VAForm_2<44, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u5imm:$SH),
def VSLDOI : VAForm_2<44, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, u4imm:$SH),
"vsldoi $vD, $vA, $vB, $SH", IIC_VecFP,
[(set v16i8:$vD,
(vsldoi_shuffle:$SH v16i8:$vA, v16i8:$vB))]>;
[(set v16i8:$vD,
(PPCvecshl v16i8:$vA, v16i8:$vB, imm32SExt16:$SH))]>;
// VX-Form instructions. AltiVec arithmetic ops.
let isCommutable = 1 in {
@ -908,6 +908,9 @@ def:Pat<(vpkuwum_unary_shuffle v16i8:$vA, undef),
(VPKUWUM $vA, $vA)>;
def:Pat<(vpkuhum_unary_shuffle v16i8:$vA, undef),
(VPKUHUM $vA, $vA)>;
def:Pat<(vsldoi_shuffle:$SH v16i8:$vA, v16i8:$vB),
(VSLDOI v16i8:$vA, v16i8:$vB, (VSLDOI_get_imm $SH))>;
// Match vsldoi(y,x), vpkuwum(y,x), vpkuhum(y,x), i.e., swapped operands.
// These fragments are matched for little-endian, where the inputs must
@ -1310,7 +1313,12 @@ def VEXTUWRX : VX1_RT5_RA5_VB5<1933, "vextuwrx", []>;
// Vector Insert Element Instructions
def VINSERTB : VX1_VT5_UIM5_VB5<781, "vinsertb", []>;
def VINSERTH : VX1_VT5_UIM5_VB5<845, "vinserth", []>;
def VINSERTH : VXForm_1<845, (outs vrrc:$vD),
(ins vrrc:$vDi, u4imm:$UIM, vrrc:$vB),
"vinserth $vD, $vB, $UIM", IIC_VecGeneral,
[(set v8i16:$vD, (PPCvecinsert v8i16:$vDi, v8i16:$vB,
imm32SExt16:$UIM))]>,
RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
def VINSERTW : VX1_VT5_UIM5_VB5<909, "vinsertw", []>;
def VINSERTD : VX1_VT5_UIM5_VB5<973, "vinsertd", []>;

View File

@ -0,0 +1,300 @@
; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -O0 -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -verify-machineinstrs < %s | FileCheck %s
; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-BE
; RUN: llc -O0 -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-BE
; The following testcases take one halfword element from the second vector and
; inserts it at various locations in the first vector
define <8 x i16> @shuffle_vector_halfword_0_8(<8 x i16> %a, <8 x i16> %b) {
entry:
; CHECK-LABEL: shuffle_vector_halfword_0_8
; CHECK: vsldoi 3, 3, 3, 8
; CHECK: vinserth 2, 3, 14
; CHECK-BE-LABEL: shuffle_vector_halfword_0_8
; CHECK-BE: vsldoi 3, 3, 3, 10
; CHECK-BE: vinserth 2, 3, 0
%vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %vecins
}
define <8 x i16> @shuffle_vector_halfword_1_15(<8 x i16> %a, <8 x i16> %b) {
entry:
; CHECK-LABEL: shuffle_vector_halfword_1_15
; CHECK: vsldoi 3, 3, 3, 10
; CHECK: vinserth 2, 3, 12
; CHECK-BE-LABEL: shuffle_vector_halfword_1_15
; CHECK-BE: vsldoi 3, 3, 3, 8
; CHECK-BE: vinserth 2, 3, 2
%vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 15, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %vecins
}
define <8 x i16> @shuffle_vector_halfword_2_9(<8 x i16> %a, <8 x i16> %b) {
entry:
; CHECK-LABEL: shuffle_vector_halfword_2_9
; CHECK: vsldoi 3, 3, 3, 6
; CHECK: vinserth 2, 3, 10
; CHECK-BE-LABEL: shuffle_vector_halfword_2_9
; CHECK-BE: vsldoi 3, 3, 3, 12
; CHECK-BE: vinserth 2, 3, 4
%vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 9, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %vecins
}
define <8 x i16> @shuffle_vector_halfword_3_13(<8 x i16> %a, <8 x i16> %b) {
entry:
; CHECK-LABEL: shuffle_vector_halfword_3_13
; CHECK: vsldoi 3, 3, 3, 14
; CHECK: vinserth 2, 3, 8
; CHECK-BE-LABEL: shuffle_vector_halfword_3_13
; CHECK-BE: vsldoi 3, 3, 3, 4
; CHECK-BE: vinserth 2, 3, 6
%vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 13, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %vecins
}
define <8 x i16> @shuffle_vector_halfword_4_10(<8 x i16> %a, <8 x i16> %b) {
entry:
; CHECK-LABEL: shuffle_vector_halfword_4_10
; CHECK: vsldoi 3, 3, 3, 4
; CHECK: vinserth 2, 3, 6
; CHECK-BE-LABEL: shuffle_vector_halfword_4_10
; CHECK-BE: vsldoi 3, 3, 3, 14
; CHECK-BE: vinserth 2, 3, 8
%vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 10, i32 5, i32 6, i32 7>
ret <8 x i16> %vecins
}
define <8 x i16> @shuffle_vector_halfword_5_14(<8 x i16> %a, <8 x i16> %b) {
entry:
; CHECK-LABEL: shuffle_vector_halfword_5_14
; CHECK: vsldoi 3, 3, 3, 12
; CHECK: vinserth 2, 3, 4
; CHECK-BE-LABEL: shuffle_vector_halfword_5_14
; CHECK-BE: vsldoi 3, 3, 3, 6
; CHECK-BE: vinserth 2, 3, 10
%vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 14, i32 6, i32 7>
ret <8 x i16> %vecins
}
define <8 x i16> @shuffle_vector_halfword_6_11(<8 x i16> %a, <8 x i16> %b) {
entry:
; CHECK-LABEL: shuffle_vector_halfword_6_11
; CHECK: vsldoi 3, 3, 3, 2
; CHECK: vinserth 2, 3, 2
; CHECK-BE-LABEL: shuffle_vector_halfword_6_11
; CHECK-BE: vinserth 2, 3, 12
%vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 11, i32 7>
ret <8 x i16> %vecins
}
define <8 x i16> @shuffle_vector_halfword_7_12(<8 x i16> %a, <8 x i16> %b) {
entry:
; CHECK-LABEL: shuffle_vector_halfword_7_12
; CHECK: vinserth 2, 3, 0
; CHECK-BE-LABEL: shuffle_vector_halfword_7_12
; CHECK-BE: vsldoi 3, 3, 3, 2
; CHECK-BE: vinserth 2, 3, 14
%vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 12>
ret <8 x i16> %vecins
}
define <8 x i16> @shuffle_vector_halfword_8_1(<8 x i16> %a, <8 x i16> %b) {
entry:
; CHECK-LABEL: shuffle_vector_halfword_8_1
; CHECK: vsldoi 2, 2, 2, 6
; CHECK: vinserth 3, 2, 14
; CHECK: vmr 2, 3
; CHECK-BE-LABEL: shuffle_vector_halfword_8_1
; CHECK-BE: vsldoi 2, 2, 2, 12
; CHECK-BE: vinserth 3, 2, 0
; CHECK-BE: vmr 2, 3
%vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <8 x i16> %vecins
}
; The following testcases take one halfword element from the first vector and
; inserts it at various locations in the second vector
define <8 x i16> @shuffle_vector_halfword_9_7(<8 x i16> %a, <8 x i16> %b) {
entry:
; CHECK-LABEL: shuffle_vector_halfword_9_7
; CHECK: vsldoi 2, 2, 2, 10
; CHECK: vinserth 3, 2, 12
; CHECK: vmr 2, 3
; CHECK-BE-LABEL: shuffle_vector_halfword_9_7
; CHECK-BE: vsldoi 2, 2, 2, 8
; CHECK-BE: vinserth 3, 2, 2
; CHECK-BE: vmr 2, 3
%vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 7, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <8 x i16> %vecins
}
define <8 x i16> @shuffle_vector_halfword_10_4(<8 x i16> %a, <8 x i16> %b) {
entry:
; CHECK-LABEL: shuffle_vector_halfword_10_4
; CHECK: vinserth 3, 2, 10
; CHECK: vmr 2, 3
; CHECK-BE-LABEL: shuffle_vector_halfword_10_4
; CHECK-BE: vsldoi 2, 2, 2, 2
; CHECK-BE: vinserth 3, 2, 4
; CHECK-BE: vmr 2, 3
%vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 4, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <8 x i16> %vecins
}
define <8 x i16> @shuffle_vector_halfword_11_2(<8 x i16> %a, <8 x i16> %b) {
entry:
; CHECK-LABEL: shuffle_vector_halfword_11_2
; CHECK: vsldoi 2, 2, 2, 4
; CHECK: vinserth 3, 2, 8
; CHECK: vmr 2, 3
; CHECK-BE-LABEL: shuffle_vector_halfword_11_2
; CHECK-BE: vsldoi 2, 2, 2, 14
; CHECK-BE: vinserth 3, 2, 6
; CHECK-BE: vmr 2, 3
%vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 2, i32 12, i32 13, i32 14, i32 15>
ret <8 x i16> %vecins
}
define <8 x i16> @shuffle_vector_halfword_12_6(<8 x i16> %a, <8 x i16> %b) {
entry:
; CHECK-LABEL: shuffle_vector_halfword_12_6
; CHECK: vsldoi 2, 2, 2, 12
; CHECK: vinserth 3, 2, 6
; CHECK: vmr 2, 3
; CHECK-BE-LABEL: shuffle_vector_halfword_12_6
; CHECK-BE: vsldoi 2, 2, 2, 6
; CHECK-BE: vinserth 3, 2, 8
; CHECK-BE: vmr 2, 3
%vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 6, i32 13, i32 14, i32 15>
ret <8 x i16> %vecins
}
define <8 x i16> @shuffle_vector_halfword_13_3(<8 x i16> %a, <8 x i16> %b) {
entry:
; CHECK-LABEL: shuffle_vector_halfword_13_3
; CHECK: vsldoi 2, 2, 2, 2
; CHECK: vinserth 3, 2, 4
; CHECK: vmr 2, 3
; CHECK-BE-LABEL: shuffle_vector_halfword_13_3
; CHECK-BE: vinserth 3, 2, 10
; CHECK-BE: vmr 2, 3
%vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 3, i32 14, i32 15>
ret <8 x i16> %vecins
}
define <8 x i16> @shuffle_vector_halfword_14_5(<8 x i16> %a, <8 x i16> %b) {
entry:
; CHECK-LABEL: shuffle_vector_halfword_14_5
; CHECK: vsldoi 2, 2, 2, 14
; CHECK: vinserth 3, 2, 2
; CHECK: vmr 2, 3
; CHECK-BE-LABEL: shuffle_vector_halfword_14_5
; CHECK-BE: vsldoi 2, 2, 2, 4
; CHECK-BE: vinserth 3, 2, 12
; CHECK-BE: vmr 2, 3
%vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 5, i32 15>
ret <8 x i16> %vecins
}
define <8 x i16> @shuffle_vector_halfword_15_0(<8 x i16> %a, <8 x i16> %b) {
entry:
; CHECK-LABEL: shuffle_vector_halfword_15_0
; CHECK: vsldoi 2, 2, 2, 8
; CHECK: vinserth 3, 2, 0
; CHECK: vmr 2, 3
; CHECK-BE-LABEL: shuffle_vector_halfword_15_0
; CHECK-BE: vsldoi 2, 2, 2, 10
; CHECK-BE: vinserth 3, 2, 14
; CHECK-BE: vmr 2, 3
%vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0>
ret <8 x i16> %vecins
}
; The following testcases use the same vector in both arguments of the
; shufflevector. If halfword element 3 in BE mode(or 4 in LE mode) is the one
; we're attempting to insert, then we can use the vector insert instruction
define <8 x i16> @shuffle_vector_halfword_0_4(<8 x i16> %a) {
entry:
; CHECK-LABEL: shuffle_vector_halfword_0_4
; CHECK: vinserth 2, 2, 14
; CHECK-BE-LABEL: shuffle_vector_halfword_0_4
; CHECK-BE-NOT: vinserth
%vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 4, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %vecins
}
define <8 x i16> @shuffle_vector_halfword_1_3(<8 x i16> %a) {
entry:
; CHECK-LABEL: shuffle_vector_halfword_1_3
; CHECK-NOT: vinserth
; CHECK-BE-LABEL: shuffle_vector_halfword_1_3
; CHECK-BE: vinserth 2, 2, 2
%vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 3, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %vecins
}
define <8 x i16> @shuffle_vector_halfword_2_3(<8 x i16> %a) {
entry:
; CHECK-LABEL: shuffle_vector_halfword_2_3
; CHECK-NOT: vinserth
; CHECK-BE-LABEL: shuffle_vector_halfword_2_3
; CHECK-BE: vinserth 2, 2, 4
%vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %vecins
}
define <8 x i16> @shuffle_vector_halfword_3_4(<8 x i16> %a) {
entry:
; CHECK-LABEL: shuffle_vector_halfword_3_4
; CHECK: vinserth 2, 2, 8
; CHECK-BE-LABEL: shuffle_vector_halfword_3_4
; CHECK-BE-NOT: vinserth
%vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 4, i32 4, i32 5, i32 6, i32 7>
ret <8 x i16> %vecins
}
define <8 x i16> @shuffle_vector_halfword_4_3(<8 x i16> %a) {
entry:
; CHECK-LABEL: shuffle_vector_halfword_4_3
; CHECK-NOT: vinserth
; CHECK-BE-LABEL: shuffle_vector_halfword_4_3
; CHECK-BE: vinserth 2, 2, 8
%vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 3, i32 5, i32 6, i32 7>
ret <8 x i16> %vecins
}
define <8 x i16> @shuffle_vector_halfword_5_3(<8 x i16> %a) {
entry:
; CHECK-LABEL: shuffle_vector_halfword_5_3
; CHECK-NOT: vinserth
; CHECK-BE-LABEL: shuffle_vector_halfword_5_3
; CHECK-BE: vinserth 2, 2, 10
%vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 3, i32 6, i32 7>
ret <8 x i16> %vecins
}
define <8 x i16> @shuffle_vector_halfword_6_4(<8 x i16> %a) {
entry:
; CHECK-LABEL: shuffle_vector_halfword_6_4
; CHECK: vinserth 2, 2, 2
; CHECK-BE-LABEL: shuffle_vector_halfword_6_4
; CHECK-BE-NOT: vinserth
%vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 4, i32 7>
ret <8 x i16> %vecins
}
define <8 x i16> @shuffle_vector_halfword_7_4(<8 x i16> %a) {
entry:
; CHECK-LABEL: shuffle_vector_halfword_7_4
; CHECK: vinserth 2, 2, 0
; CHECK-BE-LABEL: shuffle_vector_halfword_7_4
; CHECK-BE-NOT: vinserth
%vecins = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4>
ret <8 x i16> %vecins
}