1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-19 11:02:59 +02:00

[CostModel] Replace ShuffleKind::SK_Alternate with ShuffleKind::SK_Select (PR33744)

As discussed on PR33744, this patch relaxes ShuffleKind::SK_Alternate which requires shuffle masks to only match an alternating pattern from its 2 sources:

e.g. v4f32: <0,5,2,7> or <4,1,6,3>

This seems far too restrictive as most SIMD hardware which will implement it using a general blend/bit-select instruction, so replaces it with SK_Select, permitting elements from either source as long as they are inline:

e.g. v4f32: <0,5,2,7>, <4,1,6,3>, <0,1,6,7>, <4,1,2,3> etc.

This initial patch just updates the name and cost model shuffle mask analysis, later patch reviews will update SLP to better utilise this - it still limits itself to SK_Alternate style patterns.

Differential Revision: https://reviews.llvm.org/D47985

llvm-svn: 334513
This commit is contained in:
Simon Pilgrim 2018-06-12 16:12:29 +00:00
parent 263130d589
commit f6cb95e1e4
7 changed files with 147 additions and 182 deletions

View File

@ -641,7 +641,9 @@ public:
enum ShuffleKind {
SK_Broadcast, ///< Broadcast element 0 to all other elements.
SK_Reverse, ///< Reverse the order of the vector.
SK_Alternate, ///< Choose alternate elements from vector.
SK_Select, ///< Selects elements from the corresponding lane of
///< either source operand. This is equivalent to a
///< vector select with a constant condition operand.
SK_Transpose, ///< Transpose two vectors.
SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset.

View File

@ -554,7 +554,7 @@ public:
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
switch (Kind) {
case TTI::SK_Alternate:
case TTI::SK_Select:
case TTI::SK_Transpose:
case TTI::SK_PermuteSingleSrc:
case TTI::SK_PermuteTwoSrc:

View File

@ -674,29 +674,25 @@ static bool isIdentityVectorMask(ArrayRef<int> Mask) {
return IdentityLHS || IdentityRHS;
}
static bool isAlternateVectorMask(ArrayRef<int> Mask) {
bool isAlternate = true;
static bool isSelectVectorMask(ArrayRef<int> Mask) {
bool IsSelect = true;
bool FoundLHS = false;
bool FoundRHS = false;
unsigned MaskSize = Mask.size();
// Example: shufflevector A, B, <0,5,2,7>
for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
if (Mask[i] < 0)
continue;
isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i);
}
if (isAlternate)
return true;
isAlternate = true;
// Example: shufflevector A, B, <0,1,6,3>
// Example: shufflevector A, B, <4,1,6,3>
for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
for (unsigned i = 0; i < MaskSize && IsSelect; ++i) {
if (Mask[i] < 0)
continue;
isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i);
bool IsLHS = (Mask[i] == (int)i);
bool IsRHS = (Mask[i] == (int)(i + MaskSize));
FoundLHS |= IsLHS;
FoundRHS |= IsRHS;
IsSelect = IsLHS || IsRHS;
}
return isAlternate;
// If we don't use both vectors this is really an Identity mask.
return IsSelect && FoundLHS && FoundRHS;
}
static bool isTransposeVectorMask(ArrayRef<int> Mask) {
@ -1236,8 +1232,8 @@ int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
return TTIImpl->getShuffleCost(TargetTransformInfo::SK_Reverse,
VecTypOp0, 0, nullptr);
if (isAlternateVectorMask(Mask))
return TTIImpl->getShuffleCost(TargetTransformInfo::SK_Alternate,
if (isSelectVectorMask(Mask))
return TTIImpl->getShuffleCost(TargetTransformInfo::SK_Select,
VecTypOp0, 0, nullptr);
if (isTransposeVectorMask(Mask))

View File

@ -400,8 +400,8 @@ int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
Type *SubTp) {
// We only handle costs of reverse and alternate shuffles for now.
if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Alternate)
// We only handle costs of reverse and select shuffles for now.
if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Select)
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
if (Kind == TTI::SK_Reverse) {
@ -426,9 +426,9 @@ int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
}
if (Kind == TTI::SK_Alternate) {
static const CostTblEntry NEONAltShuffleTbl[] = {
// Alt shuffle cost table for ARM. Cost is the number of instructions
if (Kind == TTI::SK_Select) {
static const CostTblEntry NEONSelShuffleTbl[] = {
// Select shuffle cost table for ARM. Cost is the number of instructions
// required to create the shuffled vector.
{ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
@ -445,7 +445,7 @@ int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
if (const auto *Entry = CostTableLookup(NEONAltShuffleTbl,
if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl,
ISD::VECTOR_SHUFFLE, LT.second))
return LT.first * Entry->Cost;
return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);

View File

@ -912,8 +912,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ TTI::SK_Reverse, MVT::v16i16, 2 }, // vperm2i128 + pshufb
{ TTI::SK_Reverse, MVT::v32i8, 2 }, // vperm2i128 + pshufb
{ TTI::SK_Alternate, MVT::v16i16, 1 }, // vpblendw
{ TTI::SK_Alternate, MVT::v32i8, 1 }, // vpblendvb
{ TTI::SK_Select, MVT::v16i16, 1 }, // vpblendvb
{ TTI::SK_Select, MVT::v32i8, 1 }, // vpblendvb
{ TTI::SK_PermuteSingleSrc, MVT::v4f64, 1 }, // vpermpd
{ TTI::SK_PermuteSingleSrc, MVT::v8f32, 1 }, // vpermps
@ -977,12 +977,12 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ TTI::SK_Reverse, MVT::v32i8, 4 }, // vextractf128 + 2*pshufb
// + vinsertf128
{ TTI::SK_Alternate, MVT::v4i64, 1 }, // vblendpd
{ TTI::SK_Alternate, MVT::v4f64, 1 }, // vblendpd
{ TTI::SK_Alternate, MVT::v8i32, 1 }, // vblendps
{ TTI::SK_Alternate, MVT::v8f32, 1 }, // vblendps
{ TTI::SK_Alternate, MVT::v16i16, 3 }, // vpand + vpandn + vpor
{ TTI::SK_Alternate, MVT::v32i8, 3 }, // vpand + vpandn + vpor
{ TTI::SK_Select, MVT::v4i64, 1 }, // vblendpd
{ TTI::SK_Select, MVT::v4f64, 1 }, // vblendpd
{ TTI::SK_Select, MVT::v8i32, 1 }, // vblendps
{ TTI::SK_Select, MVT::v8f32, 1 }, // vblendps
{ TTI::SK_Select, MVT::v16i16, 3 }, // vpand + vpandn + vpor
{ TTI::SK_Select, MVT::v32i8, 3 }, // vpand + vpandn + vpor
{ TTI::SK_PermuteSingleSrc, MVT::v4f64, 3 }, // 2*vperm2f128 + vshufpd
{ TTI::SK_PermuteSingleSrc, MVT::v4i64, 3 }, // 2*vperm2f128 + vshufpd
@ -1008,12 +1008,12 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
return LT.first * Entry->Cost;
static const CostTblEntry SSE41ShuffleTbl[] = {
{ TTI::SK_Alternate, MVT::v2i64, 1 }, // pblendw
{ TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd
{ TTI::SK_Alternate, MVT::v4i32, 1 }, // pblendw
{ TTI::SK_Alternate, MVT::v4f32, 1 }, // blendps
{ TTI::SK_Alternate, MVT::v8i16, 1 }, // pblendw
{ TTI::SK_Alternate, MVT::v16i8, 1 } // pblendvb
{ TTI::SK_Select, MVT::v2i64, 1 }, // pblendw
{ TTI::SK_Select, MVT::v2f64, 1 }, // movsd
{ TTI::SK_Select, MVT::v4i32, 1 }, // pblendw
{ TTI::SK_Select, MVT::v4f32, 1 }, // blendps
{ TTI::SK_Select, MVT::v8i16, 1 }, // pblendw
{ TTI::SK_Select, MVT::v16i8, 1 } // pblendvb
};
if (ST->hasSSE41())
@ -1027,8 +1027,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ TTI::SK_Reverse, MVT::v8i16, 1 }, // pshufb
{ TTI::SK_Reverse, MVT::v16i8, 1 }, // pshufb
{ TTI::SK_Alternate, MVT::v8i16, 3 }, // 2*pshufb + por
{ TTI::SK_Alternate, MVT::v16i8, 3 }, // 2*pshufb + por
{ TTI::SK_Select, MVT::v8i16, 3 }, // 2*pshufb + por
{ TTI::SK_Select, MVT::v16i8, 3 }, // 2*pshufb + por
{ TTI::SK_PermuteSingleSrc, MVT::v8i16, 1 }, // pshufb
{ TTI::SK_PermuteSingleSrc, MVT::v16i8, 1 }, // pshufb
@ -1055,11 +1055,11 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
{ TTI::SK_Reverse, MVT::v16i8, 9 }, // 2*pshuflw + 2*pshufhw
// + 2*pshufd + 2*unpck + packus
{ TTI::SK_Alternate, MVT::v2i64, 1 }, // movsd
{ TTI::SK_Alternate, MVT::v2f64, 1 }, // movsd
{ TTI::SK_Alternate, MVT::v4i32, 2 }, // 2*shufps
{ TTI::SK_Alternate, MVT::v8i16, 3 }, // pand + pandn + por
{ TTI::SK_Alternate, MVT::v16i8, 3 }, // pand + pandn + por
{ TTI::SK_Select, MVT::v2i64, 1 }, // movsd
{ TTI::SK_Select, MVT::v2f64, 1 }, // movsd
{ TTI::SK_Select, MVT::v4i32, 2 }, // 2*shufps
{ TTI::SK_Select, MVT::v8i16, 3 }, // pand + pandn + por
{ TTI::SK_Select, MVT::v16i8, 3 }, // pand + pandn + por
{ TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // shufpd
{ TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // pshufd
@ -1083,7 +1083,7 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
static const CostTblEntry SSE1ShuffleTbl[] = {
{ TTI::SK_Broadcast, MVT::v4f32, 1 }, // shufps
{ TTI::SK_Reverse, MVT::v4f32, 1 }, // shufps
{ TTI::SK_Alternate, MVT::v4f32, 2 }, // 2*shufps
{ TTI::SK_Select, MVT::v4f32, 2 }, // 2*shufps
{ TTI::SK_PermuteSingleSrc, MVT::v4f32, 1 }, // shufps
{ TTI::SK_PermuteTwoSrc, MVT::v4f32, 2 }, // 2*shufps
};
@ -1941,8 +1941,8 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
if (VT.isSimple() && LT.second != VT.getSimpleVT() &&
LT.second.getVectorNumElements() == NumElem)
// Promotion requires expand/truncate for data and a shuffle for mask.
Cost += getShuffleCost(TTI::SK_Alternate, SrcVTy, 0, nullptr) +
getShuffleCost(TTI::SK_Alternate, MaskTy, 0, nullptr);
Cost += getShuffleCost(TTI::SK_Select, SrcVTy, 0, nullptr) +
getShuffleCost(TTI::SK_Select, MaskTy, 0, nullptr);
else if (LT.second.getVectorNumElements() > NumElem) {
VectorType *NewMaskTy = VectorType::get(MaskTy->getVectorElementType(),

View File

@ -313,7 +313,7 @@ isShuffle(ArrayRef<Value *> VL) {
if ((CommonShuffleMode == FirstAlternate ||
CommonShuffleMode == SecondAlternate) &&
Vec2)
return TargetTransformInfo::SK_Alternate;
return TargetTransformInfo::SK_Select;
// If Vec2 was never used, we have a permutation of a single vector, otherwise
// we have permutation of 2 vectors.
return Vec2 ? TargetTransformInfo::SK_PermuteTwoSrc
@ -2461,8 +2461,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
Instruction *I1 = cast<Instruction>(VL[1]);
VecCost +=
TTI->getArithmeticInstrCost(I1->getOpcode(), VecTy, Op1VK, Op2VK);
VecCost +=
TTI->getShuffleCost(TargetTransformInfo::SK_Alternate, VecTy, 0);
VecCost += TTI->getShuffleCost(TargetTransformInfo::SK_Select, VecTy, 0);
return ReuseShuffleCost + VecCost - ScalarCost;
}
default:

View File

@ -200,12 +200,24 @@ define <4 x i32> @test_v4i32_2(<4 x i32> %a, <4 x i32> %b) {
}
define <4 x i32> @test_v4i32_3(<4 x i32> %a, <4 x i32> %b) {
; CHECK-LABEL: 'test_v4i32_3'
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %1
; SSE2-LABEL: 'test_v4i32_3'
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %1
;
; SSSE3-LABEL: 'test_v4i32_3'
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %1
;
; SSE42-LABEL: 'test_v4i32_3'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %1
;
; AVX-LABEL: 'test_v4i32_3'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %1
;
; BTVER2-LABEL: 'test_v4i32_3'
; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %1
;
%1 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
@ -263,12 +275,24 @@ define <4 x float> @test_v4f32_2(<4 x float> %a, <4 x float> %b) {
}
define <4 x float> @test_v4f32_3(<4 x float> %a, <4 x float> %b) {
; CHECK-LABEL: 'test_v4f32_3'
; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %1
; SSE2-LABEL: 'test_v4f32_3'
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %1
;
; SSSE3-LABEL: 'test_v4f32_3'
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %1
;
; SSE42-LABEL: 'test_v4f32_3'
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %1
;
; AVX-LABEL: 'test_v4f32_3'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %1
;
; BTVER2-LABEL: 'test_v4f32_3'
; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x float> %1
;
%1 = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
@ -311,27 +335,15 @@ define <4 x i64> @test_v4i64_2(<4 x i64> %a, <4 x i64> %b) {
define <4 x i64> @test_v4i64_3(<4 x i64> %a, <4 x i64> %b) {
; SSE-LABEL: 'test_v4i64_3'
; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %1
;
; AVX1-LABEL: 'test_v4i64_3'
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %1
;
; AVX2-LABEL: 'test_v4i64_3'
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %1
;
; XOPAVX1-LABEL: 'test_v4i64_3'
; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %1
;
; XOPAVX2-LABEL: 'test_v4i64_3'
; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %1
; AVX-LABEL: 'test_v4i64_3'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %1
;
; BTVER2-LABEL: 'test_v4i64_3'
; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %1
;
%1 = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
@ -374,27 +386,15 @@ define <4 x double> @test_v4f64_2(<4 x double> %a, <4 x double> %b) {
define <4 x double> @test_v4f64_3(<4 x double> %a, <4 x double> %b) {
; SSE-LABEL: 'test_v4f64_3'
; SSE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %1
;
; AVX1-LABEL: 'test_v4f64_3'
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %1
;
; AVX2-LABEL: 'test_v4f64_3'
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %1
;
; XOPAVX1-LABEL: 'test_v4f64_3'
; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %1
;
; XOPAVX2-LABEL: 'test_v4f64_3'
; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %1
; AVX-LABEL: 'test_v4f64_3'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %1
;
; BTVER2-LABEL: 'test_v4f64_3'
; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x double> %1
;
%1 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
@ -455,7 +455,7 @@ define <8 x i16> @test_v8i16_2(<8 x i16> %a, <8 x i16> %b) {
define <8 x i16> @test_v8i16_3(<8 x i16> %a, <8 x i16> %b) {
; SSE2-LABEL: 'test_v8i16_3'
; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1
;
; SSSE3-LABEL: 'test_v8i16_3'
@ -463,27 +463,15 @@ define <8 x i16> @test_v8i16_3(<8 x i16> %a, <8 x i16> %b) {
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1
;
; SSE42-LABEL: 'test_v8i16_3'
; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1
;
; AVX1-LABEL: 'test_v8i16_3'
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1
;
; AVX2-LABEL: 'test_v8i16_3'
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1
;
; XOPAVX1-LABEL: 'test_v8i16_3'
; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1
;
; XOPAVX2-LABEL: 'test_v8i16_3'
; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1
; AVX-LABEL: 'test_v8i16_3'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1
;
; BTVER2-LABEL: 'test_v8i16_3'
; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %1
;
%1 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
@ -541,28 +529,24 @@ define <8 x i32> @test_v8i32_2(<8 x i32> %a, <8 x i32> %b) {
}
define <8 x i32> @test_v8i32_3(<8 x i32> %a, <8 x i32> %b) {
; SSE-LABEL: 'test_v8i32_3'
; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1
; SSE2-LABEL: 'test_v8i32_3'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1
;
; AVX1-LABEL: 'test_v8i32_3'
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1
; SSSE3-LABEL: 'test_v8i32_3'
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1
;
; AVX2-LABEL: 'test_v8i32_3'
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1
; SSE42-LABEL: 'test_v8i32_3'
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1
;
; XOPAVX1-LABEL: 'test_v8i32_3'
; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1
;
; XOPAVX2-LABEL: 'test_v8i32_3'
; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1
; AVX-LABEL: 'test_v8i32_3'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1
;
; BTVER2-LABEL: 'test_v8i32_3'
; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %1
;
%1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
@ -620,28 +604,24 @@ define <8 x float> @test_v8f32_2(<8 x float> %a, <8 x float> %b) {
}
define <8 x float> @test_v8f32_3(<8 x float> %a, <8 x float> %b) {
; SSE-LABEL: 'test_v8f32_3'
; SSE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1
; SSE2-LABEL: 'test_v8f32_3'
; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1
;
; AVX1-LABEL: 'test_v8f32_3'
; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1
; SSSE3-LABEL: 'test_v8f32_3'
; SSSE3-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1
;
; AVX2-LABEL: 'test_v8f32_3'
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1
; SSE42-LABEL: 'test_v8f32_3'
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1
;
; XOPAVX1-LABEL: 'test_v8f32_3'
; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1
;
; XOPAVX2-LABEL: 'test_v8f32_3'
; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1
; AVX-LABEL: 'test_v8f32_3'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1
;
; BTVER2-LABEL: 'test_v8f32_3'
; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x float> %1
;
%1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 15>
@ -702,7 +682,7 @@ define <16 x i8> @test_v16i8_2(<16 x i8> %a, <16 x i8> %b) {
define <16 x i8> @test_v16i8_3(<16 x i8> %a, <16 x i8> %b) {
; SSE2-LABEL: 'test_v16i8_3'
; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1
;
; SSSE3-LABEL: 'test_v16i8_3'
@ -710,27 +690,15 @@ define <16 x i8> @test_v16i8_3(<16 x i8> %a, <16 x i8> %b) {
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1
;
; SSE42-LABEL: 'test_v16i8_3'
; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1
;
; AVX1-LABEL: 'test_v16i8_3'
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1
;
; AVX2-LABEL: 'test_v16i8_3'
; AVX2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1
;
; XOPAVX1-LABEL: 'test_v16i8_3'
; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1
;
; XOPAVX2-LABEL: 'test_v16i8_3'
; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1
; AVX-LABEL: 'test_v16i8_3'
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1
;
; BTVER2-LABEL: 'test_v16i8_3'
; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1
;
%1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
@ -813,35 +781,35 @@ define <16 x i16> @test_v16i16_2(<16 x i16> %a, <16 x i16> %b) {
define <16 x i16> @test_v16i16_3(<16 x i16> %a, <16 x i16> %b) {
; SSE2-LABEL: 'test_v16i16_3'
; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1
;
; SSSE3-LABEL: 'test_v16i16_3'
; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1
;
; SSE42-LABEL: 'test_v16i16_3'
; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1
;
; AVX1-LABEL: 'test_v16i16_3'
; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1
;
; AVX2-LABEL: 'test_v16i16_3'
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1
;
; XOPAVX1-LABEL: 'test_v16i16_3'
; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1
;
; XOPAVX2-LABEL: 'test_v16i16_3'
; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1
;
; BTVER2-LABEL: 'test_v16i16_3'
; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %1
;
%1 = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 29, i32 30, i32 31>
@ -924,35 +892,35 @@ define <32 x i8> @test_v32i8_2(<32 x i8> %a, <32 x i8> %b) {
define <32 x i8> @test_v32i8_3(<32 x i8> %a, <32 x i8> %b) {
; SSE2-LABEL: 'test_v32i8_3'
; SSE2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1
;
; SSSE3-LABEL: 'test_v32i8_3'
; SSSE3-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1
;
; SSE42-LABEL: 'test_v32i8_3'
; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1
;
; AVX1-LABEL: 'test_v32i8_3'
; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1
;
; AVX2-LABEL: 'test_v32i8_3'
; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1
;
; XOPAVX1-LABEL: 'test_v32i8_3'
; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
; XOPAVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1
;
; XOPAVX2-LABEL: 'test_v32i8_3'
; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
; XOPAVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1
;
; BTVER2-LABEL: 'test_v32i8_3'
; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>
; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %1
;
%1 = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 61, i32 62, i32 63>