mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[X86] Make lowerShuffleAsLanePermuteAndPermute use sublanes on AVX2
Extends lowerShuffleAsLanePermuteAndPermute to search for opportunities to use vpermq (64-bit cross-lane shuffle) and vpermd (32-bit cross-lane shuffle) to get elements into the correct lane, in addition to the 128-bit full-lane permutes it previously searched for. This is especially helpful in cross-lane byte shuffles, where the alternative tends to be "vpshufb both lanes separately and blend them with a vpblendvb", which is very expensive, especially on Haswell where vpblendvb uses the same execution port as all the shuffles. Addresses PR47262 Patch By: @TellowKrinkle (TellowKrinkle) Differential Revision: https://reviews.llvm.org/D86429
This commit is contained in:
parent
2d71376420
commit
660c64fada
@ -15550,53 +15550,94 @@ static SDValue lowerShuffleAsLanePermuteAndPermute(
|
||||
int NumElts = VT.getVectorNumElements();
|
||||
int NumLanes = VT.getSizeInBits() / 128;
|
||||
int NumEltsPerLane = NumElts / NumLanes;
|
||||
bool CanUseSublanes = Subtarget.hasAVX2() && V2.isUndef();
|
||||
|
||||
SmallVector<int, 4> SrcLaneMask(NumLanes, SM_SentinelUndef);
|
||||
SmallVector<int, 16> PermMask(NumElts, SM_SentinelUndef);
|
||||
/// Attempts to find a sublane permute with the given size
|
||||
/// that gets all elements into their target lanes.
|
||||
///
|
||||
/// If successful, fills CrossLaneMask and InLaneMask and returns true.
|
||||
/// If unsuccessful, returns false and may overwrite InLaneMask.
|
||||
auto getSublanePermute = [&](int NumSublanes) -> SDValue {
|
||||
int NumSublanesPerLane = NumSublanes / NumLanes;
|
||||
int NumEltsPerSublane = NumElts / NumSublanes;
|
||||
|
||||
for (int i = 0; i != NumElts; ++i) {
|
||||
int M = Mask[i];
|
||||
if (M < 0)
|
||||
continue;
|
||||
SmallVector<int, 16> CrossLaneMask;
|
||||
SmallVector<int, 16> InLaneMask(NumElts, SM_SentinelUndef);
|
||||
// CrossLaneMask but one entry == one sublane.
|
||||
SmallVector<int, 16> CrossLaneMaskLarge(NumSublanes, SM_SentinelUndef);
|
||||
|
||||
// Ensure that each lane comes from a single source lane.
|
||||
int SrcLane = M / NumEltsPerLane;
|
||||
int DstLane = i / NumEltsPerLane;
|
||||
if (!isUndefOrEqual(SrcLaneMask[DstLane], SrcLane))
|
||||
return SDValue();
|
||||
SrcLaneMask[DstLane] = SrcLane;
|
||||
for (int i = 0; i != NumElts; ++i) {
|
||||
int M = Mask[i];
|
||||
if (M < 0)
|
||||
continue;
|
||||
|
||||
PermMask[i] = (DstLane * NumEltsPerLane) + (M % NumEltsPerLane);
|
||||
}
|
||||
int SrcSublane = M / NumEltsPerSublane;
|
||||
int DstLane = i / NumEltsPerLane;
|
||||
|
||||
// Make sure we set all elements of the lane mask, to avoid undef propagation.
|
||||
SmallVector<int, 16> LaneMask(NumElts, SM_SentinelUndef);
|
||||
for (int DstLane = 0; DstLane != NumLanes; ++DstLane) {
|
||||
int SrcLane = SrcLaneMask[DstLane];
|
||||
if (0 <= SrcLane)
|
||||
for (int j = 0; j != NumEltsPerLane; ++j) {
|
||||
LaneMask[(DstLane * NumEltsPerLane) + j] =
|
||||
(SrcLane * NumEltsPerLane) + j;
|
||||
// We only need to get the elements into the right lane, not sublane.
|
||||
// So search all sublanes that make up the destination lane.
|
||||
bool Found = false;
|
||||
int DstSubStart = DstLane * NumSublanesPerLane;
|
||||
int DstSubEnd = DstSubStart + NumSublanesPerLane;
|
||||
for (int DstSublane = DstSubStart; DstSublane < DstSubEnd; ++DstSublane) {
|
||||
if (!isUndefOrEqual(CrossLaneMaskLarge[DstSublane], SrcSublane))
|
||||
continue;
|
||||
|
||||
Found = true;
|
||||
CrossLaneMaskLarge[DstSublane] = SrcSublane;
|
||||
int DstSublaneOffset = DstSublane * NumEltsPerSublane;
|
||||
InLaneMask[i] = DstSublaneOffset + M % NumEltsPerSublane;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!Found)
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
// If we're only shuffling a single lowest lane and the rest are identity
|
||||
// then don't bother.
|
||||
// TODO - isShuffleMaskInputInPlace could be extended to something like this.
|
||||
int NumIdentityLanes = 0;
|
||||
bool OnlyShuffleLowestLane = true;
|
||||
for (int i = 0; i != NumLanes; ++i) {
|
||||
if (isSequentialOrUndefInRange(PermMask, i * NumEltsPerLane, NumEltsPerLane,
|
||||
i * NumEltsPerLane))
|
||||
NumIdentityLanes++;
|
||||
else if (SrcLaneMask[i] != 0 && SrcLaneMask[i] != NumLanes)
|
||||
OnlyShuffleLowestLane = false;
|
||||
}
|
||||
if (OnlyShuffleLowestLane && NumIdentityLanes == (NumLanes - 1))
|
||||
// Fill CrossLaneMask using CrossLaneMaskLarge.
|
||||
narrowShuffleMaskElts(NumEltsPerSublane, CrossLaneMaskLarge, CrossLaneMask);
|
||||
|
||||
if (!CanUseSublanes) {
|
||||
// If we're only shuffling a single lowest lane and the rest are identity
|
||||
// then don't bother.
|
||||
// TODO - isShuffleMaskInputInPlace could be extended to something like
|
||||
// this.
|
||||
int NumIdentityLanes = 0;
|
||||
bool OnlyShuffleLowestLane = true;
|
||||
for (int i = 0; i != NumLanes; ++i) {
|
||||
int LaneOffset = i * NumEltsPerLane;
|
||||
if (isSequentialOrUndefInRange(InLaneMask, LaneOffset, NumEltsPerLane,
|
||||
i * NumEltsPerLane))
|
||||
NumIdentityLanes++;
|
||||
else if (CrossLaneMask[LaneOffset] != 0)
|
||||
OnlyShuffleLowestLane = false;
|
||||
}
|
||||
if (OnlyShuffleLowestLane && NumIdentityLanes == (NumLanes - 1))
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue CrossLane = DAG.getVectorShuffle(VT, DL, V1, V2, CrossLaneMask);
|
||||
return DAG.getVectorShuffle(VT, DL, CrossLane, DAG.getUNDEF(VT),
|
||||
InLaneMask);
|
||||
};
|
||||
|
||||
// First attempt a solution with full lanes.
|
||||
if (SDValue V = getSublanePermute(/*NumSublanes=*/NumLanes))
|
||||
return V;
|
||||
|
||||
// The rest of the solutions use sublanes.
|
||||
if (!CanUseSublanes)
|
||||
return SDValue();
|
||||
|
||||
SDValue LanePermute = DAG.getVectorShuffle(VT, DL, V1, V2, LaneMask);
|
||||
return DAG.getVectorShuffle(VT, DL, LanePermute, DAG.getUNDEF(VT), PermMask);
|
||||
// Then attempt a solution with 64-bit sublanes (vpermq).
|
||||
if (SDValue V = getSublanePermute(/*NumSublanes=*/NumLanes * 2))
|
||||
return V;
|
||||
|
||||
// If that doesn't work and we have fast variable shuffle,
|
||||
// attempt 32-bit sublanes (vpermd).
|
||||
if (!Subtarget.hasFastVariableShuffle())
|
||||
return SDValue();
|
||||
|
||||
return getSublanePermute(/*NumSublanes=*/NumLanes * 4);
|
||||
}
|
||||
|
||||
/// Lower a vector shuffle crossing multiple 128-bit lanes by shuffling one
|
||||
|
@ -1132,28 +1132,50 @@ define void @interleave_24i16_in(<24 x i16>* %p, <8 x i16>* %q1, <8 x i16>* %q2,
|
||||
; AVX1-NEXT: vmovdqu %xmm2, 16(%rdi)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: interleave_24i16_in:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovdqu (%rsi), %xmm0
|
||||
; AVX2-NEXT: vmovdqu (%rdx), %xmm1
|
||||
; AVX2-NEXT: vmovdqu (%rcx), %xmm2
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm3
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm4 = ymm3[0,1,u,u,6,7,2,3,u,u,8,9,4,5,u,u,16,17,u,u,22,23,18,19,u,u,24,25,20,21,u,u]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm3 = ymm3[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm3 = ymm3[u,u,0,1,u,u,u,u,2,3,u,u,u,u,4,5,u,u,22,23,u,u,u,u,24,25,u,u,u,u,26,27]
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} ymm3 = ymm4[0],ymm3[1],ymm4[2,3],ymm3[4],ymm4[5,6],ymm3[7],ymm4[8],ymm3[9],ymm4[10,11],ymm3[12],ymm4[13,14],ymm3[15]
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} ymm4 = <u,0,0,u,1,1,u,2>
|
||||
; AVX2-NEXT: vpermd %ymm2, %ymm4, %ymm4
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,0,0,255,255,255,255,0,0,255,255,255,255,0,0,255,255,255,255,0,0,255,255,255,255,0,0,255,255]
|
||||
; AVX2-NEXT: vpblendvb %ymm5, %ymm3, %ymm4, %ymm3
|
||||
; AVX2-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,u,u,10,11,8,9,u,u,14,15,12,13,u,u]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[2,2,3,3]
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4],xmm0[5,6],xmm1[7]
|
||||
; AVX2-NEXT: vmovdqu %xmm0, 32(%rdi)
|
||||
; AVX2-NEXT: vmovdqu %ymm3, (%rdi)
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
; AVX2-SLOW-LABEL: interleave_24i16_in:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vmovdqu (%rsi), %xmm0
|
||||
; AVX2-SLOW-NEXT: vmovdqu (%rdx), %xmm1
|
||||
; AVX2-SLOW-NEXT: vmovdqu (%rcx), %xmm2
|
||||
; AVX2-SLOW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm3
|
||||
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm4 = ymm3[0,1,u,u,6,7,2,3,u,u,8,9,4,5,u,u,16,17,u,u,22,23,18,19,u,u,24,25,20,21,u,u]
|
||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm3 = ymm3[2,3,0,1]
|
||||
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm3 = ymm3[u,u,0,1,u,u,u,u,2,3,u,u,u,u,4,5,u,u,22,23,u,u,u,u,24,25,u,u,u,u,26,27]
|
||||
; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm3 = ymm4[0],ymm3[1],ymm4[2,3],ymm3[4],ymm4[5,6],ymm3[7],ymm4[8],ymm3[9],ymm4[10,11],ymm3[12],ymm4[13,14],ymm3[15]
|
||||
; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm4 = <u,0,0,u,1,1,u,2>
|
||||
; AVX2-SLOW-NEXT: vpermd %ymm2, %ymm4, %ymm4
|
||||
; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,0,0,255,255,255,255,0,0,255,255,255,255,0,0,255,255,255,255,0,0,255,255,255,255,0,0,255,255]
|
||||
; AVX2-SLOW-NEXT: vpblendvb %ymm5, %ymm3, %ymm4, %ymm3
|
||||
; AVX2-SLOW-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
|
||||
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,u,u,10,11,8,9,u,u,14,15,12,13,u,u]
|
||||
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[2,2,3,3]
|
||||
; AVX2-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4],xmm0[5,6],xmm1[7]
|
||||
; AVX2-SLOW-NEXT: vmovdqu %xmm0, 32(%rdi)
|
||||
; AVX2-SLOW-NEXT: vmovdqu %ymm3, (%rdi)
|
||||
; AVX2-SLOW-NEXT: vzeroupper
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX2-FAST-LABEL: interleave_24i16_in:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vmovdqu (%rsi), %xmm0
|
||||
; AVX2-FAST-NEXT: vmovdqu (%rdx), %xmm1
|
||||
; AVX2-FAST-NEXT: vmovdqu (%rcx), %xmm2
|
||||
; AVX2-FAST-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm3
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm4 = <u,0,0,u,1,1,u,2>
|
||||
; AVX2-FAST-NEXT: vpermd %ymm2, %ymm4, %ymm4
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm5 = [0,4,1,5,1,5,2,6]
|
||||
; AVX2-FAST-NEXT: vpermd %ymm3, %ymm5, %ymm3
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm3 = ymm3[0,1,4,5,u,u,2,3,6,7,u,u,8,9,12,13,u,u,18,19,22,23,u,u,24,25,28,29,u,u,26,27]
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,0,0,255,255,255,255,0,0,255,255,255,255,0,0,255,255,255,255,0,0,255,255,255,255,0,0,255,255]
|
||||
; AVX2-FAST-NEXT: vpblendvb %ymm5, %ymm3, %ymm4, %ymm3
|
||||
; AVX2-FAST-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,u,u,10,11,8,9,u,u,14,15,12,13,u,u]
|
||||
; AVX2-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[2,2,3,3]
|
||||
; AVX2-FAST-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3],xmm1[4],xmm0[5,6],xmm1[7]
|
||||
; AVX2-FAST-NEXT: vmovdqu %xmm0, 32(%rdi)
|
||||
; AVX2-FAST-NEXT: vmovdqu %ymm3, (%rdi)
|
||||
; AVX2-FAST-NEXT: vzeroupper
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; XOP-LABEL: interleave_24i16_in:
|
||||
; XOP: # %bb.0:
|
||||
|
@ -393,10 +393,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_0
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,17]
|
||||
; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,8,9,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
|
||||
@ -416,10 +414,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_0
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_00_00_00_00_00_00_00_00:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; XOPAVX2-NEXT: vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,17]
|
||||
; XOPAVX2-NEXT: vpbroadcastw %xmm0, %ymm0
|
||||
; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,8,9,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -439,10 +435,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_0
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
|
||||
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[0,1],ymm0[0,1]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,10,11,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
|
||||
@ -462,10 +456,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_0
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_09_00_00_00_00_00_00_00_00_00:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
|
||||
; XOPAVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[0,1],ymm0[0,1]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,2,3,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,10,11,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 9, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -482,19 +474,11 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
|
||||
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,5,u,u,0,u,u,u>
|
||||
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,12,13,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VL: # %bb.0:
|
||||
@ -513,9 +497,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_0
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_10_00_00_00_00_00_00_00_00_00_00:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,4,5,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,12,13,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 10, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -532,19 +515,11 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
|
||||
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,5,u,u,0,u,u,u>
|
||||
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VL: # %bb.0:
|
||||
@ -563,9 +538,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_0
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_11_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,6,7,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,14,15,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 11, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -584,7 +558,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_0
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
@ -605,7 +579,7 @@ define <16 x i16> @shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_0
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_12_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,8,9,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 12, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
@ -625,7 +599,7 @@ define <16 x i16> @shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_0
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
@ -646,7 +620,7 @@ define <16 x i16> @shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_0
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v16i16_00_00_13_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,10,11,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 13, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
@ -666,7 +640,7 @@ define <16 x i16> @shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_0
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
@ -687,7 +661,7 @@ define <16 x i16> @shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_0
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v16i16_00_14_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,12,13,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 14, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
@ -707,8 +681,8 @@ define <16 x i16> @shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_0
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,8,9,8,9,8,9,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
@ -728,8 +702,8 @@ define <16 x i16> @shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_0
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v16i16_15_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,8,9,8,9,8,9,8,9,8,9,8,9,8,9,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -3420,9 +3394,8 @@ define <16 x i16> @shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_1
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,2,2,2]
|
||||
; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,0,1,2,3,4,5,6,7,4,5,14,15,16,17,18,19,16,17,18,19,20,21,22,23,20,21,22,23]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11:
|
||||
@ -3441,9 +3414,8 @@ define <16 x i16> @shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_1
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v16i16_00_01_00_01_02_03_02_11_08_09_08_09_10_11_10_11:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,2,2,2]
|
||||
; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
|
||||
; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,0,1,2,3,4,5,6,7,4,5,14,15,16,17,18,19,16,17,18,19,20,21,22,23,20,21,22,23]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 2, i32 3, i32 2, i32 11, i32 8, i32 9, i32 8, i32 9, i32 10, i32 11, i32 10, i32 11>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -3548,22 +3520,11 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
|
||||
; AVX2-SLOW-NEXT: vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,17]
|
||||
; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
|
||||
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
|
||||
; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,u,u,16,17,16,17,16,17,16,17,16,17,16,17,16,17,u,u]
|
||||
; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX2-FAST-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
|
||||
; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
; AVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,8,9,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
|
||||
; AVX512VL: # %bb.0:
|
||||
@ -3582,11 +3543,8 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_0
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08_08:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
|
||||
; XOPAVX2-NEXT: vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,17]
|
||||
; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
|
||||
; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
|
||||
; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,8,9,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -3616,10 +3574,9 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_1
|
||||
;
|
||||
; AVX2-FAST-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,0,1,0,1,0,1,8,9,8,9,8,9,u,u,16,17,16,17,16,17,16,17,24,25,24,25,24,25,u,u]
|
||||
; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX2-FAST-NEXT: vpsllq $48, %ymm0, %ymm0
|
||||
; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,6,u,4,6,u,u>
|
||||
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,4,5,4,5,4,5,8,9,16,17,16,17,16,17,16,17,20,21,20,21,20,21,20,21]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_04_04_04_12_08_08_08_08_12_12_12_12:
|
||||
@ -3662,9 +3619,8 @@ define <16 x i16> @shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_1
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
|
||||
; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
|
||||
@ -3683,9 +3639,8 @@ define <16 x i16> @shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_1
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v16i16_uu_00_uu_01_uu_02_uu_11_uu_08_uu_09_uu_10_uu_11:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
|
||||
; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6,7,8,9,10],ymm1[11],ymm0[12,13,14,15]
|
||||
; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 11, i32 undef, i32 8, i32 undef, i32 9, i32 undef, i32 10, i32 undef, i32 11>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -3704,9 +3659,8 @@ define <16 x i16> @shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_1
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6],ymm0[7]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,3,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
|
||||
@ -3725,9 +3679,8 @@ define <16 x i16> @shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_1
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v16i16_uu_04_uu_05_uu_06_uu_15_uu_12_uu_13_uu_14_uu_15:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6],ymm0[7]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,3,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 4, i32 undef, i32 5, i32 undef, i32 6, i32 undef, i32 15, i32 undef, i32 12, i32 undef, i32 13, i32 undef, i32 14, i32 undef, i32 15>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -3796,13 +3749,20 @@ define <16 x i16> @shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,u,u,24,25,24,25,24,25,24,25,16,17,16,17,16,17,u,u]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
|
||||
; AVX2-NEXT: retq
|
||||
; AVX2-SLOW-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,8,9,8,9,0,1,0,1,0,1,u,u,24,25,24,25,24,25,24,25,16,17,16,17,16,17,u,u]
|
||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX2-SLOW-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
|
||||
; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX2-FAST-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,4,u,6,4,u,u>
|
||||
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,4,5,4,5,4,5,8,9,16,17,16,17,16,17,16,17,20,21,20,21,20,21,20,21]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_04_04_04_04_00_00_00_08_12_12_12_12_08_08_08_08:
|
||||
; AVX512VL: # %bb.0:
|
||||
@ -4018,13 +3978,20 @@ define <16 x i16> @shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,u,u,18,19,16,17,26,27,24,25,26,27,24,25,18,19,u,u]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
|
||||
; AVX2-NEXT: retq
|
||||
; AVX2-SLOW-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[2,3,0,1,10,11,8,9,10,11,8,9,2,3,u,u,18,19,16,17,26,27,24,25,26,27,24,25,18,19,u,u]
|
||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX2-SLOW-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
|
||||
; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX2-FAST-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,4,u,4,6,u,u>
|
||||
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,6,7,4,5,2,3,8,9,18,19,16,17,22,23,20,21,22,23,20,21,18,19,16,17]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_01_00_05_04_05_04_01_08_09_08_13_12_13_12_09_08:
|
||||
; AVX512VL: # %bb.0:
|
||||
@ -4062,13 +4029,20 @@ define <16 x i16> @shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,u,u,26,27,24,25,18,19,16,17,26,27,24,25,18,19,u,u]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
|
||||
; AVX2-NEXT: retq
|
||||
; AVX2-SLOW-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[10,11,8,9,2,3,0,1,10,11,8,9,2,3,u,u,26,27,24,25,18,19,16,17,26,27,24,25,18,19,u,u]
|
||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX2-SLOW-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
|
||||
; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX2-FAST-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,4,u,6,4,u,u>
|
||||
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,2,3,0,1,6,7,8,9,18,19,16,17,22,23,20,21,18,19,16,17,22,23,20,21]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_05_04_01_00_05_04_01_08_13_12_09_08_13_12_09_08:
|
||||
; AVX512VL: # %bb.0:
|
||||
@ -4106,13 +4080,20 @@ define <16 x i16> @shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,u,u,26,27,24,25,18,19,16,17,18,19,16,17,26,27,u,u]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
|
||||
; AVX2-NEXT: retq
|
||||
; AVX2-SLOW-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[10,11,8,9,2,3,0,1,2,3,0,1,10,11,u,u,26,27,24,25,18,19,16,17,18,19,16,17,26,27,u,u]
|
||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0
|
||||
; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX2-FAST-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,6,u,6,4,u,u>
|
||||
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5,6,7,4,5,2,3,8,9,18,19,16,17,22,23,20,21,22,23,20,21,18,19,16,17]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_05_04_01_00_01_00_05_12_13_12_09_08_09_08_13_12:
|
||||
; AVX512VL: # %bb.0:
|
||||
@ -4150,13 +4131,20 @@ define <16 x i16> @shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,u,u,16,17,24,25,24,25,16,17,16,17,24,25,24,25,u,u]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
|
||||
; AVX2-NEXT: retq
|
||||
; AVX2-SLOW-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,0,1,8,9,8,9,u,u,16,17,24,25,24,25,16,17,16,17,24,25,24,25,u,u]
|
||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX2-SLOW-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
|
||||
; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX2-FAST-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,4,u,4,6,u,u>
|
||||
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,4,5,0,1,0,1,4,5,4,5,8,9,16,17,20,21,20,21,16,17,16,17,20,21,20,21,16,17]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_00_04_04_08_08_12_12_08_08_12_12_08:
|
||||
; AVX512VL: # %bb.0:
|
||||
@ -4194,13 +4182,20 @@ define <16 x i16> @shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,u,u,24,25,16,17,16,17,24,25,24,25,16,17,16,17,u,u]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
|
||||
; AVX2-NEXT: retq
|
||||
; AVX2-SLOW-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,0,1,0,1,8,9,8,9,0,1,0,1,u,u,24,25,16,17,16,17,24,25,24,25,16,17,16,17,u,u]
|
||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0
|
||||
; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX2-FAST-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,6,u,6,4,u,u>
|
||||
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,4,5,0,1,0,1,4,5,4,5,8,9,16,17,20,21,20,21,16,17,16,17,20,21,20,21,16,17]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_04_00_00_04_04_00_00_12_12_08_08_12_12_08_08_12:
|
||||
; AVX512VL: # %bb.0:
|
||||
@ -4364,12 +4359,19 @@ define <16 x i16> @shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,5,2,3,4,5,6,7]
|
||||
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7,28,29,28,29,30,31,26,27,18,19,28,29,24,25,22,23]
|
||||
; AVX2-NEXT: retq
|
||||
; AVX2-SLOW-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,5,2,3,4,5,6,7]
|
||||
; AVX2-SLOW-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[12,13,12,13,14,15,10,11,2,3,12,13,8,9,6,7,28,29,28,29,30,31,26,27,18,19,28,29,24,25,22,23]
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX2-FAST-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [3,2,0,5,7,6,4,5]
|
||||
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,2,3,6,7,10,11,0,1,4,5,14,15,16,17,16,17,18,19,22,23,26,27,16,17,20,21,30,31]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_06_06_07_05_01_06_04_11_14_14_15_13_09_14_12_11:
|
||||
; AVX512VL: # %bb.0:
|
||||
@ -4406,13 +4408,20 @@ define <16 x i16> @shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,u,u,16,17,16,17,24,25,24,25,24,25,24,25,24,25,u,u]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
|
||||
; AVX2-NEXT: retq
|
||||
; AVX2-SLOW-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,0,1,8,9,8,9,8,9,8,9,8,9,u,u,16,17,16,17,24,25,24,25,24,25,24,25,24,25,u,u]
|
||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0
|
||||
; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX2-FAST-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,6,u,4,6,u,u>
|
||||
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,4,5,4,5,4,5,8,9,16,17,16,17,20,21,20,21,20,21,20,21,20,21,20,21]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_00_00_04_04_04_04_04_12_08_08_12_12_12_12_12_12:
|
||||
; AVX512VL: # %bb.0:
|
||||
@ -4450,13 +4459,20 @@ define <16 x i16> @shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,u,u,24,25,24,25,16,17,16,17,24,25,24,25,24,25,u,u]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
|
||||
; AVX2-NEXT: retq
|
||||
; AVX2-SLOW-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,0,1,0,1,8,9,8,9,8,9,u,u,24,25,24,25,16,17,16,17,24,25,24,25,24,25,u,u]
|
||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0
|
||||
; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX2-FAST-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,6,u,6,4,u,u>
|
||||
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5,0,1,0,1,0,1,8,9,16,17,16,17,20,21,20,21,16,17,16,17,16,17,16,17]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_04_04_00_00_04_04_04_12_12_12_08_08_12_12_12_12:
|
||||
; AVX512VL: # %bb.0:
|
||||
@ -4494,13 +4510,20 @@ define <16 x i16> @shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,u,u,16,17,24,25,24,25,16,17,24,25,24,25,24,25,u,u]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
|
||||
; AVX2-NEXT: retq
|
||||
; AVX2-SLOW-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,u,u,16,17,24,25,24,25,16,17,24,25,24,25,24,25,u,u]
|
||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0
|
||||
; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX2-FAST-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,6,u,4,6,u,u>
|
||||
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,4,5,0,1,4,5,4,5,4,5,8,9,16,17,20,21,20,21,16,17,20,21,20,21,20,21,20,21]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_04_04_04_12_08_12_12_08_12_12_12_12:
|
||||
; AVX512VL: # %bb.0:
|
||||
@ -4538,13 +4561,20 @@ define <16 x i16> @shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_0
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,u,u,16,17,24,25,24,25,16,17,16,17,16,17,16,17,u,u]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
|
||||
; AVX2-NEXT: retq
|
||||
; AVX2-SLOW-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,0,1,0,1,0,1,u,u,16,17,24,25,24,25,16,17,16,17,16,17,16,17,u,u]
|
||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX2-SLOW-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17]
|
||||
; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX2-FAST-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,4,u,4,6,u,u>
|
||||
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,4,5,0,1,0,1,0,1,0,1,8,9,16,17,20,21,20,21,16,17,16,17,16,17,16,17,16,17]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_00_00_00_08_08_12_12_08_08_08_08_08:
|
||||
; AVX512VL: # %bb.0:
|
||||
@ -4593,9 +4623,9 @@ define <16 x i16> @shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_1
|
||||
;
|
||||
; AVX2-FAST-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,2,3]
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,8,9,0,1,8,9,10,11,12,13,u,u,16,17,24,25,24,25,16,17,24,25,26,27,28,29,u,u]
|
||||
; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,3,7,4,6,7,u>
|
||||
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,4,5,0,1,4,5,6,7,8,9,14,15,16,17,20,21,20,21,16,17,20,21,22,23,24,25,26,27]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_00_04_04_00_04_05_06_15_08_12_12_08_12_13_14_15:
|
||||
@ -4635,13 +4665,20 @@ define <16 x i16> @shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,u,u,16,17,18,19,24,25,24,25,24,25,24,25,24,25,u,u]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
|
||||
; AVX2-NEXT: retq
|
||||
; AVX2-SLOW-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,2,3,8,9,8,9,8,9,8,9,8,9,u,u,16,17,18,19,24,25,24,25,24,25,24,25,24,25,u,u]
|
||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0
|
||||
; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX2-FAST-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,2,6,u,4,6,u,u>
|
||||
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,4,5,4,5,4,5,4,5,4,5,8,9,16,17,u,u,20,21,20,21,20,21,20,21,20,21,20,21]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_00_uu_04_04_04_04_04_12_08_uu_12_12_12_12_12_12:
|
||||
; AVX512VL: # %bb.0:
|
||||
@ -4679,13 +4716,20 @@ define <16 x i16> @shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,u,u,24,25,24,25,24,25,16,17,24,25,24,25,24,25,u,u]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
|
||||
; AVX2-NEXT: retq
|
||||
; AVX2-SLOW-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[8,9,8,9,8,9,0,1,8,9,8,9,8,9,u,u,24,25,24,25,24,25,16,17,24,25,24,25,24,25,u,u]
|
||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0
|
||||
; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX2-FAST-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,6,u,6,4,u,u>
|
||||
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,u,u,4,5,0,1,0,1,0,1,8,9,16,17,16,17,u,u,20,21,16,17,16,17,16,17,16,17]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_04_04_uu_00_04_04_04_12_12_12_uu_08_12_12_12_12:
|
||||
; AVX512VL: # %bb.0:
|
||||
@ -4723,13 +4767,20 @@ define <16 x i16> @shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,u,u,16,17,24,25,24,25,16,17,24,25,24,25,24,25,u,u]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpsllq $48, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
|
||||
; AVX2-NEXT: retq
|
||||
; AVX2-SLOW-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1,8,9,8,9,0,1,8,9,8,9,8,9,u,u,16,17,24,25,24,25,16,17,24,25,24,25,24,25,u,u]
|
||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX2-SLOW-NEXT: vpsllq $48, %ymm0, %ymm0
|
||||
; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0,1,2,3,4,5,6],ymm0[7],ymm1[8,9,10,11,12,13,14],ymm0[15]
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX2-FAST-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <2,0,6,u,6,4,u,u>
|
||||
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,0,1,4,5,0,1,0,1,0,1,8,9,u,u,16,17,16,17,20,21,16,17,16,17,16,17,16,17]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_uu_04_04_00_04_04_04_12_uu_12_12_08_12_12_12_12:
|
||||
; AVX512VL: # %bb.0:
|
||||
@ -4800,13 +4851,13 @@ define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_1
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,2,2,2]
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,3,2]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,8,9,10,11,0,1,2,3,4,5,14,15,16,17,18,19,24,25,26,27,16,17,18,19,20,21,30,31]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = <u,u,u,u,4,5,6,11,u,u,u,u,12,13,14,11>
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [4,5,8,9,4,5,6,11,12,13,8,9,12,13,14,11]
|
||||
; AVX512VL-NEXT: vpermw %ymm0, %ymm1, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
@ -4821,8 +4872,8 @@ define <16 x i16> @shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_1
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v16i16_uu_uu_uu_uu_04_05_06_11_uu_uu_uu_uu_12_13_14_11:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,2,2,2]
|
||||
; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7],ymm0[8,9,10,11,12,13,14],ymm1[15]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,3,2]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,8,9,10,11,0,1,2,3,4,5,14,15,16,17,18,19,24,25,26,27,16,17,18,19,20,21,30,31]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 11, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 11>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -4984,12 +5035,19 @@ define <16 x i16> @shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_1
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,6,3,4,5,6,7]
|
||||
; AVX2-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11,22,23,30,31,18,19,16,17,20,21,30,31,22,23,26,27]
|
||||
; AVX2-NEXT: retq
|
||||
; AVX2-SLOW-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm1 = [0,1,6,3,4,5,6,7]
|
||||
; AVX2-SLOW-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,14,15,2,3,0,1,4,5,14,15,6,7,10,11,22,23,30,31,18,19,16,17,20,21,30,31,22,23,26,27]
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX2-FAST-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,0,6,5,7,4,6]
|
||||
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,6,7,10,11,8,9,0,1,6,7,2,3,14,15,18,19,22,23,26,27,24,25,16,17,22,23,18,19,30,31]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_03_07_01_00_02_07_03_13_11_15_09_08_10_15_11_13:
|
||||
; AVX512VL: # %bb.0:
|
||||
@ -5028,9 +5086,10 @@ define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_2
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3]
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1,2],ymm2[3],ymm1[4,5,6,7,8,9,10],ymm2[11],ymm1[12,13,14,15]
|
||||
; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
|
||||
; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23]
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27:
|
||||
@ -5051,9 +5110,10 @@ define <16 x i16> @shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_2
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v16i16_00_16_01_17_02_18_03_27_08_24_09_25_10_26_11_27:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3]
|
||||
; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1,2],ymm2[3],ymm1[4,5,6,7,8,9,10],ymm2[11],ymm1[12,13,14,15]
|
||||
; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
|
||||
; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm0[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23]
|
||||
; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 27, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -5122,10 +5182,9 @@ define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_3
|
||||
; AVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
|
||||
; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm2 = ymm1[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2],ymm1[3],ymm2[4,5,6],ymm1[7]
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,3,3,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23]
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31:
|
||||
@ -5147,10 +5206,9 @@ define <16 x i16> @shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_3
|
||||
; XOPAVX2-LABEL: shuffle_v16i16_04_20_05_21_06_22_07_31_12_28_13_29_14_30_15_31:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpunpckhwd {{.*#+}} ymm0 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
|
||||
; XOPAVX2-NEXT: vpunpckhwd {{.*#+}} ymm2 = ymm1[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,2,3]
|
||||
; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1,2],ymm1[3],ymm2[4,5,6],ymm1[7]
|
||||
; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[1,3,3,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23]
|
||||
; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 31, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -5235,9 +5293,9 @@ define <16 x i16> @shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_3
|
||||
;
|
||||
; AVX2-FAST-LABEL: shuffle_v16i16_00_16_01_17_06_22_07_31_08_24_09_25_14_30_15_31:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3]
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,0,1,4,5,2,3,12,13,12,13,u,u,u,u,16,17,16,17,20,21,18,19,28,29,28,29,u,u,u,u]
|
||||
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2],ymm2[3],ymm1[4,5,6],ymm2[7]
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = <0,3,7,u,4,7,u,u>
|
||||
; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,10,11,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23]
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,2,3,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,28,29,u,u,30,31,u,u]
|
||||
; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
@ -5300,9 +5358,9 @@ define <16 x i16> @shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_2
|
||||
;
|
||||
; AVX2-FAST-LABEL: shuffle_v16i16_00_20_01_21_06_16_07_25_08_28_09_29_14_24_15_25:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3]
|
||||
; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2,3,4,5,6,7,8],ymm2[9],ymm1[10,11,12,13,14,15]
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,8,9,u,u,10,11,u,u,0,1,u,u,2,3,u,u,24,25,u,u,26,27,u,u,16,17,u,u,18,19]
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = <2,0,4,u,6,4,u,u>
|
||||
; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,0,1,u,u,2,3,u,u,4,5,u,u,10,11,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23]
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,u,u,2,3,u,u,12,13,u,u,14,15,u,u,16,17,u,u,18,19,u,u,28,29,u,u,30,31,u,u]
|
||||
; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
@ -5354,19 +5412,18 @@ define <16 x i16> @shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_2
|
||||
;
|
||||
; AVX2-SLOW-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3]
|
||||
; AVX2-SLOW-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1],ymm2[2],ymm1[3,4,5,6,7,8,9],ymm2[10],ymm1[11,12,13,14,15]
|
||||
; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
|
||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
|
||||
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,2,3,0,1,u,u,u,u,6,7,12,13,u,u,u,u,18,19,16,17,u,u,u,u,22,23,20,21]
|
||||
; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
|
||||
; AVX2-SLOW-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
|
||||
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,5,7]
|
||||
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX2-FAST-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3]
|
||||
; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1],ymm2[2],ymm1[3,4,5,6,7,8,9],ymm2[10],ymm1[11,12,13,14,15]
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,2,3,0,1,u,u,u,u,6,7,4,5,u,u,u,u,18,19,16,17,u,u,u,u,22,23,20,21]
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[2,3,0,1,u,u,u,u,6,7,4,5,u,u,u,u,18,19,16,17,u,u,u,u,22,23,20,21,u,u,u,u]
|
||||
; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,2,3,0,1,u,u,u,u,6,7,12,13,u,u,u,u,18,19,16,17,u,u,u,u,22,23,20,21]
|
||||
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
@ -5389,11 +5446,11 @@ define <16 x i16> @shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_2
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v16i16_01_00_17_16_03_02_19_26_09_08_25_24_11_10_27_26:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,2,3]
|
||||
; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0,1],ymm2[2],ymm1[3,4,5,6,7,8,9],ymm2[10],ymm1[11,12,13,14,15]
|
||||
; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm1 = ymm1[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[u,u,u,u,2,3,0,1,u,u,u,u,6,7,12,13,u,u,u,u,18,19,16,17,u,u,u,u,22,23,20,21]
|
||||
; XOPAVX2-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
|
||||
; XOPAVX2-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
|
||||
; XOPAVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,5,7]
|
||||
; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 1, i32 0, i32 17, i32 16, i32 3, i32 2, i32 19, i32 26, i32 9, i32 8, i32 25, i32 24, i32 11, i32 10, i32 27, i32 26>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -5412,9 +5469,10 @@ define <16 x i16> @shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_1
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm2[3],ymm0[4,5,6,7,8,9,10],ymm2[11],ymm0[12,13,14,15]
|
||||
; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11]
|
||||
; AVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23]
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11:
|
||||
@ -5436,9 +5494,10 @@ define <16 x i16> @shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_1
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v16i16_16_00_17_01_18_02_19_11_24_08_25_09_26_10_27_11:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,3,2,3]
|
||||
; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2],ymm2[3],ymm0[4,5,6,7,8,9,10],ymm2[11],ymm0[12,13,14,15]
|
||||
; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11]
|
||||
; XOPAVX2-NEXT: vpunpcklwd {{.*#+}} ymm1 = ymm1[0,0,1,1,2,2,3,3,8,8,9,9,10,10,11,11]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23]
|
||||
; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 0, i32 17, i32 1, i32 18, i32 2, i32 19, i32 11, i32 24, i32 8, i32 25, i32 9, i32 26, i32 10, i32 27, i32 11>
|
||||
ret <16 x i16> %shuffle
|
||||
@ -5458,9 +5517,8 @@ define <16 x i16> @shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_1
|
||||
; AVX2-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
|
||||
; AVX2-NEXT: vpunpckhwd {{.*#+}} ymm2 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2],ymm0[3],ymm2[4,5,6],ymm0[7]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,3,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23]
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
@ -5484,9 +5542,8 @@ define <16 x i16> @shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_1
|
||||
; XOPAVX2-LABEL: shuffle_v16i16_20_04_21_05_22_06_23_15_28_12_29_13_30_14_31_15:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpunpckhwd {{.*#+}} ymm1 = ymm1[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
|
||||
; XOPAVX2-NEXT: vpunpckhwd {{.*#+}} ymm2 = ymm0[4,4,5,5,6,6,7,7,12,12,13,13,14,14,15,15]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2],ymm0[3],ymm2[4,5,6],ymm0[7]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,3,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,2,3,u,u,4,5,u,u,14,15,u,u,16,17,u,u,18,19,u,u,20,21,u,u,22,23]
|
||||
; XOPAVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7],ymm1[8],ymm0[9],ymm1[10],ymm0[11],ymm1[12],ymm0[13],ymm1[14],ymm0[15]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 20, i32 4, i32 21, i32 5, i32 22, i32 6, i32 23, i32 15, i32 28, i32 12, i32 29, i32 13, i32 30, i32 14, i32 31, i32 15>
|
||||
@ -7293,16 +7350,13 @@ define <16 x i16> @PR24935(<16 x i16> %a, <16 x i16> %b) {
|
||||
;
|
||||
; AVX2-FAST-LABEL: PR24935:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm2 = zero,zero,zero,zero,ymm1[8,9],zero,zero,zero,zero,ymm1[14,15,12,13,0,1,24,25,24,25],zero,zero,ymm1[24,25,16,17,30,31,28,29,16,17]
|
||||
; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[6,7,4,5],zero,zero,ymm1[10,11,4,5],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16,17],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX2-FAST-NEXT: vpor %ymm2, %ymm1, %ymm1
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[u,u,2,3,2,3,u,u,10,11,u,u,u,u,u,u,u,u,18,19,18,19,u,u,26,27,u,u,u,u,u,u]
|
||||
; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,6,7,u,u,18,19,u,u,u,u,u,u,u,u,24,25,16,17,u,u]
|
||||
; AVX2-FAST-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2],ymm0[3],ymm2[4],ymm0[5,6,7,8],ymm2[9,10],ymm0[11],ymm2[12],ymm0[13,14,15]
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,0,255,255,255,255,255,255,0,0,255,255,0,0,0,0,255,255,255,255,0,0,0,0,0,0,255,255]
|
||||
; AVX2-FAST-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = <0,5,u,u,0,4,6,2>
|
||||
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm0
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,ymm0[2,3],zero,zero,zero,zero,zero,zero,ymm0[6,7],zero,zero,ymm0[18,19,22,23],zero,zero,zero,zero,ymm0[26,27,28,29,16,17],zero,zero
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = <5,6,3,0,0,6,4,u>
|
||||
; AVX2-FAST-NEXT: vpermd %ymm1, %ymm2, %ymm1
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[2,3,0,1],zero,zero,ymm1[6,7,0,1,10,11],zero,zero,ymm1[12,13],zero,zero,zero,zero,ymm1[16,17,20,21],zero,zero,zero,zero,zero,zero,ymm1[24,25]
|
||||
; AVX2-FAST-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: PR24935:
|
||||
|
@ -800,21 +800,14 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpbroadcastb %xmm0, %ymm1
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16]
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
|
||||
; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX512VLBW-NEXT: vpslldq {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm1[16]
|
||||
; AVX512VLBW-NEXT: vpbroadcastb %xmm0, %ymm0
|
||||
; AVX512VLBW-NEXT: movl $-2147450880, %eax # imm = 0x80008000
|
||||
; AVX512VLBW-NEXT: kmovd %eax, %k1
|
||||
; AVX512VLBW-NEXT: vmovdqu8 %ymm1, %ymm0 {%k1}
|
||||
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
@ -834,11 +827,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpbroadcastb %xmm0, %ymm1
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
|
||||
; XOPAVX2-NEXT: vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16]
|
||||
; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
|
||||
; XOPAVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <32 x i8> %shuffle
|
||||
@ -857,19 +847,14 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
|
||||
; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX512VLBW-NEXT: movl $1, %eax
|
||||
; AVX512VLBW-NEXT: kmovd %eax, %k1
|
||||
; AVX512VLBW-NEXT: vmovdqu8 %ymm0, %ymm1 {%k1}
|
||||
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
@ -889,10 +874,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_00_17_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; XOPAVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u,255,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
|
||||
; XOPAVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 17, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <32 x i8> %shuffle
|
||||
@ -911,17 +894,14 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
|
||||
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[0,1],ymm0[0,1]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm1 = <0,9,u,u,u,u,u,u,0,u,u,u,u,u,u,u>
|
||||
; AVX512VLBW-NEXT: vpermw %ymm0, %ymm1, %ymm0
|
||||
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
@ -941,10 +921,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_00_18_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
|
||||
; XOPAVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[0,1],ymm0[0,1]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 18, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <32 x i8> %shuffle
|
||||
@ -963,17 +941,14 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
|
||||
; AVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[0,1],ymm0[0,1]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} ymm1 = <0,9,u,u,u,u,u,u,0,u,u,u,u,u,u,u>
|
||||
; AVX512VLBW-NEXT: vpermw %ymm0, %ymm1, %ymm0
|
||||
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
@ -993,10 +968,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_00_19_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; XOPAVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
|
||||
; XOPAVX2-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm1[0,1],ymm0[0,1]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,0,11,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 19, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <32 x i8> %shuffle
|
||||
@ -1013,33 +986,17 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
|
||||
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX2-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,5,u,u,0,u,u,u>
|
||||
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VLBW-SLOW: # %bb.0:
|
||||
; AVX512VLBW-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX512VLBW-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
|
||||
; AVX512VLBW-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VLBW-FAST: # %bb.0:
|
||||
; AVX512VLBW-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,5,u,u,0,u,u,u>
|
||||
; AVX512VLBW-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX512VLBW-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-FAST-NEXT: retq
|
||||
; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VLVBMI: # %bb.0:
|
||||
@ -1058,9 +1015,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_00_20_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 20, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <32 x i8> %shuffle
|
||||
@ -1077,33 +1033,17 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
|
||||
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX2-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,5,u,u,0,u,u,u>
|
||||
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VLBW-SLOW: # %bb.0:
|
||||
; AVX512VLBW-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX512VLBW-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
|
||||
; AVX512VLBW-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VLBW-FAST: # %bb.0:
|
||||
; AVX512VLBW-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,5,u,u,0,u,u,u>
|
||||
; AVX512VLBW-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX512VLBW-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-FAST-NEXT: retq
|
||||
; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VLVBMI: # %bb.0:
|
||||
@ -1122,9 +1062,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_00_21_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 21, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <32 x i8> %shuffle
|
||||
@ -1141,33 +1080,17 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
|
||||
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX2-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,5,u,u,0,u,u,u>
|
||||
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VLBW-SLOW: # %bb.0:
|
||||
; AVX512VLBW-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX512VLBW-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
|
||||
; AVX512VLBW-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VLBW-FAST: # %bb.0:
|
||||
; AVX512VLBW-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,5,u,u,0,u,u,u>
|
||||
; AVX512VLBW-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX512VLBW-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-FAST-NEXT: retq
|
||||
; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VLVBMI: # %bb.0:
|
||||
@ -1186,9 +1109,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_00_22_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 22, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <32 x i8> %shuffle
|
||||
@ -1205,33 +1127,17 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
|
||||
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX2-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,5,u,u,0,u,u,u>
|
||||
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VLBW-SLOW: # %bb.0:
|
||||
; AVX512VLBW-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX512VLBW-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
|
||||
; AVX512VLBW-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VLBW-FAST: # %bb.0:
|
||||
; AVX512VLBW-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = <0,5,u,u,0,u,u,u>
|
||||
; AVX512VLBW-FAST-NEXT: vpermd %ymm0, %ymm1, %ymm0
|
||||
; AVX512VLBW-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-FAST-NEXT: retq
|
||||
; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VLVBMI-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VLVBMI: # %bb.0:
|
||||
@ -1250,9 +1156,8 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_00_23_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,7,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,0,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 23, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <32 x i8> %shuffle
|
||||
@ -1271,13 +1176,13 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
|
||||
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
@ -1298,7 +1203,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
@ -1318,13 +1223,13 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
|
||||
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
@ -1345,7 +1250,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 25, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
@ -1365,13 +1270,13 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
|
||||
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
@ -1392,7 +1297,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 26, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
@ -1412,13 +1317,13 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
|
||||
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
@ -1439,7 +1344,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 27, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
@ -1459,13 +1364,13 @@ define <32 x i8> @shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
|
||||
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
@ -1486,7 +1391,7 @@ define <32 x i8> @shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 28, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
@ -1506,13 +1411,13 @@ define <32 x i8> @shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
|
||||
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
@ -1533,7 +1438,7 @@ define <32 x i8> @shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 29, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
@ -1553,13 +1458,13 @@ define <32 x i8> @shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
|
||||
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
@ -1580,7 +1485,7 @@ define <32 x i8> @shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 30, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
@ -1600,16 +1505,14 @@ define <32 x i8> @shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,0,0,0]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm1 = [15,0,0,0]
|
||||
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; AVX512VLBW-NEXT: vpshufb %ymm1, %ymm0, %ymm0
|
||||
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,3]
|
||||
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VLVBMI-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
@ -1629,9 +1532,8 @@ define <32 x i8> @shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [15,0,0,0]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; XOPAVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm0
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,0,0,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[7,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 31, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <32 x i8> %shuffle
|
||||
@ -3360,34 +3262,61 @@ define <32 x i8> @shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[10,13],zero,zero,ymm1[3,3],zero,ymm1[8],zero,zero,zero,ymm1[12,1],zero,zero,zero,zero,zero,ymm1[20],zero,ymm1[17,22],zero,zero,ymm1[16],zero,ymm1[27],zero,zero,zero,zero,zero
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,ymm1[u,u],zero,zero,ymm1[12],zero,ymm1[u,u,u],zero,zero,ymm1[u,0,3,u,u],zero,ymm1[u],zero,zero,ymm1[21,16],zero,ymm1[26],zero,ymm1[u,20,18,20,23]
|
||||
; AVX2-NEXT: vpor %ymm1, %ymm2, %ymm1
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[u,u,12,13,u,u,u,u,u,u,u,u,u,12,u,u,20,19,u,19,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,1,6,13,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,23,u,u,u,u]
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1],ymm0[2],ymm2[3,4,5],ymm0[6],ymm2[7]
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255,0,0,255,0,255,255,255,255,255,255,255,0,255,255,255,255]
|
||||
; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
; AVX2-SLOW-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[10,13],zero,zero,ymm1[3,3],zero,ymm1[8],zero,zero,zero,ymm1[12,1],zero,zero,zero,zero,zero,ymm1[20],zero,ymm1[17,22],zero,zero,ymm1[16],zero,ymm1[27],zero,zero,zero,zero,zero
|
||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
|
||||
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,ymm1[u,u],zero,zero,ymm1[12],zero,ymm1[u,u,u],zero,zero,ymm1[u,0,3,u,u],zero,ymm1[u],zero,zero,ymm1[21,16],zero,ymm1[26],zero,ymm1[u,20,18,20,23]
|
||||
; AVX2-SLOW-NEXT: vpor %ymm1, %ymm2, %ymm1
|
||||
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[u,u,12,13,u,u,u,u,u,u,u,u,u,12,u,u,20,19,u,19,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
|
||||
; AVX2-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,1,6,13,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,23,u,u,u,u]
|
||||
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1],ymm0[2],ymm2[3,4,5],ymm0[6],ymm2[7]
|
||||
; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255,0,0,255,0,255,255,255,255,255,255,255,0,255,255,255,255]
|
||||
; AVX2-SLOW-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[u,u,12,13,u,u,u,u,u,u,u,u,u,12,u,u,20,19,u,19,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
|
||||
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,1,6,13,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,23,u,u,u,u]
|
||||
; AVX512VLBW-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1],ymm0[2],ymm2[3,4,5],ymm0[6],ymm2[7]
|
||||
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[10,13,u,u,3,3],zero,ymm1[8,u,u,u,12,1,u],zero,zero,ymm1[u,u,20,u,17,22],zero,zero,ymm1[16],zero,ymm1[27,u],zero,zero,zero,zero
|
||||
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
|
||||
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,ymm1[12],zero,zero,zero,zero,zero,zero,zero,ymm1[0,3],zero,zero,zero,zero,zero,zero,ymm1[21,16],zero,ymm1[26],zero,zero,ymm1[20,18,20,23]
|
||||
; AVX512VLBW-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; AVX512VLBW-NEXT: movl $134948620, %eax # imm = 0x80B270C
|
||||
; AVX512VLBW-NEXT: kmovd %eax, %k1
|
||||
; AVX512VLBW-NEXT: vmovdqu8 %ymm2, %ymm0 {%k1}
|
||||
; AVX512VLBW-NEXT: retq
|
||||
; AVX2-FAST-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[10,13],zero,zero,ymm1[3,3],zero,ymm1[8],zero,zero,zero,ymm1[12,1],zero,zero,zero,zero,zero,ymm1[20],zero,ymm1[17,22],zero,zero,ymm1[16],zero,ymm1[27],zero,zero,zero,zero,zero
|
||||
; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,ymm1[u,u],zero,zero,ymm1[12],zero,ymm1[u,u,u],zero,zero,ymm1[u,0,3,u,u],zero,ymm1[u],zero,zero,ymm1[21,16],zero,ymm1[26],zero,ymm1[u,20,18,20,23]
|
||||
; AVX2-FAST-NEXT: vpor %ymm1, %ymm2, %ymm1
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = <3,4,5,7,5,4,1,u>
|
||||
; AVX2-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm0
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,1,u,u,u,u,5,10,13,u,u,0,u,u,16,23,u,23,u,u,u,u,u,u,u,27,u,u,u,u]
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255,0,0,255,0,255,255,255,255,255,255,255,0,255,255,255,255]
|
||||
; AVX2-FAST-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-SLOW-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39:
|
||||
; AVX512VLBW-SLOW: # %bb.0:
|
||||
; AVX512VLBW-SLOW-NEXT: vpshufb {{.*#+}} ymm2 = ymm0[u,u,12,13,u,u,u,u,u,u,u,u,u,12,u,u,20,19,u,19,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; AVX512VLBW-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
|
||||
; AVX512VLBW-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,1,6,13,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,23,u,u,u,u]
|
||||
; AVX512VLBW-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1],ymm0[2],ymm2[3,4,5],ymm0[6],ymm2[7]
|
||||
; AVX512VLBW-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[10,13,u,u,3,3],zero,ymm1[8,u,u,u,12,1,u],zero,zero,ymm1[u,u,20,u,17,22],zero,zero,ymm1[16],zero,ymm1[27,u],zero,zero,zero,zero
|
||||
; AVX512VLBW-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
|
||||
; AVX512VLBW-SLOW-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,ymm1[12],zero,zero,zero,zero,zero,zero,zero,ymm1[0,3],zero,zero,zero,zero,zero,zero,ymm1[21,16],zero,ymm1[26],zero,zero,ymm1[20,18,20,23]
|
||||
; AVX512VLBW-SLOW-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; AVX512VLBW-SLOW-NEXT: movl $134948620, %eax # imm = 0x80B270C
|
||||
; AVX512VLBW-SLOW-NEXT: kmovd %eax, %k1
|
||||
; AVX512VLBW-SLOW-NEXT: vmovdqu8 %ymm2, %ymm0 {%k1}
|
||||
; AVX512VLBW-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-FAST-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39:
|
||||
; AVX512VLBW-FAST: # %bb.0:
|
||||
; AVX512VLBW-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = <3,4,5,7,5,4,1,u>
|
||||
; AVX512VLBW-FAST-NEXT: vpermd %ymm0, %ymm2, %ymm2
|
||||
; AVX512VLBW-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm1[10,13,u,u,3,3],zero,ymm1[8,u,u,u,12,1,u],zero,zero,ymm1[u,u,20,u,17,22],zero,zero,ymm1[16],zero,ymm1[27,u],zero,zero,zero,zero
|
||||
; AVX512VLBW-FAST-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,3,0,1]
|
||||
; AVX512VLBW-FAST-NEXT: vpshufb {{.*#+}} ymm1 = zero,zero,zero,zero,zero,zero,ymm1[12],zero,zero,zero,zero,zero,zero,zero,ymm1[0,3],zero,zero,zero,zero,zero,zero,ymm1[21,16],zero,ymm1[26],zero,zero,ymm1[20,18,20,23]
|
||||
; AVX512VLBW-FAST-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; AVX512VLBW-FAST-NEXT: movl $134948620, %eax # imm = 0x80B270C
|
||||
; AVX512VLBW-FAST-NEXT: kmovd %eax, %k1
|
||||
; AVX512VLBW-FAST-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[u,u,0,1,u,u,u,u,5,10,13,u,u,0,u,u,16,23,u,23,u,u,u,u,u,u,u,27,u,u,u,u]
|
||||
; AVX512VLBW-FAST-NEXT: retq
|
||||
|
||||
;
|
||||
; AVX512VLVBMI-LABEL: shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_20_19_52_19_49_54_37_32_48_42_59_07_36_34_36_39:
|
||||
; AVX512VLVBMI: # %bb.0:
|
||||
@ -4405,10 +4334,8 @@ define <32 x i8> @shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_08_09_24_25_10_11_26_27_12_13_28_29_14_15_30_31:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1],zero,zero,ymm0[2,3],zero,zero,ymm0[4,5],zero,zero,ymm0[6,7],zero,zero,zero,zero,ymm0[24,25],zero,zero,ymm0[26,27],zero,zero,ymm0[28,29],zero,zero,ymm0[30,31]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,ymm0[0,1],zero,zero,ymm0[2,3],zero,zero,ymm0[4,5],zero,zero,ymm0[6,7,24,25],zero,zero,ymm0[26,27],zero,zero,ymm0[28,29],zero,zero,ymm0[30,31],zero,zero
|
||||
; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15,16,17,24,25,18,19,26,27,20,21,28,29,22,23,30,31]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_08_09_24_25_10_11_26_27_12_13_28_29_14_15_30_31:
|
||||
@ -4427,10 +4354,8 @@ define <32 x i8> @shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_
|
||||
;
|
||||
; XOPAVX2-LABEL: shuffle_v32i8_00_01_16_17_02_03_18_19_04_05_20_21_06_07_22_23_08_09_24_25_10_11_26_27_12_13_28_29_14_15_30_31:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,1],zero,zero,ymm0[2,3],zero,zero,ymm0[4,5],zero,zero,ymm0[6,7],zero,zero,zero,zero,ymm0[24,25],zero,zero,ymm0[26,27],zero,zero,ymm0[28,29],zero,zero,ymm0[30,31]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,ymm0[0,1],zero,zero,ymm0[2,3],zero,zero,ymm0[4,5],zero,zero,ymm0[6,7,24,25],zero,zero,ymm0[26,27],zero,zero,ymm0[28,29],zero,zero,ymm0[30,31],zero,zero
|
||||
; XOPAVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,8,9,2,3,10,11,4,5,12,13,6,7,14,15,16,17,24,25,18,19,26,27,20,21,28,29,22,23,30,31]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 1, i32 16, i32 17, i32 2, i32 3, i32 18, i32 19, i32 4, i32 5, i32 20, i32 21, i32 6, i32 7, i32 22, i32 23, i32 8, i32 9, i32 24, i32 25, i32 10, i32 11, i32 26, i32 27, i32 12, i32 13, i32 28, i32 29, i32 14, i32 15, i32 30, i32 31>
|
||||
ret <32 x i8> %shuffle
|
||||
@ -4942,21 +4867,14 @@ define <32 x i8> @PR47262(<4 x i64> %a0) {
|
||||
;
|
||||
; AVX2-LABEL: PR47262:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,4],zero,zero,ymm0[1,5],zero,zero,ymm0[2,6],zero,zero,ymm0[3,7],zero,zero,zero,zero,ymm0[24,28],zero,zero,ymm0[25,29],zero,zero,ymm0[26,30],zero,zero,ymm0[27,31]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,ymm0[0,4],zero,zero,ymm0[1,5],zero,zero,ymm0[2,6],zero,zero,ymm0[3,7,24,28],zero,zero,ymm0[25,29],zero,zero,ymm0[26,30],zero,zero,ymm0[27,31],zero,zero
|
||||
; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15,16,20,24,28,17,21,25,29,18,22,26,30,19,23,27,31]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VLBW-LABEL: PR47262:
|
||||
; AVX512VLBW: # %bb.0:
|
||||
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,4,u,u,1,5,u,u,2,6,u,u,3,7,u,u,u,u,24,28,u,u,25,29,u,u,26,30,u,u,27,31]
|
||||
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
|
||||
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,0,4,u,u,1,5,u,u,2,6,u,u,3,7,24,28,u,u,25,29,u,u,26,30,u,u,27,31,u,u]
|
||||
; AVX512VLBW-NEXT: movw $21930, %ax # imm = 0x55AA
|
||||
; AVX512VLBW-NEXT: kmovd %eax, %k1
|
||||
; AVX512VLBW-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1}
|
||||
; AVX512VLBW-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; AVX512VLBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15,16,20,24,28,17,21,25,29,18,22,26,30,19,23,27,31]
|
||||
; AVX512VLBW-NEXT: retq
|
||||
;
|
||||
; AVX512VLVBMI-LABEL: PR47262:
|
||||
@ -4975,10 +4893,8 @@ define <32 x i8> @PR47262(<4 x i64> %a0) {
|
||||
;
|
||||
; XOPAVX2-LABEL: PR47262:
|
||||
; XOPAVX2: # %bb.0:
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[0,4],zero,zero,ymm0[1,5],zero,zero,ymm0[2,6],zero,zero,ymm0[3,7],zero,zero,zero,zero,ymm0[24,28],zero,zero,ymm0[25,29],zero,zero,ymm0[26,30],zero,zero,ymm0[27,31]
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,ymm0[0,4],zero,zero,ymm0[1,5],zero,zero,ymm0[2,6],zero,zero,ymm0[3,7,24,28],zero,zero,ymm0[25,29],zero,zero,ymm0[26,30],zero,zero,ymm0[27,31],zero,zero
|
||||
; XOPAVX2-NEXT: vpor %ymm0, %ymm1, %ymm0
|
||||
; XOPAVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12,1,5,9,13,2,6,10,14,3,7,11,15,16,20,24,28,17,21,25,29,18,22,26,30,19,23,27,31]
|
||||
; XOPAVX2-NEXT: retq
|
||||
%t1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
|
||||
%t2 = bitcast <4 x i64> %t1 to <32 x i8>
|
||||
|
@ -66,18 +66,16 @@ define <32 x i16> @shuffle_v32i16_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_1
|
||||
; KNL-LABEL: shuffle_v32i16_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_38:
|
||||
; KNL: ## %bb.0:
|
||||
; KNL-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; KNL-NEXT: vpermq {{.*#+}} ymm3 = ymm2[2,3,0,1]
|
||||
; KNL-NEXT: vpblendw {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm3[4,5],ymm2[6],ymm3[7],ymm2[8,9,10,11],ymm3[12,13],ymm2[14],ymm3[15]
|
||||
; KNL-NEXT: vpshufb {{.*#+}} ymm3 = ymm2[u,u,14,15,u,u,12,13,u,u,10,11,u,u,8,9,u,u,22,23,u,u,20,21,u,u,18,19,u,u,u,u]
|
||||
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm4
|
||||
; KNL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm4[5,6,7],ymm0[8,9,10,11,12],ymm4[13,14,15]
|
||||
; KNL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,u,u,12,13,u,u,10,11,u,u,8,9,u,u,22,23,u,u,20,21,u,u,18,19,u,u,16,17,u,u]
|
||||
; KNL-NEXT: vpermq {{.*#+}} ymm2 = ymm2[3,1,2,3]
|
||||
; KNL-NEXT: vpshufb {{.*#+}} ymm3 = ymm2[u,u,6,7,u,u,12,13,u,u,2,3,u,u,0,1,u,u,22,23,u,u,20,21,u,u,18,19,u,u,u,u]
|
||||
; KNL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,1,2,3]
|
||||
; KNL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,7,u,u,4,5,u,u,2,3,u,u,8,9,u,u,22,23,u,u,20,21,u,u,18,19,u,u,16,17,u,u]
|
||||
; KNL-NEXT: vpblendw {{.*#+}} ymm3 = ymm0[0],ymm3[1],ymm0[2],ymm3[3],ymm0[4],ymm3[5],ymm0[6],ymm3[7],ymm0[8],ymm3[9],ymm0[10],ymm3[11],ymm0[12],ymm3[13],ymm0[14],ymm3[15]
|
||||
; KNL-NEXT: vextracti32x4 $3, %zmm1, %xmm1
|
||||
; KNL-NEXT: vpbroadcastw %xmm1, %ymm1
|
||||
; KNL-NEXT: vpblendw {{.*#+}} ymm1 = ymm3[0,1,2,3,4,5,6],ymm1[7],ymm3[8,9,10,11,12,13,14],ymm1[15]
|
||||
; KNL-NEXT: vpblendd {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
|
||||
; KNL-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,14,15,u,u,12,13,u,u,10,11,u,u,8,9,u,u,22,23,u,u,20,21,u,u,18,19,u,u,16,17]
|
||||
; KNL-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[u,u,6,7,u,u,12,13,u,u,2,3,u,u,0,1,u,u,22,23,u,u,20,21,u,u,18,19,u,u,16,17]
|
||||
; KNL-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15]
|
||||
; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
|
||||
; KNL-NEXT: retq
|
||||
|
@ -807,8 +807,8 @@ entry:
|
||||
define <32 x i8> @PR27320(<8 x i32> %a0) {
|
||||
; CHECK-LABEL: PR27320:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,2,1]
|
||||
; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,1,2,3,4,4,5,6,7,7,8,9,10,10,11,28,29,29,30,31,16,16,17,18,19,19,20,21,22,22,23]
|
||||
; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
|
||||
; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,1,2,3,4,4,5,6,7,7,8,9,10,10,11,20,21,21,22,23,24,24,25,26,27,27,28,29,30,30,31]
|
||||
; CHECK-NEXT: ret{{[l|q]}}
|
||||
%1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 3, i32 4, i32 5, i32 undef>
|
||||
%2 = bitcast <8 x i32> %1 to <32 x i8>
|
||||
|
@ -3248,56 +3248,21 @@ define void @PR45604(<32 x i16>* %dst, <8 x i16>* %src) {
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-SLOW-LABEL: PR45604:
|
||||
; AVX2-SLOW: # %bb.0:
|
||||
; AVX2-SLOW-NEXT: vmovdqa (%rsi), %xmm0
|
||||
; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm1 = [11,11,11,11,11,11,11,11,0,0,0,0,0,0,0,0]
|
||||
; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm2 = ymm1[0,2,1,3,4,5,6,7,8,10,9,11,12,13,14,15]
|
||||
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,0,2,1,4,4,6,5]
|
||||
; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm3 = <u,u,u,u,255,255,0,0,u,u,u,u,255,255,0,0,u,u,u,u,0,0,255,255,u,u,u,u,0,0,255,255>
|
||||
; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm4 = [0,0,0,0,0,0,0,0,11,11,11,11,11,11,11,11]
|
||||
; AVX2-SLOW-NEXT: vpblendvb %ymm3, %ymm2, %ymm4, %ymm2
|
||||
; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm5 = ymm0[0,2,1,3,4,5,6,7,8,10,9,11,12,13,14,15]
|
||||
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm5 = ymm5[0,1,1,3,4,5,5,7]
|
||||
; AVX2-SLOW-NEXT: vpermq {{.*#+}} ymm6 = ymm0[2,3,0,1]
|
||||
; AVX2-SLOW-NEXT: vpshuflw {{.*#+}} ymm7 = ymm6[2,0,3,1,4,5,6,7,10,8,11,9,12,13,14,15]
|
||||
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm7 = ymm7[0,1,1,3,4,5,5,7]
|
||||
; AVX2-SLOW-NEXT: vmovdqa {{.*#+}} ymm8 = <255,255,0,0,u,u,u,u,255,255,0,0,u,u,u,u,0,0,255,255,u,u,u,u,0,0,255,255,u,u,u,u>
|
||||
; AVX2-SLOW-NEXT: vpblendvb %ymm8, %ymm5, %ymm7, %ymm5
|
||||
; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm1 = ymm1[0,1,2,3,4,6,5,7,8,9,10,11,12,14,13,15]
|
||||
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,2,2,3,4,6,6,7]
|
||||
; AVX2-SLOW-NEXT: vpblendvb %ymm3, %ymm1, %ymm4, %ymm1
|
||||
; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,6,5,7,8,9,10,11,12,14,13,15]
|
||||
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,1,3,3,6,5,7,7]
|
||||
; AVX2-SLOW-NEXT: vpshufhw {{.*#+}} ymm3 = ymm6[0,1,2,3,6,4,7,5,8,9,10,11,14,12,15,13]
|
||||
; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[2,1,3,3,6,5,7,7]
|
||||
; AVX2-SLOW-NEXT: vpblendvb %ymm8, %ymm0, %ymm3, %ymm0
|
||||
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm2 = ymm5[0],ymm2[1],ymm5[2],ymm2[3],ymm5[4],ymm2[5],ymm5[6],ymm2[7]
|
||||
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
|
||||
; AVX2-SLOW-NEXT: vmovdqu %ymm0, 32(%rdi)
|
||||
; AVX2-SLOW-NEXT: vmovdqu %ymm2, (%rdi)
|
||||
; AVX2-SLOW-NEXT: vzeroupper
|
||||
; AVX2-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX2-FAST-LABEL: PR45604:
|
||||
; AVX2-FAST: # %bb.0:
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm0 = <u,u,u,u,255,255,0,0,u,u,u,u,255,255,0,0,u,u,u,u,0,0,255,255,u,u,u,u,0,0,255,255>
|
||||
; AVX2-FAST-NEXT: vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,0,0,11,11,11,11,11,11,11,11]
|
||||
; AVX2-FAST-NEXT: vpblendvb %ymm0, {{.*}}(%rip), %ymm1, %ymm0
|
||||
; AVX2-FAST-NEXT: vmovdqa (%rsi), %xmm1
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm2 = ymm1[0,1],zero,zero,ymm1[u,u,u,u,2,3],zero,zero,ymm1[u,u,u,u],zero,zero,ymm1[20,21,u,u,u,u],zero,zero,ymm1[22,23,u,u,u,u]
|
||||
; AVX2-FAST-NEXT: vpermq {{.*#+}} ymm3 = ymm1[2,3,0,1]
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm4 = zero,zero,ymm3[0,1,u,u,u,u],zero,zero,ymm3[2,3,u,u,u,u,20,21],zero,zero,ymm3[u,u,u,u,22,23],zero,zero,ymm3[u,u,u,u]
|
||||
; AVX2-FAST-NEXT: vpor %ymm4, %ymm2, %ymm2
|
||||
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0],ymm0[1],ymm2[2],ymm0[3],ymm2[4],ymm0[5],ymm2[6],ymm0[7]
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[8,9],zero,zero,ymm1[u,u,u,u,10,11],zero,zero,ymm1[u,u,u,u],zero,zero,ymm1[28,29,u,u,u,u],zero,zero,ymm1[30,31,u,u,u,u]
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm3 = zero,zero,ymm3[8,9,u,u,u,u],zero,zero,ymm3[10,11,u,u,u,u,28,29],zero,zero,ymm3[u,u,u,u,30,31],zero,zero,ymm3[u,u,u,u]
|
||||
; AVX2-FAST-NEXT: vpor %ymm3, %ymm1, %ymm1
|
||||
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
|
||||
; AVX2-FAST-NEXT: vmovdqu %ymm0, 32(%rdi)
|
||||
; AVX2-FAST-NEXT: vmovdqu %ymm2, (%rdi)
|
||||
; AVX2-FAST-NEXT: vzeroupper
|
||||
; AVX2-FAST-NEXT: retq
|
||||
; AVX2-LABEL: PR45604:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovdqa (%rsi), %xmm0
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm0[0,2,0,2]
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} ymm2 = <0,1,8,9,u,u,u,u,2,3,10,11,u,u,u,u,4,5,12,13,u,u,u,u,6,7,14,15,u,u,u,u>
|
||||
; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
|
||||
; AVX2-NEXT: vmovdqa {{.*#+}} ymm3 = <u,u,u,u,11,0,0,0,u,u,u,u,11,0,0,0,u,u,u,u,11,0,0,0,u,u,u,u,11,0,0,0>
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0],ymm3[1],ymm1[2],ymm3[3],ymm1[4],ymm3[5],ymm1[6],ymm3[7]
|
||||
; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,3,1,3]
|
||||
; AVX2-NEXT: vpshufb %ymm2, %ymm0, %ymm0
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm3[1],ymm0[2],ymm3[3],ymm0[4],ymm3[5],ymm0[6],ymm3[7]
|
||||
; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi)
|
||||
; AVX2-NEXT: vmovdqu %ymm1, (%rdi)
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
%v1 = load <8 x i16>, <8 x i16>* %src, align 16
|
||||
%v2 = shufflevector <8 x i16> %v1, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
%v3 = shufflevector <16 x i16> %v2, <16 x i16> <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, <32 x i32> <i32 0, i32 8, i32 16, i32 24, i32 1, i32 9, i32 17, i32 25, i32 2, i32 10, i32 18, i32 26, i32 3, i32 11, i32 19, i32 27, i32 4, i32 12, i32 20, i32 28, i32 5, i32 13, i32 21, i32 29, i32 6, i32 14, i32 22, i32 30, i32 7, i32 15, i32 23, i32 31>
|
||||
|
Loading…
Reference in New Issue
Block a user