mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
[X86][AVX512] Attempt target shuffle combining to different types instead of early-out
We try to prevent shuffle combining to value types that would stop the folding of masked operations, but by just returning early, we were failing to try different shuffle types. The TODOs are all still relevant here to improve codegen but we're lacking test examples. llvm-svn: 321085
This commit is contained in:
parent
1e061bbff4
commit
d4f8c5f95d
@ -28405,8 +28405,6 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
||||
// TODO - attempt to narrow Mask back to writemask size.
|
||||
bool IsEVEXShuffle =
|
||||
RootSizeInBits == 512 || (Subtarget.hasVLX() && RootSizeInBits >= 128);
|
||||
if (IsEVEXShuffle && (RootVT.getScalarSizeInBits() != BaseMaskEltSizeInBits))
|
||||
return SDValue();
|
||||
|
||||
// TODO - handle 128/256-bit lane shuffles of 512-bit vectors.
|
||||
|
||||
@ -28489,11 +28487,10 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
||||
|
||||
if (matchUnaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
|
||||
V1, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
|
||||
ShuffleVT)) {
|
||||
ShuffleVT) &&
|
||||
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
|
||||
if (Depth == 1 && Root.getOpcode() == Shuffle)
|
||||
return SDValue(); // Nothing to do!
|
||||
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
|
||||
return SDValue(); // AVX512 Writemask clash.
|
||||
Res = DAG.getBitcast(ShuffleSrcVT, V1);
|
||||
DCI.AddToWorklist(Res.getNode());
|
||||
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);
|
||||
@ -28503,11 +28500,10 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
||||
|
||||
if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
|
||||
AllowIntDomain, Subtarget, Shuffle,
|
||||
ShuffleVT, PermuteImm)) {
|
||||
ShuffleVT, PermuteImm) &&
|
||||
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
|
||||
if (Depth == 1 && Root.getOpcode() == Shuffle)
|
||||
return SDValue(); // Nothing to do!
|
||||
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
|
||||
return SDValue(); // AVX512 Writemask clash.
|
||||
Res = DAG.getBitcast(ShuffleVT, V1);
|
||||
DCI.AddToWorklist(Res.getNode());
|
||||
Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res,
|
||||
@ -28518,12 +28514,11 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
||||
}
|
||||
|
||||
if (matchBinaryVectorShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain,
|
||||
V1, V2, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT,
|
||||
ShuffleVT, UnaryShuffle)) {
|
||||
V1, V2, DL, DAG, Subtarget, Shuffle,
|
||||
ShuffleSrcVT, ShuffleVT, UnaryShuffle) &&
|
||||
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
|
||||
if (Depth == 1 && Root.getOpcode() == Shuffle)
|
||||
return SDValue(); // Nothing to do!
|
||||
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
|
||||
return SDValue(); // AVX512 Writemask clash.
|
||||
V1 = DAG.getBitcast(ShuffleSrcVT, V1);
|
||||
DCI.AddToWorklist(V1.getNode());
|
||||
V2 = DAG.getBitcast(ShuffleSrcVT, V2);
|
||||
@ -28536,11 +28531,10 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
|
||||
if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain,
|
||||
AllowIntDomain, V1, V2, DL, DAG,
|
||||
Subtarget, Shuffle, ShuffleVT,
|
||||
PermuteImm)) {
|
||||
PermuteImm) &&
|
||||
(!IsEVEXShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) {
|
||||
if (Depth == 1 && Root.getOpcode() == Shuffle)
|
||||
return SDValue(); // Nothing to do!
|
||||
if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
|
||||
return SDValue(); // AVX512 Writemask clash.
|
||||
V1 = DAG.getBitcast(ShuffleVT, V1);
|
||||
DCI.AddToWorklist(V1.getNode());
|
||||
V2 = DAG.getBitcast(ShuffleVT, V2);
|
||||
|
@ -1488,12 +1488,10 @@ define <8 x i32> @test_masked_z_16xi32_to_8xi32_perm_mask3(<16 x i32> %vec, <8 x
|
||||
define <4 x i32> @test_16xi32_to_4xi32_perm_mask0(<16 x i32> %vec) {
|
||||
; CHECK-LABEL: test_16xi32_to_4xi32_perm_mask0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm1
|
||||
; CHECK-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4]
|
||||
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,3,4,6,4,7]
|
||||
; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3],ymm0[4,5,6],ymm1[7]
|
||||
; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,3,2,3]
|
||||
; CHECK-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
|
||||
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [0,2,4,12,4,6,4,12]
|
||||
; CHECK-NEXT: vpermi2d %ymm2, %ymm0, %ymm1
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%res = shufflevector <16 x i32> %vec, <16 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 12>
|
||||
@ -1503,13 +1501,11 @@ define <4 x i32> @test_masked_16xi32_to_4xi32_perm_mask0(<16 x i32> %vec, <4 x i
|
||||
; CHECK-LABEL: test_masked_16xi32_to_4xi32_perm_mask0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,1,2,0,4,5,6,4]
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,3,4,6,4,7]
|
||||
; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2],ymm3[3],ymm0[4,5,6],ymm3[7]
|
||||
; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
|
||||
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; CHECK-NEXT: vpcmpeqd %xmm3, %xmm2, %k1
|
||||
; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = [0,2,4,12,4,6,4,12]
|
||||
; CHECK-NEXT: vpermi2d %ymm3, %ymm0, %ymm4
|
||||
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpcmpeqd %xmm0, %xmm2, %k1
|
||||
; CHECK-NEXT: vpblendmd %xmm4, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 12>
|
||||
@ -1522,13 +1518,11 @@ define <4 x i32> @test_masked_z_16xi32_to_4xi32_perm_mask0(<16 x i32> %vec, <4 x
|
||||
; CHECK-LABEL: test_masked_z_16xi32_to_4xi32_perm_mask0:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,1,2,0,4,5,6,4]
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,0,3,4,6,4,7]
|
||||
; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2],ymm2[3],ymm0[4,5,6],ymm2[7]
|
||||
; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,2,3]
|
||||
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm1, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = [0,2,4,12,4,6,4,12]
|
||||
; CHECK-NEXT: vpermi2d %ymm2, %ymm0, %ymm3
|
||||
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpcmpeqd %xmm0, %xmm1, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %xmm3, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 12>
|
||||
@ -1863,14 +1857,12 @@ define <4 x i32> @test_masked_16xi32_to_4xi32_perm_mem_mask1(<16 x i32>* %vp, <4
|
||||
; CHECK-LABEL: test_masked_16xi32_to_4xi32_perm_mem_mask1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm2
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} ymm3 = ymm2[3,1,2,3,7,5,6,7]
|
||||
; CHECK-NEXT: vextracti64x4 $1, %zmm2, %ymm2
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,1,3,2,4,5,7,6]
|
||||
; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0],ymm2[1,2,3],ymm3[4],ymm2[5,6,7]
|
||||
; CHECK-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,1,2,3]
|
||||
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; CHECK-NEXT: vpcmpeqd %xmm3, %xmm1, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %xmm2, %xmm0 {%k1}
|
||||
; CHECK-NEXT: vextracti64x4 $1, %zmm2, %ymm3
|
||||
; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = [15,5,3,2,15,5,7,6]
|
||||
; CHECK-NEXT: vpermi2d %ymm2, %ymm3, %ymm4
|
||||
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm1, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %xmm4, %xmm0 {%k1}
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%vec = load <16 x i32>, <16 x i32>* %vp
|
||||
@ -1884,14 +1876,12 @@ define <4 x i32> @test_masked_z_16xi32_to_4xi32_perm_mem_mask1(<16 x i32>* %vp,
|
||||
; CHECK-LABEL: test_masked_z_16xi32_to_4xi32_perm_mem_mask1:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm1
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} ymm2 = ymm1[3,1,2,3,7,5,6,7]
|
||||
; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm1
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} ymm1 = ymm1[0,1,3,2,4,5,7,6]
|
||||
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0],ymm1[1,2,3],ymm2[4],ymm1[5,6,7]
|
||||
; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,1,2,3]
|
||||
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vpcmpeqd %xmm2, %xmm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %xmm1, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm2
|
||||
; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = [15,5,3,2,15,5,7,6]
|
||||
; CHECK-NEXT: vpermi2d %ymm1, %ymm2, %ymm3
|
||||
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %xmm3, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%vec = load <16 x i32>, <16 x i32>* %vp
|
||||
@ -2298,13 +2288,12 @@ define <4 x i64> @test_masked_z_8xi64_to_4xi64_perm_mask3(<8 x i64> %vec, <4 x i
|
||||
define <4 x i64> @test_masked_8xi64_to_4xi64_perm_mask4(<8 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) {
|
||||
; CHECK-LABEL: test_masked_8xi64_to_4xi64_perm_mask4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpermq {{.*#+}} ymm3 = ymm0[3,1,2,3]
|
||||
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
||||
; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,3,1]
|
||||
; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1],ymm0[2,3,4,5,6,7]
|
||||
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; CHECK-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
|
||||
; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3
|
||||
; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = [7,3,3,1]
|
||||
; CHECK-NEXT: vpermi2q %ymm0, %ymm3, %ymm4
|
||||
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpcmpeqq %ymm0, %ymm2, %k1
|
||||
; CHECK-NEXT: vpblendmq %ymm4, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <4 x i32> <i32 3, i32 7, i32 7, i32 5>
|
||||
%cmp = icmp eq <4 x i64> %mask, zeroinitializer
|
||||
@ -2315,13 +2304,12 @@ define <4 x i64> @test_masked_8xi64_to_4xi64_perm_mask4(<8 x i64> %vec, <4 x i64
|
||||
define <4 x i64> @test_masked_z_8xi64_to_4xi64_perm_mask4(<8 x i64> %vec, <4 x i64> %mask) {
|
||||
; CHECK-LABEL: test_masked_z_8xi64_to_4xi64_perm_mask4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpermq {{.*#+}} ymm2 = ymm0[3,1,2,3]
|
||||
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
||||
; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,3,1]
|
||||
; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1],ymm0[2,3,4,5,6,7]
|
||||
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vpcmpeqq %ymm2, %ymm1, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
|
||||
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3
|
||||
; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [7,3,3,1]
|
||||
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; CHECK-NEXT: vpcmpeqq %ymm4, %ymm1, %k1
|
||||
; CHECK-NEXT: vpermi2q %ymm0, %ymm3, %ymm2 {%k1} {z}
|
||||
; CHECK-NEXT: vmovdqa %ymm2, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <4 x i32> <i32 3, i32 7, i32 7, i32 5>
|
||||
%cmp = icmp eq <4 x i64> %mask, zeroinitializer
|
||||
@ -2332,12 +2320,11 @@ define <4 x i64> @test_masked_8xi64_to_4xi64_perm_mask5(<8 x i64> %vec, <4 x i64
|
||||
; CHECK-LABEL: test_masked_8xi64_to_4xi64_perm_mask5:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,1,0,1,4,5,4,5]
|
||||
; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,3]
|
||||
; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1],ymm0[2,3,4,5],ymm3[6,7]
|
||||
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; CHECK-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
|
||||
; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = [4,1,0,6]
|
||||
; CHECK-NEXT: vpermi2q %ymm3, %ymm0, %ymm4
|
||||
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpcmpeqq %ymm0, %ymm2, %k1
|
||||
; CHECK-NEXT: vpblendmq %ymm4, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <4 x i32> <i32 4, i32 1, i32 0, i32 6>
|
||||
%cmp = icmp eq <4 x i64> %mask, zeroinitializer
|
||||
@ -2348,13 +2335,12 @@ define <4 x i64> @test_masked_8xi64_to_4xi64_perm_mask5(<8 x i64> %vec, <4 x i64
|
||||
define <4 x i64> @test_masked_z_8xi64_to_4xi64_perm_mask5(<8 x i64> %vec, <4 x i64> %mask) {
|
||||
; CHECK-LABEL: test_masked_z_8xi64_to_4xi64_perm_mask5:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm2
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,1,0,1,4,5,4,5]
|
||||
; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,3]
|
||||
; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1],ymm0[2,3,4,5],ymm2[6,7]
|
||||
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vpcmpeqq %ymm2, %ymm1, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
|
||||
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3
|
||||
; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [4,1,0,6]
|
||||
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; CHECK-NEXT: vpcmpeqq %ymm4, %ymm1, %k1
|
||||
; CHECK-NEXT: vpermi2q %ymm3, %ymm0, %ymm2 {%k1} {z}
|
||||
; CHECK-NEXT: vmovdqa %ymm2, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <4 x i32> <i32 4, i32 1, i32 0, i32 6>
|
||||
%cmp = icmp eq <4 x i64> %mask, zeroinitializer
|
||||
@ -2405,13 +2391,12 @@ define <4 x i64> @test_masked_z_8xi64_to_4xi64_perm_mask6(<8 x i64> %vec, <4 x i
|
||||
define <4 x i64> @test_masked_8xi64_to_4xi64_perm_mask7(<8 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) {
|
||||
; CHECK-LABEL: test_masked_8xi64_to_4xi64_perm_mask7:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpermq {{.*#+}} ymm3 = ymm0[2,0,3,3]
|
||||
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
||||
; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0
|
||||
; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3,4,5],ymm0[6,7]
|
||||
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; CHECK-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
|
||||
; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3
|
||||
; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = [2,0,3,4]
|
||||
; CHECK-NEXT: vpermi2q %ymm3, %ymm0, %ymm4
|
||||
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vpcmpeqq %ymm0, %ymm2, %k1
|
||||
; CHECK-NEXT: vpblendmq %ymm4, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <4 x i32> <i32 2, i32 0, i32 3, i32 4>
|
||||
%cmp = icmp eq <4 x i64> %mask, zeroinitializer
|
||||
@ -2422,13 +2407,12 @@ define <4 x i64> @test_masked_8xi64_to_4xi64_perm_mask7(<8 x i64> %vec, <4 x i64
|
||||
define <4 x i64> @test_masked_z_8xi64_to_4xi64_perm_mask7(<8 x i64> %vec, <4 x i64> %mask) {
|
||||
; CHECK-LABEL: test_masked_z_8xi64_to_4xi64_perm_mask7:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vpermq {{.*#+}} ymm2 = ymm0[2,0,3,3]
|
||||
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm0
|
||||
; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0
|
||||
; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3,4,5],ymm0[6,7]
|
||||
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vpcmpeqq %ymm2, %ymm1, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}
|
||||
; CHECK-NEXT: vextracti64x4 $1, %zmm0, %ymm3
|
||||
; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [2,0,3,4]
|
||||
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; CHECK-NEXT: vpcmpeqq %ymm4, %ymm1, %k1
|
||||
; CHECK-NEXT: vpermi2q %ymm3, %ymm0, %ymm2 {%k1} {z}
|
||||
; CHECK-NEXT: vmovdqa %ymm2, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <4 x i32> <i32 2, i32 0, i32 3, i32 4>
|
||||
%cmp = icmp eq <4 x i64> %mask, zeroinitializer
|
||||
@ -2585,12 +2569,11 @@ define <4 x i64> @test_masked_8xi64_to_4xi64_perm_mem_mask2(<8 x i64>* %vp, <4 x
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
|
||||
; CHECK-NEXT: vextracti64x4 $1, %zmm2, %ymm3
|
||||
; CHECK-NEXT: vpermq {{.*#+}} ymm3 = ymm3[3,1,2,1]
|
||||
; CHECK-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,1,1,3]
|
||||
; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3,4,5],ymm3[6,7]
|
||||
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; CHECK-NEXT: vpcmpeqq %ymm3, %ymm1, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 {%k1}
|
||||
; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = [7,1,1,5]
|
||||
; CHECK-NEXT: vpermi2q %ymm3, %ymm2, %ymm4
|
||||
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vpcmpeqq %ymm2, %ymm1, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %ymm4, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%vec = load <8 x i64>, <8 x i64>* %vp
|
||||
%shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <4 x i32> <i32 7, i32 1, i32 1, i32 5>
|
||||
@ -2602,14 +2585,13 @@ define <4 x i64> @test_masked_8xi64_to_4xi64_perm_mem_mask2(<8 x i64>* %vp, <4 x
|
||||
define <4 x i64> @test_masked_z_8xi64_to_4xi64_perm_mem_mask2(<8 x i64>* %vp, <4 x i64> %mask) {
|
||||
; CHECK-LABEL: test_masked_z_8xi64_to_4xi64_perm_mem_mask2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1
|
||||
; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm2
|
||||
; CHECK-NEXT: vpermq {{.*#+}} ymm2 = ymm2[3,1,2,1]
|
||||
; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,1,3]
|
||||
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1],ymm1[2,3,4,5],ymm2[6,7]
|
||||
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vpcmpeqq %ymm2, %ymm0, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z}
|
||||
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
|
||||
; CHECK-NEXT: vextracti64x4 $1, %zmm2, %ymm3
|
||||
; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [7,1,1,5]
|
||||
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; CHECK-NEXT: vpcmpeqq %ymm4, %ymm0, %k1
|
||||
; CHECK-NEXT: vpermi2q %ymm3, %ymm2, %ymm1 {%k1} {z}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%vec = load <8 x i64>, <8 x i64>* %vp
|
||||
%shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <4 x i32> <i32 7, i32 1, i32 1, i32 5>
|
||||
@ -2669,12 +2651,11 @@ define <4 x i64> @test_masked_8xi64_to_4xi64_perm_mem_mask4(<8 x i64>* %vp, <4 x
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
|
||||
; CHECK-NEXT: vextracti64x4 $1, %zmm2, %ymm3
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} ymm3 = ymm3[0,1,0,1,4,5,4,5]
|
||||
; CHECK-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,1,2,1]
|
||||
; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1],ymm3[2,3,4,5],ymm2[6,7]
|
||||
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; CHECK-NEXT: vpcmpeqq %ymm3, %ymm1, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 {%k1}
|
||||
; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = [0,4,6,1]
|
||||
; CHECK-NEXT: vpermi2q %ymm3, %ymm2, %ymm4
|
||||
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vpcmpeqq %ymm2, %ymm1, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %ymm4, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%vec = load <8 x i64>, <8 x i64>* %vp
|
||||
%shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <4 x i32> <i32 0, i32 4, i32 6, i32 1>
|
||||
@ -2686,14 +2667,13 @@ define <4 x i64> @test_masked_8xi64_to_4xi64_perm_mem_mask4(<8 x i64>* %vp, <4 x
|
||||
define <4 x i64> @test_masked_z_8xi64_to_4xi64_perm_mem_mask4(<8 x i64>* %vp, <4 x i64> %mask) {
|
||||
; CHECK-LABEL: test_masked_z_8xi64_to_4xi64_perm_mem_mask4:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1
|
||||
; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm2
|
||||
; CHECK-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,1,0,1,4,5,4,5]
|
||||
; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,2,1]
|
||||
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1],ymm2[2,3,4,5],ymm1[6,7]
|
||||
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vpcmpeqq %ymm2, %ymm0, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z}
|
||||
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
|
||||
; CHECK-NEXT: vextracti64x4 $1, %zmm2, %ymm3
|
||||
; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [0,4,6,1]
|
||||
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; CHECK-NEXT: vpcmpeqq %ymm4, %ymm0, %k1
|
||||
; CHECK-NEXT: vpermi2q %ymm3, %ymm2, %ymm1 {%k1} {z}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%vec = load <8 x i64>, <8 x i64>* %vp
|
||||
%shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <4 x i32> <i32 0, i32 4, i32 6, i32 1>
|
||||
@ -2739,11 +2719,10 @@ define <4 x i64> @test_masked_z_8xi64_to_4xi64_perm_mem_mask5(<8 x i64>* %vp, <4
|
||||
define <4 x i64> @test_8xi64_to_4xi64_perm_mem_mask6(<8 x i64>* %vp) {
|
||||
; CHECK-LABEL: test_8xi64_to_4xi64_perm_mem_mask6:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovaps (%rdi), %zmm0
|
||||
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm1
|
||||
; CHECK-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[3,1,2,3]
|
||||
; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,3,2]
|
||||
; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
|
||||
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1
|
||||
; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm2
|
||||
; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [7,2,3,2]
|
||||
; CHECK-NEXT: vpermi2q %ymm2, %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%vec = load <8 x i64>, <8 x i64>* %vp
|
||||
%res = shufflevector <8 x i64> %vec, <8 x i64> undef, <4 x i32> <i32 7, i32 2, i32 3, i32 2>
|
||||
@ -2754,12 +2733,11 @@ define <4 x i64> @test_masked_8xi64_to_4xi64_perm_mem_mask6(<8 x i64>* %vp, <4 x
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
|
||||
; CHECK-NEXT: vextracti64x4 $1, %zmm2, %ymm3
|
||||
; CHECK-NEXT: vpermq {{.*#+}} ymm3 = ymm3[3,1,2,3]
|
||||
; CHECK-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,3,2]
|
||||
; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm3[0,1],ymm2[2,3,4,5,6,7]
|
||||
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; CHECK-NEXT: vpcmpeqq %ymm3, %ymm1, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %ymm2, %ymm0 {%k1}
|
||||
; CHECK-NEXT: vmovdqa {{.*#+}} ymm4 = [7,2,3,2]
|
||||
; CHECK-NEXT: vpermi2q %ymm3, %ymm2, %ymm4
|
||||
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vpcmpeqq %ymm2, %ymm1, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %ymm4, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%vec = load <8 x i64>, <8 x i64>* %vp
|
||||
%shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <4 x i32> <i32 7, i32 2, i32 3, i32 2>
|
||||
@ -2771,14 +2749,13 @@ define <4 x i64> @test_masked_8xi64_to_4xi64_perm_mem_mask6(<8 x i64>* %vp, <4 x
|
||||
define <4 x i64> @test_masked_z_8xi64_to_4xi64_perm_mem_mask6(<8 x i64>* %vp, <4 x i64> %mask) {
|
||||
; CHECK-LABEL: test_masked_z_8xi64_to_4xi64_perm_mem_mask6:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1
|
||||
; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm2
|
||||
; CHECK-NEXT: vpermq {{.*#+}} ymm2 = ymm2[3,1,2,3]
|
||||
; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,3,2]
|
||||
; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0,1],ymm1[2,3,4,5,6,7]
|
||||
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vpcmpeqq %ymm2, %ymm0, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %ymm1, %ymm0 {%k1} {z}
|
||||
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
|
||||
; CHECK-NEXT: vextracti64x4 $1, %zmm2, %ymm3
|
||||
; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [7,2,3,2]
|
||||
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; CHECK-NEXT: vpcmpeqq %ymm4, %ymm0, %k1
|
||||
; CHECK-NEXT: vpermi2q %ymm3, %ymm2, %ymm1 {%k1} {z}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%vec = load <8 x i64>, <8 x i64>* %vp
|
||||
%shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <4 x i32> <i32 7, i32 2, i32 3, i32 2>
|
||||
@ -3307,14 +3284,13 @@ define <8 x float> @test_masked_z_16xfloat_to_8xfloat_perm_mask1(<16 x float> %v
|
||||
define <8 x float> @test_masked_16xfloat_to_8xfloat_perm_mask2(<16 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
|
||||
; CHECK-LABEL: test_masked_16xfloat_to_8xfloat_perm_mask2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovapd {{.*#+}} ymm3 = <0,4,u,u,6,1,4,4>
|
||||
; CHECK-NEXT: vpermps %ymm0, %ymm3, %ymm3
|
||||
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0
|
||||
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm3[0],ymm0[1],ymm3[2,3]
|
||||
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
|
||||
; CHECK-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm3
|
||||
; CHECK-NEXT: vmovddup {{.*#+}} xmm3 = xmm3[0,0]
|
||||
; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = [0,4,10,11,6,1,4,4]
|
||||
; CHECK-NEXT: vpermi2ps %ymm3, %ymm0, %ymm4
|
||||
; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vcmpeqps %ymm0, %ymm2, %k1
|
||||
; CHECK-NEXT: vblendmps %ymm4, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 9, i32 6, i32 1, i32 4, i32 4>
|
||||
%cmp = fcmp oeq <8 x float> %mask, zeroinitializer
|
||||
@ -3325,14 +3301,13 @@ define <8 x float> @test_masked_16xfloat_to_8xfloat_perm_mask2(<16 x float> %vec
|
||||
define <8 x float> @test_masked_z_16xfloat_to_8xfloat_perm_mask2(<16 x float> %vec, <8 x float> %mask) {
|
||||
; CHECK-LABEL: test_masked_z_16xfloat_to_8xfloat_perm_mask2:
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = <0,4,u,u,6,1,4,4>
|
||||
; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm2
|
||||
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm0
|
||||
; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
||||
; CHECK-NEXT: vblendpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2,3]
|
||||
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
|
||||
; CHECK-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z}
|
||||
; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2
|
||||
; CHECK-NEXT: vmovddup {{.*#+}} xmm3 = xmm2[0,0]
|
||||
; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [0,4,10,11,6,1,4,4]
|
||||
; CHECK-NEXT: vpxor %xmm4, %xmm4, %xmm4
|
||||
; CHECK-NEXT: vcmpeqps %ymm4, %ymm1, %k1
|
||||
; CHECK-NEXT: vpermi2ps %ymm3, %ymm0, %ymm2 {%k1} {z}
|
||||
; CHECK-NEXT: vmovaps %ymm2, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%shuf = shufflevector <16 x float> %vec, <16 x float> undef, <8 x i32> <i32 0, i32 4, i32 8, i32 9, i32 6, i32 1, i32 4, i32 4>
|
||||
%cmp = fcmp oeq <8 x float> %mask, zeroinitializer
|
||||
@ -3775,12 +3750,11 @@ define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mem_mask1(<16 x float>*
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovaps (%rdi), %zmm2
|
||||
; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
|
||||
; CHECK-NEXT: vshufps {{.*#+}} ymm2 = ymm3[0,2],ymm2[2,3],ymm3[4,6],ymm2[6,7]
|
||||
; CHECK-NEXT: vpermilps {{.*#+}} ymm2 = ymm2[0,2,1,3,4,6,5,7]
|
||||
; CHECK-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[0,3,2,3]
|
||||
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; CHECK-NEXT: vcmpeqps %xmm3, %xmm1, %k1
|
||||
; CHECK-NEXT: vmovaps %xmm2, %xmm0 {%k1}
|
||||
; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = [0,10,6,15,4,14,6,15]
|
||||
; CHECK-NEXT: vpermi2ps %ymm2, %ymm3, %ymm4
|
||||
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
|
||||
; CHECK-NEXT: vmovaps %xmm4, %xmm0 {%k1}
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%vec = load <16 x float>, <16 x float>* %vp
|
||||
@ -3795,12 +3769,11 @@ define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mem_mask1(<16 x float
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovaps (%rdi), %zmm1
|
||||
; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm2
|
||||
; CHECK-NEXT: vshufps {{.*#+}} ymm1 = ymm2[0,2],ymm1[2,3],ymm2[4,6],ymm1[6,7]
|
||||
; CHECK-NEXT: vpermilps {{.*#+}} ymm1 = ymm1[0,2,1,3,4,6,5,7]
|
||||
; CHECK-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,3,2,3]
|
||||
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vcmpeqps %xmm2, %xmm0, %k1
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [0,10,6,15,4,14,6,15]
|
||||
; CHECK-NEXT: vpermi2ps %ymm1, %ymm2, %ymm3
|
||||
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
|
||||
; CHECK-NEXT: vmovaps %xmm3, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%vec = load <16 x float>, <16 x float>* %vp
|
||||
@ -3815,12 +3788,11 @@ define <4 x float> @test_masked_16xfloat_to_4xfloat_perm_mem_mask2(<16 x float>*
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovaps (%rdi), %zmm2
|
||||
; CHECK-NEXT: vextractf64x4 $1, %zmm2, %ymm3
|
||||
; CHECK-NEXT: vshufps {{.*#+}} ymm2 = ymm2[2,0],ymm3[0,0],ymm2[6,4],ymm3[4,4]
|
||||
; CHECK-NEXT: vshufps {{.*#+}} ymm2 = ymm2[2,0],ymm3[2,3],ymm2[6,4],ymm3[6,7]
|
||||
; CHECK-NEXT: vpermpd {{.*#+}} ymm2 = ymm2[2,2,2,3]
|
||||
; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
|
||||
; CHECK-NEXT: vcmpeqps %xmm3, %xmm1, %k1
|
||||
; CHECK-NEXT: vmovaps %xmm2, %xmm0 {%k1}
|
||||
; CHECK-NEXT: vmovaps {{.*#+}} ymm4 = [4,14,4,14,4,14,6,7]
|
||||
; CHECK-NEXT: vpermi2ps %ymm2, %ymm3, %ymm4
|
||||
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
|
||||
; CHECK-NEXT: vmovaps %xmm4, %xmm0 {%k1}
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%vec = load <16 x float>, <16 x float>* %vp
|
||||
@ -3835,12 +3807,11 @@ define <4 x float> @test_masked_z_16xfloat_to_4xfloat_perm_mem_mask2(<16 x float
|
||||
; CHECK: # %bb.0:
|
||||
; CHECK-NEXT: vmovaps (%rdi), %zmm1
|
||||
; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm2
|
||||
; CHECK-NEXT: vshufps {{.*#+}} ymm1 = ymm1[2,0],ymm2[0,0],ymm1[6,4],ymm2[4,4]
|
||||
; CHECK-NEXT: vshufps {{.*#+}} ymm1 = ymm1[2,0],ymm2[2,3],ymm1[6,4],ymm2[6,7]
|
||||
; CHECK-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[2,2,2,3]
|
||||
; CHECK-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; CHECK-NEXT: vcmpeqps %xmm2, %xmm0, %k1
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [4,14,4,14,4,14,6,7]
|
||||
; CHECK-NEXT: vpermi2ps %ymm1, %ymm2, %ymm3
|
||||
; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
|
||||
; CHECK-NEXT: vmovaps %xmm3, %xmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vzeroupper
|
||||
; CHECK-NEXT: retq
|
||||
%vec = load <16 x float>, <16 x float>* %vp
|
||||
|
@ -633,11 +633,9 @@ define void @shuffle_v32i8_to_v4i8_1(<32 x i8>* %L, <4 x i8>* %S) nounwind {
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,1,1,8,8,9,9,8,8,9,9,10,10,11,11]
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,255,255,9,9,255,255,9,9,255,255,11,11,255,255]
|
||||
; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpsrld $16, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpsrld $16, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX512VL-NEXT: vpmovdb %xmm0, (%rsi)
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
@ -659,11 +657,9 @@ define void @shuffle_v32i8_to_v4i8_1(<32 x i8>* %L, <4 x i8>* %S) nounwind {
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm2 = [0,0,1,1,8,8,9,9,8,8,9,9,10,10,11,11]
|
||||
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,255,255,9,9,255,255,9,9,255,255,11,11,255,255]
|
||||
; AVX512BWVL-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
||||
; AVX512BWVL-NEXT: vpsrld $16, %xmm1, %xmm1
|
||||
; AVX512BWVL-NEXT: vpshufb %xmm2, %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vpsrld $16, %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX512BWVL-NEXT: vpmovdb %xmm0, (%rsi)
|
||||
; AVX512BWVL-NEXT: vzeroupper
|
||||
@ -795,11 +791,9 @@ define void @shuffle_v32i8_to_v4i8_3(<32 x i8>* %L, <4 x i8>* %S) nounwind {
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [10,10,11,11,2,2,3,3,8,8,9,9,10,10,11,11]
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [3,3,11,11,11,11,3,3,8,8,9,9,10,10,11,11]
|
||||
; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,1,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,1,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX512VL-NEXT: vpmovdb %xmm0, (%rsi)
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
@ -821,11 +815,9 @@ define void @shuffle_v32i8_to_v4i8_3(<32 x i8>* %L, <4 x i8>* %S) nounwind {
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm2 = [10,10,11,11,2,2,3,3,8,8,9,9,10,10,11,11]
|
||||
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm2 = [3,3,11,11,11,11,3,3,8,8,9,9,10,10,11,11]
|
||||
; AVX512BWVL-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
||||
; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,1,3,4,5,6,7]
|
||||
; AVX512BWVL-NEXT: vpshufb %xmm2, %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,1,3,4,5,6,7]
|
||||
; AVX512BWVL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX512BWVL-NEXT: vpmovdb %xmm0, (%rsi)
|
||||
; AVX512BWVL-NEXT: vzeroupper
|
||||
@ -949,14 +941,9 @@ define void @shuffle_v32i8_to_v4i8_5(<32 x i8>* %L, <4 x i8>* %S) nounwind {
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,1,2,3]
|
||||
; AVX512VL-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,1,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; AVX512VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,1,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [5,5,13,13,13,13,5,5,4,4,5,5,6,6,7,7]
|
||||
; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX512VL-NEXT: vpmovdb %xmm0, (%rsi)
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
@ -978,14 +965,9 @@ define void @shuffle_v32i8_to_v4i8_5(<32 x i8>* %L, <4 x i8>* %S) nounwind {
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,1,2,3]
|
||||
; AVX512BWVL-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
|
||||
; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[3,1,1,3,4,5,6,7]
|
||||
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; AVX512BWVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,1,3,4,5,6,7]
|
||||
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm2 = [5,5,13,13,13,13,5,5,4,4,5,5,6,6,7,7]
|
||||
; AVX512BWVL-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
||||
; AVX512BWVL-NEXT: vpshufb %xmm2, %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX512BWVL-NEXT: vpmovdb %xmm0, (%rsi)
|
||||
; AVX512BWVL-NEXT: vzeroupper
|
||||
|
@ -99,10 +99,9 @@ define void @shuffle_v32i16_to_v16i16_1(<32 x i16>* %L, <16 x i16>* %S) nounwind
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vmovdqa64 (%rdi), %zmm0
|
||||
; AVX512BWVL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,3,5,7,17,19,21,23,9,11,13,15,25,27,29,31]
|
||||
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31]
|
||||
; AVX512BWVL-NEXT: vpermi2w %ymm1, %ymm0, %ymm2
|
||||
; AVX512BWVL-NEXT: vpermq {{.*#+}} ymm0 = ymm2[0,2,1,3]
|
||||
; AVX512BWVL-NEXT: vmovdqa %ymm0, (%rsi)
|
||||
; AVX512BWVL-NEXT: vmovdqa %ymm2, (%rsi)
|
||||
; AVX512BWVL-NEXT: vzeroupper
|
||||
; AVX512BWVL-NEXT: retq
|
||||
%vec = load <32 x i16>, <32 x i16>* %L
|
||||
@ -673,17 +672,14 @@ define void @shuffle_v64i8_to_v8i8_1(<64 x i8>* %L, <8 x i8>* %S) nounwind {
|
||||
; AVX512BWVL-NEXT: vmovdqa64 (%rdi), %zmm0
|
||||
; AVX512BWVL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512BWVL-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm3 = [0,0,1,1,8,8,9,9,8,8,9,9,10,10,11,11]
|
||||
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm3 = [0,0,1,1,1,1,9,9,8,8,9,9,10,10,11,11]
|
||||
; AVX512BWVL-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,1,1,3,4,5,6,7]
|
||||
; AVX512BWVL-NEXT: vpshufb %xmm3, %xmm1, %xmm1
|
||||
; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,1,1,3,4,5,6,7]
|
||||
; AVX512BWVL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||
; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm3 = [1,1,9,9,8,8,9,9,8,8,9,9,10,10,11,11]
|
||||
; AVX512BWVL-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[1,3,2,3,4,5,6,7]
|
||||
; AVX512BWVL-NEXT: vpshufb %xmm3, %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,3,2,3,4,5,6,7]
|
||||
; AVX512BWVL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; AVX512BWVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
|
||||
; AVX512BWVL-NEXT: vpmovwb %xmm0, (%rsi)
|
||||
@ -831,17 +827,14 @@ define void @shuffle_v64i8_to_v8i8_3(<64 x i8>* %L, <8 x i8>* %S) nounwind {
|
||||
; AVX512BWVL-NEXT: vmovdqa64 (%rdi), %zmm0
|
||||
; AVX512BWVL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512BWVL-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm3 = [10,10,11,11,2,2,3,3,8,8,9,9,10,10,11,11]
|
||||
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm3 = [10,10,11,11,3,3,11,11,8,8,9,9,10,10,11,11]
|
||||
; AVX512BWVL-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,1,3,1,4,5,6,7]
|
||||
; AVX512BWVL-NEXT: vpshufb %xmm3, %xmm1, %xmm1
|
||||
; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,1,3,1,4,5,6,7]
|
||||
; AVX512BWVL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||
; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm3 = [3,3,11,11,2,2,3,3,8,8,9,9,10,10,11,11]
|
||||
; AVX512BWVL-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[3,1,2,3,4,5,6,7]
|
||||
; AVX512BWVL-NEXT: vpshufb %xmm3, %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,3,4,5,6,7]
|
||||
; AVX512BWVL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; AVX512BWVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
|
||||
; AVX512BWVL-NEXT: vpmovwb %xmm0, (%rsi)
|
||||
@ -989,24 +982,14 @@ define void @shuffle_v64i8_to_v8i8_5(<64 x i8>* %L, <8 x i8>* %S) nounwind {
|
||||
; AVX512BWVL-NEXT: vmovdqa64 (%rdi), %zmm0
|
||||
; AVX512BWVL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512BWVL-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,1,2,3]
|
||||
; AVX512BWVL-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
|
||||
; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,1,3,1,4,5,6,7]
|
||||
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,1,2,3]
|
||||
; AVX512BWVL-NEXT: vpunpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
|
||||
; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,1,3,1,4,5,6,7]
|
||||
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm3 = [12,12,13,13,5,5,13,13,4,4,5,5,6,6,7,7]
|
||||
; AVX512BWVL-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; AVX512BWVL-NEXT: vpshufb %xmm3, %xmm1, %xmm1
|
||||
; AVX512BWVL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||
; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[3,1,2,3]
|
||||
; AVX512BWVL-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
|
||||
; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[3,1,2,3,4,5,6,7]
|
||||
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
|
||||
; AVX512BWVL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,3,4,5,6,7]
|
||||
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm3 = [5,5,13,13,4,4,5,5,4,4,5,5,6,6,7,7]
|
||||
; AVX512BWVL-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; AVX512BWVL-NEXT: vpshufb %xmm3, %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; AVX512BWVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
|
||||
; AVX512BWVL-NEXT: vpmovwb %xmm0, (%rsi)
|
||||
@ -1154,17 +1137,14 @@ define void @shuffle_v64i8_to_v8i8_7(<64 x i8>* %L, <8 x i8>* %S) nounwind {
|
||||
; AVX512BWVL-NEXT: vmovdqa64 (%rdi), %zmm0
|
||||
; AVX512BWVL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512BWVL-NEXT: vextracti128 $1, %ymm1, %xmm2
|
||||
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm3 = [14,14,15,15,6,6,7,7,4,4,5,5,6,6,7,7]
|
||||
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm3 = [14,14,15,15,7,7,15,15,4,4,5,5,6,6,7,7]
|
||||
; AVX512BWVL-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,1,3,1,4,5,6,7]
|
||||
; AVX512BWVL-NEXT: vpshufb %xmm3, %xmm1, %xmm1
|
||||
; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,1,3,1,4,5,6,7]
|
||||
; AVX512BWVL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
|
||||
; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm2
|
||||
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,15,15,6,6,7,7,4,4,5,5,6,6,7,7]
|
||||
; AVX512BWVL-NEXT: vpshufb %xmm3, %xmm2, %xmm2
|
||||
; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[3,1,2,3,4,5,6,7]
|
||||
; AVX512BWVL-NEXT: vpshufb %xmm3, %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,3,4,5,6,7]
|
||||
; AVX512BWVL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
|
||||
; AVX512BWVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
|
||||
; AVX512BWVL-NEXT: vpmovwb %xmm0, (%rsi)
|
||||
|
@ -139,59 +139,17 @@ define void @shuffle_v16i16_to_v8i16(<16 x i16>* %L, <8 x i16>* %S) nounwind {
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: shuffle_v16i16_to_v8i16:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; AVX512F-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpshufb %xmm2, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX512F-NEXT: vmovdqa %xmm0, (%rsi)
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_to_v8i16:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX512VL-NEXT: vmovdqa %xmm0, (%rsi)
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: shuffle_v16i16_to_v8i16:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX512BW-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; AVX512BW-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpshufb %xmm2, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX512BW-NEXT: vmovdqa %xmm0, (%rsi)
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512BWVL-LABEL: shuffle_v16i16_to_v8i16:
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
|
||||
; AVX512BWVL-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
|
||||
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
|
||||
; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX512BWVL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
|
||||
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX512BWVL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX512BWVL-NEXT: vmovdqa %xmm0, (%rsi)
|
||||
; AVX512BWVL-NEXT: vzeroupper
|
||||
; AVX512BWVL-NEXT: retq
|
||||
; AVX512-LABEL: shuffle_v16i16_to_v8i16:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; AVX512-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpshufb %xmm2, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX512-NEXT: vmovdqa %xmm0, (%rsi)
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
%vec = load <16 x i16>, <16 x i16>* %L
|
||||
%strided.vec = shufflevector <16 x i16> %vec, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
|
||||
store <8 x i16> %strided.vec, <8 x i16>* %S
|
||||
@ -399,12 +357,9 @@ define void @shuffle_v32i8_to_v8i8(<32 x i8>* %L, <8 x i8>* %S) nounwind {
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm0
|
||||
; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
|
||||
; AVX512BWVL-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
|
||||
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
|
||||
; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX512BWVL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
|
||||
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; AVX512BWVL-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
||||
; AVX512BWVL-NEXT: vpshufb %xmm2, %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX512BWVL-NEXT: vpmovwb %xmm0, (%rsi)
|
||||
; AVX512BWVL-NEXT: vzeroupper
|
||||
|
@ -148,10 +148,9 @@ define void @shuffle_v32i16_to_v16i16(<32 x i16>* %L, <16 x i16>* %S) nounwind {
|
||||
; AVX512BWVL: # %bb.0:
|
||||
; AVX512BWVL-NEXT: vmovdqa64 (%rdi), %zmm0
|
||||
; AVX512BWVL-NEXT: vextracti64x4 $1, %zmm0, %ymm1
|
||||
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,16,18,20,22,8,10,12,14,24,26,28,30]
|
||||
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30]
|
||||
; AVX512BWVL-NEXT: vpermi2w %ymm1, %ymm0, %ymm2
|
||||
; AVX512BWVL-NEXT: vpermq {{.*#+}} ymm0 = ymm2[0,2,1,3]
|
||||
; AVX512BWVL-NEXT: vmovdqa %ymm0, (%rsi)
|
||||
; AVX512BWVL-NEXT: vmovdqa %ymm2, (%rsi)
|
||||
; AVX512BWVL-NEXT: vzeroupper
|
||||
; AVX512BWVL-NEXT: retq
|
||||
%vec = load <32 x i16>, <32 x i16>* %L
|
||||
|
@ -2195,8 +2195,7 @@ define <8 x i16> @cvt_4f32_to_8i16_undef(<4 x float> %a0) nounwind {
|
||||
; AVX512VL-NEXT: shlq $32, %rdx
|
||||
; AVX512VL-NEXT: orq %rcx, %rdx
|
||||
; AVX512VL-NEXT: vmovq %rdx, %xmm0
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; AVX512VL-NEXT: retq
|
||||
%1 = fptrunc <4 x float> %a0 to <4 x half>
|
||||
%2 = bitcast <4 x half> %1 to <4 x i16>
|
||||
@ -2205,108 +2204,30 @@ define <8 x i16> @cvt_4f32_to_8i16_undef(<4 x float> %a0) nounwind {
|
||||
}
|
||||
|
||||
define <8 x i16> @cvt_4f32_to_8i16_zero(<4 x float> %a0) nounwind {
|
||||
; AVX1-LABEL: cvt_4f32_to_8i16_zero:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vmovd %xmm1, %eax
|
||||
; AVX1-NEXT: shll $16, %eax
|
||||
; AVX1-NEXT: vcvtps2ph $4, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vmovd %xmm1, %ecx
|
||||
; AVX1-NEXT: movzwl %cx, %ecx
|
||||
; AVX1-NEXT: orl %eax, %ecx
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
|
||||
; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vmovd %xmm1, %eax
|
||||
; AVX1-NEXT: shll $16, %eax
|
||||
; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; AVX1-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %edx
|
||||
; AVX1-NEXT: movzwl %dx, %edx
|
||||
; AVX1-NEXT: orl %eax, %edx
|
||||
; AVX1-NEXT: shlq $32, %rdx
|
||||
; AVX1-NEXT: orq %rcx, %rdx
|
||||
; AVX1-NEXT: vmovq %rdx, %xmm0
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: cvt_4f32_to_8i16_zero:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vmovd %xmm1, %eax
|
||||
; AVX2-NEXT: shll $16, %eax
|
||||
; AVX2-NEXT: vcvtps2ph $4, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vmovd %xmm1, %ecx
|
||||
; AVX2-NEXT: movzwl %cx, %ecx
|
||||
; AVX2-NEXT: orl %eax, %ecx
|
||||
; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
|
||||
; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vmovd %xmm1, %eax
|
||||
; AVX2-NEXT: shll $16, %eax
|
||||
; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; AVX2-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, %edx
|
||||
; AVX2-NEXT: movzwl %dx, %edx
|
||||
; AVX2-NEXT: orl %eax, %edx
|
||||
; AVX2-NEXT: shlq $32, %rdx
|
||||
; AVX2-NEXT: orq %rcx, %rdx
|
||||
; AVX2-NEXT: vmovq %rdx, %xmm0
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: cvt_4f32_to_8i16_zero:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; AVX512F-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vmovd %xmm1, %eax
|
||||
; AVX512F-NEXT: shll $16, %eax
|
||||
; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm1
|
||||
; AVX512F-NEXT: vmovd %xmm1, %ecx
|
||||
; AVX512F-NEXT: movzwl %cx, %ecx
|
||||
; AVX512F-NEXT: orl %eax, %ecx
|
||||
; AVX512F-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
|
||||
; AVX512F-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vmovd %xmm1, %eax
|
||||
; AVX512F-NEXT: shll $16, %eax
|
||||
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vmovd %xmm0, %edx
|
||||
; AVX512F-NEXT: movzwl %dx, %edx
|
||||
; AVX512F-NEXT: orl %eax, %edx
|
||||
; AVX512F-NEXT: shlq $32, %rdx
|
||||
; AVX512F-NEXT: orq %rcx, %rdx
|
||||
; AVX512F-NEXT: vmovq %rdx, %xmm0
|
||||
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: cvt_4f32_to_8i16_zero:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vmovd %xmm1, %eax
|
||||
; AVX512VL-NEXT: shll $16, %eax
|
||||
; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm1
|
||||
; AVX512VL-NEXT: vmovd %xmm1, %ecx
|
||||
; AVX512VL-NEXT: movzwl %cx, %ecx
|
||||
; AVX512VL-NEXT: orl %eax, %ecx
|
||||
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
|
||||
; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vmovd %xmm1, %eax
|
||||
; AVX512VL-NEXT: shll $16, %eax
|
||||
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovd %xmm0, %edx
|
||||
; AVX512VL-NEXT: movzwl %dx, %edx
|
||||
; AVX512VL-NEXT: orl %eax, %edx
|
||||
; AVX512VL-NEXT: shlq $32, %rdx
|
||||
; AVX512VL-NEXT: orq %rcx, %rdx
|
||||
; AVX512VL-NEXT: vmovq %rdx, %xmm0
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
|
||||
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
|
||||
; AVX512VL-NEXT: retq
|
||||
; ALL-LABEL: cvt_4f32_to_8i16_zero:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; ALL-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
; ALL-NEXT: vmovd %xmm1, %eax
|
||||
; ALL-NEXT: shll $16, %eax
|
||||
; ALL-NEXT: vcvtps2ph $4, %xmm0, %xmm1
|
||||
; ALL-NEXT: vmovd %xmm1, %ecx
|
||||
; ALL-NEXT: movzwl %cx, %ecx
|
||||
; ALL-NEXT: orl %eax, %ecx
|
||||
; ALL-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
|
||||
; ALL-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
; ALL-NEXT: vmovd %xmm1, %eax
|
||||
; ALL-NEXT: shll $16, %eax
|
||||
; ALL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; ALL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
; ALL-NEXT: vmovd %xmm0, %edx
|
||||
; ALL-NEXT: movzwl %dx, %edx
|
||||
; ALL-NEXT: orl %eax, %edx
|
||||
; ALL-NEXT: shlq $32, %rdx
|
||||
; ALL-NEXT: orq %rcx, %rdx
|
||||
; ALL-NEXT: vmovq %rdx, %xmm0
|
||||
; ALL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; ALL-NEXT: retq
|
||||
%1 = fptrunc <4 x float> %a0 to <4 x half>
|
||||
%2 = bitcast <4 x half> %1 to <4 x i16>
|
||||
%3 = shufflevector <4 x i16> %2, <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
@ -2715,8 +2636,7 @@ define void @store_cvt_4f32_to_8i16_undef(<4 x float> %a0, <8 x i16>* %a1) nounw
|
||||
; AVX512VL-NEXT: shlq $32, %rdx
|
||||
; AVX512VL-NEXT: orq %rcx, %rdx
|
||||
; AVX512VL-NEXT: vmovq %rdx, %xmm0
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; AVX512VL-NEXT: vmovdqa %xmm0, (%rdi)
|
||||
; AVX512VL-NEXT: retq
|
||||
%1 = fptrunc <4 x float> %a0 to <4 x half>
|
||||
@ -2727,112 +2647,31 @@ define void @store_cvt_4f32_to_8i16_undef(<4 x float> %a0, <8 x i16>* %a1) nounw
|
||||
}
|
||||
|
||||
define void @store_cvt_4f32_to_8i16_zero(<4 x float> %a0, <8 x i16>* %a1) nounwind {
|
||||
; AVX1-LABEL: store_cvt_4f32_to_8i16_zero:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vmovd %xmm1, %eax
|
||||
; AVX1-NEXT: shll $16, %eax
|
||||
; AVX1-NEXT: vcvtps2ph $4, %xmm0, %xmm1
|
||||
; AVX1-NEXT: vmovd %xmm1, %ecx
|
||||
; AVX1-NEXT: movzwl %cx, %ecx
|
||||
; AVX1-NEXT: orl %eax, %ecx
|
||||
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
|
||||
; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
; AVX1-NEXT: vmovd %xmm1, %eax
|
||||
; AVX1-NEXT: shll $16, %eax
|
||||
; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; AVX1-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
; AVX1-NEXT: vmovd %xmm0, %edx
|
||||
; AVX1-NEXT: movzwl %dx, %edx
|
||||
; AVX1-NEXT: orl %eax, %edx
|
||||
; AVX1-NEXT: shlq $32, %rdx
|
||||
; AVX1-NEXT: orq %rcx, %rdx
|
||||
; AVX1-NEXT: vmovq %rdx, %xmm0
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX1-NEXT: vmovdqa %xmm0, (%rdi)
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: store_cvt_4f32_to_8i16_zero:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vmovd %xmm1, %eax
|
||||
; AVX2-NEXT: shll $16, %eax
|
||||
; AVX2-NEXT: vcvtps2ph $4, %xmm0, %xmm1
|
||||
; AVX2-NEXT: vmovd %xmm1, %ecx
|
||||
; AVX2-NEXT: movzwl %cx, %ecx
|
||||
; AVX2-NEXT: orl %eax, %ecx
|
||||
; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
|
||||
; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
; AVX2-NEXT: vmovd %xmm1, %eax
|
||||
; AVX2-NEXT: shll $16, %eax
|
||||
; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; AVX2-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
; AVX2-NEXT: vmovd %xmm0, %edx
|
||||
; AVX2-NEXT: movzwl %dx, %edx
|
||||
; AVX2-NEXT: orl %eax, %edx
|
||||
; AVX2-NEXT: shlq $32, %rdx
|
||||
; AVX2-NEXT: orq %rcx, %rdx
|
||||
; AVX2-NEXT: vmovq %rdx, %xmm0
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX2-NEXT: vmovdqa %xmm0, (%rdi)
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: store_cvt_4f32_to_8i16_zero:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; AVX512F-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vmovd %xmm1, %eax
|
||||
; AVX512F-NEXT: shll $16, %eax
|
||||
; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm1
|
||||
; AVX512F-NEXT: vmovd %xmm1, %ecx
|
||||
; AVX512F-NEXT: movzwl %cx, %ecx
|
||||
; AVX512F-NEXT: orl %eax, %ecx
|
||||
; AVX512F-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
|
||||
; AVX512F-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vmovd %xmm1, %eax
|
||||
; AVX512F-NEXT: shll $16, %eax
|
||||
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vmovd %xmm0, %edx
|
||||
; AVX512F-NEXT: movzwl %dx, %edx
|
||||
; AVX512F-NEXT: orl %eax, %edx
|
||||
; AVX512F-NEXT: shlq $32, %rdx
|
||||
; AVX512F-NEXT: orq %rcx, %rdx
|
||||
; AVX512F-NEXT: vmovq %rdx, %xmm0
|
||||
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vmovdqa %xmm0, (%rdi)
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: store_cvt_4f32_to_8i16_zero:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vmovd %xmm1, %eax
|
||||
; AVX512VL-NEXT: shll $16, %eax
|
||||
; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm1
|
||||
; AVX512VL-NEXT: vmovd %xmm1, %ecx
|
||||
; AVX512VL-NEXT: movzwl %cx, %ecx
|
||||
; AVX512VL-NEXT: orl %eax, %ecx
|
||||
; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
|
||||
; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vmovd %xmm1, %eax
|
||||
; AVX512VL-NEXT: shll $16, %eax
|
||||
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovd %xmm0, %edx
|
||||
; AVX512VL-NEXT: movzwl %dx, %edx
|
||||
; AVX512VL-NEXT: orl %eax, %edx
|
||||
; AVX512VL-NEXT: shlq $32, %rdx
|
||||
; AVX512VL-NEXT: orq %rcx, %rdx
|
||||
; AVX512VL-NEXT: vmovq %rdx, %xmm0
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
|
||||
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
|
||||
; AVX512VL-NEXT: vmovdqa %xmm0, (%rdi)
|
||||
; AVX512VL-NEXT: retq
|
||||
; ALL-LABEL: store_cvt_4f32_to_8i16_zero:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
|
||||
; ALL-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
; ALL-NEXT: vmovd %xmm1, %eax
|
||||
; ALL-NEXT: shll $16, %eax
|
||||
; ALL-NEXT: vcvtps2ph $4, %xmm0, %xmm1
|
||||
; ALL-NEXT: vmovd %xmm1, %ecx
|
||||
; ALL-NEXT: movzwl %cx, %ecx
|
||||
; ALL-NEXT: orl %eax, %ecx
|
||||
; ALL-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
|
||||
; ALL-NEXT: vcvtps2ph $4, %xmm1, %xmm1
|
||||
; ALL-NEXT: vmovd %xmm1, %eax
|
||||
; ALL-NEXT: shll $16, %eax
|
||||
; ALL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; ALL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
|
||||
; ALL-NEXT: vmovd %xmm0, %edx
|
||||
; ALL-NEXT: movzwl %dx, %edx
|
||||
; ALL-NEXT: orl %eax, %edx
|
||||
; ALL-NEXT: shlq $32, %rdx
|
||||
; ALL-NEXT: orq %rcx, %rdx
|
||||
; ALL-NEXT: vmovq %rdx, %xmm0
|
||||
; ALL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; ALL-NEXT: vmovdqa %xmm0, (%rdi)
|
||||
; ALL-NEXT: retq
|
||||
%1 = fptrunc <4 x float> %a0 to <4 x half>
|
||||
%2 = bitcast <4 x half> %1 to <4 x i16>
|
||||
%3 = shufflevector <4 x i16> %2, <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
@ -3389,8 +3228,7 @@ define <8 x i16> @cvt_4f64_to_8i16_undef(<4 x double> %a0) nounwind {
|
||||
; AVX512VL-NEXT: shlq $32, %rax
|
||||
; AVX512VL-NEXT: orq %r14, %rax
|
||||
; AVX512VL-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; AVX512VL-NEXT: addq $40, %rsp
|
||||
; AVX512VL-NEXT: popq %rbx
|
||||
; AVX512VL-NEXT: popq %r14
|
||||
@ -3478,84 +3316,43 @@ define <8 x i16> @cvt_4f64_to_8i16_zero(<4 x double> %a0) nounwind {
|
||||
; AVX2-NEXT: popq %r14
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: cvt_4f64_to_8i16_zero:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: pushq %r14
|
||||
; AVX512F-NEXT: pushq %rbx
|
||||
; AVX512F-NEXT: subq $40, %rsp
|
||||
; AVX512F-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
|
||||
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: callq __truncdfhf2
|
||||
; AVX512F-NEXT: movl %eax, %ebx
|
||||
; AVX512F-NEXT: shll $16, %ebx
|
||||
; AVX512F-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
|
||||
; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: callq __truncdfhf2
|
||||
; AVX512F-NEXT: movzwl %ax, %r14d
|
||||
; AVX512F-NEXT: orl %ebx, %r14d
|
||||
; AVX512F-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload
|
||||
; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX512F-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
|
||||
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: callq __truncdfhf2
|
||||
; AVX512F-NEXT: movl %eax, %ebx
|
||||
; AVX512F-NEXT: shll $16, %ebx
|
||||
; AVX512F-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
|
||||
; AVX512F-NEXT: callq __truncdfhf2
|
||||
; AVX512F-NEXT: movzwl %ax, %eax
|
||||
; AVX512F-NEXT: orl %ebx, %eax
|
||||
; AVX512F-NEXT: shlq $32, %rax
|
||||
; AVX512F-NEXT: orq %r14, %rax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: addq $40, %rsp
|
||||
; AVX512F-NEXT: popq %rbx
|
||||
; AVX512F-NEXT: popq %r14
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: cvt_4f64_to_8i16_zero:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: pushq %r14
|
||||
; AVX512VL-NEXT: pushq %rbx
|
||||
; AVX512VL-NEXT: subq $40, %rsp
|
||||
; AVX512VL-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
|
||||
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: callq __truncdfhf2
|
||||
; AVX512VL-NEXT: movl %eax, %ebx
|
||||
; AVX512VL-NEXT: shll $16, %ebx
|
||||
; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
|
||||
; AVX512VL-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: callq __truncdfhf2
|
||||
; AVX512VL-NEXT: movzwl %ax, %r14d
|
||||
; AVX512VL-NEXT: orl %ebx, %r14d
|
||||
; AVX512VL-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload
|
||||
; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
|
||||
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: callq __truncdfhf2
|
||||
; AVX512VL-NEXT: movl %eax, %ebx
|
||||
; AVX512VL-NEXT: shll $16, %ebx
|
||||
; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
|
||||
; AVX512VL-NEXT: callq __truncdfhf2
|
||||
; AVX512VL-NEXT: movzwl %ax, %eax
|
||||
; AVX512VL-NEXT: orl %ebx, %eax
|
||||
; AVX512VL-NEXT: shlq $32, %rax
|
||||
; AVX512VL-NEXT: orq %r14, %rax
|
||||
; AVX512VL-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
|
||||
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
|
||||
; AVX512VL-NEXT: addq $40, %rsp
|
||||
; AVX512VL-NEXT: popq %rbx
|
||||
; AVX512VL-NEXT: popq %r14
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512-LABEL: cvt_4f64_to_8i16_zero:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: pushq %r14
|
||||
; AVX512-NEXT: pushq %rbx
|
||||
; AVX512-NEXT: subq $40, %rsp
|
||||
; AVX512-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
|
||||
; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: callq __truncdfhf2
|
||||
; AVX512-NEXT: movl %eax, %ebx
|
||||
; AVX512-NEXT: shll $16, %ebx
|
||||
; AVX512-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
|
||||
; AVX512-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: callq __truncdfhf2
|
||||
; AVX512-NEXT: movzwl %ax, %r14d
|
||||
; AVX512-NEXT: orl %ebx, %r14d
|
||||
; AVX512-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload
|
||||
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX512-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
|
||||
; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: callq __truncdfhf2
|
||||
; AVX512-NEXT: movl %eax, %ebx
|
||||
; AVX512-NEXT: shll $16, %ebx
|
||||
; AVX512-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
|
||||
; AVX512-NEXT: callq __truncdfhf2
|
||||
; AVX512-NEXT: movzwl %ax, %eax
|
||||
; AVX512-NEXT: orl %ebx, %eax
|
||||
; AVX512-NEXT: shlq $32, %rax
|
||||
; AVX512-NEXT: orq %r14, %rax
|
||||
; AVX512-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512-NEXT: addq $40, %rsp
|
||||
; AVX512-NEXT: popq %rbx
|
||||
; AVX512-NEXT: popq %r14
|
||||
; AVX512-NEXT: retq
|
||||
%1 = fptrunc <4 x double> %a0 to <4 x half>
|
||||
%2 = bitcast <4 x half> %1 to <4 x i16>
|
||||
%3 = shufflevector <4 x i16> %2, <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
@ -4095,8 +3892,7 @@ define void @store_cvt_4f64_to_8i16_undef(<4 x double> %a0, <8 x i16>* %a1) noun
|
||||
; AVX512VL-NEXT: shlq $32, %rax
|
||||
; AVX512VL-NEXT: orq %rbx, %rax
|
||||
; AVX512VL-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; AVX512VL-NEXT: vmovdqa %xmm0, (%r14)
|
||||
; AVX512VL-NEXT: addq $32, %rsp
|
||||
; AVX512VL-NEXT: popq %rbx
|
||||
@ -4195,92 +3991,47 @@ define void @store_cvt_4f64_to_8i16_zero(<4 x double> %a0, <8 x i16>* %a1) nounw
|
||||
; AVX2-NEXT: popq %rbp
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: store_cvt_4f64_to_8i16_zero:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: pushq %rbp
|
||||
; AVX512F-NEXT: pushq %r14
|
||||
; AVX512F-NEXT: pushq %rbx
|
||||
; AVX512F-NEXT: subq $32, %rsp
|
||||
; AVX512F-NEXT: movq %rdi, %r14
|
||||
; AVX512F-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
|
||||
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: callq __truncdfhf2
|
||||
; AVX512F-NEXT: movl %eax, %ebp
|
||||
; AVX512F-NEXT: shll $16, %ebp
|
||||
; AVX512F-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
|
||||
; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: callq __truncdfhf2
|
||||
; AVX512F-NEXT: movzwl %ax, %ebx
|
||||
; AVX512F-NEXT: orl %ebp, %ebx
|
||||
; AVX512F-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload
|
||||
; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX512F-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
|
||||
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: callq __truncdfhf2
|
||||
; AVX512F-NEXT: movl %eax, %ebp
|
||||
; AVX512F-NEXT: shll $16, %ebp
|
||||
; AVX512F-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
|
||||
; AVX512F-NEXT: callq __truncdfhf2
|
||||
; AVX512F-NEXT: movzwl %ax, %eax
|
||||
; AVX512F-NEXT: orl %ebp, %eax
|
||||
; AVX512F-NEXT: shlq $32, %rax
|
||||
; AVX512F-NEXT: orq %rbx, %rax
|
||||
; AVX512F-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512F-NEXT: vmovdqa %xmm0, (%r14)
|
||||
; AVX512F-NEXT: addq $32, %rsp
|
||||
; AVX512F-NEXT: popq %rbx
|
||||
; AVX512F-NEXT: popq %r14
|
||||
; AVX512F-NEXT: popq %rbp
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: store_cvt_4f64_to_8i16_zero:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: pushq %rbp
|
||||
; AVX512VL-NEXT: pushq %r14
|
||||
; AVX512VL-NEXT: pushq %rbx
|
||||
; AVX512VL-NEXT: subq $32, %rsp
|
||||
; AVX512VL-NEXT: movq %rdi, %r14
|
||||
; AVX512VL-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
|
||||
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: callq __truncdfhf2
|
||||
; AVX512VL-NEXT: movl %eax, %ebp
|
||||
; AVX512VL-NEXT: shll $16, %ebp
|
||||
; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
|
||||
; AVX512VL-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: callq __truncdfhf2
|
||||
; AVX512VL-NEXT: movzwl %ax, %ebx
|
||||
; AVX512VL-NEXT: orl %ebp, %ebx
|
||||
; AVX512VL-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload
|
||||
; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX512VL-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
|
||||
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: callq __truncdfhf2
|
||||
; AVX512VL-NEXT: movl %eax, %ebp
|
||||
; AVX512VL-NEXT: shll $16, %ebp
|
||||
; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
|
||||
; AVX512VL-NEXT: callq __truncdfhf2
|
||||
; AVX512VL-NEXT: movzwl %ax, %eax
|
||||
; AVX512VL-NEXT: orl %ebp, %eax
|
||||
; AVX512VL-NEXT: shlq $32, %rax
|
||||
; AVX512VL-NEXT: orq %rbx, %rax
|
||||
; AVX512VL-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2]
|
||||
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1]
|
||||
; AVX512VL-NEXT: vmovdqa %xmm0, (%r14)
|
||||
; AVX512VL-NEXT: addq $32, %rsp
|
||||
; AVX512VL-NEXT: popq %rbx
|
||||
; AVX512VL-NEXT: popq %r14
|
||||
; AVX512VL-NEXT: popq %rbp
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512-LABEL: store_cvt_4f64_to_8i16_zero:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: pushq %rbp
|
||||
; AVX512-NEXT: pushq %r14
|
||||
; AVX512-NEXT: pushq %rbx
|
||||
; AVX512-NEXT: subq $32, %rsp
|
||||
; AVX512-NEXT: movq %rdi, %r14
|
||||
; AVX512-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
|
||||
; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: callq __truncdfhf2
|
||||
; AVX512-NEXT: movl %eax, %ebp
|
||||
; AVX512-NEXT: shll $16, %ebp
|
||||
; AVX512-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
|
||||
; AVX512-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: callq __truncdfhf2
|
||||
; AVX512-NEXT: movzwl %ax, %ebx
|
||||
; AVX512-NEXT: orl %ebp, %ebx
|
||||
; AVX512-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload
|
||||
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
|
||||
; AVX512-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
|
||||
; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: callq __truncdfhf2
|
||||
; AVX512-NEXT: movl %eax, %ebp
|
||||
; AVX512-NEXT: shll $16, %ebp
|
||||
; AVX512-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
|
||||
; AVX512-NEXT: callq __truncdfhf2
|
||||
; AVX512-NEXT: movzwl %ax, %eax
|
||||
; AVX512-NEXT: orl %ebp, %eax
|
||||
; AVX512-NEXT: shlq $32, %rax
|
||||
; AVX512-NEXT: orq %rbx, %rax
|
||||
; AVX512-NEXT: vmovq %rax, %xmm0
|
||||
; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,u,u,u,u],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512-NEXT: vmovdqa %xmm0, (%r14)
|
||||
; AVX512-NEXT: addq $32, %rsp
|
||||
; AVX512-NEXT: popq %rbx
|
||||
; AVX512-NEXT: popq %r14
|
||||
; AVX512-NEXT: popq %rbp
|
||||
; AVX512-NEXT: retq
|
||||
%1 = fptrunc <4 x double> %a0 to <4 x half>
|
||||
%2 = bitcast <4 x half> %1 to <4 x i16>
|
||||
%3 = shufflevector <4 x i16> %2, <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
|
@ -59,17 +59,10 @@ define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01(
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1OR2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01:
|
||||
; AVX1OR2: # %bb.0:
|
||||
; AVX1OR2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
|
||||
; AVX1OR2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_01_01_01_01_01_01_01_01:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]
|
||||
; AVX-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
|
||||
ret <16 x i8> %shuffle
|
||||
}
|
||||
@ -94,17 +87,10 @@ define <16 x i8> @shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08(
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1OR2-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
|
||||
; AVX1OR2: # %bb.0:
|
||||
; AVX1OR2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
|
||||
; AVX1OR2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,1,1,8,8,9,9,8,8,9,9,10,10,11,11]
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,2,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX-LABEL: shuffle_v16i8_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,8,8,8,8,8,8,8,8]
|
||||
; AVX-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
|
||||
ret <16 x i8> %shuffle
|
||||
}
|
||||
@ -135,8 +121,7 @@ define <16 x i8> @shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03(
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i8_00_00_00_00_01_01_01_01_02_02_02_02_03_03_03_03:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512VL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
|
||||
; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3]
|
||||
; AVX512VL-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3>
|
||||
ret <16 x i8> %shuffle
|
||||
@ -168,8 +153,7 @@ define <16 x i8> @shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07(
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i8_04_04_04_04_05_05_05_05_06_06_06_06_07_07_07_07:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512VL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
|
||||
; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,4,4,4,5,5,5,5,6,6,6,6,7,7,7,7]
|
||||
; AVX512VL-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7>
|
||||
ret <16 x i8> %shuffle
|
||||
|
@ -167,11 +167,16 @@ define <8 x i16> @shuffle_v8i16_31206745(<8 x i16> %a, <8 x i16> %b) {
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,2,3,4,5,0,1,12,13,14,15,8,9,10,11]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v8i16_31206745:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v8i16_31206745:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,1,2,0,4,5,6,7]
|
||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v8i16_31206745:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,2,3,4,5,0,1,12,13,14,15,8,9,10,11]
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 6, i32 7, i32 4, i32 5>
|
||||
ret <8 x i16> %shuffle
|
||||
}
|
||||
@ -237,11 +242,16 @@ define <8 x i16> @shuffle_v8i16_23026745(<8 x i16> %a, <8 x i16> %b) {
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,4,5,12,13,14,15,8,9,10,11]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v8i16_23026745:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v8i16_23026745:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,3,0,2,4,5,6,7]
|
||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,3,2]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v8i16_23026745:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[4,5,6,7,0,1,4,5,12,13,14,15,8,9,10,11]
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 2, i32 3, i32 0, i32 2, i32 6, i32 7, i32 4, i32 5>
|
||||
ret <8 x i16> %shuffle
|
||||
}
|
||||
@ -802,23 +812,10 @@ define <8 x i16> @shuffle_v8i16_0127XXXX(<8 x i16> %a, <8 x i16> %b) {
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1OR2-LABEL: shuffle_v8i16_0127XXXX:
|
||||
; AVX1OR2: # %bb.0:
|
||||
; AVX1OR2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
|
||||
; AVX1OR2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v8i16_0127XXXX:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
|
||||
; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,7,6,7]
|
||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v8i16_0127XXXX:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,10,11,4,5,14,15,12,13,14,15]
|
||||
; AVX512VL-FAST-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
; AVX-LABEL: shuffle_v8i16_0127XXXX:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15]
|
||||
; AVX-NEXT: retq
|
||||
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
ret <8 x i16> %shuffle
|
||||
}
|
||||
@ -841,23 +838,10 @@ define <8 x i16> @shuffle_v8i16_XXXX4563(<8 x i16> %a, <8 x i16> %b) {
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1OR2-LABEL: shuffle_v8i16_XXXX4563:
|
||||
; AVX1OR2: # %bb.0:
|
||||
; AVX1OR2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
|
||||
; AVX1OR2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v8i16_XXXX4563:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
|
||||
; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
|
||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,2,0]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v8i16_XXXX4563:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,0,1,2,3]
|
||||
; AVX512VL-FAST-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,2,0]
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
; AVX-LABEL: shuffle_v8i16_XXXX4563:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,12,13,6,7]
|
||||
; AVX-NEXT: retq
|
||||
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 3>
|
||||
ret <8 x i16> %shuffle
|
||||
}
|
||||
@ -880,23 +864,10 @@ define <8 x i16> @shuffle_v8i16_4563XXXX(<8 x i16> %a, <8 x i16> %b) {
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1OR2-LABEL: shuffle_v8i16_4563XXXX:
|
||||
; AVX1OR2: # %bb.0:
|
||||
; AVX1OR2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
|
||||
; AVX1OR2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v8i16_4563XXXX:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
|
||||
; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,3,2,3,4,5,6,7]
|
||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,0,2,3]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v8i16_4563XXXX:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,6,7,8,9,10,11,0,1,2,3]
|
||||
; AVX512VL-FAST-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,0,2,3]
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
; AVX-LABEL: shuffle_v8i16_4563XXXX:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3]
|
||||
; AVX-NEXT: retq
|
||||
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
ret <8 x i16> %shuffle
|
||||
}
|
||||
@ -919,23 +890,10 @@ define <8 x i16> @shuffle_v8i16_01274563(<8 x i16> %a, <8 x i16> %b) {
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1OR2-LABEL: shuffle_v8i16_01274563:
|
||||
; AVX1OR2: # %bb.0:
|
||||
; AVX1OR2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
|
||||
; AVX1OR2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v8i16_01274563:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
|
||||
; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
|
||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v8i16_01274563:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,8,9,10,11,12,13,6,7,4,5,14,15]
|
||||
; AVX512VL-FAST-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,1,2]
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
; AVX-LABEL: shuffle_v8i16_01274563:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,14,15,8,9,10,11,12,13,6,7]
|
||||
; AVX-NEXT: retq
|
||||
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6, i32 3>
|
||||
ret <8 x i16> %shuffle
|
||||
}
|
||||
@ -958,23 +916,10 @@ define <8 x i16> @shuffle_v8i16_45630127(<8 x i16> %a, <8 x i16> %b) {
|
||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
|
||||
; SSE41-NEXT: retq
|
||||
;
|
||||
; AVX1OR2-LABEL: shuffle_v8i16_45630127:
|
||||
; AVX1OR2: # %bb.0:
|
||||
; AVX1OR2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
|
||||
; AVX1OR2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v8i16_45630127:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
|
||||
; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
|
||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,0,3,1]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v8i16_45630127:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,6,7,4,5,14,15,8,9,10,11,0,1,2,3]
|
||||
; AVX512VL-FAST-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,0,3,1]
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
; AVX-LABEL: shuffle_v8i16_45630127:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,9,10,11,12,13,6,7,0,1,2,3,4,5,14,15]
|
||||
; AVX-NEXT: retq
|
||||
%shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 3, i32 0, i32 1, i32 2, i32 7>
|
||||
ret <8 x i16> %shuffle
|
||||
}
|
||||
|
@ -409,11 +409,16 @@ define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_0
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
|
||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v16i16_00_00_00_00_00_00_00_00_08_08_08_08_08_08_08_08:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
|
||||
ret <16 x i16> %shuffle
|
||||
}
|
||||
@ -440,11 +445,16 @@ define <16 x i16> @shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_1
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15,30,31,30,31,30,31,30,31,30,31,30,31,30,31,30,31]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,3,3,6,6,7,7]
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,7,7,7,7,8,9,10,11,15,15,15,15]
|
||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,2,3,3,6,6,7,7]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v16i16_07_07_07_07_07_07_07_07_15_15_15_15_15_15_15_15:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[14,15,14,15,14,15,14,15,14,15,14,15,14,15,14,15,30,31,30,31,30,31,30,31,30,31,30,31,30,31,30,31]
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
|
||||
ret <16 x i16> %shuffle
|
||||
}
|
||||
@ -2867,23 +2877,10 @@ define <16 x i16> @shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_u
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15,16,17,18,19,20,21,30,31,20,21,30,31,28,29,30,31]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
|
||||
; AVX512VL-SLOW-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,7,6,7,8,9,10,11,12,15,14,15]
|
||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,8,9,10,11,4,5,14,15,12,13,14,15,16,17,18,19,24,25,26,27,20,21,30,31,28,29,30,31]
|
||||
; AVX512VL-FAST-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,2,3,4,6,6,7]
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
; AVX2OR512VL-LABEL: shuffle_v16i16_00_01_02_07_uu_uu_uu_uu_08_09_10_15_uu_uu_uu_uu:
|
||||
; AVX2OR512VL: # %bb.0:
|
||||
; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,14,15,4,5,14,15,12,13,14,15,16,17,18,19,20,21,30,31,20,21,30,31,28,29,30,31]
|
||||
; AVX2OR512VL-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 8, i32 9, i32 10, i32 15, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
ret <16 x i16> %shuffle
|
||||
}
|
||||
@ -2923,23 +2920,10 @@ define <16 x i16> @shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_u
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3,24,25,26,27,28,29,22,23,24,25,26,27,16,17,18,19]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[3,1,2,0,7,5,6,4]
|
||||
; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,3,2,3,4,5,6,7,8,11,10,11,12,13,14,15]
|
||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,0,2,3,6,4,6,7]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[12,13,6,7,4,5,6,7,8,9,10,11,0,1,2,3,28,29,22,23,20,21,22,23,24,25,26,27,16,17,18,19]
|
||||
; AVX512VL-FAST-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[2,0,2,3,6,4,6,7]
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
; AVX2OR512VL-LABEL: shuffle_v16i16_04_05_06_03_uu_uu_uu_uu_12_13_14_11_uu_uu_uu_uu:
|
||||
; AVX2OR512VL: # %bb.0:
|
||||
; AVX2OR512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,6,7,8,9,10,11,0,1,2,3,24,25,26,27,28,29,22,23,24,25,26,27,16,17,18,19]
|
||||
; AVX2OR512VL-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
ret <16 x i16> %shuffle
|
||||
}
|
||||
@ -4069,12 +4053,18 @@ define <16 x i16> @shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3(<16 x i16> %a,
|
||||
; AVX2-FAST-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX512VL-SLOW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v16i16_u_u_u_u_u_u_u_u_3_3_3_3_3_3_3_3:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
|
||||
; AVX512VL-FAST-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
|
||||
ret <16 x i16> %shuffle
|
||||
}
|
||||
@ -4124,11 +4114,16 @@ define <16 x i16> @shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u(<16 x i16> %a,
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v16i16_3_3_3_3_3_3_3_3_u_u_u_u_u_u_u_u:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,7,6,7,6,7,6,7,6,7,6,7,6,7,6,7]
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
ret <16 x i16> %shuffle
|
||||
}
|
||||
@ -4154,12 +4149,18 @@ define <16 x i16> @shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u(<16 x i16> %a,
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||
; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,1,1,1,4,5,6,7]
|
||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v16i16_9_9_9_9_9_9_9_9_u_u_u_u_u_u_u_u:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||
; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
ret <16 x i16> %shuffle
|
||||
}
|
||||
@ -4222,9 +4223,8 @@ define <16 x i16> @shuffle_v16i16_02_18_03_19_10_26_11_27_00_16_01_17_08_24_09_2
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v16i16_02_18_03_19_10_26_11_27_00_16_01_17_08_24_09_25:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [2,18,3,19,0,16,1,17,10,26,11,27,8,24,9,25]
|
||||
; AVX512VL-NEXT: vpermi2w %ymm1, %ymm0, %ymm2
|
||||
; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm2[0,2,1,3]
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [2,18,3,19,10,26,11,27,0,16,1,17,8,24,9,25]
|
||||
; AVX512VL-NEXT: vpermt2w %ymm1, %ymm2, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
%1 = shufflevector <16 x i16> %a0, <16 x i16> %a1, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 0, i32 16, i32 1, i32 17, i32 10, i32 26, i32 11, i32 27, i32 8, i32 24, i32 9, i32 25>
|
||||
%2 = bitcast <16 x i16> %1 to <4 x i64>
|
||||
|
@ -608,12 +608,18 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
|
||||
; AVX512VL-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_00_24_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <32 x i8> %shuffle
|
||||
}
|
||||
@ -636,12 +642,18 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
|
||||
; AVX512VL-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_00_25_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,9,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 25, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <32 x i8> %shuffle
|
||||
}
|
||||
@ -664,12 +676,18 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
|
||||
; AVX512VL-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v32i8_00_00_00_00_00_26_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 26, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <32 x i8> %shuffle
|
||||
}
|
||||
@ -692,12 +710,18 @@ define <32 x i8> @shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
|
||||
; AVX512VL-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v32i8_00_00_00_00_27_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,11,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 27, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <32 x i8> %shuffle
|
||||
}
|
||||
@ -720,12 +744,18 @@ define <32 x i8> @shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
|
||||
; AVX512VL-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v32i8_00_00_00_28_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,12,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 0, i32 28, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <32 x i8> %shuffle
|
||||
}
|
||||
@ -748,12 +778,18 @@ define <32 x i8> @shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
|
||||
; AVX512VL-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v32i8_00_00_29_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,13,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 0, i32 29, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <32 x i8> %shuffle
|
||||
}
|
||||
@ -776,12 +812,18 @@ define <32 x i8> @shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
|
||||
; AVX512VL-SLOW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v32i8_00_30_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16]
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 30, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <32 x i8> %shuffle
|
||||
}
|
||||
@ -808,14 +850,22 @@ define <32 x i8> @shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_
|
||||
; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: movl $15, %eax
|
||||
; AVX512VL-NEXT: vmovd %eax, %xmm1
|
||||
; AVX512VL-NEXT: vpshufb %ymm1, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm1 = ymm0[2,3,0,1]
|
||||
; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
|
||||
; AVX512VL-SLOW-NEXT: movl $15, %eax
|
||||
; AVX512VL-SLOW-NEXT: vmovd %eax, %xmm1
|
||||
; AVX512VL-SLOW-NEXT: vpshufb %ymm1, %ymm0, %ymm0
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v32i8_31_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,3,0,1]
|
||||
; AVX512VL-FAST-NEXT: movl $15, %eax
|
||||
; AVX512VL-FAST-NEXT: vmovd %eax, %xmm1
|
||||
; AVX512VL-FAST-NEXT: vpshufb %ymm1, %ymm0, %ymm0
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 31, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
|
||||
ret <32 x i8> %shuffle
|
||||
}
|
||||
@ -1197,15 +1247,24 @@ define <32 x i8> @shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_
|
||||
; AVX2-FAST-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
|
||||
; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VL-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA
|
||||
; AVX512VL-NEXT: kmovd %eax, %k1
|
||||
; AVX512VL-NEXT: vpshufb %ymm2, %ymm1, %ymm0 {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[0,0,0,0,4,5,6,7,8,8,8,8,12,13,14,15]
|
||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5]
|
||||
; AVX512VL-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VL-SLOW-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA
|
||||
; AVX512VL-SLOW-NEXT: kmovd %eax, %k1
|
||||
; AVX512VL-SLOW-NEXT: vpshufb %ymm2, %ymm1, %ymm0 {%k1}
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v32i8_00_32_00_32_00_32_00_32_00_32_00_32_00_32_00_32_16_48_16_48_16_48_16_48_16_48_16_48_16_48_16_48:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1,16,17,16,17,16,17,16,17,16,17,16,17,16,17,16,17]
|
||||
; AVX512VL-FAST-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; AVX512VL-FAST-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA
|
||||
; AVX512VL-FAST-NEXT: kmovd %eax, %k1
|
||||
; AVX512VL-FAST-NEXT: vpshufb %ymm2, %ymm1, %ymm0 {%k1}
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 0, i32 32, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48, i32 16, i32 48>
|
||||
ret <32 x i8> %shuffle
|
||||
}
|
||||
@ -2254,19 +2313,11 @@ define <32 x i8> @shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_
|
||||
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10]
|
||||
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,2,2,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
|
||||
; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX2OR512VL-LABEL: shuffle_v32i8_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10_10:
|
||||
; AVX2OR512VL: # %bb.0:
|
||||
; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10]
|
||||
; AVX2OR512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
||||
; AVX2OR512VL-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10, i32 10>
|
||||
ret <32 x i8> %shuffle
|
||||
}
|
||||
@ -2298,44 +2349,21 @@ define <32 x i8> @shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_
|
||||
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15,12,12,13,13,14,14,15,15]
|
||||
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpbroadcastb %xmm1, %xmm1
|
||||
; AVX512VL-SLOW-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
|
||||
; AVX512VL-SLOW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,3,3,3,4,5,6,7]
|
||||
; AVX512VL-SLOW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vpbroadcastb %xmm1, %xmm1
|
||||
; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15,12,12,13,13,14,14,15,15]
|
||||
; AVX512VL-FAST-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
; AVX2OR512VL-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_32_32_32_32_32_32_32_32_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
|
||||
; AVX2OR512VL: # %bb.0:
|
||||
; AVX2OR512VL-NEXT: vpbroadcastb %xmm1, %xmm1
|
||||
; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15,12,12,13,13,14,14,15,15]
|
||||
; AVX2OR512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX2OR512VL-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
ret <32 x i8> %shuffle
|
||||
}
|
||||
|
||||
define <32 x i8> @shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu(<32 x i8> %a, <32 x i8> %b) {
|
||||
; AVX1OR2-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
|
||||
; AVX1OR2: # %bb.0:
|
||||
; AVX1OR2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; AVX1OR2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
|
||||
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,7,7,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
|
||||
; AVX512VL-NEXT: retq
|
||||
; ALL-LABEL: shuffle_v32i8_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_15_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
|
||||
; ALL: # %bb.0:
|
||||
; ALL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
|
||||
; ALL-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
ret <32 x i8> %shuffle
|
||||
}
|
||||
@ -2347,19 +2375,11 @@ define <32 x i8> @shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_
|
||||
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||
; AVX512VL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
|
||||
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,6,6,6]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX2OR512VL-LABEL: shuffle_v32i8_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_22_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu_uu:
|
||||
; AVX2OR512VL: # %bb.0:
|
||||
; AVX2OR512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
|
||||
; AVX2OR512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6]
|
||||
; AVX2OR512VL-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 22, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
ret <32 x i8> %shuffle
|
||||
}
|
||||
|
@ -834,11 +834,17 @@ define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v4i64_0124:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vbroadcastsd %xmm1, %ymm1
|
||||
; AVX512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v4i64_0124:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vbroadcastsd %xmm1, %ymm1
|
||||
; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v4i64_0124:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,1,2,4]
|
||||
; AVX512VL-FAST-NEXT: vpermt2q %ymm1, %ymm2, %ymm0
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
|
||||
ret <4 x i64> %shuffle
|
||||
}
|
||||
@ -885,12 +891,19 @@ define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v4i64_0412:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpbroadcastq %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
|
||||
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v4i64_0412:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpbroadcastq %xmm1, %xmm1
|
||||
; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
|
||||
; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v4i64_0412:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vpbroadcastq %xmm1, %xmm1
|
||||
; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,5,1,2]
|
||||
; AVX512VL-FAST-NEXT: vpermt2q %ymm1, %ymm2, %ymm0
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2>
|
||||
ret <4 x i64> %shuffle
|
||||
}
|
||||
@ -911,11 +924,17 @@ define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v4i64_4012:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,2]
|
||||
; AVX512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v4i64_4012:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,2]
|
||||
; AVX512VL-SLOW-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v4i64_4012:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [4,0,1,2]
|
||||
; AVX512VL-FAST-NEXT: vpermt2q %ymm1, %ymm2, %ymm0
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
|
||||
ret <4 x i64> %shuffle
|
||||
}
|
||||
@ -946,9 +965,8 @@ define <4 x i64> @shuffle_v4i64_0451(<4 x i64> %a, <4 x i64> %b) {
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v4i64_0451:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,1,3]
|
||||
; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,1]
|
||||
; AVX512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,4,5,1]
|
||||
; AVX512VL-NEXT: vpermt2q %ymm1, %ymm2, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
|
||||
ret <4 x i64> %shuffle
|
||||
@ -980,9 +998,8 @@ define <4 x i64> @shuffle_v4i64_4015(<4 x i64> %a, <4 x i64> %b) {
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v4i64_4015:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,2,1]
|
||||
; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,3]
|
||||
; AVX512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [4,0,1,5]
|
||||
; AVX512VL-NEXT: vpermt2q %ymm1, %ymm2, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5>
|
||||
ret <4 x i64> %shuffle
|
||||
@ -1036,9 +1053,8 @@ define <4 x i64> @shuffle_v4i64_1251(<4 x i64> %a, <4 x i64> %b) {
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v4i64_1251:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,1,1,3]
|
||||
; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[1,2,2,1]
|
||||
; AVX512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [1,2,5,1]
|
||||
; AVX512VL-NEXT: vpermt2q %ymm1, %ymm2, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 1>
|
||||
ret <4 x i64> %shuffle
|
||||
@ -1149,9 +1165,8 @@ define <4 x i64> @shuffle_v4i64_0415(<4 x i64> %a, <4 x i64> %b) {
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v4i64_0415:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
|
||||
; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
|
||||
; AVX512VL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,4,1,5]
|
||||
; AVX512VL-NEXT: vpermt2q %ymm1, %ymm2, %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
|
||||
ret <4 x i64> %shuffle
|
||||
@ -1604,12 +1619,17 @@ define <4 x i64> @shuffle_v4i64_z0z3(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31]
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v4i64_z0z3:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,3]
|
||||
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v4i64_z0z3:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,3]
|
||||
; AVX512VL-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v4i64_z0z3:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vpshufb {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31]
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
%1 = shufflevector <4 x i64> %a, <4 x i64> <i64 0, i64 undef, i64 undef, i64 undef>, <4 x i32> <i32 4, i32 0, i32 4, i32 3>
|
||||
ret <4 x i64> %1
|
||||
}
|
||||
|
@ -768,11 +768,17 @@ define <8 x float> @shuffle_v8f32_76547654(<8 x float> %a, <8 x float> %b) {
|
||||
; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v8f32_76547654:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
|
||||
; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v8f32_76547654:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
|
||||
; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v8f32_76547654:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
|
||||
; AVX512VL-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
|
||||
ret <8 x float> %shuffle
|
||||
}
|
||||
@ -796,11 +802,17 @@ define <8 x float> @shuffle_v8f32_76543210(<8 x float> %a, <8 x float> %b) {
|
||||
; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v8f32_76543210:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
|
||||
; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v8f32_76543210:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
|
||||
; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v8f32_76543210:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
|
||||
; AVX512VL-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
%shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
|
||||
ret <8 x float> %shuffle
|
||||
}
|
||||
@ -856,11 +868,23 @@ define <8 x float> @PR21138(<8 x float> %truc, <8 x float> %tchose) {
|
||||
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2OR512VL-LABEL: PR21138:
|
||||
; AVX2OR512VL: # %bb.0:
|
||||
; AVX2OR512VL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
|
||||
; AVX2OR512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX2OR512VL-NEXT: retq
|
||||
; AVX2-LABEL: PR21138:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
|
||||
; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512VL-SLOW-LABEL: PR21138:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
|
||||
; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: PR21138:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm2 = [1,3,5,7,9,11,13,15]
|
||||
; AVX512VL-FAST-NEXT: vpermt2ps %ymm1, %ymm2, %ymm0
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
%shuffle = shufflevector <8 x float> %truc, <8 x float> %tchose, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
|
||||
ret <8 x float> %shuffle
|
||||
}
|
||||
@ -1829,11 +1853,17 @@ define <8 x i32> @shuffle_v8i32_76547654(<8 x i32> %a, <8 x i32> %b) {
|
||||
; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v8i32_76547654:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
|
||||
; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v8i32_76547654:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
|
||||
; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,2,3]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v8i32_76547654:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
|
||||
; AVX512VL-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
|
||||
ret <8 x i32> %shuffle
|
||||
}
|
||||
@ -1857,11 +1887,17 @@ define <8 x i32> @shuffle_v8i32_76543210(<8 x i32> %a, <8 x i32> %b) {
|
||||
; AVX2-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
|
||||
; AVX2-FAST-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: shuffle_v8i32_76543210:
|
||||
; AVX512VL: # %bb.0:
|
||||
; AVX512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
|
||||
; AVX512VL-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512VL-SLOW-LABEL: shuffle_v8i32_76543210:
|
||||
; AVX512VL-SLOW: # %bb.0:
|
||||
; AVX512VL-SLOW-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
|
||||
; AVX512VL-SLOW-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
|
||||
; AVX512VL-SLOW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-FAST-LABEL: shuffle_v8i32_76543210:
|
||||
; AVX512VL-FAST: # %bb.0:
|
||||
; AVX512VL-FAST-NEXT: vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
|
||||
; AVX512VL-FAST-NEXT: vpermps %ymm0, %ymm1, %ymm0
|
||||
; AVX512VL-FAST-NEXT: retq
|
||||
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
|
||||
ret <8 x i32> %shuffle
|
||||
}
|
||||
|
@ -1504,12 +1504,9 @@ define <8 x i16> @trunc2x4i64_8i16(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX512VL: # %bb.0: # %entry
|
||||
; AVX512VL-NEXT: vpmovqd %ymm0, %xmm0
|
||||
; AVX512VL-NEXT: vpmovqd %ymm1, %xmm1
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; AVX512VL-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
||||
; AVX512VL-NEXT: vpshufb %xmm2, %xmm0, %xmm0
|
||||
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX512VL-NEXT: vzeroupper
|
||||
; AVX512VL-NEXT: retq
|
||||
@ -1531,12 +1528,9 @@ define <8 x i16> @trunc2x4i64_8i16(<4 x i64> %a, <4 x i64> %b) {
|
||||
; AVX512BWVL: # %bb.0: # %entry
|
||||
; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vpmovqd %ymm1, %xmm1
|
||||
; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
|
||||
; AVX512BWVL-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
|
||||
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
|
||||
; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX512BWVL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
|
||||
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX512BWVL-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; AVX512BWVL-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
||||
; AVX512BWVL-NEXT: vpshufb %xmm2, %xmm0, %xmm0
|
||||
; AVX512BWVL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX512BWVL-NEXT: vzeroupper
|
||||
; AVX512BWVL-NEXT: retq
|
||||
@ -1647,43 +1641,13 @@ define <8 x i16> @trunc2x4i32_8i16(<4 x i32> %a, <4 x i32> %b) {
|
||||
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: trunc2x4i32_8i16:
|
||||
; AVX512F: # %bb.0: # %entry
|
||||
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; AVX512F-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
||||
; AVX512F-NEXT: vpshufb %xmm2, %xmm0, %xmm0
|
||||
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: trunc2x4i32_8i16:
|
||||
; AVX512VL: # %bb.0: # %entry
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
|
||||
; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
|
||||
; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: trunc2x4i32_8i16:
|
||||
; AVX512BW: # %bb.0: # %entry
|
||||
; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; AVX512BW-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
||||
; AVX512BW-NEXT: vpshufb %xmm2, %xmm0, %xmm0
|
||||
; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512BWVL-LABEL: trunc2x4i32_8i16:
|
||||
; AVX512BWVL: # %bb.0: # %entry
|
||||
; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
|
||||
; AVX512BWVL-NEXT: vpshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
|
||||
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
|
||||
; AVX512BWVL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; AVX512BWVL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
|
||||
; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; AVX512BWVL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX512BWVL-NEXT: retq
|
||||
; AVX512-LABEL: trunc2x4i32_8i16:
|
||||
; AVX512: # %bb.0: # %entry
|
||||
; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; AVX512-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
||||
; AVX512-NEXT: vpshufb %xmm2, %xmm0, %xmm0
|
||||
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
||||
; AVX512-NEXT: retq
|
||||
entry:
|
||||
%0 = trunc <4 x i32> %a to <4 x i16>
|
||||
%1 = trunc <4 x i32> %b to <4 x i16>
|
||||
|
Loading…
x
Reference in New Issue
Block a user