diff --git a/test/CodeGen/X86/vector-shuffle-512-v16.ll b/test/CodeGen/X86/vector-shuffle-512-v16.ll index 2e65bd8c75c..174a487160c 100644 --- a/test/CodeGen/X86/vector-shuffle-512-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-512-v16.ll @@ -279,6 +279,35 @@ define <16 x i32> @shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u(<16 x i32> %a ret <16 x i32> %c } +;FIXME: can do better with vpcompress +define <8 x i32> @test_v16i32_1_3_5_7_9_11_13_15(<16 x i32> %v) { +; ALL-LABEL: test_v16i32_1_3_5_7_9_11_13_15: +; ALL: # BB#0: +; ALL-NEXT: vextracti32x8 $1, %zmm0, %ymm1 +; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] +; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; ALL-NEXT: retq + %res = shufflevector <16 x i32> %v, <16 x i32> undef, <8 x i32> + ret <8 x i32> %res +} + +;FIXME: can do better with vpcompress +define <4 x i32> @test_v16i32_0_1_2_12 (<16 x i32> %v) { +; ALL-LABEL: test_v16i32_0_1_2_12: +; ALL: # BB#0: +; ALL-NEXT: vpextrd $1, %xmm0, %eax +; ALL-NEXT: vpinsrd $1, %eax, %xmm0, %xmm1 +; ALL-NEXT: vpextrd $2, %xmm0, %eax +; ALL-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 +; ALL-NEXT: vextracti32x4 $3, %zmm0, %xmm0 +; ALL-NEXT: vmovd %xmm0, %eax +; ALL-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 +; ALL-NEXT: vzeroupper +; ALL-NEXT: retq + %res = shufflevector <16 x i32> %v, <16 x i32> undef, <4 x i32> + ret <4 x i32> %res +} + define <8 x float> @shuffle_v16f32_extract_256(float* %RET, float* %a) { ; ALL-LABEL: shuffle_v16f32_extract_256: ; ALL: # BB#0: @@ -290,6 +319,34 @@ define <8 x float> @shuffle_v16f32_extract_256(float* %RET, float* %a) { ret <8 x float> %v2 } +;FIXME: can do better with vcompressp +define <8 x float> @test_v16f32_0_1_2_3_4_6_7_10 (<16 x float> %v) { +; ALL-LABEL: test_v16f32_0_1_2_3_4_6_7_10: +; ALL: # BB#0: +; ALL-NEXT: vextractf32x8 $1, %zmm0, %ymm1 +; ALL-NEXT: vmovsldup {{.*#+}} xmm1 = xmm1[0,0,2,2] +; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 +; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,6,7,u] +; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5,6],ymm1[7] +; ALL-NEXT: retq + %res = shufflevector <16 x float> %v, <16 x float> undef, <8 x i32> + ret <8 x float> %res +} + +;FIXME: can do better with vcompressp +define <4 x float> @test_v16f32_0_1_3_6 (<16 x float> %v) { +; ALL-LABEL: test_v16f32_0_1_3_6: +; ALL: # BB#0: +; ALL-NEXT: vextractf32x4 $1, %zmm0, %xmm1 +; ALL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] +; ALL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,1,3,3] +; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; ALL-NEXT: vzeroupper +; ALL-NEXT: retq + %res = shufflevector <16 x float> %v, <16 x float> undef, <4 x i32> + ret <4 x float> %res +} + define <16 x i32> @shuffle_v16i16_1_0_0_0_5_4_4_4_9_8_8_8_13_12_12_12(<16 x i32> %a, <16 x i32> %b) { ; ALL-LABEL: shuffle_v16i16_1_0_0_0_5_4_4_4_9_8_8_8_13_12_12_12: ; ALL: # BB#0: diff --git a/test/CodeGen/X86/vector-shuffle-512-v8.ll b/test/CodeGen/X86/vector-shuffle-512-v8.ll index 30c8d1b2373..d0b7e4eb205 100644 --- a/test/CodeGen/X86/vector-shuffle-512-v8.ll +++ b/test/CodeGen/X86/vector-shuffle-512-v8.ll @@ -2659,3 +2659,91 @@ define <8 x double> @shuffle_v2f64_v8f64_01010101(<2 x double> %a) { %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <8 x i32> ret <8 x double> %shuffle } + +;FIXME: compressp +define <4 x double> @test_v8f64_2346 (<8 x double> %v) { +; AVX512F-LABEL: test_v8f64_2346: +; AVX512F: # BB#0: +; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX512F-NEXT: vextractf64x4 $1, %zmm0, %ymm0 +; AVX512F-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,2] +; AVX512F-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] +; AVX512F-NEXT: retq +; +; AVX512F-32-LABEL: test_v8f64_2346: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX512F-32-NEXT: vextractf64x4 $1, %zmm0, %ymm0 +; AVX512F-32-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,2] +; AVX512F-32-NEXT: vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3] +; AVX512F-32-NEXT: retl + %res = shufflevector <8 x double> %v, <8 x double> undef, <4 x i32> + ret <4 x double> %res +} + +;FIXME: compressp +define <2 x double> @test_v8f64_34 (<8 x double> %v) { +; AVX512F-LABEL: test_v8f64_34: +; AVX512F: # BB#0: +; AVX512F-NEXT: vextractf32x4 $2, %zmm0, %xmm1 +; AVX512F-NEXT: vextractf32x4 $1, %zmm0, %xmm0 +; AVX512F-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512F-32-LABEL: test_v8f64_34: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vextractf32x4 $2, %zmm0, %xmm1 +; AVX512F-32-NEXT: vextractf32x4 $1, %zmm0, %xmm0 +; AVX512F-32-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] +; AVX512F-32-NEXT: vzeroupper +; AVX512F-32-NEXT: retl + %res = shufflevector <8 x double> %v, <8 x double> undef, <2 x i32> + ret <2 x double> %res +} + +; FIXME: vpcompress +define <4 x i64> @test_v8i64_1257 (<8 x i64> %v) { +; AVX512F-LABEL: test_v8i64_1257: +; AVX512F: # BB#0: +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512F-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,1,3] +; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,2,3] +; AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-NEXT: retq +; +; AVX512F-32-LABEL: test_v8i64_1257: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512F-32-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,1,1,3] +; AVX512F-32-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,2,3] +; AVX512F-32-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] +; AVX512F-32-NEXT: retl + %res = shufflevector <8 x i64> %v, <8 x i64> undef, <4 x i32> + ret <4 x i64> %res +} + +define <2 x i64> @test_v8i64_2_5 (<8 x i64> %v) { +; AVX512F-LABEL: test_v8i64_2_5: +; AVX512F: # BB#0: +; AVX512F-NEXT: vextracti32x4 $2, %zmm0, %xmm1 +; AVX512F-NEXT: vextracti32x4 $1, %zmm0, %xmm0 +; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512F-32-LABEL: test_v8i64_2_5: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: vextracti32x4 $1, %zmm0, %xmm1 +; AVX512F-32-NEXT: vpextrd $1, %xmm1, %eax +; AVX512F-32-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1 +; AVX512F-32-NEXT: vextracti32x4 $2, %zmm0, %xmm0 +; AVX512F-32-NEXT: vpextrd $2, %xmm0, %eax +; AVX512F-32-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1 +; AVX512F-32-NEXT: vpextrd $3, %xmm0, %eax +; AVX512F-32-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0 +; AVX512F-32-NEXT: vzeroupper +; AVX512F-32-NEXT: retl + %res = shufflevector <8 x i64> %v, <8 x i64> undef, <2 x i32> + ret <2 x i64> %res +}