From 5d937c9531fd31344d0899cea96a183cb7db6d5d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 17 Feb 2021 18:17:50 +0000 Subject: [PATCH] [X86][SSE] Add uniform vector shift test coverage for (sra (trunc (sr[al] x, c1)), c2) folds --- test/CodeGen/X86/combine-sra.ll | 49 +++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/test/CodeGen/X86/combine-sra.ll b/test/CodeGen/X86/combine-sra.ll index e8dd6d0ef97..465f10ce5e9 100644 --- a/test/CodeGen/X86/combine-sra.ll +++ b/test/CodeGen/X86/combine-sra.ll @@ -226,6 +226,34 @@ define <4 x i32> @combine_vec_ashr_trunc_lshr(<4 x i64> %x) { ret <4 x i32> %3 } +define <16 x i8> @combine_vec_ashr_trunc_lshr_splat(<16 x i32> %x) { +; SSE-LABEL: combine_vec_ashr_trunc_lshr_splat: +; SSE: # %bb.0: +; SSE-NEXT: psrad $26, %xmm3 +; SSE-NEXT: psrad $26, %xmm2 +; SSE-NEXT: packssdw %xmm3, %xmm2 +; SSE-NEXT: psrad $26, %xmm1 +; SSE-NEXT: psrad $26, %xmm0 +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: packsswb %xmm2, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: combine_vec_ashr_trunc_lshr_splat: +; AVX: # %bb.0: +; AVX-NEXT: vpsrad $26, %ymm1, %ymm1 +; AVX-NEXT: vpsrad $26, %ymm0, %ymm0 +; AVX-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq + %1 = lshr <16 x i32> %x, + %2 = trunc <16 x i32> %1 to <16 x i8> + %3 = ashr <16 x i8> %2, + ret <16 x i8> %3 +} + ; fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2)) ; if c1 is equal to the number of bits the trunc removes define <4 x i32> @combine_vec_ashr_trunc_ashr(<4 x i64> %x) { @@ -263,6 +291,27 @@ define <4 x i32> @combine_vec_ashr_trunc_ashr(<4 x i64> %x) { ret <4 x i32> %3 } +define <8 x i16> @combine_vec_ashr_trunc_ashr_splat(<8 x i32> %x) { +; SSE-LABEL: combine_vec_ashr_trunc_ashr_splat: +; SSE: # %bb.0: +; SSE-NEXT: psrad $19, %xmm1 +; SSE-NEXT: psrad $19, %xmm0 +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: combine_vec_ashr_trunc_ashr_splat: +; AVX: # %bb.0: +; AVX-NEXT: vpsrad $19, %ymm0, %ymm0 +; AVX-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq + %1 = ashr <8 x i32> %x, + %2 = trunc <8 x i32> %1 to <8 x i16> + %3 = ashr <8 x i16> %2, + ret <8 x i16> %3 +} + ; If the sign bit is known to be zero, switch this to a SRL. define <4 x i32> @combine_vec_ashr_positive(<4 x i32> %x, <4 x i32> %y) { ; SSE-LABEL: combine_vec_ashr_positive: