1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 18:54:02 +01:00

[X86][SSE] Add uniform vector shift test coverage for (sra (trunc (sr[al] x, c1)), c2) folds

This commit is contained in:
Simon Pilgrim 2021-02-17 18:17:50 +00:00
parent 2465ca1fd4
commit 5d937c9531

View File

@ -226,6 +226,34 @@ define <4 x i32> @combine_vec_ashr_trunc_lshr(<4 x i64> %x) {
ret <4 x i32> %3 ret <4 x i32> %3
} }
define <16 x i8> @combine_vec_ashr_trunc_lshr_splat(<16 x i32> %x) {
; SSE-LABEL: combine_vec_ashr_trunc_lshr_splat:
; SSE: # %bb.0:
; SSE-NEXT: psrad $26, %xmm3
; SSE-NEXT: psrad $26, %xmm2
; SSE-NEXT: packssdw %xmm3, %xmm2
; SSE-NEXT: psrad $26, %xmm1
; SSE-NEXT: psrad $26, %xmm0
; SSE-NEXT: packssdw %xmm1, %xmm0
; SSE-NEXT: packsswb %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_ashr_trunc_lshr_splat:
; AVX: # %bb.0:
; AVX-NEXT: vpsrad $26, %ymm1, %ymm1
; AVX-NEXT: vpsrad $26, %ymm0, %ymm0
; AVX-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
; AVX-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
%1 = lshr <16 x i32> %x, <i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
%2 = trunc <16 x i32> %1 to <16 x i8>
%3 = ashr <16 x i8> %2, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
ret <16 x i8> %3
}
; fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2)) ; fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
; if c1 is equal to the number of bits the trunc removes ; if c1 is equal to the number of bits the trunc removes
define <4 x i32> @combine_vec_ashr_trunc_ashr(<4 x i64> %x) { define <4 x i32> @combine_vec_ashr_trunc_ashr(<4 x i64> %x) {
@ -263,6 +291,27 @@ define <4 x i32> @combine_vec_ashr_trunc_ashr(<4 x i64> %x) {
ret <4 x i32> %3 ret <4 x i32> %3
} }
define <8 x i16> @combine_vec_ashr_trunc_ashr_splat(<8 x i32> %x) {
; SSE-LABEL: combine_vec_ashr_trunc_ashr_splat:
; SSE: # %bb.0:
; SSE-NEXT: psrad $19, %xmm1
; SSE-NEXT: psrad $19, %xmm0
; SSE-NEXT: packssdw %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_ashr_trunc_ashr_splat:
; AVX: # %bb.0:
; AVX-NEXT: vpsrad $19, %ymm0, %ymm0
; AVX-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
%1 = ashr <8 x i32> %x, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
%2 = trunc <8 x i32> %1 to <8 x i16>
%3 = ashr <8 x i16> %2, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
ret <8 x i16> %3
}
; If the sign bit is known to be zero, switch this to a SRL. ; If the sign bit is known to be zero, switch this to a SRL.
define <4 x i32> @combine_vec_ashr_positive(<4 x i32> %x, <4 x i32> %y) { define <4 x i32> @combine_vec_ashr_positive(<4 x i32> %x, <4 x i32> %y) {
; SSE-LABEL: combine_vec_ashr_positive: ; SSE-LABEL: combine_vec_ashr_positive: