mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 10:42:39 +01:00
[X86][SSE] Add uniform vector shift test coverage for (sra (trunc (sr[al] x, c1)), c2) folds
This commit is contained in:
parent
2465ca1fd4
commit
5d937c9531
@ -226,6 +226,34 @@ define <4 x i32> @combine_vec_ashr_trunc_lshr(<4 x i64> %x) {
|
||||
ret <4 x i32> %3
|
||||
}
|
||||
|
||||
define <16 x i8> @combine_vec_ashr_trunc_lshr_splat(<16 x i32> %x) {
|
||||
; SSE-LABEL: combine_vec_ashr_trunc_lshr_splat:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: psrad $26, %xmm3
|
||||
; SSE-NEXT: psrad $26, %xmm2
|
||||
; SSE-NEXT: packssdw %xmm3, %xmm2
|
||||
; SSE-NEXT: psrad $26, %xmm1
|
||||
; SSE-NEXT: psrad $26, %xmm0
|
||||
; SSE-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSE-NEXT: packsswb %xmm2, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: combine_vec_ashr_trunc_lshr_splat:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpsrad $26, %ymm1, %ymm1
|
||||
; AVX-NEXT: vpsrad $26, %ymm0, %ymm0
|
||||
; AVX-NEXT: vpackssdw %ymm1, %ymm0, %ymm0
|
||||
; AVX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
|
||||
; AVX-NEXT: vzeroupper
|
||||
; AVX-NEXT: retq
|
||||
%1 = lshr <16 x i32> %x, <i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
|
||||
%2 = trunc <16 x i32> %1 to <16 x i8>
|
||||
%3 = ashr <16 x i8> %2, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
|
||||
ret <16 x i8> %3
|
||||
}
|
||||
|
||||
; fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
|
||||
; if c1 is equal to the number of bits the trunc removes
|
||||
define <4 x i32> @combine_vec_ashr_trunc_ashr(<4 x i64> %x) {
|
||||
@ -263,6 +291,27 @@ define <4 x i32> @combine_vec_ashr_trunc_ashr(<4 x i64> %x) {
|
||||
ret <4 x i32> %3
|
||||
}
|
||||
|
||||
define <8 x i16> @combine_vec_ashr_trunc_ashr_splat(<8 x i32> %x) {
|
||||
; SSE-LABEL: combine_vec_ashr_trunc_ashr_splat:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: psrad $19, %xmm1
|
||||
; SSE-NEXT: psrad $19, %xmm0
|
||||
; SSE-NEXT: packssdw %xmm1, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: combine_vec_ashr_trunc_ashr_splat:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vpsrad $19, %ymm0, %ymm0
|
||||
; AVX-NEXT: vextracti128 $1, %ymm0, %xmm1
|
||||
; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
|
||||
; AVX-NEXT: vzeroupper
|
||||
; AVX-NEXT: retq
|
||||
%1 = ashr <8 x i32> %x, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
|
||||
%2 = trunc <8 x i32> %1 to <8 x i16>
|
||||
%3 = ashr <8 x i16> %2, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
|
||||
ret <8 x i16> %3
|
||||
}
|
||||
|
||||
; If the sign bit is known to be zero, switch this to a SRL.
|
||||
define <4 x i32> @combine_vec_ashr_positive(<4 x i32> %x, <4 x i32> %y) {
|
||||
; SSE-LABEL: combine_vec_ashr_positive:
|
||||
|
Loading…
Reference in New Issue
Block a user