mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
2a81f662cd
DAG patterns optimization: truncate + unsigned saturation supported by VPMOVUS* instructions in AVX-512. And VPACKUS* instructions on SEE* targets. Differential Revision: https://reviews.llvm.org/D28216 llvm-svn: 291670
68 lines
2.7 KiB
LLVM
68 lines
2.7 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
|
|
|
|
define <4 x i32> @trunc_64_32(<4 x i64> %A) nounwind uwtable readnone ssp{
|
|
; CHECK-LABEL: trunc_64_32:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: retq
|
|
%B = trunc <4 x i64> %A to <4 x i32>
|
|
ret <4 x i32>%B
|
|
}
|
|
|
|
define <8 x i16> @trunc_32_16(<8 x i32> %A) nounwind uwtable readnone ssp{
|
|
; CHECK-LABEL: trunc_32_16:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
|
; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
|
; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0
|
|
; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: retq
|
|
%B = trunc <8 x i32> %A to <8 x i16>
|
|
ret <8 x i16>%B
|
|
}
|
|
|
|
define <16 x i8> @trunc_16_8(<16 x i16> %A) nounwind uwtable readnone ssp{
|
|
; CHECK-LABEL: trunc_16_8:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
|
|
; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1
|
|
; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0
|
|
; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: retq
|
|
%B = trunc <16 x i16> %A to <16 x i8>
|
|
ret <16 x i8> %B
|
|
}
|
|
|
|
define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
|
|
; CHECK-LABEL: usat_trunc_wb_256:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: retq
|
|
%x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
|
|
%x6 = trunc <16 x i16> %x5 to <16 x i8>
|
|
ret <16 x i8> %x6
|
|
}
|
|
|
|
define <8 x i16> @usat_trunc_dw_256(<8 x i32> %i) {
|
|
; CHECK-LABEL: usat_trunc_dw_256:
|
|
; CHECK: # BB#0:
|
|
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
|
|
; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
|
|
; CHECK-NEXT: vzeroupper
|
|
; CHECK-NEXT: retq
|
|
%x3 = icmp ult <8 x i32> %i, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%x5 = select <8 x i1> %x3, <8 x i32> %i, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
|
|
%x6 = trunc <8 x i32> %x5 to <8 x i16>
|
|
ret <8 x i16> %x6
|
|
}
|