1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-22 20:43:44 +02:00
llvm-mirror/test/CodeGen/X86/avx-trunc.ll
Elena Demikhovsky 2a81f662cd X86 CodeGen: Optimized pattern for truncate with unsigned saturation.
DAG patterns optimization: truncate + unsigned saturation supported by VPMOVUS* instructions in AVX-512.
And VPACKUS* instructions on SEE* targets.

Differential Revision: https://reviews.llvm.org/D28216

llvm-svn: 291670
2017-01-11 12:59:32 +00:00

68 lines
2.7 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s
define <4 x i32> @trunc_64_32(<4 x i64> %A) nounwind uwtable readnone ssp{
; CHECK-LABEL: trunc_64_32:
; CHECK: # BB#0:
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%B = trunc <4 x i64> %A to <4 x i32>
ret <4 x i32>%B
}
define <8 x i16> @trunc_32_16(<8 x i32> %A) nounwind uwtable readnone ssp{
; CHECK-LABEL: trunc_32_16:
; CHECK: # BB#0:
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%B = trunc <8 x i32> %A to <8 x i16>
ret <8 x i16>%B
}
define <16 x i8> @trunc_16_8(<16 x i16> %A) nounwind uwtable readnone ssp{
; CHECK-LABEL: trunc_16_8:
; CHECK: # BB#0:
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%B = trunc <16 x i16> %A to <16 x i8>
ret <16 x i8> %B
}
define <16 x i8> @usat_trunc_wb_256(<16 x i16> %i) {
; CHECK-LABEL: usat_trunc_wb_256:
; CHECK: # BB#0:
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
; CHECK-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%x3 = icmp ult <16 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%x5 = select <16 x i1> %x3, <16 x i16> %i, <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%x6 = trunc <16 x i16> %x5 to <16 x i8>
ret <16 x i8> %x6
}
define <8 x i16> @usat_trunc_dw_256(<8 x i32> %i) {
; CHECK-LABEL: usat_trunc_dw_256:
; CHECK: # BB#0:
; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
; CHECK-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%x3 = icmp ult <8 x i32> %i, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%x5 = select <8 x i1> %x3, <8 x i32> %i, <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
%x6 = trunc <8 x i32> %x5 to <8 x i16>
ret <8 x i16> %x6
}