1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-21 20:12:56 +02:00
llvm-mirror/test/CodeGen/X86/vector-shuffle-sse41.ll
Ahmed Bougacha 6b357b9101 [X86] Lower blended PACKUSes using appropriate types.
When lowering two blended PACKUS, we used to disregard the types
of the PACKUS inputs, indiscriminately generating a v16i8 PACKUS.

This leads to non-selectable things like:
    (v16i8 (PACKUS (v4i32 v0), (v4i32 v1)))

Instead, check that the PACKUSes have the same type, and use that
as the final result type.

llvm-svn: 274138
2016-06-29 16:56:09 +00:00

60 lines
2.6 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE41
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX
define <8 x i16> @blend_packusdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2, <4 x i32> %a3) {
; SSE41-LABEL: blend_packusdw:
; SSE41: # BB#0:
; SSE41-NEXT: packusdw %xmm2, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: blend_packusdw:
; AVX: # BB#0:
; AVX-NEXT: vpackusdw %xmm2, %xmm0, %xmm0
; AVX-NEXT: retq
%p0 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1)
%p1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a2, <4 x i32> %a3)
%s0 = shufflevector <8 x i16> %p0, <8 x i16> %p1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
ret <8 x i16> %s0
}
define <16 x i8> @blend_packuswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2, <8 x i16> %a3) {
; SSE41-LABEL: blend_packuswb:
; SSE41: # BB#0:
; SSE41-NEXT: packuswb %xmm2, %xmm0
; SSE41-NEXT: retq
;
; AVX-LABEL: blend_packuswb:
; AVX: # BB#0:
; AVX-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
; AVX-NEXT: retq
%p0 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1)
%p1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a2, <8 x i16> %a3)
%s0 = shufflevector <16 x i8> %p0, <16 x i8> %p1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
ret <16 x i8> %s0
}
define <8 x i16> @blend_packusdw_packuswb(<4 x i32> %a0, <4 x i32> %a1, <8 x i16> %a2, <8 x i16> %a3) {
; SSE41-LABEL: blend_packusdw_packuswb:
; SSE41: # BB#0:
; SSE41-NEXT: packusdw %xmm1, %xmm0
; SSE41-NEXT: packuswb %xmm3, %xmm2
; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; SSE41-NEXT: retq
;
; AVX-LABEL: blend_packusdw_packuswb:
; AVX: # BB#0:
; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpackuswb %xmm3, %xmm2, %xmm1
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; AVX-NEXT: retq
%p0 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1)
%p1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a2, <8 x i16> %a3)
%b1 = bitcast <16 x i8> %p1 to <8 x i16>
%s0 = shufflevector <8 x i16> %p0, <8 x i16> %b1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
ret <8 x i16> %s0
}
declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>)