1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00
llvm-mirror/test/CodeGen/X86/extract-concat.ll
Simon Pilgrim eb1b2706f9 [X86][SSE] getFauxShuffle - support insert(truncate/extend(extract(vec0,c0)),vec1,c1) shuffle patterns at the byte level
Followup to the PR45604 fix at rGe71dd7c011a3 where we disabled most of these cases.

By creating the shuffle at the byte level we can handle any extension/truncation as long as we track how small the scalar got and assume that the upper bytes will need to be zero.
2020-04-26 15:31:01 +01:00

177 lines
7.4 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=SSE,SSE2
; RUN: llc < %s -mtriple=x86_64-- -mattr=sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42
; RUN: llc < %s -mtriple=x86_64-- -mattr=avx | FileCheck %s --check-prefixes=AVX,AVX1
; RUN: llc < %s -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=AVX,AVX2
; RUN: llc < %s -mtriple=x86_64-- -mattr=avx512f | FileCheck %s --check-prefixes=AVX,AVX512F
define void @foo(<4 x float> %in, <4 x i8>* %out) {
; SSE2-LABEL: foo:
; SSE2: # %bb.0:
; SSE2-NEXT: cvttps2dq %xmm0, %xmm0
; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp)
; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; SSE2-NEXT: movl -{{[0-9]+}}(%rsp), %ecx
; SSE2-NEXT: shll $8, %ecx
; SSE2-NEXT: orl %eax, %ecx
; SSE2-NEXT: movd %ecx, %xmm0
; SSE2-NEXT: movl $65280, %eax # imm = 0xFF00
; SSE2-NEXT: orl -{{[0-9]+}}(%rsp), %eax
; SSE2-NEXT: pinsrw $1, %eax, %xmm0
; SSE2-NEXT: movd %xmm0, (%rdi)
; SSE2-NEXT: retq
;
; SSE42-LABEL: foo:
; SSE42: # %bb.0:
; SSE42-NEXT: cvttps2dq %xmm0, %xmm0
; SSE42-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,4,8],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u]
; SSE42-NEXT: movl $255, %eax
; SSE42-NEXT: pinsrb $3, %eax, %xmm0
; SSE42-NEXT: movd %xmm0, (%rdi)
; SSE42-NEXT: retq
;
; AVX-LABEL: foo:
; AVX: # %bb.0:
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8],zero,xmm0[u,u,u,u,u,u,u,u,u,u,u,u]
; AVX-NEXT: movl $255, %eax
; AVX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; AVX-NEXT: vmovd %xmm0, (%rdi)
; AVX-NEXT: retq
%t0 = fptosi <4 x float> %in to <4 x i32>
%t1 = trunc <4 x i32> %t0 to <4 x i16>
%t2 = shufflevector <4 x i16> %t1, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%t3 = trunc <8 x i16> %t2 to <8 x i8>
%t4 = shufflevector <8 x i8> %t3, <8 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
%t5 = insertelement <4 x i8> %t4, i8 -1, i32 3
store <4 x i8> %t5, <4 x i8>* %out
ret void
}
define <16 x i64> @catcat(<4 x i64> %x) {
; SSE-LABEL: catcat:
; SSE: # %bb.0:
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,1,0,1]
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; SSE-NEXT: movdqa %xmm1, 112(%rdi)
; SSE-NEXT: movdqa %xmm1, 96(%rdi)
; SSE-NEXT: movdqa %xmm3, 80(%rdi)
; SSE-NEXT: movdqa %xmm3, 64(%rdi)
; SSE-NEXT: movdqa %xmm0, 48(%rdi)
; SSE-NEXT: movdqa %xmm0, 32(%rdi)
; SSE-NEXT: movdqa %xmm2, 16(%rdi)
; SSE-NEXT: movdqa %xmm2, (%rdi)
; SSE-NEXT: retq
;
; AVX1-LABEL: catcat:
; AVX1: # %bb.0:
; AVX1-NEXT: vmovddup {{.*#+}} ymm1 = ymm0[0,0,2,2]
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,2,3]
; AVX1-NEXT: vpermilpd {{.*#+}} ymm1 = ymm0[1,1,3,3]
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm3 = ymm1[2,3,2,3]
; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,1,0,1]
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm4
; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,3,2,3]
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
; AVX1-NEXT: vmovaps %ymm4, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: catcat:
; AVX2: # %bb.0:
; AVX2-NEXT: vpermpd {{.*#+}} ymm1 = ymm0[1,1,1,1]
; AVX2-NEXT: vpermpd {{.*#+}} ymm2 = ymm0[2,2,2,2]
; AVX2-NEXT: vpermpd {{.*#+}} ymm3 = ymm0[3,3,3,3]
; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512F-LABEL: catcat:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [0,0,0,0,1,1,1,1]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm2
; AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [2,2,2,2,3,3,3,3]
; AVX512F-NEXT: vpermpd %zmm0, %zmm1, %zmm1
; AVX512F-NEXT: vmovaps %zmm2, %zmm0
; AVX512F-NEXT: retq
%cat1 = shufflevector <4 x i64> %x, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
%cat2 = shufflevector <8 x i64> %cat1, <8 x i64> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%r = shufflevector <16 x i64> %cat2, <16 x i64> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
ret <16 x i64> %r
}
define <16 x i64> @load_catcat(<4 x i64>* %p) {
; SSE-LABEL: load_catcat:
; SSE: # %bb.0:
; SSE-NEXT: movq %rdi, %rax
; SSE-NEXT: movdqa (%rsi), %xmm0
; SSE-NEXT: movdqa 16(%rsi), %xmm1
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,1,0,1]
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
; SSE-NEXT: movdqa %xmm1, 112(%rdi)
; SSE-NEXT: movdqa %xmm1, 96(%rdi)
; SSE-NEXT: movdqa %xmm3, 80(%rdi)
; SSE-NEXT: movdqa %xmm3, 64(%rdi)
; SSE-NEXT: movdqa %xmm0, 48(%rdi)
; SSE-NEXT: movdqa %xmm0, 32(%rdi)
; SSE-NEXT: movdqa %xmm2, 16(%rdi)
; SSE-NEXT: movdqa %xmm2, (%rdi)
; SSE-NEXT: retq
;
; AVX1-LABEL: load_catcat:
; AVX1: # %bb.0:
; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0
; AVX1-NEXT: vbroadcastsd 8(%rdi), %ymm1
; AVX1-NEXT: vbroadcastsd 16(%rdi), %ymm2
; AVX1-NEXT: vbroadcastsd 24(%rdi), %ymm3
; AVX1-NEXT: retq
;
; AVX2-LABEL: load_catcat:
; AVX2: # %bb.0:
; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
; AVX2-NEXT: vbroadcastsd 8(%rdi), %ymm1
; AVX2-NEXT: vbroadcastsd 16(%rdi), %ymm2
; AVX2-NEXT: vbroadcastsd 24(%rdi), %ymm3
; AVX2-NEXT: retq
;
; AVX512F-LABEL: load_catcat:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vbroadcasti64x4 {{.*#+}} zmm1 = mem[0,1,2,3,0,1,2,3]
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,4,0,4,1,5,1,5]
; AVX512F-NEXT: vpermq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm2 = [2,6,2,6,3,7,3,7]
; AVX512F-NEXT: vpermq %zmm1, %zmm2, %zmm1
; AVX512F-NEXT: retq
%x = load <4 x i64>, <4 x i64>* %p
%cat1 = shufflevector <4 x i64> %x, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
%cat2 = shufflevector <8 x i64> %cat1, <8 x i64> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%r = shufflevector <16 x i64> %cat2, <16 x i64> undef, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
ret <16 x i64> %r
}
; Use weird types to make sure we do not miscompile a case where
; the source ops are not an even multiple size of the result.
define <4 x i32> @cat_ext_straddle(<6 x i32>* %px, <6 x i32>* %py) {
; SSE-LABEL: cat_ext_straddle:
; SSE: # %bb.0:
; SSE-NEXT: movaps 16(%rdi), %xmm0
; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; SSE-NEXT: retq
;
; AVX-LABEL: cat_ext_straddle:
; AVX: # %bb.0:
; AVX-NEXT: vmovaps 16(%rdi), %xmm0
; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; AVX-NEXT: retq
%x = load <6 x i32>, <6 x i32>* %px
%y = load <6 x i32>, <6 x i32>* %py
%cat = shufflevector <6 x i32> %x, <6 x i32> %y, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
%ext = shufflevector <12 x i32> %cat, <12 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
ret <4 x i32> %ext
}