1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-22 20:43:44 +02:00
llvm-mirror/test/CodeGen/X86/vector-shuffle-v48.ll
Craig Topper 0ec35b588c [X86] Fix two more places to prefer VPERMQ/PD over VPERM2X128 when AVX2 is enabled
The shuffle combining and lowerVectorShuffleAsLanePermuteAndBlend were both still trying to use VPERM2XF128 for unary shuffles when AVX2 is enabled. VPERM2X128 takes two inputs meaning when we use it for a unary shuffle one of those inputs is left undefined creating a false dependency on whatever register gets allocated there.

If we have VPERMQ/PD we should prefer those since they only have a single input.

Differential Revision: https://reviews.llvm.org/D37947

llvm-svn: 313542
2017-09-18 16:39:49 +00:00

22 lines
1.5 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-pc-linux -mattr=+avx2 < %s | FileCheck %s
define <32 x i8> @foo(<48 x i8>* %x0, <16 x i32> %x1, <16 x i32> %x2) {
; CHECK-LABEL: foo:
; CHECK: # BB#0:
; CHECK-NEXT: vmovdqu 32(%rdi), %xmm0
; CHECK-NEXT: vmovdqu (%rdi), %ymm1
; CHECK-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,0,1]
; CHECK-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,u,u,u,u,u,u,0,2,3,5,6]
; CHECK-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,3,4,6,7,9,10,12,13,15,u,u,u,u,u,24,25,27,28,30,31,u,u,u,u,u,u,u,u,u,u]
; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = <255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,255,255,255,255,255,255,u,u,u,u,u,u,u,u,u,u>
; CHECK-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,u,u,1,2,4,5,7,8,10,11,13,14]
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0,0,0]
; CHECK-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
%1 = load <48 x i8>, <48 x i8>* %x0, align 1
%2 = shufflevector <48 x i8> %1, <48 x i8> undef, <32 x i32> <i32 0, i32 1, i32 3, i32 4, i32 6, i32 7, i32 9, i32 10, i32 12, i32 13, i32 15, i32 16, i32 18, i32 19, i32 21, i32 22, i32 24, i32 25, i32 27, i32 28, i32 30, i32 31, i32 33, i32 34, i32 36, i32 37, i32 39, i32 40, i32 42, i32 43, i32 45, i32 46>
ret <32 x i8> %2
}