1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00
llvm-mirror/test/CodeGen/X86/prefer-avx256-mask-extend.ll
Craig Topper a53cc3c6ca [X86] Move VPTESTM matching from the isel table to custom code in X86ISelDAGToDAG.
We had many tablegen patterns for these instructions. And due to the
commutability of the patterns, tablegen expands them to even more patterns. All
together VPTESTMD patterns accounted for more the 50K of the 610K isel table.
This had gotten bad when we stopped canonicalizing AND to vXi64. This required
a pattern for every combination of bitcast input type.

This change moves the matching to custom code where it is easier to look through
the bitcasts without being concerned with the specific types.

The test changes are because we are now stricter with one use checks as its
required to make load folding legal. We now require the AND and any BITCAST to
only have a single use. This prevents forming VPTESTM and a VPAND with the same
inputs.

We now support broadcast loads for 128/256 patterns without VLX. We'll widen to
512-bit like and still fold the broadcast since the amount of memory read
doesn't change.

There are a few tests that got slightly longer because are now prefering
load + VPTESTM over XOR+VPCMPEQ for (seteq (load), allzeros). Previously we were
able to share the XOR with multiple VPTESTM instructions.

llvm-svn: 358359
2019-04-14 18:26:11 +00:00

269 lines
11 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+prefer-256-bit | FileCheck %s --check-prefix=CHECK --check-prefix=AVX256
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,-prefer-256-bit | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512VL
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+prefer-256-bit | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,-prefer-256-bit | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
define <8 x i16> @testv8i1_sext_v8i16(<8 x i32>* %p) {
; AVX256-LABEL: testv8i1_sext_v8i16:
; AVX256: # %bb.0:
; AVX256-NEXT: vmovdqa (%rdi), %ymm0
; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k1
; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX256-NEXT: vpmovdw %ymm0, %xmm0
; AVX256-NEXT: vzeroupper
; AVX256-NEXT: retq
;
; AVX512VL-LABEL: testv8i1_sext_v8i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1
; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
; AVX512F-LABEL: testv8i1_sext_v8i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0
; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
%in = load <8 x i32>, <8 x i32>* %p
%cmp = icmp eq <8 x i32> %in, zeroinitializer
%ext = sext <8 x i1> %cmp to <8 x i16>
ret <8 x i16> %ext
}
define <16 x i8> @testv16i1_sext_v16i8(<8 x i32>* %p, <8 x i32>* %q) {
; AVX256-LABEL: testv16i1_sext_v16i8:
; AVX256: # %bb.0:
; AVX256-NEXT: vmovdqa (%rdi), %ymm0
; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k1
; AVX256-NEXT: vmovdqa (%rsi), %ymm0
; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k2
; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX256-NEXT: vmovdqa32 %ymm0, %ymm1 {%k2} {z}
; AVX256-NEXT: vpmovdw %ymm1, %xmm1
; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX256-NEXT: vpmovdw %ymm0, %xmm0
; AVX256-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
; AVX256-NEXT: vzeroupper
; AVX256-NEXT: retq
;
; AVX512VL-LABEL: testv16i1_sext_v16i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k0
; AVX512VL-NEXT: vmovdqa (%rsi), %ymm0
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1
; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1
; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
; AVX512F-LABEL: testv16i1_sext_v16i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0
; AVX512F-NEXT: vmovdqa (%rsi), %ymm0
; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k1
; AVX512F-NEXT: kunpckbw %k0, %k1, %k1
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
%in = load <8 x i32>, <8 x i32>* %p
%cmp = icmp eq <8 x i32> %in, zeroinitializer
%in2 = load <8 x i32>, <8 x i32>* %q
%cmp2 = icmp eq <8 x i32> %in2, zeroinitializer
%concat = shufflevector <8 x i1> %cmp, <8 x i1> %cmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%ext = sext <16 x i1> %concat to <16 x i8>
ret <16 x i8> %ext
}
define <16 x i16> @testv16i1_sext_v16i16(<8 x i32>* %p, <8 x i32>* %q) {
; AVX256-LABEL: testv16i1_sext_v16i16:
; AVX256: # %bb.0:
; AVX256-NEXT: vmovdqa (%rdi), %ymm0
; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k1
; AVX256-NEXT: vmovdqa (%rsi), %ymm0
; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k2
; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX256-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} {z}
; AVX256-NEXT: vpmovdw %ymm1, %xmm1
; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k2} {z}
; AVX256-NEXT: vpmovdw %ymm0, %xmm0
; AVX256-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX256-NEXT: retq
;
; AVX512VL-LABEL: testv16i1_sext_v16i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k0
; AVX512VL-NEXT: vmovdqa (%rsi), %ymm0
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1
; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1
; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512F-LABEL: testv16i1_sext_v16i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0
; AVX512F-NEXT: vmovdqa (%rsi), %ymm0
; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k1
; AVX512F-NEXT: kunpckbw %k0, %k1, %k1
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
; AVX512F-NEXT: retq
%in = load <8 x i32>, <8 x i32>* %p
%cmp = icmp eq <8 x i32> %in, zeroinitializer
%in2 = load <8 x i32>, <8 x i32>* %q
%cmp2 = icmp eq <8 x i32> %in2, zeroinitializer
%concat = shufflevector <8 x i1> %cmp, <8 x i1> %cmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%ext = sext <16 x i1> %concat to <16 x i16>
ret <16 x i16> %ext
}
define <8 x i16> @testv8i1_zext_v8i16(<8 x i32>* %p) {
; AVX256-LABEL: testv8i1_zext_v8i16:
; AVX256: # %bb.0:
; AVX256-NEXT: vmovdqa (%rdi), %ymm0
; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k1
; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX256-NEXT: vpmovdw %ymm0, %xmm0
; AVX256-NEXT: vpsrlw $15, %xmm0, %xmm0
; AVX256-NEXT: vzeroupper
; AVX256-NEXT: retq
;
; AVX512VL-LABEL: testv8i1_zext_v8i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1
; AVX512VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0
; AVX512VL-NEXT: vpsrlw $15, %xmm0, %xmm0
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
; AVX512F-LABEL: testv8i1_zext_v8i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: vpcmpeqd (%rdi), %ymm0, %ymm0
; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
; AVX512F-NEXT: vpsrlw $15, %xmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
%in = load <8 x i32>, <8 x i32>* %p
%cmp = icmp eq <8 x i32> %in, zeroinitializer
%ext = zext <8 x i1> %cmp to <8 x i16>
ret <8 x i16> %ext
}
define <16 x i8> @testv16i1_zext_v16i8(<8 x i32>* %p, <8 x i32>* %q) {
; AVX256-LABEL: testv16i1_zext_v16i8:
; AVX256: # %bb.0:
; AVX256-NEXT: vmovdqa (%rdi), %ymm0
; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k1
; AVX256-NEXT: vmovdqa (%rsi), %ymm0
; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k2
; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX256-NEXT: vmovdqa32 %ymm0, %ymm1 {%k2} {z}
; AVX256-NEXT: vpmovdw %ymm1, %xmm1
; AVX256-NEXT: vpsrlw $15, %xmm1, %xmm1
; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
; AVX256-NEXT: vpmovdw %ymm0, %xmm0
; AVX256-NEXT: vpsrlw $15, %xmm0, %xmm0
; AVX256-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
; AVX256-NEXT: vzeroupper
; AVX256-NEXT: retq
;
; AVX512VL-LABEL: testv16i1_zext_v16i8:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k0
; AVX512VL-NEXT: vmovdqa (%rsi), %ymm0
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1
; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1
; AVX512VL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
; AVX512F-LABEL: testv16i1_zext_v16i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0
; AVX512F-NEXT: vmovdqa (%rsi), %ymm0
; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k1
; AVX512F-NEXT: kunpckbw %k0, %k1, %k1
; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
%in = load <8 x i32>, <8 x i32>* %p
%cmp = icmp eq <8 x i32> %in, zeroinitializer
%in2 = load <8 x i32>, <8 x i32>* %q
%cmp2 = icmp eq <8 x i32> %in2, zeroinitializer
%concat = shufflevector <8 x i1> %cmp, <8 x i1> %cmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%ext = zext <16 x i1> %concat to <16 x i8>
ret <16 x i8> %ext
}
define <16 x i16> @testv16i1_zext_v16i16(<8 x i32>* %p, <8 x i32>* %q) {
; AVX256-LABEL: testv16i1_zext_v16i16:
; AVX256: # %bb.0:
; AVX256-NEXT: vmovdqa (%rdi), %ymm0
; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k1
; AVX256-NEXT: vmovdqa (%rsi), %ymm0
; AVX256-NEXT: vptestnmd %ymm0, %ymm0, %k2
; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX256-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1} {z}
; AVX256-NEXT: vpmovdw %ymm1, %xmm1
; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k2} {z}
; AVX256-NEXT: vpmovdw %ymm0, %xmm0
; AVX256-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX256-NEXT: vpsrlw $15, %ymm0, %ymm0
; AVX256-NEXT: retq
;
; AVX512VL-LABEL: testv16i1_zext_v16i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k0
; AVX512VL-NEXT: vmovdqa (%rsi), %ymm0
; AVX512VL-NEXT: vptestnmd %ymm0, %ymm0, %k1
; AVX512VL-NEXT: kunpckbw %k0, %k1, %k1
; AVX512VL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512VL-NEXT: vpmovdw %zmm0, %ymm0
; AVX512VL-NEXT: vpsrlw $15, %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512F-LABEL: testv16i1_zext_v16i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0
; AVX512F-NEXT: vmovdqa (%rsi), %ymm0
; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k1
; AVX512F-NEXT: kunpckbw %k0, %k1, %k1
; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
; AVX512F-NEXT: vpsrlw $15, %ymm0, %ymm0
; AVX512F-NEXT: retq
%in = load <8 x i32>, <8 x i32>* %p
%cmp = icmp eq <8 x i32> %in, zeroinitializer
%in2 = load <8 x i32>, <8 x i32>* %q
%cmp2 = icmp eq <8 x i32> %in2, zeroinitializer
%concat = shufflevector <8 x i1> %cmp, <8 x i1> %cmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%ext = zext <16 x i1> %concat to <16 x i16>
ret <16 x i16> %ext
}