1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 11:42:57 +01:00

[X86] Add isel patterns for (i64 (zext (i8 (bitcast (v16i1 X))))) to use a KMOVW and a SUBREG_TO_REG. Similar for i8 and anyextend.

We already had patterns for extending to i32 to take advantage of
the impliciting zeroing of the upper bits of a 32-bit GPR that is
done by KMOVW/KMOVB. But the extend might be all the way to i64,
in which case the existing patterns would fail and we'd get a
KMOVW/B followed by a MOVZX. By adding patterns for i64 we can
use the fact that KMOVW/B zero the upper bits of the 32-bit GPR
and the normal property that 32-bit GPR writes implicitly zero the
upper 32-bits of the full 64-bit GPR.

The anyextend patterns are slightly different since we don't care
about the upper zeros. For the i8->i64 I think this avoids selecting
the anyextend as a MOVZX to prevent a partial register issue that
doesn't exist. For i16->i64 I think we would have just emitted an
insert_subreg on top of the extract_subreg that the vXi16->i16
bitcast pattern emits. The register coalescer or peephole pass
should combine those, but this saves that work and makes i8/16
consistent.

llvm-svn: 369431
This commit is contained in:
Craig Topper 2019-08-20 19:43:48 +00:00
parent c0b824d734
commit a93395c550
4 changed files with 15 additions and 216 deletions

View File

@ -2836,13 +2836,21 @@ def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
(KMOVWrk VK16:$src)>;
def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
(SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
(COPY_TO_REGCLASS VK16:$src, GR32)>;
def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
(INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
(KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
(SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
(COPY_TO_REGCLASS VK8:$src, GR32)>;
def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
(INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
(COPY_TO_REGCLASS GR32:$src, VK32)>;

File diff suppressed because it is too large Load Diff

View File

@ -482,7 +482,6 @@ define i64 @v16i8_widened_with_zeroes(<16 x i8> %a, <16 x i8> %b) {
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: movzwl %ax, %eax
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;

View File

@ -561,7 +561,6 @@ define i64 @kshiftr_v64i1_63(<64 x i8> %x, <64 x i8> %y) {
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: movzwl %ax, %eax
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;