mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 11:42:57 +01:00
[X86] Add isel patterns for (i64 (zext (i8 (bitcast (v16i1 X))))) to use a KMOVW and a SUBREG_TO_REG. Similar for i8 and anyextend.
We already had patterns for extending to i32 to take advantage of the impliciting zeroing of the upper bits of a 32-bit GPR that is done by KMOVW/KMOVB. But the extend might be all the way to i64, in which case the existing patterns would fail and we'd get a KMOVW/B followed by a MOVZX. By adding patterns for i64 we can use the fact that KMOVW/B zero the upper bits of the 32-bit GPR and the normal property that 32-bit GPR writes implicitly zero the upper 32-bits of the full 64-bit GPR. The anyextend patterns are slightly different since we don't care about the upper zeros. For the i8->i64 I think this avoids selecting the anyextend as a MOVZX to prevent a partial register issue that doesn't exist. For i16->i64 I think we would have just emitted an insert_subreg on top of the extract_subreg that the vXi16->i16 bitcast pattern emits. The register coalescer or peephole pass should combine those, but this saves that work and makes i8/16 consistent. llvm-svn: 369431
This commit is contained in:
parent
c0b824d734
commit
a93395c550
@ -2836,13 +2836,21 @@ def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
|
||||
|
||||
def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
|
||||
(KMOVWrk VK16:$src)>;
|
||||
def : Pat<(i64 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
|
||||
(SUBREG_TO_REG (i64 0), (KMOVWrk VK16:$src), sub_32bit)>;
|
||||
def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
|
||||
(COPY_TO_REGCLASS VK16:$src, GR32)>;
|
||||
def : Pat<(i64 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
|
||||
(INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK16:$src, GR32), sub_32bit)>;
|
||||
|
||||
def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
|
||||
(KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
|
||||
def : Pat<(i64 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
|
||||
(SUBREG_TO_REG (i64 0), (KMOVBrk VK8:$src), sub_32bit)>, Requires<[HasDQI]>;
|
||||
def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
|
||||
(COPY_TO_REGCLASS VK8:$src, GR32)>;
|
||||
def : Pat<(i64 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
|
||||
(INSERT_SUBREG (IMPLICIT_DEF), (COPY_TO_REGCLASS VK8:$src, GR32), sub_32bit)>;
|
||||
|
||||
def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
|
||||
(COPY_TO_REGCLASS GR32:$src, VK32)>;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -482,7 +482,6 @@ define i64 @v16i8_widened_with_zeroes(<16 x i8> %a, <16 x i8> %b) {
|
||||
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
|
||||
; AVX512F-NEXT: kmovw %k0, %eax
|
||||
; AVX512F-NEXT: movzwl %ax, %eax
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
|
@ -561,7 +561,6 @@ define i64 @kshiftr_v64i1_63(<64 x i8> %x, <64 x i8> %y) {
|
||||
; KNL-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
|
||||
; KNL-NEXT: kmovw %k0, %eax
|
||||
; KNL-NEXT: movzwl %ax, %eax
|
||||
; KNL-NEXT: vzeroupper
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
|
Loading…
Reference in New Issue
Block a user