1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00
llvm-mirror/test/CodeGen/X86/avx512-any_extend_load.ll
Simon Pilgrim 850778022a [DAG] Fold vector (aext (load x)) -> (zext (truncate (zextload x)))
We currently don't do anything to fold any_extend vector loads as no target has such an instruction.

Instead I've added support for folding to a zextload, SimplifyDemandedBits does a good job of adjusting the zext(truncate(()) stages as required later on.

We still need the custom scalar extload handling instead of using the tryToFoldExtOfLoad helper as it has different legality tests - we can probably tweak that to reduce most of the code duplication.

Fixes the regression I mentioned in rG99a971cadff7

Differential Revision: https://reviews.llvm.org/D85129
2020-08-05 11:22:23 +01:00

73 lines
3.6 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gn -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gn -mattr=+avx512f,+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=SKX
define void @any_extend_load_v8i64(<8 x i8> * %ptr) {
; ALL-LABEL: any_extend_load_v8i64:
; ALL: # %bb.0:
; ALL-NEXT: vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
; ALL-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; ALL-NEXT: vpmovqb %zmm0, (%rdi)
; ALL-NEXT: vzeroupper
; ALL-NEXT: retq
%wide.load = load <8 x i8>, <8 x i8>* %ptr, align 1
%1 = zext <8 x i8> %wide.load to <8 x i64>
%2 = add nuw nsw <8 x i64> %1, <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4>
%3 = xor <8 x i64> %2, zeroinitializer
%4 = trunc <8 x i64> %3 to <8 x i8>
store <8 x i8> %4, <8 x i8>* %ptr, align 1
ret void
}
define void @any_extend_load_v8i32(<8 x i8> * %ptr) {
; KNL-LABEL: any_extend_load_v8i32:
; KNL: # %bb.0:
; KNL-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
; KNL-NEXT: vpbroadcastd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4]
; KNL-NEXT: vpaddd %ymm1, %ymm0, %ymm0
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: vmovq %xmm0, (%rdi)
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; SKX-LABEL: any_extend_load_v8i32:
; SKX: # %bb.0:
; SKX-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero
; SKX-NEXT: vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm0
; SKX-NEXT: vpmovdb %ymm0, (%rdi)
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
%wide.load = load <8 x i8>, <8 x i8>* %ptr, align 1
%1 = zext <8 x i8> %wide.load to <8 x i32>
%2 = add nuw nsw <8 x i32> %1, <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
%3 = xor <8 x i32> %2, zeroinitializer
%4 = trunc <8 x i32> %3 to <8 x i8>
store <8 x i8> %4, <8 x i8>* %ptr, align 1
ret void
}
define void @any_extend_load_v8i16(<8 x i8> * %ptr) {
; KNL-LABEL: any_extend_load_v8i16:
; KNL: # %bb.0:
; KNL-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
; KNL-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0
; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
; KNL-NEXT: vmovq %xmm0, (%rdi)
; KNL-NEXT: retq
;
; SKX-LABEL: any_extend_load_v8i16:
; SKX: # %bb.0:
; SKX-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
; SKX-NEXT: vpaddw {{.*}}(%rip), %xmm0, %xmm0
; SKX-NEXT: vpmovwb %xmm0, (%rdi)
; SKX-NEXT: retq
%wide.load = load <8 x i8>, <8 x i8>* %ptr, align 1
%1 = zext <8 x i8> %wide.load to <8 x i16>
%2 = add nuw nsw <8 x i16> %1, <i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4, i16 4>
%3 = xor <8 x i16> %2, zeroinitializer
%4 = trunc <8 x i16> %3 to <8 x i8>
store <8 x i8> %4, <8 x i8>* %ptr, align 1
ret void
}