mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
AVX-512: Optimized SIMD truncate operations for AVX512F set.
Optimized <8 x i32> to <8 x i16> <4 x i64> to < 4 x i32> <16 x i16> to <16 x i8> All these oprtrations use now AVX512F set (KNL). Before this change it was implemented with AVX2 set. Differential Revision: http://reviews.llvm.org/D14108 llvm-svn: 251764
This commit is contained in:
parent
5a2aae9d0b
commit
f42814b247
@ -13102,10 +13102,13 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
|
||||
}
|
||||
|
||||
// vpmovqb/w/d, vpmovdb/w, vpmovwb
|
||||
if (((!InVT.is512BitVector() && Subtarget->hasVLX()) || InVT.is512BitVector()) &&
|
||||
(InVT.getVectorElementType() != MVT::i16 || Subtarget->hasBWI()))
|
||||
if (Subtarget->hasAVX512()) {
|
||||
// word to byte only under BWI
|
||||
if (InVT == MVT::v16i16 && !Subtarget->hasBWI()) // v16i16 -> v16i8
|
||||
return DAG.getNode(X86ISD::VTRUNC, DL, VT,
|
||||
DAG.getNode(X86ISD::VSEXT, DL, MVT::v16i32, In));
|
||||
return DAG.getNode(X86ISD::VTRUNC, DL, VT, In);
|
||||
|
||||
}
|
||||
if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) {
|
||||
// On AVX2, v4i64 -> v4i32 becomes VPERMD.
|
||||
if (Subtarget->hasInt256()) {
|
||||
|
@ -6036,6 +6036,23 @@ defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc>;
|
||||
defm VPMOVSWB : avx512_trunc_sat_wb<0x20, "s", X86vtruncs>;
|
||||
defm VPMOVUSWB : avx512_trunc_sat_wb<0x10, "us", X86vtruncus>;
|
||||
|
||||
let Predicates = [HasAVX512, NoVLX] in {
|
||||
def: Pat<(v8i16 (X86vtrunc (v8i32 VR256X:$src))),
|
||||
(v8i16 (EXTRACT_SUBREG
|
||||
(v16i16 (VPMOVDWZrr (v16i32 (SUBREG_TO_REG (i32 0),
|
||||
VR256X:$src, sub_ymm)))), sub_xmm))>;
|
||||
def: Pat<(v4i32 (X86vtrunc (v4i64 VR256X:$src))),
|
||||
(v4i32 (EXTRACT_SUBREG
|
||||
(v8i32 (VPMOVQDZrr (v8i64 (SUBREG_TO_REG (i32 0),
|
||||
VR256X:$src, sub_ymm)))), sub_xmm))>;
|
||||
}
|
||||
|
||||
let Predicates = [HasBWI, NoVLX] in {
|
||||
def: Pat<(v16i8 (X86vtrunc (v16i16 VR256X:$src))),
|
||||
(v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (SUBREG_TO_REG (i32 0),
|
||||
VR256X:$src, sub_ymm))), sub_xmm))>;
|
||||
}
|
||||
|
||||
multiclass avx512_extend_common<bits<8> opc, string OpcodeStr,
|
||||
X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
|
||||
X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
|
||||
|
@ -1,52 +1,60 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX
|
||||
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=KNL
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -mattr=+avx512vl -mattr=+avx512bw -mattr=+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=SKX
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
||||
; KNL-LABEL: trunc_16x32_to_16x8
|
||||
; KNL: vpmovdb
|
||||
; KNL: ret
|
||||
|
||||
define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) #0 {
|
||||
; ALL-LABEL: trunc_16x32_to_16x8:
|
||||
; ALL: ## BB#0:
|
||||
; ALL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; ALL-NEXT: retq
|
||||
%x = trunc <16 x i32> %i to <16 x i8>
|
||||
ret <16 x i8> %x
|
||||
}
|
||||
|
||||
; KNL-LABEL: trunc_8x64_to_8x16
|
||||
; KNL: vpmovqw
|
||||
; KNL: ret
|
||||
define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) #0 {
|
||||
; ALL-LABEL: trunc_8x64_to_8x16:
|
||||
; ALL: ## BB#0:
|
||||
; ALL-NEXT: vpmovqw %zmm0, %xmm0
|
||||
; ALL-NEXT: retq
|
||||
%x = trunc <8 x i64> %i to <8 x i16>
|
||||
ret <8 x i16> %x
|
||||
}
|
||||
|
||||
; KNL-LABEL: trunc_v16i32_to_v16i16
|
||||
; KNL: vpmovdw
|
||||
; KNL: ret
|
||||
define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) #0 {
|
||||
; ALL-LABEL: trunc_v16i32_to_v16i16:
|
||||
; ALL: ## BB#0:
|
||||
; ALL-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; ALL-NEXT: retq
|
||||
%1 = trunc <16 x i32> %x to <16 x i16>
|
||||
ret <16 x i16> %1
|
||||
}
|
||||
|
||||
define <8 x i8> @trunc_qb_512(<8 x i64> %i) #0 {
|
||||
; SKX-LABEL: trunc_qb_512:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovqw %zmm0, %xmm0
|
||||
; SKX-NEXT: retq
|
||||
; ALL-LABEL: trunc_qb_512:
|
||||
; ALL: ## BB#0:
|
||||
; ALL-NEXT: vpmovqw %zmm0, %xmm0
|
||||
; ALL-NEXT: retq
|
||||
%x = trunc <8 x i64> %i to <8 x i8>
|
||||
ret <8 x i8> %x
|
||||
}
|
||||
|
||||
define void @trunc_qb_512_mem(<8 x i64> %i, <8 x i8>* %res) #0 {
|
||||
; SKX-LABEL: trunc_qb_512_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovqb %zmm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
; ALL-LABEL: trunc_qb_512_mem:
|
||||
; ALL: ## BB#0:
|
||||
; ALL-NEXT: vpmovqb %zmm0, (%rdi)
|
||||
; ALL-NEXT: retq
|
||||
%x = trunc <8 x i64> %i to <8 x i8>
|
||||
store <8 x i8> %x, <8 x i8>* %res
|
||||
ret void
|
||||
}
|
||||
|
||||
define <4 x i8> @trunc_qb_256(<4 x i64> %i) #0 {
|
||||
; KNL-LABEL: trunc_qb_256:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpmovqd %zmm0, %ymm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: trunc_qb_256:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovqd %ymm0, %xmm0
|
||||
@ -56,6 +64,13 @@ define <4 x i8> @trunc_qb_256(<4 x i64> %i) #0 {
|
||||
}
|
||||
|
||||
define void @trunc_qb_256_mem(<4 x i64> %i, <4 x i8>* %res) #0 {
|
||||
; KNL-LABEL: trunc_qb_256_mem:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpmovqd %zmm0, %ymm0
|
||||
; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; KNL-NEXT: vmovd %xmm0, (%rdi)
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: trunc_qb_256_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovqb %ymm0, (%rdi)
|
||||
@ -66,14 +81,21 @@ define void @trunc_qb_256_mem(<4 x i64> %i, <4 x i8>* %res) #0 {
|
||||
}
|
||||
|
||||
define <2 x i8> @trunc_qb_128(<2 x i64> %i) #0 {
|
||||
; SKX-LABEL: trunc_qb_128:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: retq
|
||||
; ALL-LABEL: trunc_qb_128:
|
||||
; ALL: ## BB#0:
|
||||
; ALL-NEXT: retq
|
||||
%x = trunc <2 x i64> %i to <2 x i8>
|
||||
ret <2 x i8> %x
|
||||
}
|
||||
|
||||
define void @trunc_qb_128_mem(<2 x i64> %i, <2 x i8>* %res) #0 {
|
||||
; KNL-LABEL: trunc_qb_128_mem:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; KNL-NEXT: vmovd %xmm0, %eax
|
||||
; KNL-NEXT: movw %ax, (%rdi)
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: trunc_qb_128_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovqb %xmm0, (%rdi)
|
||||
@ -84,25 +106,30 @@ define void @trunc_qb_128_mem(<2 x i64> %i, <2 x i8>* %res) #0 {
|
||||
}
|
||||
|
||||
define <8 x i16> @trunc_qw_512(<8 x i64> %i) #0 {
|
||||
; SKX-LABEL: trunc_qw_512:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovqw %zmm0, %xmm0
|
||||
; SKX-NEXT: retq
|
||||
; ALL-LABEL: trunc_qw_512:
|
||||
; ALL: ## BB#0:
|
||||
; ALL-NEXT: vpmovqw %zmm0, %xmm0
|
||||
; ALL-NEXT: retq
|
||||
%x = trunc <8 x i64> %i to <8 x i16>
|
||||
ret <8 x i16> %x
|
||||
}
|
||||
|
||||
define void @trunc_qw_512_mem(<8 x i64> %i, <8 x i16>* %res) #0 {
|
||||
; SKX-LABEL: trunc_qw_512_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovqw %zmm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
; ALL-LABEL: trunc_qw_512_mem:
|
||||
; ALL: ## BB#0:
|
||||
; ALL-NEXT: vpmovqw %zmm0, (%rdi)
|
||||
; ALL-NEXT: retq
|
||||
%x = trunc <8 x i64> %i to <8 x i16>
|
||||
store <8 x i16> %x, <8 x i16>* %res
|
||||
ret void
|
||||
}
|
||||
|
||||
define <4 x i16> @trunc_qw_256(<4 x i64> %i) #0 {
|
||||
; KNL-LABEL: trunc_qw_256:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpmovqd %zmm0, %ymm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: trunc_qw_256:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovqd %ymm0, %xmm0
|
||||
@ -112,6 +139,13 @@ define <4 x i16> @trunc_qw_256(<4 x i64> %i) #0 {
|
||||
}
|
||||
|
||||
define void @trunc_qw_256_mem(<4 x i64> %i, <4 x i16>* %res) #0 {
|
||||
; KNL-LABEL: trunc_qw_256_mem:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpmovqd %zmm0, %ymm0
|
||||
; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; KNL-NEXT: vmovq %xmm0, (%rdi)
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: trunc_qw_256_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovqw %ymm0, (%rdi)
|
||||
@ -122,14 +156,21 @@ define void @trunc_qw_256_mem(<4 x i64> %i, <4 x i16>* %res) #0 {
|
||||
}
|
||||
|
||||
define <2 x i16> @trunc_qw_128(<2 x i64> %i) #0 {
|
||||
; SKX-LABEL: trunc_qw_128:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: retq
|
||||
; ALL-LABEL: trunc_qw_128:
|
||||
; ALL: ## BB#0:
|
||||
; ALL-NEXT: retq
|
||||
%x = trunc <2 x i64> %i to <2 x i16>
|
||||
ret <2 x i16> %x
|
||||
}
|
||||
|
||||
define void @trunc_qw_128_mem(<2 x i64> %i, <2 x i16>* %res) #0 {
|
||||
; KNL-LABEL: trunc_qw_128_mem:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; KNL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
|
||||
; KNL-NEXT: vmovd %xmm0, (%rdi)
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: trunc_qw_128_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovqw %xmm0, (%rdi)
|
||||
@ -140,25 +181,30 @@ define void @trunc_qw_128_mem(<2 x i64> %i, <2 x i16>* %res) #0 {
|
||||
}
|
||||
|
||||
define <8 x i32> @trunc_qd_512(<8 x i64> %i) #0 {
|
||||
; SKX-LABEL: trunc_qd_512:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovqd %zmm0, %ymm0
|
||||
; SKX-NEXT: retq
|
||||
; ALL-LABEL: trunc_qd_512:
|
||||
; ALL: ## BB#0:
|
||||
; ALL-NEXT: vpmovqd %zmm0, %ymm0
|
||||
; ALL-NEXT: retq
|
||||
%x = trunc <8 x i64> %i to <8 x i32>
|
||||
ret <8 x i32> %x
|
||||
}
|
||||
|
||||
define void @trunc_qd_512_mem(<8 x i64> %i, <8 x i32>* %res) #0 {
|
||||
; SKX-LABEL: trunc_qd_512_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovqd %zmm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
; ALL-LABEL: trunc_qd_512_mem:
|
||||
; ALL: ## BB#0:
|
||||
; ALL-NEXT: vpmovqd %zmm0, (%rdi)
|
||||
; ALL-NEXT: retq
|
||||
%x = trunc <8 x i64> %i to <8 x i32>
|
||||
store <8 x i32> %x, <8 x i32>* %res
|
||||
ret void
|
||||
}
|
||||
|
||||
define <4 x i32> @trunc_qd_256(<4 x i64> %i) #0 {
|
||||
; KNL-LABEL: trunc_qd_256:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpmovqd %zmm0, %ymm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: trunc_qd_256:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovqd %ymm0, %xmm0
|
||||
@ -168,6 +214,12 @@ define <4 x i32> @trunc_qd_256(<4 x i64> %i) #0 {
|
||||
}
|
||||
|
||||
define void @trunc_qd_256_mem(<4 x i64> %i, <4 x i32>* %res) #0 {
|
||||
; KNL-LABEL: trunc_qd_256_mem:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpmovqd %zmm0, %ymm0
|
||||
; KNL-NEXT: vmovaps %xmm0, (%rdi)
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: trunc_qd_256_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovqd %ymm0, (%rdi)
|
||||
@ -178,14 +230,20 @@ define void @trunc_qd_256_mem(<4 x i64> %i, <4 x i32>* %res) #0 {
|
||||
}
|
||||
|
||||
define <2 x i32> @trunc_qd_128(<2 x i64> %i) #0 {
|
||||
; SKX-LABEL: trunc_qd_128:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: retq
|
||||
; ALL-LABEL: trunc_qd_128:
|
||||
; ALL: ## BB#0:
|
||||
; ALL-NEXT: retq
|
||||
%x = trunc <2 x i64> %i to <2 x i32>
|
||||
ret <2 x i32> %x
|
||||
}
|
||||
|
||||
define void @trunc_qd_128_mem(<2 x i64> %i, <2 x i32>* %res) #0 {
|
||||
; KNL-LABEL: trunc_qd_128_mem:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
|
||||
; KNL-NEXT: vmovq %xmm0, (%rdi)
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: trunc_qd_128_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovqd %xmm0, (%rdi)
|
||||
@ -196,25 +254,30 @@ define void @trunc_qd_128_mem(<2 x i64> %i, <2 x i32>* %res) #0 {
|
||||
}
|
||||
|
||||
define <16 x i8> @trunc_db_512(<16 x i32> %i) #0 {
|
||||
; SKX-LABEL: trunc_db_512:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; SKX-NEXT: retq
|
||||
; ALL-LABEL: trunc_db_512:
|
||||
; ALL: ## BB#0:
|
||||
; ALL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; ALL-NEXT: retq
|
||||
%x = trunc <16 x i32> %i to <16 x i8>
|
||||
ret <16 x i8> %x
|
||||
}
|
||||
|
||||
define void @trunc_db_512_mem(<16 x i32> %i, <16 x i8>* %res) #0 {
|
||||
; SKX-LABEL: trunc_db_512_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovdb %zmm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
; ALL-LABEL: trunc_db_512_mem:
|
||||
; ALL: ## BB#0:
|
||||
; ALL-NEXT: vpmovdb %zmm0, (%rdi)
|
||||
; ALL-NEXT: retq
|
||||
%x = trunc <16 x i32> %i to <16 x i8>
|
||||
store <16 x i8> %x, <16 x i8>* %res
|
||||
ret void
|
||||
}
|
||||
|
||||
define <8 x i8> @trunc_db_256(<8 x i32> %i) #0 {
|
||||
; KNL-LABEL: trunc_db_256:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: trunc_db_256:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovdw %ymm0, %xmm0
|
||||
@ -224,6 +287,13 @@ define <8 x i8> @trunc_db_256(<8 x i32> %i) #0 {
|
||||
}
|
||||
|
||||
define void @trunc_db_256_mem(<8 x i32> %i, <8 x i8>* %res) #0 {
|
||||
; KNL-LABEL: trunc_db_256_mem:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
|
||||
; KNL-NEXT: vmovq %xmm0, (%rdi)
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: trunc_db_256_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovdb %ymm0, (%rdi)
|
||||
@ -234,14 +304,20 @@ define void @trunc_db_256_mem(<8 x i32> %i, <8 x i8>* %res) #0 {
|
||||
}
|
||||
|
||||
define <4 x i8> @trunc_db_128(<4 x i32> %i) #0 {
|
||||
; SKX-LABEL: trunc_db_128:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: retq
|
||||
; ALL-LABEL: trunc_db_128:
|
||||
; ALL: ## BB#0:
|
||||
; ALL-NEXT: retq
|
||||
%x = trunc <4 x i32> %i to <4 x i8>
|
||||
ret <4 x i8> %x
|
||||
}
|
||||
|
||||
define void @trunc_db_128_mem(<4 x i32> %i, <4 x i8>* %res) #0 {
|
||||
; KNL-LABEL: trunc_db_128_mem:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
|
||||
; KNL-NEXT: vmovd %xmm0, (%rdi)
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: trunc_db_128_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovdb %xmm0, (%rdi)
|
||||
@ -252,25 +328,30 @@ define void @trunc_db_128_mem(<4 x i32> %i, <4 x i8>* %res) #0 {
|
||||
}
|
||||
|
||||
define <16 x i16> @trunc_dw_512(<16 x i32> %i) #0 {
|
||||
; SKX-LABEL: trunc_dw_512:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; SKX-NEXT: retq
|
||||
; ALL-LABEL: trunc_dw_512:
|
||||
; ALL: ## BB#0:
|
||||
; ALL-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; ALL-NEXT: retq
|
||||
%x = trunc <16 x i32> %i to <16 x i16>
|
||||
ret <16 x i16> %x
|
||||
}
|
||||
|
||||
define void @trunc_dw_512_mem(<16 x i32> %i, <16 x i16>* %res) #0 {
|
||||
; SKX-LABEL: trunc_dw_512_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovdw %zmm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
; ALL-LABEL: trunc_dw_512_mem:
|
||||
; ALL: ## BB#0:
|
||||
; ALL-NEXT: vpmovdw %zmm0, (%rdi)
|
||||
; ALL-NEXT: retq
|
||||
%x = trunc <16 x i32> %i to <16 x i16>
|
||||
store <16 x i16> %x, <16 x i16>* %res
|
||||
ret void
|
||||
}
|
||||
|
||||
define <8 x i16> @trunc_dw_256(<8 x i32> %i) #0 {
|
||||
; KNL-LABEL: trunc_dw_256:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: trunc_dw_256:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovdw %ymm0, %xmm0
|
||||
@ -280,6 +361,12 @@ define <8 x i16> @trunc_dw_256(<8 x i32> %i) #0 {
|
||||
}
|
||||
|
||||
define void @trunc_dw_256_mem(<8 x i32> %i, <8 x i16>* %res) #0 {
|
||||
; KNL-LABEL: trunc_dw_256_mem:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; KNL-NEXT: vmovaps %xmm0, (%rdi)
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: trunc_dw_256_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovdw %ymm0, (%rdi)
|
||||
@ -289,15 +376,13 @@ define void @trunc_dw_256_mem(<8 x i32> %i, <8 x i16>* %res) #0 {
|
||||
ret void
|
||||
}
|
||||
|
||||
define <4 x i16> @trunc_dw_128(<4 x i32> %i) #0 {
|
||||
; SKX-LABEL: trunc_dw_128:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: retq
|
||||
%x = trunc <4 x i32> %i to <4 x i16>
|
||||
ret <4 x i16> %x
|
||||
}
|
||||
|
||||
define void @trunc_dw_128_mem(<4 x i32> %i, <4 x i16>* %res) #0 {
|
||||
; KNL-LABEL: trunc_dw_128_mem:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
|
||||
; KNL-NEXT: vmovq %xmm0, (%rdi)
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: trunc_dw_128_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovdw %xmm0, (%rdi)
|
||||
@ -308,6 +393,15 @@ define void @trunc_dw_128_mem(<4 x i32> %i, <4 x i16>* %res) #0 {
|
||||
}
|
||||
|
||||
define <32 x i8> @trunc_wb_512(<32 x i16> %i) #0 {
|
||||
; KNL-LABEL: trunc_wb_512:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
|
||||
; KNL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
|
||||
; KNL-NEXT: vpmovdb %zmm1, %xmm1
|
||||
; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: trunc_wb_512:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovwb %zmm0, %ymm0
|
||||
@ -317,6 +411,16 @@ define <32 x i8> @trunc_wb_512(<32 x i16> %i) #0 {
|
||||
}
|
||||
|
||||
define void @trunc_wb_512_mem(<32 x i16> %i, <32 x i8>* %res) #0 {
|
||||
; KNL-LABEL: trunc_wb_512_mem:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
|
||||
; KNL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; KNL-NEXT: vpmovsxwd %ymm1, %zmm1
|
||||
; KNL-NEXT: vpmovdb %zmm1, %xmm1
|
||||
; KNL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
|
||||
; KNL-NEXT: vmovaps %ymm0, (%rdi)
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: trunc_wb_512_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovwb %zmm0, (%rdi)
|
||||
@ -327,6 +431,12 @@ define void @trunc_wb_512_mem(<32 x i16> %i, <32 x i8>* %res) #0 {
|
||||
}
|
||||
|
||||
define <16 x i8> @trunc_wb_256(<16 x i16> %i) #0 {
|
||||
; KNL-LABEL: trunc_wb_256:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
|
||||
; KNL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: trunc_wb_256:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovwb %ymm0, %xmm0
|
||||
@ -336,6 +446,13 @@ define <16 x i8> @trunc_wb_256(<16 x i16> %i) #0 {
|
||||
}
|
||||
|
||||
define void @trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) #0 {
|
||||
; KNL-LABEL: trunc_wb_256_mem:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpmovsxwd %ymm0, %zmm0
|
||||
; KNL-NEXT: vpmovdb %zmm0, %xmm0
|
||||
; KNL-NEXT: vmovaps %xmm0, (%rdi)
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: trunc_wb_256_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovwb %ymm0, (%rdi)
|
||||
@ -346,14 +463,20 @@ define void @trunc_wb_256_mem(<16 x i16> %i, <16 x i8>* %res) #0 {
|
||||
}
|
||||
|
||||
define <8 x i8> @trunc_wb_128(<8 x i16> %i) #0 {
|
||||
; SKX-LABEL: trunc_wb_128:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: retq
|
||||
; ALL-LABEL: trunc_wb_128:
|
||||
; ALL: ## BB#0:
|
||||
; ALL-NEXT: retq
|
||||
%x = trunc <8 x i16> %i to <8 x i8>
|
||||
ret <8 x i8> %x
|
||||
}
|
||||
|
||||
define void @trunc_wb_128_mem(<8 x i16> %i, <8 x i8>* %res) #0 {
|
||||
; KNL-LABEL: trunc_wb_128_mem:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
|
||||
; KNL-NEXT: vmovq %xmm0, (%rdi)
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: trunc_wb_128_mem:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpmovwb %xmm0, (%rdi)
|
||||
|
@ -753,8 +753,7 @@ define <8 x i16> @testv8i16(<8 x i16> %in) nounwind {
|
||||
; AVX512CD: ## BB#0:
|
||||
; AVX512CD-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
|
||||
; AVX512CD-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512CD-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; AVX512CD-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512CD-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512CD-NEXT: retq
|
||||
%out = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %in, i1 0)
|
||||
@ -976,8 +975,7 @@ define <8 x i16> @testv8i16u(<8 x i16> %in) nounwind {
|
||||
; AVX512CD: ## BB#0:
|
||||
; AVX512CD-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
|
||||
; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0
|
||||
; AVX512CD-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,20,21,24,25,28,29],zero,zero,zero,zero,zero,zero,zero,zero
|
||||
; AVX512CD-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3]
|
||||
; AVX512CD-NEXT: vpmovdw %zmm0, %ymm0
|
||||
; AVX512CD-NEXT: vpsubw {{.*}}(%rip), %xmm0, %xmm0
|
||||
; AVX512CD-NEXT: retq
|
||||
%out = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %in, i1 -1)
|
||||
|
Loading…
Reference in New Issue
Block a user