1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00

[AVX-512] Add unmasked subvector inserts and extract to the execution domain tables.

llvm-svn: 309632
This commit is contained in:
Craig Topper 2017-07-31 22:07:29 +00:00
parent 40fe1a4f74
commit 6e16535e63
18 changed files with 461 additions and 801 deletions

View File

@ -9356,6 +9356,30 @@ static const uint16_t ReplaceableInstrs[][3] = {
{ X86::VBROADCASTSDZ256m, X86::VBROADCASTSDZ256m, X86::VPBROADCASTQZ256m }, { X86::VBROADCASTSDZ256m, X86::VBROADCASTSDZ256m, X86::VPBROADCASTQZ256m },
{ X86::VBROADCASTSDZr, X86::VBROADCASTSDZr, X86::VPBROADCASTQZr }, { X86::VBROADCASTSDZr, X86::VBROADCASTSDZr, X86::VPBROADCASTQZr },
{ X86::VBROADCASTSDZm, X86::VBROADCASTSDZm, X86::VPBROADCASTQZm }, { X86::VBROADCASTSDZm, X86::VBROADCASTSDZm, X86::VPBROADCASTQZm },
{ X86::VINSERTF32x4Zrr, X86::VINSERTF32x4Zrr, X86::VINSERTI32x4Zrr },
{ X86::VINSERTF32x4Zrm, X86::VINSERTF32x4Zrm, X86::VINSERTI32x4Zrm },
{ X86::VINSERTF32x8Zrr, X86::VINSERTF32x8Zrr, X86::VINSERTI32x8Zrr },
{ X86::VINSERTF32x8Zrm, X86::VINSERTF32x8Zrm, X86::VINSERTI32x8Zrm },
{ X86::VINSERTF64x2Zrr, X86::VINSERTF64x2Zrr, X86::VINSERTI64x2Zrr },
{ X86::VINSERTF64x2Zrm, X86::VINSERTF64x2Zrm, X86::VINSERTI64x2Zrm },
{ X86::VINSERTF64x4Zrr, X86::VINSERTF64x4Zrr, X86::VINSERTI64x4Zrr },
{ X86::VINSERTF64x4Zrm, X86::VINSERTF64x4Zrm, X86::VINSERTI64x4Zrm },
{ X86::VINSERTF32x4Z256rr,X86::VINSERTF32x4Z256rr,X86::VINSERTI32x4Z256rr },
{ X86::VINSERTF32x4Z256rm,X86::VINSERTF32x4Z256rm,X86::VINSERTI32x4Z256rm },
{ X86::VINSERTF64x2Z256rr,X86::VINSERTF64x2Z256rr,X86::VINSERTI64x2Z256rr },
{ X86::VINSERTF64x2Z256rm,X86::VINSERTF64x2Z256rm,X86::VINSERTI64x2Z256rm },
{ X86::VEXTRACTF32x4Zrr, X86::VEXTRACTF32x4Zrr, X86::VEXTRACTI32x4Zrr },
{ X86::VEXTRACTF32x4Zmr, X86::VEXTRACTF32x4Zmr, X86::VEXTRACTI32x4Zmr },
{ X86::VEXTRACTF32x8Zrr, X86::VEXTRACTF32x8Zrr, X86::VEXTRACTI32x8Zrr },
{ X86::VEXTRACTF32x8Zmr, X86::VEXTRACTF32x8Zmr, X86::VEXTRACTI32x8Zmr },
{ X86::VEXTRACTF64x2Zrr, X86::VEXTRACTF64x2Zrr, X86::VEXTRACTI64x2Zrr },
{ X86::VEXTRACTF64x2Zmr, X86::VEXTRACTF64x2Zmr, X86::VEXTRACTI64x2Zmr },
{ X86::VEXTRACTF64x4Zrr, X86::VEXTRACTF64x4Zrr, X86::VEXTRACTI64x4Zrr },
{ X86::VEXTRACTF64x4Zmr, X86::VEXTRACTF64x4Zmr, X86::VEXTRACTI64x4Zmr },
{ X86::VEXTRACTF32x4Z256rr,X86::VEXTRACTF32x4Z256rr,X86::VEXTRACTI32x4Z256rr },
{ X86::VEXTRACTF32x4Z256mr,X86::VEXTRACTF32x4Z256mr,X86::VEXTRACTI32x4Z256mr },
{ X86::VEXTRACTF64x2Z256rr,X86::VEXTRACTF64x2Z256rr,X86::VEXTRACTI64x2Z256rr },
{ X86::VEXTRACTF64x2Z256mr,X86::VEXTRACTF64x2Z256mr,X86::VEXTRACTI64x2Z256mr },
}; };
static const uint16_t ReplaceableInstrsAVX2[][3] = { static const uint16_t ReplaceableInstrsAVX2[][3] = {

View File

@ -879,7 +879,7 @@ define <16 x double> @uitof64(<16 x i32> %a) nounwind {
; NODQ-LABEL: uitof64: ; NODQ-LABEL: uitof64:
; NODQ: # BB#0: ; NODQ: # BB#0:
; NODQ-NEXT: vcvtudq2pd %ymm0, %zmm2 ; NODQ-NEXT: vcvtudq2pd %ymm0, %zmm2
; NODQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; NODQ-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; NODQ-NEXT: vcvtudq2pd %ymm0, %zmm1 ; NODQ-NEXT: vcvtudq2pd %ymm0, %zmm1
; NODQ-NEXT: vmovaps %zmm2, %zmm0 ; NODQ-NEXT: vmovaps %zmm2, %zmm0
; NODQ-NEXT: retq ; NODQ-NEXT: retq
@ -887,7 +887,7 @@ define <16 x double> @uitof64(<16 x i32> %a) nounwind {
; DQ-LABEL: uitof64: ; DQ-LABEL: uitof64:
; DQ: # BB#0: ; DQ: # BB#0:
; DQ-NEXT: vcvtudq2pd %ymm0, %zmm2 ; DQ-NEXT: vcvtudq2pd %ymm0, %zmm2
; DQ-NEXT: vextracti32x8 $1, %zmm0, %ymm0 ; DQ-NEXT: vextractf32x8 $1, %zmm0, %ymm0
; DQ-NEXT: vcvtudq2pd %ymm0, %zmm1 ; DQ-NEXT: vcvtudq2pd %ymm0, %zmm1
; DQ-NEXT: vmovaps %zmm2, %zmm0 ; DQ-NEXT: vmovaps %zmm2, %zmm0
; DQ-NEXT: retq ; DQ-NEXT: retq

View File

@ -5,7 +5,7 @@
define <8 x i16> @extract_subvector128_v32i16(<32 x i16> %x) nounwind { define <8 x i16> @extract_subvector128_v32i16(<32 x i16> %x) nounwind {
; SKX-LABEL: extract_subvector128_v32i16: ; SKX-LABEL: extract_subvector128_v32i16:
; SKX: ## BB#0: ; SKX: ## BB#0:
; SKX-NEXT: vextracti32x4 $2, %zmm0, %xmm0 ; SKX-NEXT: vextractf32x4 $2, %zmm0, %xmm0
; SKX-NEXT: vzeroupper ; SKX-NEXT: vzeroupper
; SKX-NEXT: retq ; SKX-NEXT: retq
%r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
@ -25,7 +25,7 @@ define <8 x i16> @extract_subvector128_v32i16_first_element(<32 x i16> %x) nounw
define <16 x i8> @extract_subvector128_v64i8(<64 x i8> %x) nounwind { define <16 x i8> @extract_subvector128_v64i8(<64 x i8> %x) nounwind {
; SKX-LABEL: extract_subvector128_v64i8: ; SKX-LABEL: extract_subvector128_v64i8:
; SKX: ## BB#0: ; SKX: ## BB#0:
; SKX-NEXT: vextracti32x4 $2, %zmm0, %xmm0 ; SKX-NEXT: vextractf32x4 $2, %zmm0, %xmm0
; SKX-NEXT: vzeroupper ; SKX-NEXT: vzeroupper
; SKX-NEXT: retq ; SKX-NEXT: retq
%r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <16 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38,i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47> %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <16 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38,i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47>
@ -46,7 +46,7 @@ define <16 x i8> @extract_subvector128_v64i8_first_element(<64 x i8> %x) nounwin
define <16 x i16> @extract_subvector256_v32i16(<32 x i16> %x) nounwind { define <16 x i16> @extract_subvector256_v32i16(<32 x i16> %x) nounwind {
; SKX-LABEL: extract_subvector256_v32i16: ; SKX-LABEL: extract_subvector256_v32i16:
; SKX: ## BB#0: ; SKX: ## BB#0:
; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; SKX-NEXT: retq ; SKX-NEXT: retq
%r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
ret <16 x i16> %r1 ret <16 x i16> %r1
@ -55,7 +55,7 @@ define <16 x i16> @extract_subvector256_v32i16(<32 x i16> %x) nounwind {
define <32 x i8> @extract_subvector256_v64i8(<64 x i8> %x) nounwind { define <32 x i8> @extract_subvector256_v64i8(<64 x i8> %x) nounwind {
; SKX-LABEL: extract_subvector256_v64i8: ; SKX-LABEL: extract_subvector256_v64i8:
; SKX: ## BB#0: ; SKX: ## BB#0:
; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; SKX-NEXT: retq ; SKX-NEXT: retq
%r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <32 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63> %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <32 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
ret <32 x i8> %r1 ret <32 x i8> %r1
@ -90,7 +90,7 @@ entry:
define void @extract_subvector256_v4i64_store(i64* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp { define void @extract_subvector256_v4i64_store(i64* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp {
; SKX-LABEL: extract_subvector256_v4i64_store: ; SKX-LABEL: extract_subvector256_v4i64_store:
; SKX: ## BB#0: ## %entry ; SKX: ## BB#0: ## %entry
; SKX-NEXT: vextracti128 $1, %ymm0, (%rdi) ; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi)
; SKX-NEXT: vzeroupper ; SKX-NEXT: vzeroupper
; SKX-NEXT: retq ; SKX-NEXT: retq
entry: entry:
@ -103,7 +103,7 @@ entry:
define void @extract_subvector256_v8i32_store(i32* nocapture %addr, <8 x i32> %a) nounwind uwtable ssp { define void @extract_subvector256_v8i32_store(i32* nocapture %addr, <8 x i32> %a) nounwind uwtable ssp {
; SKX-LABEL: extract_subvector256_v8i32_store: ; SKX-LABEL: extract_subvector256_v8i32_store:
; SKX: ## BB#0: ## %entry ; SKX: ## BB#0: ## %entry
; SKX-NEXT: vextracti128 $1, %ymm0, (%rdi) ; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi)
; SKX-NEXT: vzeroupper ; SKX-NEXT: vzeroupper
; SKX-NEXT: retq ; SKX-NEXT: retq
entry: entry:
@ -116,7 +116,7 @@ entry:
define void @extract_subvector256_v16i16_store(i16* nocapture %addr, <16 x i16> %a) nounwind uwtable ssp { define void @extract_subvector256_v16i16_store(i16* nocapture %addr, <16 x i16> %a) nounwind uwtable ssp {
; SKX-LABEL: extract_subvector256_v16i16_store: ; SKX-LABEL: extract_subvector256_v16i16_store:
; SKX: ## BB#0: ## %entry ; SKX: ## BB#0: ## %entry
; SKX-NEXT: vextracti128 $1, %ymm0, (%rdi) ; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi)
; SKX-NEXT: vzeroupper ; SKX-NEXT: vzeroupper
; SKX-NEXT: retq ; SKX-NEXT: retq
entry: entry:
@ -129,7 +129,7 @@ entry:
define void @extract_subvector256_v32i8_store(i8* nocapture %addr, <32 x i8> %a) nounwind uwtable ssp { define void @extract_subvector256_v32i8_store(i8* nocapture %addr, <32 x i8> %a) nounwind uwtable ssp {
; SKX-LABEL: extract_subvector256_v32i8_store: ; SKX-LABEL: extract_subvector256_v32i8_store:
; SKX: ## BB#0: ## %entry ; SKX: ## BB#0: ## %entry
; SKX-NEXT: vextracti128 $1, %ymm0, (%rdi) ; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi)
; SKX-NEXT: vzeroupper ; SKX-NEXT: vzeroupper
; SKX-NEXT: retq ; SKX-NEXT: retq
entry: entry:

View File

@ -1134,7 +1134,7 @@ define <8 x double> @test_mm512_zextpd128_pd512(<2 x double> %a0) nounwind {
; X32-LABEL: test_mm512_zextpd128_pd512: ; X32-LABEL: test_mm512_zextpd128_pd512:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> ; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm2 ; X32-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm2
; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; X32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 ; X32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
@ -1143,7 +1143,7 @@ define <8 x double> @test_mm512_zextpd128_pd512(<2 x double> %a0) nounwind {
; X64-LABEL: test_mm512_zextpd128_pd512: ; X64-LABEL: test_mm512_zextpd128_pd512:
; X64: # BB#0: ; X64: # BB#0:
; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> ; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm2 ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm2
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; X64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 ; X64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
@ -1156,14 +1156,14 @@ define <8 x double> @test_mm512_zextpd256_pd512(<4 x double> %a0) nounwind {
; X32-LABEL: test_mm512_zextpd256_pd512: ; X32-LABEL: test_mm512_zextpd256_pd512:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; X32-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; X32-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm512_zextpd256_pd512: ; X64-LABEL: test_mm512_zextpd256_pd512:
; X64: # BB#0: ; X64: # BB#0:
; X64-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; X64-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; X64-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; X64-NEXT: retq ; X64-NEXT: retq
%res = shufflevector <4 x double> %a0, <4 x double> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> %res = shufflevector <4 x double> %a0, <4 x double> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@ -1174,7 +1174,7 @@ define <16 x float> @test_mm512_zextps128_ps512(<4 x float> %a0) nounwind {
; X32-LABEL: test_mm512_zextps128_ps512: ; X32-LABEL: test_mm512_zextps128_ps512:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> ; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm2 ; X32-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm2
; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; X32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 ; X32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
@ -1183,7 +1183,7 @@ define <16 x float> @test_mm512_zextps128_ps512(<4 x float> %a0) nounwind {
; X64-LABEL: test_mm512_zextps128_ps512: ; X64-LABEL: test_mm512_zextps128_ps512:
; X64: # BB#0: ; X64: # BB#0:
; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> ; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm2 ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm2
; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; X64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 ; X64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
@ -1196,14 +1196,14 @@ define <16 x float> @test_mm512_zextps256_ps512(<8 x float> %a0) nounwind {
; X32-LABEL: test_mm512_zextps256_ps512: ; X32-LABEL: test_mm512_zextps256_ps512:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; X32-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; X32-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm512_zextps256_ps512: ; X64-LABEL: test_mm512_zextps256_ps512:
; X64: # BB#0: ; X64: # BB#0:
; X64-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; X64-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; X64-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; X64-NEXT: retq ; X64-NEXT: retq
%res = shufflevector <8 x float> %a0, <8 x float> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> %res = shufflevector <8 x float> %a0, <8 x float> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@ -1214,19 +1214,19 @@ define <8 x i64> @test_mm512_zextsi128_si512(<2 x i64> %a0) nounwind {
; X32-LABEL: test_mm512_zextsi128_si512: ; X32-LABEL: test_mm512_zextsi128_si512:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> ; X32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm2 ; X32-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm2
; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; X32-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; X32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm512_zextsi128_si512: ; X64-LABEL: test_mm512_zextsi128_si512:
; X64: # BB#0: ; X64: # BB#0:
; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> ; X64-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm2 ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm2
; X64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; X64-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; X64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
; X64-NEXT: retq ; X64-NEXT: retq
%res = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> %res = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
ret <8 x i64> %res ret <8 x i64> %res
@ -1236,15 +1236,15 @@ define <8 x i64> @test_mm512_zextsi256_si512(<4 x i64> %a0) nounwind {
; X32-LABEL: test_mm512_zextsi256_si512: ; X32-LABEL: test_mm512_zextsi256_si512:
; X32: # BB#0: ; X32: # BB#0:
; X32-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; X32-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; X32-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; X32-NEXT: retl ; X32-NEXT: retl
; ;
; X64-LABEL: test_mm512_zextsi256_si512: ; X64-LABEL: test_mm512_zextsi256_si512:
; X64: # BB#0: ; X64: # BB#0:
; X64-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> ; X64-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; X64-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; X64-NEXT: retq ; X64-NEXT: retq
%res = shufflevector <4 x i64> %a0, <4 x i64> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> %res = shufflevector <4 x i64> %a0, <4 x i64> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i64> %res ret <8 x i64> %res

View File

@ -2910,7 +2910,7 @@ declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i32, <
define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) { define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) {
; CHECK-LABEL: test_mask_vextracti64x4: ; CHECK-LABEL: test_mask_vextracti64x4:
; CHECK: ## BB#0: ; CHECK: ## BB#0:
; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm1 ; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm1
; CHECK-NEXT: kmovw %edi, %k0 ; CHECK-NEXT: kmovw %edi, %k0
; CHECK-NEXT: kshiftlw $12, %k0, %k1 ; CHECK-NEXT: kshiftlw $12, %k0, %k1
; CHECK-NEXT: kshiftrw $15, %k1, %k1 ; CHECK-NEXT: kshiftrw $15, %k1, %k1

View File

@ -234,26 +234,26 @@ define <64 x i8> @test_broadcast_16i8_64i8(<16 x i8> *%p) nounwind {
define <8 x i32> @PR29088(<4 x i32>* %p0, <8 x float>* %p1) { define <8 x i32> @PR29088(<4 x i32>* %p0, <8 x float>* %p1) {
; X64-AVX512VL-LABEL: PR29088: ; X64-AVX512VL-LABEL: PR29088:
; X64-AVX512VL: ## BB#0: ; X64-AVX512VL: ## BB#0:
; X64-AVX512VL-NEXT: vmovdqa (%rdi), %xmm0 ; X64-AVX512VL-NEXT: vmovaps (%rdi), %xmm0
; X64-AVX512VL-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; X64-AVX512VL-NEXT: vpxor %ymm1, %ymm1, %ymm1
; X64-AVX512VL-NEXT: vmovdqa %ymm1, (%rsi) ; X64-AVX512VL-NEXT: vmovdqa %ymm1, (%rsi)
; X64-AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512VL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512VL-NEXT: retq ; X64-AVX512VL-NEXT: retq
; ;
; X64-AVX512BWVL-LABEL: PR29088: ; X64-AVX512BWVL-LABEL: PR29088:
; X64-AVX512BWVL: ## BB#0: ; X64-AVX512BWVL: ## BB#0:
; X64-AVX512BWVL-NEXT: vmovdqa (%rdi), %xmm0 ; X64-AVX512BWVL-NEXT: vmovaps (%rdi), %xmm0
; X64-AVX512BWVL-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; X64-AVX512BWVL-NEXT: vpxor %ymm1, %ymm1, %ymm1
; X64-AVX512BWVL-NEXT: vmovdqa %ymm1, (%rsi) ; X64-AVX512BWVL-NEXT: vmovdqa %ymm1, (%rsi)
; X64-AVX512BWVL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512BWVL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512BWVL-NEXT: retq ; X64-AVX512BWVL-NEXT: retq
; ;
; X64-AVX512DQVL-LABEL: PR29088: ; X64-AVX512DQVL-LABEL: PR29088:
; X64-AVX512DQVL: ## BB#0: ; X64-AVX512DQVL: ## BB#0:
; X64-AVX512DQVL-NEXT: vmovdqa (%rdi), %xmm0 ; X64-AVX512DQVL-NEXT: vmovaps (%rdi), %xmm0
; X64-AVX512DQVL-NEXT: vxorps %ymm1, %ymm1, %ymm1 ; X64-AVX512DQVL-NEXT: vxorps %ymm1, %ymm1, %ymm1
; X64-AVX512DQVL-NEXT: vmovaps %ymm1, (%rsi) ; X64-AVX512DQVL-NEXT: vmovaps %ymm1, (%rsi)
; X64-AVX512DQVL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512DQVL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512DQVL-NEXT: retq ; X64-AVX512DQVL-NEXT: retq
%ld = load <4 x i32>, <4 x i32>* %p0 %ld = load <4 x i32>, <4 x i32>* %p0
store <8 x float> zeroinitializer, <8 x float>* %p1 store <8 x float> zeroinitializer, <8 x float>* %p1

View File

@ -1858,7 +1858,7 @@ define <16 x double> @test_gather_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <
; KNL_32-NEXT: vmovapd 8(%ebp), %zmm1 ; KNL_32-NEXT: vmovapd 8(%ebp), %zmm1
; KNL_32-NEXT: kshiftrw $8, %k1, %k2 ; KNL_32-NEXT: kshiftrw $8, %k1, %k2
; KNL_32-NEXT: vgatherdpd (,%ymm0), %zmm2 {%k1} ; KNL_32-NEXT: vgatherdpd (,%ymm0), %zmm2 {%k1}
; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; KNL_32-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; KNL_32-NEXT: vgatherdpd (,%ymm0), %zmm1 {%k2} ; KNL_32-NEXT: vgatherdpd (,%ymm0), %zmm1 {%k2}
; KNL_32-NEXT: vmovapd %zmm2, %zmm0 ; KNL_32-NEXT: vmovapd %zmm2, %zmm0
; KNL_32-NEXT: movl %ebp, %esp ; KNL_32-NEXT: movl %ebp, %esp
@ -1895,7 +1895,7 @@ define <16 x double> @test_gather_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <
; SKX_32-NEXT: vmovapd 8(%ebp), %zmm1 ; SKX_32-NEXT: vmovapd 8(%ebp), %zmm1
; SKX_32-NEXT: kshiftrw $8, %k1, %k2 ; SKX_32-NEXT: kshiftrw $8, %k1, %k2
; SKX_32-NEXT: vgatherdpd (,%ymm0), %zmm2 {%k1} ; SKX_32-NEXT: vgatherdpd (,%ymm0), %zmm2 {%k1}
; SKX_32-NEXT: vextracti32x8 $1, %zmm0, %ymm0 ; SKX_32-NEXT: vextractf32x8 $1, %zmm0, %ymm0
; SKX_32-NEXT: vgatherdpd (,%ymm0), %zmm1 {%k2} ; SKX_32-NEXT: vgatherdpd (,%ymm0), %zmm1 {%k2}
; SKX_32-NEXT: vmovapd %zmm2, %zmm0 ; SKX_32-NEXT: vmovapd %zmm2, %zmm0
; SKX_32-NEXT: movl %ebp, %esp ; SKX_32-NEXT: movl %ebp, %esp
@ -2102,7 +2102,7 @@ define void @test_scatter_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x dou
; KNL_32-NEXT: vmovapd 8(%ebp), %zmm1 ; KNL_32-NEXT: vmovapd 8(%ebp), %zmm1
; KNL_32-NEXT: kshiftrw $8, %k1, %k2 ; KNL_32-NEXT: kshiftrw $8, %k1, %k2
; KNL_32-NEXT: vscatterdpd %zmm2, (,%ymm0) {%k1} ; KNL_32-NEXT: vscatterdpd %zmm2, (,%ymm0) {%k1}
; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0 ; KNL_32-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; KNL_32-NEXT: vscatterdpd %zmm1, (,%ymm0) {%k2} ; KNL_32-NEXT: vscatterdpd %zmm1, (,%ymm0) {%k2}
; KNL_32-NEXT: movl %ebp, %esp ; KNL_32-NEXT: movl %ebp, %esp
; KNL_32-NEXT: popl %ebp ; KNL_32-NEXT: popl %ebp
@ -2138,7 +2138,7 @@ define void @test_scatter_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x dou
; SKX_32-NEXT: vmovapd 8(%ebp), %zmm1 ; SKX_32-NEXT: vmovapd 8(%ebp), %zmm1
; SKX_32-NEXT: kshiftrw $8, %k1, %k2 ; SKX_32-NEXT: kshiftrw $8, %k1, %k2
; SKX_32-NEXT: vscatterdpd %zmm2, (,%ymm0) {%k1} ; SKX_32-NEXT: vscatterdpd %zmm2, (,%ymm0) {%k1}
; SKX_32-NEXT: vextracti32x8 $1, %zmm0, %ymm0 ; SKX_32-NEXT: vextractf32x8 $1, %zmm0, %ymm0
; SKX_32-NEXT: vscatterdpd %zmm1, (,%ymm0) {%k2} ; SKX_32-NEXT: vscatterdpd %zmm1, (,%ymm0) {%k2}
; SKX_32-NEXT: movl %ebp, %esp ; SKX_32-NEXT: movl %ebp, %esp
; SKX_32-NEXT: popl %ebp ; SKX_32-NEXT: popl %ebp

View File

@ -995,19 +995,12 @@ define void @one_mask_bit_set3(<4 x i64>* %addr, <4 x i64> %val) {
; AVX-NEXT: vzeroupper ; AVX-NEXT: vzeroupper
; AVX-NEXT: retq ; AVX-NEXT: retq
; ;
; AVX512F-LABEL: one_mask_bit_set3: ; AVX512-LABEL: one_mask_bit_set3:
; AVX512F: ## BB#0: ; AVX512: ## BB#0:
; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512F-NEXT: vmovlps %xmm0, 16(%rdi) ; AVX512-NEXT: vmovlps %xmm0, 16(%rdi)
; AVX512F-NEXT: vzeroupper ; AVX512-NEXT: vzeroupper
; AVX512F-NEXT: retq ; AVX512-NEXT: retq
;
; SKX-LABEL: one_mask_bit_set3:
; SKX: ## BB#0:
; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0
; SKX-NEXT: vmovq %xmm0, 16(%rdi)
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %val, <4 x i64>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 true, i1 false>) call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %val, <4 x i64>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 true, i1 false>)
ret void ret void
} }

View File

@ -8,7 +8,7 @@
define <8 x double> @merge_8f64_2f64_12u4(<2 x double>* %ptr) nounwind uwtable noinline ssp { define <8 x double> @merge_8f64_2f64_12u4(<2 x double>* %ptr) nounwind uwtable noinline ssp {
; ALL-LABEL: merge_8f64_2f64_12u4: ; ALL-LABEL: merge_8f64_2f64_12u4:
; ALL: # BB#0: ; ALL: # BB#0:
; ALL-NEXT: vmovupd 16(%rdi), %ymm0 ; ALL-NEXT: vmovups 16(%rdi), %ymm0
; ALL-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm1 ; ALL-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm1
; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; ALL-NEXT: retq ; ALL-NEXT: retq
@ -16,7 +16,7 @@ define <8 x double> @merge_8f64_2f64_12u4(<2 x double>* %ptr) nounwind uwtable n
; X32-AVX512F-LABEL: merge_8f64_2f64_12u4: ; X32-AVX512F-LABEL: merge_8f64_2f64_12u4:
; X32-AVX512F: # BB#0: ; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: vmovupd 16(%eax), %ymm0 ; X32-AVX512F-NEXT: vmovups 16(%eax), %ymm0
; X32-AVX512F-NEXT: vinsertf128 $1, 64(%eax), %ymm0, %ymm1 ; X32-AVX512F-NEXT: vinsertf128 $1, 64(%eax), %ymm0, %ymm1
; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; X32-AVX512F-NEXT: retl ; X32-AVX512F-NEXT: retl
@ -35,8 +35,8 @@ define <8 x double> @merge_8f64_2f64_12u4(<2 x double>* %ptr) nounwind uwtable n
define <8 x double> @merge_8f64_2f64_23z5(<2 x double>* %ptr) nounwind uwtable noinline ssp { define <8 x double> @merge_8f64_2f64_23z5(<2 x double>* %ptr) nounwind uwtable noinline ssp {
; ALL-LABEL: merge_8f64_2f64_23z5: ; ALL-LABEL: merge_8f64_2f64_23z5:
; ALL: # BB#0: ; ALL: # BB#0:
; ALL-NEXT: vmovupd 32(%rdi), %ymm0 ; ALL-NEXT: vmovups 32(%rdi), %ymm0
; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
; ALL-NEXT: vinsertf128 $1, 80(%rdi), %ymm1, %ymm1 ; ALL-NEXT: vinsertf128 $1, 80(%rdi), %ymm1, %ymm1
; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; ALL-NEXT: retq ; ALL-NEXT: retq
@ -44,8 +44,8 @@ define <8 x double> @merge_8f64_2f64_23z5(<2 x double>* %ptr) nounwind uwtable n
; X32-AVX512F-LABEL: merge_8f64_2f64_23z5: ; X32-AVX512F-LABEL: merge_8f64_2f64_23z5:
; X32-AVX512F: # BB#0: ; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: vmovupd 32(%eax), %ymm0 ; X32-AVX512F-NEXT: vmovups 32(%eax), %ymm0
; X32-AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; X32-AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-AVX512F-NEXT: vinsertf128 $1, 80(%eax), %ymm1, %ymm1 ; X32-AVX512F-NEXT: vinsertf128 $1, 80(%eax), %ymm1, %ymm1
; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; X32-AVX512F-NEXT: retl ; X32-AVX512F-NEXT: retl
@ -64,14 +64,14 @@ define <8 x double> @merge_8f64_2f64_23z5(<2 x double>* %ptr) nounwind uwtable n
define <8 x double> @merge_8f64_4f64_z2(<4 x double>* %ptr) nounwind uwtable noinline ssp { define <8 x double> @merge_8f64_4f64_z2(<4 x double>* %ptr) nounwind uwtable noinline ssp {
; ALL-LABEL: merge_8f64_4f64_z2: ; ALL-LABEL: merge_8f64_4f64_z2:
; ALL: # BB#0: ; ALL: # BB#0:
; ALL-NEXT: vxorpd %xmm0, %xmm0, %xmm0 ; ALL-NEXT: vxorps %xmm0, %xmm0, %xmm0
; ALL-NEXT: vinsertf64x4 $1, 64(%rdi), %zmm0, %zmm0 ; ALL-NEXT: vinsertf64x4 $1, 64(%rdi), %zmm0, %zmm0
; ALL-NEXT: retq ; ALL-NEXT: retq
; ;
; X32-AVX512F-LABEL: merge_8f64_4f64_z2: ; X32-AVX512F-LABEL: merge_8f64_4f64_z2:
; X32-AVX512F: # BB#0: ; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: vxorpd %xmm0, %xmm0, %xmm0 ; X32-AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX512F-NEXT: vinsertf64x4 $1, 64(%eax), %zmm0, %zmm0 ; X32-AVX512F-NEXT: vinsertf64x4 $1, 64(%eax), %zmm0, %zmm0
; X32-AVX512F-NEXT: retl ; X32-AVX512F-NEXT: retl
%ptr1 = getelementptr inbounds <4 x double>, <4 x double>* %ptr, i64 2 %ptr1 = getelementptr inbounds <4 x double>, <4 x double>* %ptr, i64 2
@ -106,20 +106,20 @@ define <8 x double> @merge_8f64_f64_23uuuuu9(double* %ptr) nounwind uwtable noin
define <8 x double> @merge_8f64_f64_12zzuuzz(double* %ptr) nounwind uwtable noinline ssp { define <8 x double> @merge_8f64_f64_12zzuuzz(double* %ptr) nounwind uwtable noinline ssp {
; ALL-LABEL: merge_8f64_f64_12zzuuzz: ; ALL-LABEL: merge_8f64_f64_12zzuuzz:
; ALL: # BB#0: ; ALL: # BB#0:
; ALL-NEXT: vmovupd 8(%rdi), %xmm0 ; ALL-NEXT: vmovups 8(%rdi), %xmm0
; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; ALL-NEXT: retq ; ALL-NEXT: retq
; ;
; X32-AVX512F-LABEL: merge_8f64_f64_12zzuuzz: ; X32-AVX512F-LABEL: merge_8f64_f64_12zzuuzz:
; X32-AVX512F: # BB#0: ; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: vmovupd 8(%eax), %xmm0 ; X32-AVX512F-NEXT: vmovups 8(%eax), %xmm0
; X32-AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; X32-AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; X32-AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; X32-AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; X32-AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; X32-AVX512F-NEXT: retl ; X32-AVX512F-NEXT: retl
%ptr0 = getelementptr inbounds double, double* %ptr, i64 1 %ptr0 = getelementptr inbounds double, double* %ptr, i64 1
@ -179,15 +179,15 @@ define <8 x double> @merge_8f64_f64_1u3u5zu8(double* %ptr) nounwind uwtable noin
define <8 x i64> @merge_8i64_4i64_z3(<4 x i64>* %ptr) nounwind uwtable noinline ssp { define <8 x i64> @merge_8i64_4i64_z3(<4 x i64>* %ptr) nounwind uwtable noinline ssp {
; ALL-LABEL: merge_8i64_4i64_z3: ; ALL-LABEL: merge_8i64_4i64_z3:
; ALL: # BB#0: ; ALL: # BB#0:
; ALL-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; ALL-NEXT: vxorps %xmm0, %xmm0, %xmm0
; ALL-NEXT: vinserti64x4 $1, 96(%rdi), %zmm0, %zmm0 ; ALL-NEXT: vinsertf64x4 $1, 96(%rdi), %zmm0, %zmm0
; ALL-NEXT: retq ; ALL-NEXT: retq
; ;
; X32-AVX512F-LABEL: merge_8i64_4i64_z3: ; X32-AVX512F-LABEL: merge_8i64_4i64_z3:
; X32-AVX512F: # BB#0: ; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; X32-AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-AVX512F-NEXT: vinserti64x4 $1, 96(%eax), %zmm0, %zmm0 ; X32-AVX512F-NEXT: vinsertf64x4 $1, 96(%eax), %zmm0, %zmm0
; X32-AVX512F-NEXT: retl ; X32-AVX512F-NEXT: retl
%ptr1 = getelementptr inbounds <4 x i64>, <4 x i64>* %ptr, i64 3 %ptr1 = getelementptr inbounds <4 x i64>, <4 x i64>* %ptr, i64 3
%val1 = load <4 x i64>, <4 x i64>* %ptr1 %val1 = load <4 x i64>, <4 x i64>* %ptr1
@ -198,21 +198,21 @@ define <8 x i64> @merge_8i64_4i64_z3(<4 x i64>* %ptr) nounwind uwtable noinline
define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline ssp { define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline ssp {
; ALL-LABEL: merge_8i64_i64_56zz9uzz: ; ALL-LABEL: merge_8i64_i64_56zz9uzz:
; ALL: # BB#0: ; ALL: # BB#0:
; ALL-NEXT: vmovdqu 40(%rdi), %xmm0 ; ALL-NEXT: vmovups 40(%rdi), %xmm0
; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; ALL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; ALL-NEXT: retq ; ALL-NEXT: retq
; ;
; X32-AVX512F-LABEL: merge_8i64_i64_56zz9uzz: ; X32-AVX512F-LABEL: merge_8i64_i64_56zz9uzz:
; X32-AVX512F: # BB#0: ; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: vmovdqu 40(%eax), %xmm0 ; X32-AVX512F-NEXT: vmovups 40(%eax), %xmm0
; X32-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X32-AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; X32-AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; X32-AVX512F-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero ; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; X32-AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; X32-AVX512F-NEXT: retl ; X32-AVX512F-NEXT: retl
%ptr0 = getelementptr inbounds i64, i64* %ptr, i64 5 %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 5
%ptr1 = getelementptr inbounds i64, i64* %ptr, i64 6 %ptr1 = getelementptr inbounds i64, i64* %ptr, i64 6

View File

@ -141,41 +141,14 @@ define void @shuffle_v8i32_to_v4i32_1(<8 x i32>* %L, <4 x i32>* %S) nounwind {
; AVX-NEXT: vzeroupper ; AVX-NEXT: vzeroupper
; AVX-NEXT: retq ; AVX-NEXT: retq
; ;
; AVX512F-LABEL: shuffle_v8i32_to_v4i32_1: ; AVX512-LABEL: shuffle_v8i32_to_v4i32_1:
; AVX512F: # BB#0: ; AVX512: # BB#0:
; AVX512F-NEXT: vmovaps (%rdi), %ymm0 ; AVX512-NEXT: vmovaps (%rdi), %ymm0
; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] ; AVX512-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
; AVX512F-NEXT: vmovaps %xmm0, (%rsi) ; AVX512-NEXT: vmovaps %xmm0, (%rsi)
; AVX512F-NEXT: vzeroupper ; AVX512-NEXT: vzeroupper
; AVX512F-NEXT: retq ; AVX512-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v8i32_to_v4i32_1:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
; AVX512VL-NEXT: vmovaps %xmm0, (%rsi)
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: shuffle_v8i32_to_v4i32_1:
; AVX512BW: # BB#0:
; AVX512BW-NEXT: vmovaps (%rdi), %ymm0
; AVX512BW-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512BW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
; AVX512BW-NEXT: vmovaps %xmm0, (%rsi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: shuffle_v8i32_to_v4i32_1:
; AVX512BWVL: # BB#0:
; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm0
; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512BWVL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3]
; AVX512BWVL-NEXT: vmovaps %xmm0, (%rsi)
; AVX512BWVL-NEXT: vzeroupper
; AVX512BWVL-NEXT: retq
%vec = load <8 x i32>, <8 x i32>* %L %vec = load <8 x i32>, <8 x i32>* %L
%strided.vec = shufflevector <8 x i32> %vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> %strided.vec = shufflevector <8 x i32> %vec, <8 x i32> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
store <4 x i32> %strided.vec, <4 x i32>* %S store <4 x i32> %strided.vec, <4 x i32>* %S

View File

@ -95,8 +95,8 @@ define void @shuffle_v32i16_to_v16i16_1(<32 x i16>* %L, <16 x i16>* %S) nounwind
define void @shuffle_v16i32_to_v8i32_1(<16 x i32>* %L, <8 x i32>* %S) nounwind { define void @shuffle_v16i32_to_v8i32_1(<16 x i32>* %L, <8 x i32>* %S) nounwind {
; AVX512-LABEL: shuffle_v16i32_to_v8i32_1: ; AVX512-LABEL: shuffle_v16i32_to_v8i32_1:
; AVX512: # BB#0: ; AVX512: # BB#0:
; AVX512-NEXT: vmovdqa32 (%rdi), %zmm0 ; AVX512-NEXT: vmovaps (%rdi), %zmm0
; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; AVX512-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] ; AVX512-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
; AVX512-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX512-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
; AVX512-NEXT: vmovdqa %ymm0, (%rsi) ; AVX512-NEXT: vmovdqa %ymm0, (%rsi)

View File

@ -806,69 +806,21 @@ define <64 x i8> @test_broadcast_32i8_64i8(<32 x i8> *%p) nounwind {
; ;
define <4 x double> @test_broadcast_2f64_4f64_reuse(<2 x double>* %p0, <2 x double>* %p1) { define <4 x double> @test_broadcast_2f64_4f64_reuse(<2 x double>* %p0, <2 x double>* %p1) {
; X32-AVX-LABEL: test_broadcast_2f64_4f64_reuse: ; X32-LABEL: test_broadcast_2f64_4f64_reuse:
; X32-AVX: ## BB#0: ; X32: ## BB#0:
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-AVX-NEXT: vmovaps (%ecx), %xmm0 ; X32-NEXT: vmovaps (%ecx), %xmm0
; X32-AVX-NEXT: vmovaps %xmm0, (%eax) ; X32-NEXT: vmovaps %xmm0, (%eax)
; X32-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX-NEXT: retl ; X32-NEXT: retl
; ;
; X32-AVX512F-LABEL: test_broadcast_2f64_4f64_reuse: ; X64-LABEL: test_broadcast_2f64_4f64_reuse:
; X32-AVX512F: ## BB#0: ; X64: ## BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax ; X64-NEXT: vmovaps (%rdi), %xmm0
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X64-NEXT: vmovaps %xmm0, (%rsi)
; X32-AVX512F-NEXT: vmovaps (%ecx), %xmm0 ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512F-NEXT: vmovaps %xmm0, (%eax) ; X64-NEXT: retq
; X32-AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512F-NEXT: retl
;
; X32-AVX512BW-LABEL: test_broadcast_2f64_4f64_reuse:
; X32-AVX512BW: ## BB#0:
; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-AVX512BW-NEXT: vmovaps (%ecx), %xmm0
; X32-AVX512BW-NEXT: vmovaps %xmm0, (%eax)
; X32-AVX512BW-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512BW-NEXT: retl
;
; X32-AVX512DQ-LABEL: test_broadcast_2f64_4f64_reuse:
; X32-AVX512DQ: ## BB#0:
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-AVX512DQ-NEXT: vmovapd (%ecx), %xmm0
; X32-AVX512DQ-NEXT: vmovapd %xmm0, (%eax)
; X32-AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512DQ-NEXT: retl
;
; X64-AVX-LABEL: test_broadcast_2f64_4f64_reuse:
; X64-AVX: ## BB#0:
; X64-AVX-NEXT: vmovaps (%rdi), %xmm0
; X64-AVX-NEXT: vmovaps %xmm0, (%rsi)
; X64-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX-NEXT: retq
;
; X64-AVX512F-LABEL: test_broadcast_2f64_4f64_reuse:
; X64-AVX512F: ## BB#0:
; X64-AVX512F-NEXT: vmovaps (%rdi), %xmm0
; X64-AVX512F-NEXT: vmovaps %xmm0, (%rsi)
; X64-AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512F-NEXT: retq
;
; X64-AVX512BW-LABEL: test_broadcast_2f64_4f64_reuse:
; X64-AVX512BW: ## BB#0:
; X64-AVX512BW-NEXT: vmovaps (%rdi), %xmm0
; X64-AVX512BW-NEXT: vmovaps %xmm0, (%rsi)
; X64-AVX512BW-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512BW-NEXT: retq
;
; X64-AVX512DQ-LABEL: test_broadcast_2f64_4f64_reuse:
; X64-AVX512DQ: ## BB#0:
; X64-AVX512DQ-NEXT: vmovapd (%rdi), %xmm0
; X64-AVX512DQ-NEXT: vmovapd %xmm0, (%rsi)
; X64-AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512DQ-NEXT: retq
%1 = load <2 x double>, <2 x double>* %p0 %1 = load <2 x double>, <2 x double>* %p0
store <2 x double> %1, <2 x double>* %p1 store <2 x double> %1, <2 x double>* %p1
%2 = shufflevector <2 x double> %1, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> %2 = shufflevector <2 x double> %1, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
@ -876,37 +828,21 @@ define <4 x double> @test_broadcast_2f64_4f64_reuse(<2 x double>* %p0, <2 x doub
} }
define <4 x i64> @test_broadcast_2i64_4i64_reuse(<2 x i64>* %p0, <2 x i64>* %p1) { define <4 x i64> @test_broadcast_2i64_4i64_reuse(<2 x i64>* %p0, <2 x i64>* %p1) {
; X32-AVX-LABEL: test_broadcast_2i64_4i64_reuse: ; X32-LABEL: test_broadcast_2i64_4i64_reuse:
; X32-AVX: ## BB#0: ; X32: ## BB#0:
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-AVX-NEXT: vmovaps (%ecx), %xmm0 ; X32-NEXT: vmovaps (%ecx), %xmm0
; X32-AVX-NEXT: vmovaps %xmm0, (%eax) ; X32-NEXT: vmovaps %xmm0, (%eax)
; X32-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX-NEXT: retl ; X32-NEXT: retl
; ;
; X32-AVX512-LABEL: test_broadcast_2i64_4i64_reuse: ; X64-LABEL: test_broadcast_2i64_4i64_reuse:
; X32-AVX512: ## BB#0: ; X64: ## BB#0:
; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ; X64-NEXT: vmovaps (%rdi), %xmm0
; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X64-NEXT: vmovaps %xmm0, (%rsi)
; X32-AVX512-NEXT: vmovdqa (%ecx), %xmm0 ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512-NEXT: vmovdqa %xmm0, (%eax) ; X64-NEXT: retq
; X32-AVX512-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512-NEXT: retl
;
; X64-AVX-LABEL: test_broadcast_2i64_4i64_reuse:
; X64-AVX: ## BB#0:
; X64-AVX-NEXT: vmovaps (%rdi), %xmm0
; X64-AVX-NEXT: vmovaps %xmm0, (%rsi)
; X64-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX-NEXT: retq
;
; X64-AVX512-LABEL: test_broadcast_2i64_4i64_reuse:
; X64-AVX512: ## BB#0:
; X64-AVX512-NEXT: vmovdqa (%rdi), %xmm0
; X64-AVX512-NEXT: vmovdqa %xmm0, (%rsi)
; X64-AVX512-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512-NEXT: retq
%1 = load <2 x i64>, <2 x i64>* %p0 %1 = load <2 x i64>, <2 x i64>* %p0
store <2 x i64> %1, <2 x i64>* %p1 store <2 x i64> %1, <2 x i64>* %p1
%2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
@ -936,37 +872,21 @@ define <8 x float> @test_broadcast_4f32_8f32_reuse(<4 x float>* %p0, <4 x float>
} }
define <8 x i32> @test_broadcast_4i32_8i32_reuse(<4 x i32>* %p0, <4 x i32>* %p1) { define <8 x i32> @test_broadcast_4i32_8i32_reuse(<4 x i32>* %p0, <4 x i32>* %p1) {
; X32-AVX-LABEL: test_broadcast_4i32_8i32_reuse: ; X32-LABEL: test_broadcast_4i32_8i32_reuse:
; X32-AVX: ## BB#0: ; X32: ## BB#0:
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-AVX-NEXT: vmovaps (%ecx), %xmm0 ; X32-NEXT: vmovaps (%ecx), %xmm0
; X32-AVX-NEXT: vmovaps %xmm0, (%eax) ; X32-NEXT: vmovaps %xmm0, (%eax)
; X32-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX-NEXT: retl ; X32-NEXT: retl
; ;
; X32-AVX512-LABEL: test_broadcast_4i32_8i32_reuse: ; X64-LABEL: test_broadcast_4i32_8i32_reuse:
; X32-AVX512: ## BB#0: ; X64: ## BB#0:
; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ; X64-NEXT: vmovaps (%rdi), %xmm0
; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X64-NEXT: vmovaps %xmm0, (%rsi)
; X32-AVX512-NEXT: vmovdqa (%ecx), %xmm0 ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512-NEXT: vmovdqa %xmm0, (%eax) ; X64-NEXT: retq
; X32-AVX512-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512-NEXT: retl
;
; X64-AVX-LABEL: test_broadcast_4i32_8i32_reuse:
; X64-AVX: ## BB#0:
; X64-AVX-NEXT: vmovaps (%rdi), %xmm0
; X64-AVX-NEXT: vmovaps %xmm0, (%rsi)
; X64-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX-NEXT: retq
;
; X64-AVX512-LABEL: test_broadcast_4i32_8i32_reuse:
; X64-AVX512: ## BB#0:
; X64-AVX512-NEXT: vmovdqa (%rdi), %xmm0
; X64-AVX512-NEXT: vmovdqa %xmm0, (%rsi)
; X64-AVX512-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512-NEXT: retq
%1 = load <4 x i32>, <4 x i32>* %p0 %1 = load <4 x i32>, <4 x i32>* %p0
store <4 x i32> %1, <4 x i32>* %p1 store <4 x i32> %1, <4 x i32>* %p1
%2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
@ -987,9 +907,9 @@ define <16 x i16> @test_broadcast_8i16_16i16_reuse(<8 x i16> *%p0, <8 x i16> *%p
; X32-AVX512F: ## BB#0: ; X32-AVX512F: ## BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-AVX512F-NEXT: vmovdqa (%ecx), %xmm0 ; X32-AVX512F-NEXT: vmovaps (%ecx), %xmm0
; X32-AVX512F-NEXT: vmovdqa %xmm0, (%eax) ; X32-AVX512F-NEXT: vmovaps %xmm0, (%eax)
; X32-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512F-NEXT: retl ; X32-AVX512F-NEXT: retl
; ;
; X32-AVX512BW-LABEL: test_broadcast_8i16_16i16_reuse: ; X32-AVX512BW-LABEL: test_broadcast_8i16_16i16_reuse:
@ -1005,9 +925,9 @@ define <16 x i16> @test_broadcast_8i16_16i16_reuse(<8 x i16> *%p0, <8 x i16> *%p
; X32-AVX512DQ: ## BB#0: ; X32-AVX512DQ: ## BB#0:
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-AVX512DQ-NEXT: vmovdqa (%ecx), %xmm0 ; X32-AVX512DQ-NEXT: vmovaps (%ecx), %xmm0
; X32-AVX512DQ-NEXT: vmovdqa %xmm0, (%eax) ; X32-AVX512DQ-NEXT: vmovaps %xmm0, (%eax)
; X32-AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512DQ-NEXT: retl ; X32-AVX512DQ-NEXT: retl
; ;
; X64-AVX-LABEL: test_broadcast_8i16_16i16_reuse: ; X64-AVX-LABEL: test_broadcast_8i16_16i16_reuse:
@ -1019,9 +939,9 @@ define <16 x i16> @test_broadcast_8i16_16i16_reuse(<8 x i16> *%p0, <8 x i16> *%p
; ;
; X64-AVX512F-LABEL: test_broadcast_8i16_16i16_reuse: ; X64-AVX512F-LABEL: test_broadcast_8i16_16i16_reuse:
; X64-AVX512F: ## BB#0: ; X64-AVX512F: ## BB#0:
; X64-AVX512F-NEXT: vmovdqa (%rdi), %xmm0 ; X64-AVX512F-NEXT: vmovaps (%rdi), %xmm0
; X64-AVX512F-NEXT: vmovdqa %xmm0, (%rsi) ; X64-AVX512F-NEXT: vmovaps %xmm0, (%rsi)
; X64-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512F-NEXT: retq ; X64-AVX512F-NEXT: retq
; ;
; X64-AVX512BW-LABEL: test_broadcast_8i16_16i16_reuse: ; X64-AVX512BW-LABEL: test_broadcast_8i16_16i16_reuse:
@ -1033,9 +953,9 @@ define <16 x i16> @test_broadcast_8i16_16i16_reuse(<8 x i16> *%p0, <8 x i16> *%p
; ;
; X64-AVX512DQ-LABEL: test_broadcast_8i16_16i16_reuse: ; X64-AVX512DQ-LABEL: test_broadcast_8i16_16i16_reuse:
; X64-AVX512DQ: ## BB#0: ; X64-AVX512DQ: ## BB#0:
; X64-AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0 ; X64-AVX512DQ-NEXT: vmovaps (%rdi), %xmm0
; X64-AVX512DQ-NEXT: vmovdqa %xmm0, (%rsi) ; X64-AVX512DQ-NEXT: vmovaps %xmm0, (%rsi)
; X64-AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512DQ-NEXT: retq ; X64-AVX512DQ-NEXT: retq
%1 = load <8 x i16>, <8 x i16> *%p0 %1 = load <8 x i16>, <8 x i16> *%p0
store <8 x i16> %1, <8 x i16>* %p1 store <8 x i16> %1, <8 x i16>* %p1
@ -1057,9 +977,9 @@ define <32 x i8> @test_broadcast_16i8_32i8_reuse(<16 x i8> *%p0, <16 x i8> *%p1)
; X32-AVX512F: ## BB#0: ; X32-AVX512F: ## BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-AVX512F-NEXT: vmovdqa (%ecx), %xmm0 ; X32-AVX512F-NEXT: vmovaps (%ecx), %xmm0
; X32-AVX512F-NEXT: vmovdqa %xmm0, (%eax) ; X32-AVX512F-NEXT: vmovaps %xmm0, (%eax)
; X32-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512F-NEXT: retl ; X32-AVX512F-NEXT: retl
; ;
; X32-AVX512BW-LABEL: test_broadcast_16i8_32i8_reuse: ; X32-AVX512BW-LABEL: test_broadcast_16i8_32i8_reuse:
@ -1075,9 +995,9 @@ define <32 x i8> @test_broadcast_16i8_32i8_reuse(<16 x i8> *%p0, <16 x i8> *%p1)
; X32-AVX512DQ: ## BB#0: ; X32-AVX512DQ: ## BB#0:
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-AVX512DQ-NEXT: vmovdqa (%ecx), %xmm0 ; X32-AVX512DQ-NEXT: vmovaps (%ecx), %xmm0
; X32-AVX512DQ-NEXT: vmovdqa %xmm0, (%eax) ; X32-AVX512DQ-NEXT: vmovaps %xmm0, (%eax)
; X32-AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512DQ-NEXT: retl ; X32-AVX512DQ-NEXT: retl
; ;
; X64-AVX-LABEL: test_broadcast_16i8_32i8_reuse: ; X64-AVX-LABEL: test_broadcast_16i8_32i8_reuse:
@ -1089,9 +1009,9 @@ define <32 x i8> @test_broadcast_16i8_32i8_reuse(<16 x i8> *%p0, <16 x i8> *%p1)
; ;
; X64-AVX512F-LABEL: test_broadcast_16i8_32i8_reuse: ; X64-AVX512F-LABEL: test_broadcast_16i8_32i8_reuse:
; X64-AVX512F: ## BB#0: ; X64-AVX512F: ## BB#0:
; X64-AVX512F-NEXT: vmovdqa (%rdi), %xmm0 ; X64-AVX512F-NEXT: vmovaps (%rdi), %xmm0
; X64-AVX512F-NEXT: vmovdqa %xmm0, (%rsi) ; X64-AVX512F-NEXT: vmovaps %xmm0, (%rsi)
; X64-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512F-NEXT: retq ; X64-AVX512F-NEXT: retq
; ;
; X64-AVX512BW-LABEL: test_broadcast_16i8_32i8_reuse: ; X64-AVX512BW-LABEL: test_broadcast_16i8_32i8_reuse:
@ -1103,9 +1023,9 @@ define <32 x i8> @test_broadcast_16i8_32i8_reuse(<16 x i8> *%p0, <16 x i8> *%p1)
; ;
; X64-AVX512DQ-LABEL: test_broadcast_16i8_32i8_reuse: ; X64-AVX512DQ-LABEL: test_broadcast_16i8_32i8_reuse:
; X64-AVX512DQ: ## BB#0: ; X64-AVX512DQ: ## BB#0:
; X64-AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0 ; X64-AVX512DQ-NEXT: vmovaps (%rdi), %xmm0
; X64-AVX512DQ-NEXT: vmovdqa %xmm0, (%rsi) ; X64-AVX512DQ-NEXT: vmovaps %xmm0, (%rsi)
; X64-AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512DQ-NEXT: retq ; X64-AVX512DQ-NEXT: retq
%1 = load <16 x i8>, <16 x i8> *%p0 %1 = load <16 x i8>, <16 x i8> *%p0
store <16 x i8> %1, <16 x i8>* %p1 store <16 x i8> %1, <16 x i8>* %p1
@ -1132,30 +1052,30 @@ define <8 x i32> @test_broadcast_4i32_8i32_chain(<4 x i32>* %p0, <4 x float>* %p
; X32-AVX512F: ## BB#0: ; X32-AVX512F: ## BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-AVX512F-NEXT: vmovdqa (%ecx), %xmm0 ; X32-AVX512F-NEXT: vmovaps (%ecx), %xmm0
; X32-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X32-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X32-AVX512F-NEXT: vmovdqa %xmm1, (%eax) ; X32-AVX512F-NEXT: vmovdqa %xmm1, (%eax)
; X32-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512F-NEXT: retl ; X32-AVX512F-NEXT: retl
; ;
; X32-AVX512BW-LABEL: test_broadcast_4i32_8i32_chain: ; X32-AVX512BW-LABEL: test_broadcast_4i32_8i32_chain:
; X32-AVX512BW: ## BB#0: ; X32-AVX512BW: ## BB#0:
; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-AVX512BW-NEXT: vmovdqa (%ecx), %xmm0 ; X32-AVX512BW-NEXT: vmovaps (%ecx), %xmm0
; X32-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X32-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X32-AVX512BW-NEXT: vmovdqa %xmm1, (%eax) ; X32-AVX512BW-NEXT: vmovdqa %xmm1, (%eax)
; X32-AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512BW-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512BW-NEXT: retl ; X32-AVX512BW-NEXT: retl
; ;
; X32-AVX512DQ-LABEL: test_broadcast_4i32_8i32_chain: ; X32-AVX512DQ-LABEL: test_broadcast_4i32_8i32_chain:
; X32-AVX512DQ: ## BB#0: ; X32-AVX512DQ: ## BB#0:
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-AVX512DQ-NEXT: vmovdqa (%ecx), %xmm0 ; X32-AVX512DQ-NEXT: vmovaps (%ecx), %xmm0
; X32-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X32-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X32-AVX512DQ-NEXT: vmovaps %xmm1, (%eax) ; X32-AVX512DQ-NEXT: vmovaps %xmm1, (%eax)
; X32-AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512DQ-NEXT: retl ; X32-AVX512DQ-NEXT: retl
; ;
; X64-AVX-LABEL: test_broadcast_4i32_8i32_chain: ; X64-AVX-LABEL: test_broadcast_4i32_8i32_chain:
@ -1168,26 +1088,26 @@ define <8 x i32> @test_broadcast_4i32_8i32_chain(<4 x i32>* %p0, <4 x float>* %p
; ;
; X64-AVX512F-LABEL: test_broadcast_4i32_8i32_chain: ; X64-AVX512F-LABEL: test_broadcast_4i32_8i32_chain:
; X64-AVX512F: ## BB#0: ; X64-AVX512F: ## BB#0:
; X64-AVX512F-NEXT: vmovdqa (%rdi), %xmm0 ; X64-AVX512F-NEXT: vmovaps (%rdi), %xmm0
; X64-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X64-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-AVX512F-NEXT: vmovdqa %xmm1, (%rsi) ; X64-AVX512F-NEXT: vmovdqa %xmm1, (%rsi)
; X64-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512F-NEXT: retq ; X64-AVX512F-NEXT: retq
; ;
; X64-AVX512BW-LABEL: test_broadcast_4i32_8i32_chain: ; X64-AVX512BW-LABEL: test_broadcast_4i32_8i32_chain:
; X64-AVX512BW: ## BB#0: ; X64-AVX512BW: ## BB#0:
; X64-AVX512BW-NEXT: vmovdqa (%rdi), %xmm0 ; X64-AVX512BW-NEXT: vmovaps (%rdi), %xmm0
; X64-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X64-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; X64-AVX512BW-NEXT: vmovdqa %xmm1, (%rsi) ; X64-AVX512BW-NEXT: vmovdqa %xmm1, (%rsi)
; X64-AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512BW-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512BW-NEXT: retq ; X64-AVX512BW-NEXT: retq
; ;
; X64-AVX512DQ-LABEL: test_broadcast_4i32_8i32_chain: ; X64-AVX512DQ-LABEL: test_broadcast_4i32_8i32_chain:
; X64-AVX512DQ: ## BB#0: ; X64-AVX512DQ: ## BB#0:
; X64-AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0 ; X64-AVX512DQ-NEXT: vmovaps (%rdi), %xmm0
; X64-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X64-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
; X64-AVX512DQ-NEXT: vmovaps %xmm1, (%rsi) ; X64-AVX512DQ-NEXT: vmovaps %xmm1, (%rsi)
; X64-AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512DQ-NEXT: retq ; X64-AVX512DQ-NEXT: retq
%1 = load <4 x i32>, <4 x i32>* %p0 %1 = load <4 x i32>, <4 x i32>* %p0
store <4 x float> zeroinitializer, <4 x float>* %p1 store <4 x float> zeroinitializer, <4 x float>* %p1

View File

@ -4393,79 +4393,42 @@ define <4 x i16> @cvt_4f64_to_4i16(<4 x double> %a0) nounwind {
; AVX2-NEXT: popq %r14 ; AVX2-NEXT: popq %r14
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512F-LABEL: cvt_4f64_to_4i16: ; AVX512-LABEL: cvt_4f64_to_4i16:
; AVX512F: # BB#0: ; AVX512: # BB#0:
; AVX512F-NEXT: pushq %r14 ; AVX512-NEXT: pushq %r14
; AVX512F-NEXT: pushq %rbx ; AVX512-NEXT: pushq %rbx
; AVX512F-NEXT: subq $40, %rsp ; AVX512-NEXT: subq $40, %rsp
; AVX512F-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill ; AVX512-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512F-NEXT: vzeroupper ; AVX512-NEXT: vzeroupper
; AVX512F-NEXT: callq __truncdfhf2 ; AVX512-NEXT: callq __truncdfhf2
; AVX512F-NEXT: movw %ax, %bx ; AVX512-NEXT: movw %ax, %bx
; AVX512F-NEXT: shll $16, %ebx ; AVX512-NEXT: shll $16, %ebx
; AVX512F-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX512-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512F-NEXT: vzeroupper ; AVX512-NEXT: vzeroupper
; AVX512F-NEXT: callq __truncdfhf2 ; AVX512-NEXT: callq __truncdfhf2
; AVX512F-NEXT: movzwl %ax, %r14d ; AVX512-NEXT: movzwl %ax, %r14d
; AVX512F-NEXT: orl %ebx, %r14d ; AVX512-NEXT: orl %ebx, %r14d
; AVX512F-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload ; AVX512-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload
; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512F-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX512-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512F-NEXT: vzeroupper ; AVX512-NEXT: vzeroupper
; AVX512F-NEXT: callq __truncdfhf2 ; AVX512-NEXT: callq __truncdfhf2
; AVX512F-NEXT: movw %ax, %bx ; AVX512-NEXT: movw %ax, %bx
; AVX512F-NEXT: shll $16, %ebx ; AVX512-NEXT: shll $16, %ebx
; AVX512F-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload ; AVX512-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
; AVX512F-NEXT: callq __truncdfhf2 ; AVX512-NEXT: callq __truncdfhf2
; AVX512F-NEXT: movzwl %ax, %eax ; AVX512-NEXT: movzwl %ax, %eax
; AVX512F-NEXT: orl %ebx, %eax ; AVX512-NEXT: orl %ebx, %eax
; AVX512F-NEXT: shlq $32, %rax ; AVX512-NEXT: shlq $32, %rax
; AVX512F-NEXT: orq %r14, %rax ; AVX512-NEXT: orq %r14, %rax
; AVX512F-NEXT: vmovq %rax, %xmm0 ; AVX512-NEXT: vmovq %rax, %xmm0
; AVX512F-NEXT: addq $40, %rsp ; AVX512-NEXT: addq $40, %rsp
; AVX512F-NEXT: popq %rbx ; AVX512-NEXT: popq %rbx
; AVX512F-NEXT: popq %r14 ; AVX512-NEXT: popq %r14
; AVX512F-NEXT: retq ; AVX512-NEXT: retq
;
; AVX512VL-LABEL: cvt_4f64_to_4i16:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: pushq %r14
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $40, %rsp
; AVX512VL-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movw %ax, %bx
; AVX512VL-NEXT: shll $16, %ebx
; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movzwl %ax, %r14d
; AVX512VL-NEXT: orl %ebx, %r14d
; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movw %ax, %bx
; AVX512VL-NEXT: shll $16, %ebx
; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movzwl %ax, %eax
; AVX512VL-NEXT: orl %ebx, %eax
; AVX512VL-NEXT: shlq $32, %rax
; AVX512VL-NEXT: orq %r14, %rax
; AVX512VL-NEXT: vmovq %rax, %xmm0
; AVX512VL-NEXT: addq $40, %rsp
; AVX512VL-NEXT: popq %rbx
; AVX512VL-NEXT: popq %r14
; AVX512VL-NEXT: retq
%1 = fptrunc <4 x double> %a0 to <4 x half> %1 = fptrunc <4 x double> %a0 to <4 x half>
%2 = bitcast <4 x half> %1 to <4 x i16> %2 = bitcast <4 x half> %1 to <4 x i16>
ret <4 x i16> %2 ret <4 x i16> %2
@ -4603,9 +4566,9 @@ define <8 x i16> @cvt_4f64_to_8i16_undef(<4 x double> %a0) nounwind {
; AVX512VL-NEXT: callq __truncdfhf2 ; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movzwl %ax, %r14d ; AVX512VL-NEXT: movzwl %ax, %r14d
; AVX512VL-NEXT: orl %ebx, %r14d ; AVX512VL-NEXT: orl %ebx, %r14d
; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX512VL-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; AVX512VL-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: callq __truncdfhf2 ; AVX512VL-NEXT: callq __truncdfhf2
@ -4762,9 +4725,9 @@ define <8 x i16> @cvt_4f64_to_8i16_zero(<4 x double> %a0) nounwind {
; AVX512VL-NEXT: callq __truncdfhf2 ; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movzwl %ax, %r14d ; AVX512VL-NEXT: movzwl %ax, %r14d
; AVX512VL-NEXT: orl %ebx, %r14d ; AVX512VL-NEXT: orl %ebx, %r14d
; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX512VL-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; AVX512VL-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: callq __truncdfhf2 ; AVX512VL-NEXT: callq __truncdfhf2
@ -4926,143 +4889,74 @@ define <8 x i16> @cvt_8f64_to_8i16(<8 x double> %a0) nounwind {
; AVX2-NEXT: popq %r15 ; AVX2-NEXT: popq %r15
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512F-LABEL: cvt_8f64_to_8i16: ; AVX512-LABEL: cvt_8f64_to_8i16:
; AVX512F: # BB#0: ; AVX512: # BB#0:
; AVX512F-NEXT: pushq %r15 ; AVX512-NEXT: pushq %r15
; AVX512F-NEXT: pushq %r14 ; AVX512-NEXT: pushq %r14
; AVX512F-NEXT: pushq %rbx ; AVX512-NEXT: pushq %rbx
; AVX512F-NEXT: subq $96, %rsp ; AVX512-NEXT: subq $96, %rsp
; AVX512F-NEXT: vmovupd %zmm0, (%rsp) # 64-byte Spill ; AVX512-NEXT: vmovupd %zmm0, (%rsp) # 64-byte Spill
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512F-NEXT: vzeroupper ; AVX512-NEXT: vzeroupper
; AVX512F-NEXT: callq __truncdfhf2 ; AVX512-NEXT: callq __truncdfhf2
; AVX512F-NEXT: movw %ax, %bx ; AVX512-NEXT: movw %ax, %bx
; AVX512F-NEXT: shll $16, %ebx ; AVX512-NEXT: shll $16, %ebx
; AVX512F-NEXT: vmovups (%rsp), %zmm0 # 64-byte Reload ; AVX512-NEXT: vmovups (%rsp), %zmm0 # 64-byte Reload
; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512F-NEXT: vzeroupper ; AVX512-NEXT: vzeroupper
; AVX512F-NEXT: callq __truncdfhf2 ; AVX512-NEXT: callq __truncdfhf2
; AVX512F-NEXT: movzwl %ax, %r15d ; AVX512-NEXT: movzwl %ax, %r15d
; AVX512F-NEXT: orl %ebx, %r15d ; AVX512-NEXT: orl %ebx, %r15d
; AVX512F-NEXT: vmovupd (%rsp), %zmm0 # 64-byte Reload ; AVX512-NEXT: vmovupd (%rsp), %zmm0 # 64-byte Reload
; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512F-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill ; AVX512-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512F-NEXT: vzeroupper ; AVX512-NEXT: vzeroupper
; AVX512F-NEXT: callq __truncdfhf2 ; AVX512-NEXT: callq __truncdfhf2
; AVX512F-NEXT: movw %ax, %bx ; AVX512-NEXT: movw %ax, %bx
; AVX512F-NEXT: shll $16, %ebx ; AVX512-NEXT: shll $16, %ebx
; AVX512F-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload ; AVX512-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
; AVX512F-NEXT: callq __truncdfhf2 ; AVX512-NEXT: callq __truncdfhf2
; AVX512F-NEXT: movzwl %ax, %r14d ; AVX512-NEXT: movzwl %ax, %r14d
; AVX512F-NEXT: orl %ebx, %r14d ; AVX512-NEXT: orl %ebx, %r14d
; AVX512F-NEXT: shlq $32, %r14 ; AVX512-NEXT: shlq $32, %r14
; AVX512F-NEXT: orq %r15, %r14 ; AVX512-NEXT: orq %r15, %r14
; AVX512F-NEXT: vmovupd (%rsp), %zmm0 # 64-byte Reload ; AVX512-NEXT: vmovupd (%rsp), %zmm0 # 64-byte Reload
; AVX512F-NEXT: vextractf64x4 $1, %zmm0, %ymm0 ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; AVX512F-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill ; AVX512-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512F-NEXT: vzeroupper ; AVX512-NEXT: vzeroupper
; AVX512F-NEXT: callq __truncdfhf2 ; AVX512-NEXT: callq __truncdfhf2
; AVX512F-NEXT: movw %ax, %bx ; AVX512-NEXT: movw %ax, %bx
; AVX512F-NEXT: shll $16, %ebx ; AVX512-NEXT: shll $16, %ebx
; AVX512F-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX512-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512F-NEXT: vzeroupper ; AVX512-NEXT: vzeroupper
; AVX512F-NEXT: callq __truncdfhf2 ; AVX512-NEXT: callq __truncdfhf2
; AVX512F-NEXT: movzwl %ax, %r15d ; AVX512-NEXT: movzwl %ax, %r15d
; AVX512F-NEXT: orl %ebx, %r15d ; AVX512-NEXT: orl %ebx, %r15d
; AVX512F-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload ; AVX512-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload
; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512F-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX512-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512F-NEXT: vzeroupper ; AVX512-NEXT: vzeroupper
; AVX512F-NEXT: callq __truncdfhf2 ; AVX512-NEXT: callq __truncdfhf2
; AVX512F-NEXT: movw %ax, %bx ; AVX512-NEXT: movw %ax, %bx
; AVX512F-NEXT: shll $16, %ebx ; AVX512-NEXT: shll $16, %ebx
; AVX512F-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload ; AVX512-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
; AVX512F-NEXT: callq __truncdfhf2 ; AVX512-NEXT: callq __truncdfhf2
; AVX512F-NEXT: movzwl %ax, %eax ; AVX512-NEXT: movzwl %ax, %eax
; AVX512F-NEXT: orl %ebx, %eax ; AVX512-NEXT: orl %ebx, %eax
; AVX512F-NEXT: shlq $32, %rax ; AVX512-NEXT: shlq $32, %rax
; AVX512F-NEXT: orq %r15, %rax ; AVX512-NEXT: orq %r15, %rax
; AVX512F-NEXT: vmovq %rax, %xmm0 ; AVX512-NEXT: vmovq %rax, %xmm0
; AVX512F-NEXT: vmovq %r14, %xmm1 ; AVX512-NEXT: vmovq %r14, %xmm1
; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] ; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512F-NEXT: addq $96, %rsp ; AVX512-NEXT: addq $96, %rsp
; AVX512F-NEXT: popq %rbx ; AVX512-NEXT: popq %rbx
; AVX512F-NEXT: popq %r14 ; AVX512-NEXT: popq %r14
; AVX512F-NEXT: popq %r15 ; AVX512-NEXT: popq %r15
; AVX512F-NEXT: retq ; AVX512-NEXT: retq
;
; AVX512VL-LABEL: cvt_8f64_to_8i16:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: pushq %r15
; AVX512VL-NEXT: pushq %r14
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $96, %rsp
; AVX512VL-NEXT: vmovupd %zmm0, (%rsp) # 64-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movw %ax, %bx
; AVX512VL-NEXT: shll $16, %ebx
; AVX512VL-NEXT: vmovups (%rsp), %zmm0 # 64-byte Reload
; AVX512VL-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movzwl %ax, %r15d
; AVX512VL-NEXT: orl %ebx, %r15d
; AVX512VL-NEXT: vmovups (%rsp), %zmm0 # 64-byte Reload
; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512VL-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movw %ax, %bx
; AVX512VL-NEXT: shll $16, %ebx
; AVX512VL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movzwl %ax, %r14d
; AVX512VL-NEXT: orl %ebx, %r14d
; AVX512VL-NEXT: shlq $32, %r14
; AVX512VL-NEXT: orq %r15, %r14
; AVX512VL-NEXT: vmovupd (%rsp), %zmm0 # 64-byte Reload
; AVX512VL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; AVX512VL-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movw %ax, %bx
; AVX512VL-NEXT: shll $16, %ebx
; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movzwl %ax, %r15d
; AVX512VL-NEXT: orl %ebx, %r15d
; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movw %ax, %bx
; AVX512VL-NEXT: shll $16, %ebx
; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movzwl %ax, %eax
; AVX512VL-NEXT: orl %ebx, %eax
; AVX512VL-NEXT: shlq $32, %rax
; AVX512VL-NEXT: orq %r15, %rax
; AVX512VL-NEXT: vmovq %rax, %xmm0
; AVX512VL-NEXT: vmovq %r14, %xmm1
; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX512VL-NEXT: addq $96, %rsp
; AVX512VL-NEXT: popq %rbx
; AVX512VL-NEXT: popq %r14
; AVX512VL-NEXT: popq %r15
; AVX512VL-NEXT: retq
%1 = fptrunc <8 x double> %a0 to <8 x half> %1 = fptrunc <8 x double> %a0 to <8 x half>
%2 = bitcast <8 x half> %1 to <8 x i16> %2 = bitcast <8 x half> %1 to <8 x i16>
ret <8 x i16> %2 ret <8 x i16> %2
@ -5189,81 +5083,43 @@ define void @store_cvt_4f64_to_4i16(<4 x double> %a0, <4 x i16>* %a1) nounwind {
; AVX2-NEXT: popq %rbp ; AVX2-NEXT: popq %rbp
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512F-LABEL: store_cvt_4f64_to_4i16: ; AVX512-LABEL: store_cvt_4f64_to_4i16:
; AVX512F: # BB#0: ; AVX512: # BB#0:
; AVX512F-NEXT: pushq %rbp ; AVX512-NEXT: pushq %rbp
; AVX512F-NEXT: pushq %r15 ; AVX512-NEXT: pushq %r15
; AVX512F-NEXT: pushq %r14 ; AVX512-NEXT: pushq %r14
; AVX512F-NEXT: pushq %rbx ; AVX512-NEXT: pushq %rbx
; AVX512F-NEXT: subq $88, %rsp ; AVX512-NEXT: subq $88, %rsp
; AVX512F-NEXT: movq %rdi, %rbx ; AVX512-NEXT: movq %rdi, %rbx
; AVX512F-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill ; AVX512-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512F-NEXT: vzeroupper ; AVX512-NEXT: vzeroupper
; AVX512F-NEXT: callq __truncdfhf2 ; AVX512-NEXT: callq __truncdfhf2
; AVX512F-NEXT: movl %eax, %r14d ; AVX512-NEXT: movl %eax, %r14d
; AVX512F-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX512-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512F-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill ; AVX512-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512F-NEXT: vzeroupper ; AVX512-NEXT: vzeroupper
; AVX512F-NEXT: callq __truncdfhf2 ; AVX512-NEXT: callq __truncdfhf2
; AVX512F-NEXT: movl %eax, %r15d ; AVX512-NEXT: movl %eax, %r15d
; AVX512F-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX512-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512F-NEXT: vzeroupper ; AVX512-NEXT: vzeroupper
; AVX512F-NEXT: callq __truncdfhf2 ; AVX512-NEXT: callq __truncdfhf2
; AVX512F-NEXT: movl %eax, %ebp ; AVX512-NEXT: movl %eax, %ebp
; AVX512F-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload ; AVX512-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
; AVX512F-NEXT: callq __truncdfhf2 ; AVX512-NEXT: callq __truncdfhf2
; AVX512F-NEXT: movw %ax, 4(%rbx) ; AVX512-NEXT: movw %ax, 4(%rbx)
; AVX512F-NEXT: movw %bp, (%rbx) ; AVX512-NEXT: movw %bp, (%rbx)
; AVX512F-NEXT: movw %r15w, 6(%rbx) ; AVX512-NEXT: movw %r15w, 6(%rbx)
; AVX512F-NEXT: movw %r14w, 2(%rbx) ; AVX512-NEXT: movw %r14w, 2(%rbx)
; AVX512F-NEXT: addq $88, %rsp ; AVX512-NEXT: addq $88, %rsp
; AVX512F-NEXT: popq %rbx ; AVX512-NEXT: popq %rbx
; AVX512F-NEXT: popq %r14 ; AVX512-NEXT: popq %r14
; AVX512F-NEXT: popq %r15 ; AVX512-NEXT: popq %r15
; AVX512F-NEXT: popq %rbp ; AVX512-NEXT: popq %rbp
; AVX512F-NEXT: retq ; AVX512-NEXT: retq
;
; AVX512VL-LABEL: store_cvt_4f64_to_4i16:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: pushq %rbp
; AVX512VL-NEXT: pushq %r15
; AVX512VL-NEXT: pushq %r14
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $88, %rsp
; AVX512VL-NEXT: movq %rdi, %rbx
; AVX512VL-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movl %eax, %r14d
; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512VL-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movl %eax, %r15d
; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movl %eax, %ebp
; AVX512VL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movw %ax, 4(%rbx)
; AVX512VL-NEXT: movw %bp, (%rbx)
; AVX512VL-NEXT: movw %r15w, 6(%rbx)
; AVX512VL-NEXT: movw %r14w, 2(%rbx)
; AVX512VL-NEXT: addq $88, %rsp
; AVX512VL-NEXT: popq %rbx
; AVX512VL-NEXT: popq %r14
; AVX512VL-NEXT: popq %r15
; AVX512VL-NEXT: popq %rbp
; AVX512VL-NEXT: retq
%1 = fptrunc <4 x double> %a0 to <4 x half> %1 = fptrunc <4 x double> %a0 to <4 x half>
%2 = bitcast <4 x half> %1 to <4 x i16> %2 = bitcast <4 x half> %1 to <4 x i16>
store <4 x i16> %2, <4 x i16>* %a1 store <4 x i16> %2, <4 x i16>* %a1
@ -5416,9 +5272,9 @@ define void @store_cvt_4f64_to_8i16_undef(<4 x double> %a0, <8 x i16>* %a1) noun
; AVX512VL-NEXT: callq __truncdfhf2 ; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movzwl %ax, %ebx ; AVX512VL-NEXT: movzwl %ax, %ebx
; AVX512VL-NEXT: orl %ebp, %ebx ; AVX512VL-NEXT: orl %ebp, %ebx
; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX512VL-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; AVX512VL-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: callq __truncdfhf2 ; AVX512VL-NEXT: callq __truncdfhf2
@ -5592,9 +5448,9 @@ define void @store_cvt_4f64_to_8i16_zero(<4 x double> %a0, <8 x i16>* %a1) nounw
; AVX512VL-NEXT: callq __truncdfhf2 ; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movzwl %ax, %ebx ; AVX512VL-NEXT: movzwl %ax, %ebx
; AVX512VL-NEXT: orl %ebp, %ebx ; AVX512VL-NEXT: orl %ebp, %ebx
; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload ; AVX512VL-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill ; AVX512VL-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: callq __truncdfhf2 ; AVX512VL-NEXT: callq __truncdfhf2
@ -5761,145 +5617,75 @@ define void @store_cvt_8f64_to_8i16(<8 x double> %a0, <8 x i16>* %a1) nounwind {
; AVX2-NEXT: popq %rbp ; AVX2-NEXT: popq %rbp
; AVX2-NEXT: retq ; AVX2-NEXT: retq
; ;
; AVX512F-LABEL: store_cvt_8f64_to_8i16: ; AVX512-LABEL: store_cvt_8f64_to_8i16:
; AVX512F: # BB#0: ; AVX512: # BB#0:
; AVX512F-NEXT: pushq %rbp ; AVX512-NEXT: pushq %rbp
; AVX512F-NEXT: pushq %r15 ; AVX512-NEXT: pushq %r15
; AVX512F-NEXT: pushq %r14 ; AVX512-NEXT: pushq %r14
; AVX512F-NEXT: pushq %r13 ; AVX512-NEXT: pushq %r13
; AVX512F-NEXT: pushq %r12 ; AVX512-NEXT: pushq %r12
; AVX512F-NEXT: pushq %rbx ; AVX512-NEXT: pushq %rbx
; AVX512F-NEXT: subq $200, %rsp ; AVX512-NEXT: subq $200, %rsp
; AVX512F-NEXT: movq %rdi, %rbx ; AVX512-NEXT: movq %rdi, %rbx
; AVX512F-NEXT: vmovupd %zmm0, {{[0-9]+}}(%rsp) # 64-byte Spill ; AVX512-NEXT: vmovupd %zmm0, {{[0-9]+}}(%rsp) # 64-byte Spill
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512F-NEXT: vzeroupper ; AVX512-NEXT: vzeroupper
; AVX512F-NEXT: callq __truncdfhf2 ; AVX512-NEXT: callq __truncdfhf2
; AVX512F-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill ; AVX512-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
; AVX512F-NEXT: vmovupd {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload ; AVX512-NEXT: vmovupd {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload
; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512F-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill ; AVX512-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512F-NEXT: vzeroupper ; AVX512-NEXT: vzeroupper
; AVX512F-NEXT: callq __truncdfhf2 ; AVX512-NEXT: callq __truncdfhf2
; AVX512F-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill ; AVX512-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
; AVX512F-NEXT: vmovupd {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload ; AVX512-NEXT: vmovupd {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload
; AVX512F-NEXT: vextractf64x4 $1, %zmm0, %ymm0 ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; AVX512F-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill ; AVX512-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512F-NEXT: vzeroupper ; AVX512-NEXT: vzeroupper
; AVX512F-NEXT: callq __truncdfhf2 ; AVX512-NEXT: callq __truncdfhf2
; AVX512F-NEXT: movl %eax, %r12d ; AVX512-NEXT: movl %eax, %r12d
; AVX512F-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX512-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512F-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill ; AVX512-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512F-NEXT: vzeroupper ; AVX512-NEXT: vzeroupper
; AVX512F-NEXT: callq __truncdfhf2 ; AVX512-NEXT: callq __truncdfhf2
; AVX512F-NEXT: movl %eax, %r13d ; AVX512-NEXT: movl %eax, %r13d
; AVX512F-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload ; AVX512-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload
; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512F-NEXT: vzeroupper ; AVX512-NEXT: vzeroupper
; AVX512F-NEXT: callq __truncdfhf2 ; AVX512-NEXT: callq __truncdfhf2
; AVX512F-NEXT: movl %eax, %ebp ; AVX512-NEXT: movl %eax, %ebp
; AVX512F-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload ; AVX512-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
; AVX512F-NEXT: callq __truncdfhf2 ; AVX512-NEXT: callq __truncdfhf2
; AVX512F-NEXT: movl %eax, %r14d ; AVX512-NEXT: movl %eax, %r14d
; AVX512F-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload ; AVX512-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> ; AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512F-NEXT: vzeroupper ; AVX512-NEXT: vzeroupper
; AVX512F-NEXT: callq __truncdfhf2 ; AVX512-NEXT: callq __truncdfhf2
; AVX512F-NEXT: movl %eax, %r15d ; AVX512-NEXT: movl %eax, %r15d
; AVX512F-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload ; AVX512-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
; AVX512F-NEXT: callq __truncdfhf2 ; AVX512-NEXT: callq __truncdfhf2
; AVX512F-NEXT: movw %ax, 12(%rbx) ; AVX512-NEXT: movw %ax, 12(%rbx)
; AVX512F-NEXT: movw %r15w, 8(%rbx) ; AVX512-NEXT: movw %r15w, 8(%rbx)
; AVX512F-NEXT: movw %r14w, 4(%rbx) ; AVX512-NEXT: movw %r14w, 4(%rbx)
; AVX512F-NEXT: movw %bp, (%rbx) ; AVX512-NEXT: movw %bp, (%rbx)
; AVX512F-NEXT: movw %r13w, 14(%rbx) ; AVX512-NEXT: movw %r13w, 14(%rbx)
; AVX512F-NEXT: movw %r12w, 10(%rbx) ; AVX512-NEXT: movw %r12w, 10(%rbx)
; AVX512F-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # 2-byte Folded Reload ; AVX512-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # 2-byte Folded Reload
; AVX512F-NEXT: movw %ax, 6(%rbx) ; AVX512-NEXT: movw %ax, 6(%rbx)
; AVX512F-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # 2-byte Folded Reload ; AVX512-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # 2-byte Folded Reload
; AVX512F-NEXT: movw %ax, 2(%rbx) ; AVX512-NEXT: movw %ax, 2(%rbx)
; AVX512F-NEXT: addq $200, %rsp ; AVX512-NEXT: addq $200, %rsp
; AVX512F-NEXT: popq %rbx ; AVX512-NEXT: popq %rbx
; AVX512F-NEXT: popq %r12 ; AVX512-NEXT: popq %r12
; AVX512F-NEXT: popq %r13 ; AVX512-NEXT: popq %r13
; AVX512F-NEXT: popq %r14 ; AVX512-NEXT: popq %r14
; AVX512F-NEXT: popq %r15 ; AVX512-NEXT: popq %r15
; AVX512F-NEXT: popq %rbp ; AVX512-NEXT: popq %rbp
; AVX512F-NEXT: retq ; AVX512-NEXT: retq
;
; AVX512VL-LABEL: store_cvt_8f64_to_8i16:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: pushq %rbp
; AVX512VL-NEXT: pushq %r15
; AVX512VL-NEXT: pushq %r14
; AVX512VL-NEXT: pushq %r13
; AVX512VL-NEXT: pushq %r12
; AVX512VL-NEXT: pushq %rbx
; AVX512VL-NEXT: subq $200, %rsp
; AVX512VL-NEXT: movq %rdi, %rbx
; AVX512VL-NEXT: vmovupd %zmm0, {{[0-9]+}}(%rsp) # 64-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload
; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512VL-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
; AVX512VL-NEXT: vmovupd {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload
; AVX512VL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; AVX512VL-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movl %eax, %r12d
; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512VL-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movl %eax, %r13d
; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload
; AVX512VL-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movl %eax, %ebp
; AVX512VL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movl %eax, %r14d
; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX512VL-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512VL-NEXT: vzeroupper
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movl %eax, %r15d
; AVX512VL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
; AVX512VL-NEXT: callq __truncdfhf2
; AVX512VL-NEXT: movw %ax, 12(%rbx)
; AVX512VL-NEXT: movw %r15w, 8(%rbx)
; AVX512VL-NEXT: movw %r14w, 4(%rbx)
; AVX512VL-NEXT: movw %bp, (%rbx)
; AVX512VL-NEXT: movw %r13w, 14(%rbx)
; AVX512VL-NEXT: movw %r12w, 10(%rbx)
; AVX512VL-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # 2-byte Folded Reload
; AVX512VL-NEXT: movw %ax, 6(%rbx)
; AVX512VL-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # 2-byte Folded Reload
; AVX512VL-NEXT: movw %ax, 2(%rbx)
; AVX512VL-NEXT: addq $200, %rsp
; AVX512VL-NEXT: popq %rbx
; AVX512VL-NEXT: popq %r12
; AVX512VL-NEXT: popq %r13
; AVX512VL-NEXT: popq %r14
; AVX512VL-NEXT: popq %r15
; AVX512VL-NEXT: popq %rbp
; AVX512VL-NEXT: retq
%1 = fptrunc <8 x double> %a0 to <8 x half> %1 = fptrunc <8 x double> %a0 to <8 x half>
%2 = bitcast <8 x half> %1 to <8 x i16> %2 = bitcast <8 x half> %1 to <8 x i16>
store <8 x i16> %2, <8 x i16>* %a1 store <8 x i16> %2, <8 x i16>* %a1

View File

@ -857,20 +857,10 @@ define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) {
} }
define <4 x i64> @shuffle_v4i64_0145(<4 x i64> %a, <4 x i64> %b) { define <4 x i64> @shuffle_v4i64_0145(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: shuffle_v4i64_0145: ; ALL-LABEL: shuffle_v4i64_0145:
; AVX1: # BB#0: ; ALL: # BB#0:
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq ; ALL-NEXT: retq
;
; AVX2-LABEL: shuffle_v4i64_0145:
; AVX2: # BB#0:
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v4i64_0145:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5> %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
ret <4 x i64> %shuffle ret <4 x i64> %shuffle
} }
@ -901,20 +891,10 @@ define <4 x i64> @shuffle_v4i64_0451(<4 x i64> %a, <4 x i64> %b) {
} }
define <4 x i64> @shuffle_v4i64_4501(<4 x i64> %a, <4 x i64> %b) { define <4 x i64> @shuffle_v4i64_4501(<4 x i64> %a, <4 x i64> %b) {
; AVX1-LABEL: shuffle_v4i64_4501: ; ALL-LABEL: shuffle_v4i64_4501:
; AVX1: # BB#0: ; ALL: # BB#0:
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq ; ALL-NEXT: retq
;
; AVX2-LABEL: shuffle_v4i64_4501:
; AVX2: # BB#0:
; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v4i64_4501:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
ret <4 x i64> %shuffle ret <4 x i64> %shuffle
} }
@ -1487,20 +1467,10 @@ define <4 x i64> @concat_v4i64_0167(<4 x i64> %a0, <4 x i64> %a1) {
} }
define <4 x i64> @concat_v4i64_0145_bc(<4 x i64> %a0, <4 x i64> %a1) { define <4 x i64> @concat_v4i64_0145_bc(<4 x i64> %a0, <4 x i64> %a1) {
; AVX1-LABEL: concat_v4i64_0145_bc: ; ALL-LABEL: concat_v4i64_0145_bc:
; AVX1: # BB#0: ; ALL: # BB#0:
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq ; ALL-NEXT: retq
;
; AVX2-LABEL: concat_v4i64_0145_bc:
; AVX2: # BB#0:
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512VL-LABEL: concat_v4i64_0145_bc:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512VL-NEXT: retq
%a0lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 0, i32 1> %a0lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 0, i32 1>
%a1lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 4, i32 5> %a1lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 4, i32 5>
%bc0lo = bitcast <2 x i64> %a0lo to <4 x i32> %bc0lo = bitcast <2 x i64> %a0lo to <4 x i32>

View File

@ -2021,17 +2021,11 @@ define <8 x i32> @shuffle_v8i32_44444444(<8 x i32> %a, <8 x i32> %b) {
; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
; AVX1-NEXT: retq ; AVX1-NEXT: retq
; ;
; AVX2-LABEL: shuffle_v8i32_44444444: ; AVX2OR512VL-LABEL: shuffle_v8i32_44444444:
; AVX2: # BB#0: ; AVX2OR512VL: # BB#0:
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX2OR512VL-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX2-NEXT: vbroadcastss %xmm0, %ymm0 ; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %ymm0
; AVX2-NEXT: retq ; AVX2OR512VL-NEXT: retq
;
; AVX512VL-LABEL: shuffle_v8i32_44444444:
; AVX512VL: # BB#0:
; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512VL-NEXT: vpbroadcastd %xmm0, %ymm0
; AVX512VL-NEXT: retq
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
ret <8 x i32> %shuffle ret <8 x i32> %shuffle
} }

View File

@ -26,8 +26,8 @@ define <16 x float> @shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08
define <16 x float> @shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_bc(<16 x i32> %a, <16 x i32> %b) { define <16 x float> @shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_bc(<16 x i32> %a, <16 x i32> %b) {
; ALL-LABEL: shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_bc: ; ALL-LABEL: shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_bc:
; ALL: # BB#0: ; ALL: # BB#0:
; ALL-NEXT: vextracti32x4 $2, %zmm0, %xmm0 ; ALL-NEXT: vextractf32x4 $2, %zmm0, %xmm0
; ALL-NEXT: vpbroadcastd %xmm0, %zmm0 ; ALL-NEXT: vbroadcastss %xmm0, %zmm0
; ALL-NEXT: retq ; ALL-NEXT: retq
%tmp0 = bitcast <16 x i32> %a to <16 x float> %tmp0 = bitcast <16 x i32> %a to <16 x float>
%tmp1 = bitcast <16 x i32> %b to <16 x float> %tmp1 = bitcast <16 x i32> %b to <16 x float>
@ -158,8 +158,8 @@ define <16 x i32> @shuffle_v16i32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_0
define <16 x i32> @shuffle_v16i32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04(<16 x i32> %a, <16 x i32> %b) { define <16 x i32> @shuffle_v16i32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04(<16 x i32> %a, <16 x i32> %b) {
; ALL-LABEL: shuffle_v16i32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04: ; ALL-LABEL: shuffle_v16i32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04:
; ALL: # BB#0: ; ALL: # BB#0:
; ALL-NEXT: vextracti32x4 $1, %zmm0, %xmm0 ; ALL-NEXT: vextractf32x4 $1, %zmm0, %xmm0
; ALL-NEXT: vpbroadcastd %xmm0, %zmm0 ; ALL-NEXT: vbroadcastss %xmm0, %zmm0
; ALL-NEXT: retq ; ALL-NEXT: retq
%shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32><i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
ret <16 x i32> %shuffle ret <16 x i32> %shuffle
@ -283,7 +283,7 @@ define <16 x i32> @shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u(<16 x i32> %a
define <8 x i32> @test_v16i32_1_3_5_7_9_11_13_15(<16 x i32> %v) { define <8 x i32> @test_v16i32_1_3_5_7_9_11_13_15(<16 x i32> %v) {
; ALL-LABEL: test_v16i32_1_3_5_7_9_11_13_15: ; ALL-LABEL: test_v16i32_1_3_5_7_9_11_13_15:
; ALL: # BB#0: ; ALL: # BB#0:
; ALL-NEXT: vextracti32x8 $1, %zmm0, %ymm1 ; ALL-NEXT: vextractf32x8 $1, %zmm0, %ymm1
; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
; ALL-NEXT: retq ; ALL-NEXT: retq
@ -692,8 +692,8 @@ define <16 x i32> @mask_shuffle_v4i32_v16i32_00_01_02_03_00_01_02_03_00_01_02_03
; ALL-LABEL: mask_shuffle_v4i32_v16i32_00_01_02_03_00_01_02_03_00_01_02_03_00_01_02_03: ; ALL-LABEL: mask_shuffle_v4i32_v16i32_00_01_02_03_00_01_02_03_00_01_02_03_00_01_02_03:
; ALL: # BB#0: ; ALL: # BB#0:
; ALL-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> ; ALL-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; ALL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; ALL-NEXT: vinserti32x8 $1, %ymm0, %zmm0, %zmm0 ; ALL-NEXT: vinsertf32x8 $1, %ymm0, %zmm0, %zmm0
; ALL-NEXT: retq ; ALL-NEXT: retq
%res = shufflevector <4 x i32> %a, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> %res = shufflevector <4 x i32> %a, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
ret <16 x i32> %res ret <16 x i32> %res

View File

@ -51,14 +51,14 @@ define <8 x double> @shuffle_v8f64_44444444(<8 x double> %a, <8 x double> %b) {
define <8 x double> @shuffle_v8f64_44444444_bc(<8 x i64> %a, <8 x i64> %b) { define <8 x double> @shuffle_v8f64_44444444_bc(<8 x i64> %a, <8 x i64> %b) {
; AVX512F-LABEL: shuffle_v8f64_44444444_bc: ; AVX512F-LABEL: shuffle_v8f64_44444444_bc:
; AVX512F: # BB#0: ; AVX512F: # BB#0:
; AVX512F-NEXT: vextracti32x4 $2, %zmm0, %xmm0 ; AVX512F-NEXT: vextractf32x4 $2, %zmm0, %xmm0
; AVX512F-NEXT: vpbroadcastq %xmm0, %zmm0 ; AVX512F-NEXT: vbroadcastsd %xmm0, %zmm0
; AVX512F-NEXT: retq ; AVX512F-NEXT: retq
; ;
; AVX512F-32-LABEL: shuffle_v8f64_44444444_bc: ; AVX512F-32-LABEL: shuffle_v8f64_44444444_bc:
; AVX512F-32: # BB#0: ; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vextracti32x4 $2, %zmm0, %xmm0 ; AVX512F-32-NEXT: vextractf32x4 $2, %zmm0, %xmm0
; AVX512F-32-NEXT: vpbroadcastq %xmm0, %zmm0 ; AVX512F-32-NEXT: vbroadcastsd %xmm0, %zmm0
; AVX512F-32-NEXT: retl ; AVX512F-32-NEXT: retl
%tmp0 = bitcast <8 x i64> %a to <8 x double> %tmp0 = bitcast <8 x i64> %a to <8 x double>
%tmp1 = bitcast <8 x i64> %b to <8 x double> %tmp1 = bitcast <8 x i64> %b to <8 x double>
@ -1012,14 +1012,14 @@ define <8 x i64> @shuffle_v8i64_00000000(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_44444444(<8 x i64> %a, <8 x i64> %b) { define <8 x i64> @shuffle_v8i64_44444444(<8 x i64> %a, <8 x i64> %b) {
; AVX512F-LABEL: shuffle_v8i64_44444444: ; AVX512F-LABEL: shuffle_v8i64_44444444:
; AVX512F: # BB#0: ; AVX512F: # BB#0:
; AVX512F-NEXT: vextracti32x4 $2, %zmm0, %xmm0 ; AVX512F-NEXT: vextractf32x4 $2, %zmm0, %xmm0
; AVX512F-NEXT: vpbroadcastq %xmm0, %zmm0 ; AVX512F-NEXT: vbroadcastsd %xmm0, %zmm0
; AVX512F-NEXT: retq ; AVX512F-NEXT: retq
; ;
; AVX512F-32-LABEL: shuffle_v8i64_44444444: ; AVX512F-32-LABEL: shuffle_v8i64_44444444:
; AVX512F-32: # BB#0: ; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vextracti32x4 $2, %zmm0, %xmm0 ; AVX512F-32-NEXT: vextractf32x4 $2, %zmm0, %xmm0
; AVX512F-32-NEXT: vpbroadcastq %xmm0, %zmm0 ; AVX512F-32-NEXT: vbroadcastsd %xmm0, %zmm0
; AVX512F-32-NEXT: retl ; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4> %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
ret <8 x i64> %shuffle ret <8 x i64> %shuffle
@ -1028,14 +1028,14 @@ define <8 x i64> @shuffle_v8i64_44444444(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_66666666(<8 x i64> %a, <8 x i64> %b) { define <8 x i64> @shuffle_v8i64_66666666(<8 x i64> %a, <8 x i64> %b) {
; AVX512F-LABEL: shuffle_v8i64_66666666: ; AVX512F-LABEL: shuffle_v8i64_66666666:
; AVX512F: # BB#0: ; AVX512F: # BB#0:
; AVX512F-NEXT: vextracti32x4 $3, %zmm0, %xmm0 ; AVX512F-NEXT: vextractf32x4 $3, %zmm0, %xmm0
; AVX512F-NEXT: vpbroadcastq %xmm0, %zmm0 ; AVX512F-NEXT: vbroadcastsd %xmm0, %zmm0
; AVX512F-NEXT: retq ; AVX512F-NEXT: retq
; ;
; AVX512F-32-LABEL: shuffle_v8i64_66666666: ; AVX512F-32-LABEL: shuffle_v8i64_66666666:
; AVX512F-32: # BB#0: ; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vextracti32x4 $3, %zmm0, %xmm0 ; AVX512F-32-NEXT: vextractf32x4 $3, %zmm0, %xmm0
; AVX512F-32-NEXT: vpbroadcastq %xmm0, %zmm0 ; AVX512F-32-NEXT: vbroadcastsd %xmm0, %zmm0
; AVX512F-32-NEXT: retl ; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6> %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>
ret <8 x i64> %shuffle ret <8 x i64> %shuffle
@ -2457,12 +2457,12 @@ define <8 x double> @shuffle_v8f64_01230123(<8 x double> %a, <8 x double> %b) {
define <8 x i64> @shuffle_v8i64_012389AB(<8 x i64> %a, <8 x i64> %b) { define <8 x i64> @shuffle_v8i64_012389AB(<8 x i64> %a, <8 x i64> %b) {
; AVX512F-LABEL: shuffle_v8i64_012389AB: ; AVX512F-LABEL: shuffle_v8i64_012389AB:
; AVX512F: # BB#0: ; AVX512F: # BB#0:
; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512F-NEXT: retq ; AVX512F-NEXT: retq
; ;
; AVX512F-32-LABEL: shuffle_v8i64_012389AB: ; AVX512F-32-LABEL: shuffle_v8i64_012389AB:
; AVX512F-32: # BB#0: ; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512F-32-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512F-32-NEXT: retl ; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
ret <8 x i64> %shuffle ret <8 x i64> %shuffle
@ -2471,12 +2471,12 @@ define <8 x i64> @shuffle_v8i64_012389AB(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_89AB0123(<8 x i64> %a, <8 x i64> %b) { define <8 x i64> @shuffle_v8i64_89AB0123(<8 x i64> %a, <8 x i64> %b) {
; AVX512F-LABEL: shuffle_v8i64_89AB0123: ; AVX512F-LABEL: shuffle_v8i64_89AB0123:
; AVX512F: # BB#0: ; AVX512F: # BB#0:
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512F-NEXT: retq ; AVX512F-NEXT: retq
; ;
; AVX512F-32-LABEL: shuffle_v8i64_89AB0123: ; AVX512F-32-LABEL: shuffle_v8i64_89AB0123:
; AVX512F-32: # BB#0: ; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512F-32-NEXT: retl ; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3> %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3>
ret <8 x i64> %shuffle ret <8 x i64> %shuffle
@ -2485,12 +2485,12 @@ define <8 x i64> @shuffle_v8i64_89AB0123(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_01230123(<8 x i64> %a, <8 x i64> %b) { define <8 x i64> @shuffle_v8i64_01230123(<8 x i64> %a, <8 x i64> %b) {
; AVX512F-LABEL: shuffle_v8i64_01230123: ; AVX512F-LABEL: shuffle_v8i64_01230123:
; AVX512F: # BB#0: ; AVX512F: # BB#0:
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ; AVX512F-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512F-NEXT: retq ; AVX512F-NEXT: retq
; ;
; AVX512F-32-LABEL: shuffle_v8i64_01230123: ; AVX512F-32-LABEL: shuffle_v8i64_01230123:
; AVX512F-32: # BB#0: ; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ; AVX512F-32-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512F-32-NEXT: retl ; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
ret <8 x i64> %shuffle ret <8 x i64> %shuffle
@ -2555,12 +2555,12 @@ define <8 x double> @shuffle_v8f64_01234589(<8 x double> %a, <8 x double> %b) {
define <8 x i64> @shuffle_v8i64_89234567(<8 x i64> %a, <8 x i64> %b) { define <8 x i64> @shuffle_v8i64_89234567(<8 x i64> %a, <8 x i64> %b) {
; AVX512F-LABEL: shuffle_v8i64_89234567: ; AVX512F-LABEL: shuffle_v8i64_89234567:
; AVX512F: # BB#0: ; AVX512F: # BB#0:
; AVX512F-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
; AVX512F-NEXT: retq ; AVX512F-NEXT: retq
; ;
; AVX512F-32-LABEL: shuffle_v8i64_89234567: ; AVX512F-32-LABEL: shuffle_v8i64_89234567:
; AVX512F-32: # BB#0: ; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 ; AVX512F-32-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: retl ; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i64> %shuffle ret <8 x i64> %shuffle
@ -2569,12 +2569,12 @@ define <8 x i64> @shuffle_v8i64_89234567(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_01894567(<8 x i64> %a, <8 x i64> %b) { define <8 x i64> @shuffle_v8i64_01894567(<8 x i64> %a, <8 x i64> %b) {
; AVX512F-LABEL: shuffle_v8i64_01894567: ; AVX512F-LABEL: shuffle_v8i64_01894567:
; AVX512F: # BB#0: ; AVX512F: # BB#0:
; AVX512F-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0
; AVX512F-NEXT: retq ; AVX512F-NEXT: retq
; ;
; AVX512F-32-LABEL: shuffle_v8i64_01894567: ; AVX512F-32-LABEL: shuffle_v8i64_01894567:
; AVX512F-32: # BB#0: ; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0 ; AVX512F-32-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: retl ; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7> %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 6, i32 7>
ret <8 x i64> %shuffle ret <8 x i64> %shuffle
@ -2583,12 +2583,12 @@ define <8 x i64> @shuffle_v8i64_01894567(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_01238967(<8 x i64> %a, <8 x i64> %b) { define <8 x i64> @shuffle_v8i64_01238967(<8 x i64> %a, <8 x i64> %b) {
; AVX512F-LABEL: shuffle_v8i64_01238967: ; AVX512F-LABEL: shuffle_v8i64_01238967:
; AVX512F: # BB#0: ; AVX512F: # BB#0:
; AVX512F-NEXT: vinserti32x4 $2, %xmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vinsertf32x4 $2, %xmm1, %zmm0, %zmm0
; AVX512F-NEXT: retq ; AVX512F-NEXT: retq
; ;
; AVX512F-32-LABEL: shuffle_v8i64_01238967: ; AVX512F-32-LABEL: shuffle_v8i64_01238967:
; AVX512F-32: # BB#0: ; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vinserti32x4 $2, %xmm1, %zmm0, %zmm0 ; AVX512F-32-NEXT: vinsertf32x4 $2, %xmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: retl ; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7> %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 6, i32 7>
ret <8 x i64> %shuffle ret <8 x i64> %shuffle
@ -2597,12 +2597,12 @@ define <8 x i64> @shuffle_v8i64_01238967(<8 x i64> %a, <8 x i64> %b) {
define <8 x i64> @shuffle_v8i64_01234589(<8 x i64> %a, <8 x i64> %b) { define <8 x i64> @shuffle_v8i64_01234589(<8 x i64> %a, <8 x i64> %b) {
; AVX512F-LABEL: shuffle_v8i64_01234589: ; AVX512F-LABEL: shuffle_v8i64_01234589:
; AVX512F: # BB#0: ; AVX512F: # BB#0:
; AVX512F-NEXT: vinserti32x4 $3, %xmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vinsertf32x4 $3, %xmm1, %zmm0, %zmm0
; AVX512F-NEXT: retq ; AVX512F-NEXT: retq
; ;
; AVX512F-32-LABEL: shuffle_v8i64_01234589: ; AVX512F-32-LABEL: shuffle_v8i64_01234589:
; AVX512F-32: # BB#0: ; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: vinserti32x4 $3, %xmm1, %zmm0, %zmm0 ; AVX512F-32-NEXT: vinsertf32x4 $3, %xmm1, %zmm0, %zmm0
; AVX512F-32-NEXT: retl ; AVX512F-32-NEXT: retl
%shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9> %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
ret <8 x i64> %shuffle ret <8 x i64> %shuffle
@ -2628,15 +2628,15 @@ define <8 x i64> @shuffle_v2i64_v8i64_01010101(<2 x i64> %a) {
; AVX512F-LABEL: shuffle_v2i64_v8i64_01010101: ; AVX512F-LABEL: shuffle_v2i64_v8i64_01010101:
; AVX512F: # BB#0: ; AVX512F: # BB#0:
; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> ; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ; AVX512F-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512F-NEXT: retq ; AVX512F-NEXT: retq
; ;
; AVX512F-32-LABEL: shuffle_v2i64_v8i64_01010101: ; AVX512F-32-LABEL: shuffle_v2i64_v8i64_01010101:
; AVX512F-32: # BB#0: ; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def> ; AVX512F-32-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; AVX512F-32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ; AVX512F-32-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
; AVX512F-32-NEXT: retl ; AVX512F-32-NEXT: retl
%shuffle = shufflevector <2 x i64> %a, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> %shuffle = shufflevector <2 x i64> %a, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
ret <8 x i64> %shuffle ret <8 x i64> %shuffle

View File

@ -479,7 +479,7 @@ define <16 x float> @expand13(<8 x float> %a ) {
; ;
; KNL64-LABEL: expand13: ; KNL64-LABEL: expand13:
; KNL64: # BB#0: ; KNL64: # BB#0:
; KNL64-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; KNL64-NEXT: vxorps %xmm1, %xmm1, %xmm1
; KNL64-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 ; KNL64-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; KNL64-NEXT: retq ; KNL64-NEXT: retq
; ;
@ -491,7 +491,7 @@ define <16 x float> @expand13(<8 x float> %a ) {
; ;
; KNL32-LABEL: expand13: ; KNL32-LABEL: expand13:
; KNL32: # BB#0: ; KNL32: # BB#0:
; KNL32-NEXT: vxorpd %xmm1, %xmm1, %xmm1 ; KNL32-NEXT: vxorps %xmm1, %xmm1, %xmm1
; KNL32-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 ; KNL32-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; KNL32-NEXT: retl ; KNL32-NEXT: retl
%res = shufflevector <8 x float> zeroinitializer, <8 x float> %a, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> %res = shufflevector <8 x float> zeroinitializer, <8 x float> %a, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>