diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index ac1c0c9357d..09f63ecfe71 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -9356,6 +9356,30 @@ static const uint16_t ReplaceableInstrs[][3] = { { X86::VBROADCASTSDZ256m, X86::VBROADCASTSDZ256m, X86::VPBROADCASTQZ256m }, { X86::VBROADCASTSDZr, X86::VBROADCASTSDZr, X86::VPBROADCASTQZr }, { X86::VBROADCASTSDZm, X86::VBROADCASTSDZm, X86::VPBROADCASTQZm }, + { X86::VINSERTF32x4Zrr, X86::VINSERTF32x4Zrr, X86::VINSERTI32x4Zrr }, + { X86::VINSERTF32x4Zrm, X86::VINSERTF32x4Zrm, X86::VINSERTI32x4Zrm }, + { X86::VINSERTF32x8Zrr, X86::VINSERTF32x8Zrr, X86::VINSERTI32x8Zrr }, + { X86::VINSERTF32x8Zrm, X86::VINSERTF32x8Zrm, X86::VINSERTI32x8Zrm }, + { X86::VINSERTF64x2Zrr, X86::VINSERTF64x2Zrr, X86::VINSERTI64x2Zrr }, + { X86::VINSERTF64x2Zrm, X86::VINSERTF64x2Zrm, X86::VINSERTI64x2Zrm }, + { X86::VINSERTF64x4Zrr, X86::VINSERTF64x4Zrr, X86::VINSERTI64x4Zrr }, + { X86::VINSERTF64x4Zrm, X86::VINSERTF64x4Zrm, X86::VINSERTI64x4Zrm }, + { X86::VINSERTF32x4Z256rr,X86::VINSERTF32x4Z256rr,X86::VINSERTI32x4Z256rr }, + { X86::VINSERTF32x4Z256rm,X86::VINSERTF32x4Z256rm,X86::VINSERTI32x4Z256rm }, + { X86::VINSERTF64x2Z256rr,X86::VINSERTF64x2Z256rr,X86::VINSERTI64x2Z256rr }, + { X86::VINSERTF64x2Z256rm,X86::VINSERTF64x2Z256rm,X86::VINSERTI64x2Z256rm }, + { X86::VEXTRACTF32x4Zrr, X86::VEXTRACTF32x4Zrr, X86::VEXTRACTI32x4Zrr }, + { X86::VEXTRACTF32x4Zmr, X86::VEXTRACTF32x4Zmr, X86::VEXTRACTI32x4Zmr }, + { X86::VEXTRACTF32x8Zrr, X86::VEXTRACTF32x8Zrr, X86::VEXTRACTI32x8Zrr }, + { X86::VEXTRACTF32x8Zmr, X86::VEXTRACTF32x8Zmr, X86::VEXTRACTI32x8Zmr }, + { X86::VEXTRACTF64x2Zrr, X86::VEXTRACTF64x2Zrr, X86::VEXTRACTI64x2Zrr }, + { X86::VEXTRACTF64x2Zmr, X86::VEXTRACTF64x2Zmr, X86::VEXTRACTI64x2Zmr }, + { X86::VEXTRACTF64x4Zrr, X86::VEXTRACTF64x4Zrr, X86::VEXTRACTI64x4Zrr }, + { X86::VEXTRACTF64x4Zmr, X86::VEXTRACTF64x4Zmr, X86::VEXTRACTI64x4Zmr }, + { X86::VEXTRACTF32x4Z256rr,X86::VEXTRACTF32x4Z256rr,X86::VEXTRACTI32x4Z256rr }, + { X86::VEXTRACTF32x4Z256mr,X86::VEXTRACTF32x4Z256mr,X86::VEXTRACTI32x4Z256mr }, + { X86::VEXTRACTF64x2Z256rr,X86::VEXTRACTF64x2Z256rr,X86::VEXTRACTI64x2Z256rr }, + { X86::VEXTRACTF64x2Z256mr,X86::VEXTRACTF64x2Z256mr,X86::VEXTRACTI64x2Z256mr }, }; static const uint16_t ReplaceableInstrsAVX2[][3] = { diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll index 21c719bea4d..3a76c267194 100644 --- a/test/CodeGen/X86/avx512-cvt.ll +++ b/test/CodeGen/X86/avx512-cvt.ll @@ -879,7 +879,7 @@ define <16 x double> @uitof64(<16 x i32> %a) nounwind { ; NODQ-LABEL: uitof64: ; NODQ: # BB#0: ; NODQ-NEXT: vcvtudq2pd %ymm0, %zmm2 -; NODQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0 +; NODQ-NEXT: vextractf64x4 $1, %zmm0, %ymm0 ; NODQ-NEXT: vcvtudq2pd %ymm0, %zmm1 ; NODQ-NEXT: vmovaps %zmm2, %zmm0 ; NODQ-NEXT: retq @@ -887,7 +887,7 @@ define <16 x double> @uitof64(<16 x i32> %a) nounwind { ; DQ-LABEL: uitof64: ; DQ: # BB#0: ; DQ-NEXT: vcvtudq2pd %ymm0, %zmm2 -; DQ-NEXT: vextracti32x8 $1, %zmm0, %ymm0 +; DQ-NEXT: vextractf32x8 $1, %zmm0, %ymm0 ; DQ-NEXT: vcvtudq2pd %ymm0, %zmm1 ; DQ-NEXT: vmovaps %zmm2, %zmm0 ; DQ-NEXT: retq diff --git a/test/CodeGen/X86/avx512-extract-subvector.ll b/test/CodeGen/X86/avx512-extract-subvector.ll index 2d0a81046b4..3cc87cf513b 100644 --- a/test/CodeGen/X86/avx512-extract-subvector.ll +++ b/test/CodeGen/X86/avx512-extract-subvector.ll @@ -5,7 +5,7 @@ define <8 x i16> @extract_subvector128_v32i16(<32 x i16> %x) nounwind { ; SKX-LABEL: extract_subvector128_v32i16: ; SKX: ## BB#0: -; SKX-NEXT: vextracti32x4 $2, %zmm0, %xmm0 +; SKX-NEXT: vextractf32x4 $2, %zmm0, %xmm0 ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <8 x i32> @@ -25,7 +25,7 @@ define <8 x i16> @extract_subvector128_v32i16_first_element(<32 x i16> %x) nounw define <16 x i8> @extract_subvector128_v64i8(<64 x i8> %x) nounwind { ; SKX-LABEL: extract_subvector128_v64i8: ; SKX: ## BB#0: -; SKX-NEXT: vextracti32x4 $2, %zmm0, %xmm0 +; SKX-NEXT: vextractf32x4 $2, %zmm0, %xmm0 ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <16 x i32> @@ -46,7 +46,7 @@ define <16 x i8> @extract_subvector128_v64i8_first_element(<64 x i8> %x) nounwin define <16 x i16> @extract_subvector256_v32i16(<32 x i16> %x) nounwind { ; SKX-LABEL: extract_subvector256_v32i16: ; SKX: ## BB#0: -; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 +; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 ; SKX-NEXT: retq %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <16 x i32> ret <16 x i16> %r1 @@ -55,7 +55,7 @@ define <16 x i16> @extract_subvector256_v32i16(<32 x i16> %x) nounwind { define <32 x i8> @extract_subvector256_v64i8(<64 x i8> %x) nounwind { ; SKX-LABEL: extract_subvector256_v64i8: ; SKX: ## BB#0: -; SKX-NEXT: vextracti64x4 $1, %zmm0, %ymm0 +; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 ; SKX-NEXT: retq %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <32 x i32> ret <32 x i8> %r1 @@ -90,7 +90,7 @@ entry: define void @extract_subvector256_v4i64_store(i64* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector256_v4i64_store: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: vextracti128 $1, %ymm0, (%rdi) +; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi) ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: @@ -103,7 +103,7 @@ entry: define void @extract_subvector256_v8i32_store(i32* nocapture %addr, <8 x i32> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector256_v8i32_store: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: vextracti128 $1, %ymm0, (%rdi) +; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi) ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: @@ -116,7 +116,7 @@ entry: define void @extract_subvector256_v16i16_store(i16* nocapture %addr, <16 x i16> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector256_v16i16_store: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: vextracti128 $1, %ymm0, (%rdi) +; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi) ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: @@ -129,7 +129,7 @@ entry: define void @extract_subvector256_v32i8_store(i8* nocapture %addr, <32 x i8> %a) nounwind uwtable ssp { ; SKX-LABEL: extract_subvector256_v32i8_store: ; SKX: ## BB#0: ## %entry -; SKX-NEXT: vextracti128 $1, %ymm0, (%rdi) +; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi) ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq entry: diff --git a/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll b/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll index ed5483c4cdc..fd6028f6b51 100644 --- a/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll +++ b/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll @@ -1134,7 +1134,7 @@ define <8 x double> @test_mm512_zextpd128_pd512(<2 x double> %a0) nounwind { ; X32-LABEL: test_mm512_zextpd128_pd512: ; X32: # BB#0: ; X32-NEXT: # kill: %XMM0 %XMM0 %YMM0 -; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X32-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm2 ; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; X32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 @@ -1143,7 +1143,7 @@ define <8 x double> @test_mm512_zextpd128_pd512(<2 x double> %a0) nounwind { ; X64-LABEL: test_mm512_zextpd128_pd512: ; X64: # BB#0: ; X64-NEXT: # kill: %XMM0 %XMM0 %YMM0 -; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm2 ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; X64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 @@ -1156,14 +1156,14 @@ define <8 x double> @test_mm512_zextpd256_pd512(<4 x double> %a0) nounwind { ; X32-LABEL: test_mm512_zextpd256_pd512: ; X32: # BB#0: ; X32-NEXT: # kill: %YMM0 %YMM0 %ZMM0 -; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X32-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; X32-NEXT: retl ; ; X64-LABEL: test_mm512_zextpd256_pd512: ; X64: # BB#0: ; X64-NEXT: # kill: %YMM0 %YMM0 %ZMM0 -; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X64-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; X64-NEXT: retq %res = shufflevector <4 x double> %a0, <4 x double> zeroinitializer, <8 x i32> @@ -1174,7 +1174,7 @@ define <16 x float> @test_mm512_zextps128_ps512(<4 x float> %a0) nounwind { ; X32-LABEL: test_mm512_zextps128_ps512: ; X32: # BB#0: ; X32-NEXT: # kill: %XMM0 %XMM0 %YMM0 -; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X32-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm2 ; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; X32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 @@ -1183,7 +1183,7 @@ define <16 x float> @test_mm512_zextps128_ps512(<4 x float> %a0) nounwind { ; X64-LABEL: test_mm512_zextps128_ps512: ; X64: # BB#0: ; X64-NEXT: # kill: %XMM0 %XMM0 %YMM0 -; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm2 ; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; X64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 @@ -1196,14 +1196,14 @@ define <16 x float> @test_mm512_zextps256_ps512(<8 x float> %a0) nounwind { ; X32-LABEL: test_mm512_zextps256_ps512: ; X32: # BB#0: ; X32-NEXT: # kill: %YMM0 %YMM0 %ZMM0 -; X32-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X32-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; X32-NEXT: retl ; ; X64-LABEL: test_mm512_zextps256_ps512: ; X64: # BB#0: ; X64-NEXT: # kill: %YMM0 %YMM0 %ZMM0 -; X64-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X64-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; X64-NEXT: retq %res = shufflevector <8 x float> %a0, <8 x float> zeroinitializer, <16 x i32> @@ -1214,19 +1214,19 @@ define <8 x i64> @test_mm512_zextsi128_si512(<2 x i64> %a0) nounwind { ; X32-LABEL: test_mm512_zextsi128_si512: ; X32: # BB#0: ; X32-NEXT: # kill: %XMM0 %XMM0 %YMM0 -; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X32-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm2 -; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; X32-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X32-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm2 +; X32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; X32-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 ; X32-NEXT: retl ; ; X64-LABEL: test_mm512_zextsi128_si512: ; X64: # BB#0: ; X64-NEXT: # kill: %XMM0 %XMM0 %YMM0 -; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X64-NEXT: vinserti128 $1, %xmm1, %ymm1, %ymm2 -; X64-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; X64-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 +; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X64-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm2 +; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; X64-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0 ; X64-NEXT: retq %res = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <8 x i32> ret <8 x i64> %res @@ -1236,15 +1236,15 @@ define <8 x i64> @test_mm512_zextsi256_si512(<4 x i64> %a0) nounwind { ; X32-LABEL: test_mm512_zextsi256_si512: ; X32: # BB#0: ; X32-NEXT: # kill: %YMM0 %YMM0 %ZMM0 -; X32-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X32-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; X32-NEXT: retl ; ; X64-LABEL: test_mm512_zextsi256_si512: ; X64: # BB#0: ; X64-NEXT: # kill: %YMM0 %YMM0 %ZMM0 -; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X64-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X64-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; X64-NEXT: retq %res = shufflevector <4 x i64> %a0, <4 x i64> zeroinitializer, <8 x i32> ret <8 x i64> %res diff --git a/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/test/CodeGen/X86/avx512-intrinsics-upgrade.ll index 86902ac926a..65bf148f550 100644 --- a/test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ b/test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -2910,7 +2910,7 @@ declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i32, < define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) { ; CHECK-LABEL: test_mask_vextracti64x4: ; CHECK: ## BB#0: -; CHECK-NEXT: vextracti64x4 $1, %zmm1, %ymm1 +; CHECK-NEXT: vextractf64x4 $1, %zmm1, %ymm1 ; CHECK-NEXT: kmovw %edi, %k0 ; CHECK-NEXT: kshiftlw $12, %k0, %k1 ; CHECK-NEXT: kshiftrw $15, %k1, %k1 diff --git a/test/CodeGen/X86/avx512-vbroadcasti128.ll b/test/CodeGen/X86/avx512-vbroadcasti128.ll index ed19324df99..d6a77fd49ea 100644 --- a/test/CodeGen/X86/avx512-vbroadcasti128.ll +++ b/test/CodeGen/X86/avx512-vbroadcasti128.ll @@ -234,26 +234,26 @@ define <64 x i8> @test_broadcast_16i8_64i8(<16 x i8> *%p) nounwind { define <8 x i32> @PR29088(<4 x i32>* %p0, <8 x float>* %p1) { ; X64-AVX512VL-LABEL: PR29088: ; X64-AVX512VL: ## BB#0: -; X64-AVX512VL-NEXT: vmovdqa (%rdi), %xmm0 +; X64-AVX512VL-NEXT: vmovaps (%rdi), %xmm0 ; X64-AVX512VL-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; X64-AVX512VL-NEXT: vmovdqa %ymm1, (%rsi) -; X64-AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX512VL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512VL-NEXT: retq ; ; X64-AVX512BWVL-LABEL: PR29088: ; X64-AVX512BWVL: ## BB#0: -; X64-AVX512BWVL-NEXT: vmovdqa (%rdi), %xmm0 +; X64-AVX512BWVL-NEXT: vmovaps (%rdi), %xmm0 ; X64-AVX512BWVL-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; X64-AVX512BWVL-NEXT: vmovdqa %ymm1, (%rsi) -; X64-AVX512BWVL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX512BWVL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512BWVL-NEXT: retq ; ; X64-AVX512DQVL-LABEL: PR29088: ; X64-AVX512DQVL: ## BB#0: -; X64-AVX512DQVL-NEXT: vmovdqa (%rdi), %xmm0 +; X64-AVX512DQVL-NEXT: vmovaps (%rdi), %xmm0 ; X64-AVX512DQVL-NEXT: vxorps %ymm1, %ymm1, %ymm1 ; X64-AVX512DQVL-NEXT: vmovaps %ymm1, (%rsi) -; X64-AVX512DQVL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX512DQVL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512DQVL-NEXT: retq %ld = load <4 x i32>, <4 x i32>* %p0 store <8 x float> zeroinitializer, <8 x float>* %p1 diff --git a/test/CodeGen/X86/masked_gather_scatter.ll b/test/CodeGen/X86/masked_gather_scatter.ll index 10e980bfb75..bcde7ac5545 100644 --- a/test/CodeGen/X86/masked_gather_scatter.ll +++ b/test/CodeGen/X86/masked_gather_scatter.ll @@ -1858,7 +1858,7 @@ define <16 x double> @test_gather_16f64(<16 x double*> %ptrs, <16 x i1> %mask, < ; KNL_32-NEXT: vmovapd 8(%ebp), %zmm1 ; KNL_32-NEXT: kshiftrw $8, %k1, %k2 ; KNL_32-NEXT: vgatherdpd (,%ymm0), %zmm2 {%k1} -; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0 +; KNL_32-NEXT: vextractf64x4 $1, %zmm0, %ymm0 ; KNL_32-NEXT: vgatherdpd (,%ymm0), %zmm1 {%k2} ; KNL_32-NEXT: vmovapd %zmm2, %zmm0 ; KNL_32-NEXT: movl %ebp, %esp @@ -1895,7 +1895,7 @@ define <16 x double> @test_gather_16f64(<16 x double*> %ptrs, <16 x i1> %mask, < ; SKX_32-NEXT: vmovapd 8(%ebp), %zmm1 ; SKX_32-NEXT: kshiftrw $8, %k1, %k2 ; SKX_32-NEXT: vgatherdpd (,%ymm0), %zmm2 {%k1} -; SKX_32-NEXT: vextracti32x8 $1, %zmm0, %ymm0 +; SKX_32-NEXT: vextractf32x8 $1, %zmm0, %ymm0 ; SKX_32-NEXT: vgatherdpd (,%ymm0), %zmm1 {%k2} ; SKX_32-NEXT: vmovapd %zmm2, %zmm0 ; SKX_32-NEXT: movl %ebp, %esp @@ -2102,7 +2102,7 @@ define void @test_scatter_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x dou ; KNL_32-NEXT: vmovapd 8(%ebp), %zmm1 ; KNL_32-NEXT: kshiftrw $8, %k1, %k2 ; KNL_32-NEXT: vscatterdpd %zmm2, (,%ymm0) {%k1} -; KNL_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0 +; KNL_32-NEXT: vextractf64x4 $1, %zmm0, %ymm0 ; KNL_32-NEXT: vscatterdpd %zmm1, (,%ymm0) {%k2} ; KNL_32-NEXT: movl %ebp, %esp ; KNL_32-NEXT: popl %ebp @@ -2138,7 +2138,7 @@ define void @test_scatter_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x dou ; SKX_32-NEXT: vmovapd 8(%ebp), %zmm1 ; SKX_32-NEXT: kshiftrw $8, %k1, %k2 ; SKX_32-NEXT: vscatterdpd %zmm2, (,%ymm0) {%k1} -; SKX_32-NEXT: vextracti32x8 $1, %zmm0, %ymm0 +; SKX_32-NEXT: vextractf32x8 $1, %zmm0, %ymm0 ; SKX_32-NEXT: vscatterdpd %zmm1, (,%ymm0) {%k2} ; SKX_32-NEXT: movl %ebp, %esp ; SKX_32-NEXT: popl %ebp diff --git a/test/CodeGen/X86/masked_memop.ll b/test/CodeGen/X86/masked_memop.ll index 8bcc5a4a6e1..e3d09611dec 100644 --- a/test/CodeGen/X86/masked_memop.ll +++ b/test/CodeGen/X86/masked_memop.ll @@ -995,19 +995,12 @@ define void @one_mask_bit_set3(<4 x i64>* %addr, <4 x i64> %val) { ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; -; AVX512F-LABEL: one_mask_bit_set3: -; AVX512F: ## BB#0: -; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512F-NEXT: vmovlps %xmm0, 16(%rdi) -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: retq -; -; SKX-LABEL: one_mask_bit_set3: -; SKX: ## BB#0: -; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0 -; SKX-NEXT: vmovq %xmm0, 16(%rdi) -; SKX-NEXT: vzeroupper -; SKX-NEXT: retq +; AVX512-LABEL: one_mask_bit_set3: +; AVX512: ## BB#0: +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX512-NEXT: vmovlps %xmm0, 16(%rdi) +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %val, <4 x i64>* %addr, i32 4, <4 x i1>) ret void } diff --git a/test/CodeGen/X86/merge-consecutive-loads-512.ll b/test/CodeGen/X86/merge-consecutive-loads-512.ll index 417479d325e..087580fe3a7 100644 --- a/test/CodeGen/X86/merge-consecutive-loads-512.ll +++ b/test/CodeGen/X86/merge-consecutive-loads-512.ll @@ -8,7 +8,7 @@ define <8 x double> @merge_8f64_2f64_12u4(<2 x double>* %ptr) nounwind uwtable noinline ssp { ; ALL-LABEL: merge_8f64_2f64_12u4: ; ALL: # BB#0: -; ALL-NEXT: vmovupd 16(%rdi), %ymm0 +; ALL-NEXT: vmovups 16(%rdi), %ymm0 ; ALL-NEXT: vinsertf128 $1, 64(%rdi), %ymm0, %ymm1 ; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; ALL-NEXT: retq @@ -16,7 +16,7 @@ define <8 x double> @merge_8f64_2f64_12u4(<2 x double>* %ptr) nounwind uwtable n ; X32-AVX512F-LABEL: merge_8f64_2f64_12u4: ; X32-AVX512F: # BB#0: ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512F-NEXT: vmovupd 16(%eax), %ymm0 +; X32-AVX512F-NEXT: vmovups 16(%eax), %ymm0 ; X32-AVX512F-NEXT: vinsertf128 $1, 64(%eax), %ymm0, %ymm1 ; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; X32-AVX512F-NEXT: retl @@ -35,8 +35,8 @@ define <8 x double> @merge_8f64_2f64_12u4(<2 x double>* %ptr) nounwind uwtable n define <8 x double> @merge_8f64_2f64_23z5(<2 x double>* %ptr) nounwind uwtable noinline ssp { ; ALL-LABEL: merge_8f64_2f64_23z5: ; ALL: # BB#0: -; ALL-NEXT: vmovupd 32(%rdi), %ymm0 -; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; ALL-NEXT: vmovups 32(%rdi), %ymm0 +; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; ALL-NEXT: vinsertf128 $1, 80(%rdi), %ymm1, %ymm1 ; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; ALL-NEXT: retq @@ -44,8 +44,8 @@ define <8 x double> @merge_8f64_2f64_23z5(<2 x double>* %ptr) nounwind uwtable n ; X32-AVX512F-LABEL: merge_8f64_2f64_23z5: ; X32-AVX512F: # BB#0: ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512F-NEXT: vmovupd 32(%eax), %ymm0 -; X32-AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; X32-AVX512F-NEXT: vmovups 32(%eax), %ymm0 +; X32-AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X32-AVX512F-NEXT: vinsertf128 $1, 80(%eax), %ymm1, %ymm1 ; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; X32-AVX512F-NEXT: retl @@ -64,14 +64,14 @@ define <8 x double> @merge_8f64_2f64_23z5(<2 x double>* %ptr) nounwind uwtable n define <8 x double> @merge_8f64_4f64_z2(<4 x double>* %ptr) nounwind uwtable noinline ssp { ; ALL-LABEL: merge_8f64_4f64_z2: ; ALL: # BB#0: -; ALL-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; ALL-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; ALL-NEXT: vinsertf64x4 $1, 64(%rdi), %zmm0, %zmm0 ; ALL-NEXT: retq ; ; X32-AVX512F-LABEL: merge_8f64_4f64_z2: ; X32-AVX512F: # BB#0: ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512F-NEXT: vxorpd %xmm0, %xmm0, %xmm0 +; X32-AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; X32-AVX512F-NEXT: vinsertf64x4 $1, 64(%eax), %zmm0, %zmm0 ; X32-AVX512F-NEXT: retl %ptr1 = getelementptr inbounds <4 x double>, <4 x double>* %ptr, i64 2 @@ -106,20 +106,20 @@ define <8 x double> @merge_8f64_f64_23uuuuu9(double* %ptr) nounwind uwtable noin define <8 x double> @merge_8f64_f64_12zzuuzz(double* %ptr) nounwind uwtable noinline ssp { ; ALL-LABEL: merge_8f64_f64_12zzuuzz: ; ALL: # BB#0: -; ALL-NEXT: vmovupd 8(%rdi), %xmm0 -; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; ALL-NEXT: vmovups 8(%rdi), %xmm0 +; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; ALL-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; ALL-NEXT: retq ; ; X32-AVX512F-LABEL: merge_8f64_f64_12zzuuzz: ; X32-AVX512F: # BB#0: ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512F-NEXT: vmovupd 8(%eax), %xmm0 -; X32-AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; X32-AVX512F-NEXT: vmovups 8(%eax), %xmm0 +; X32-AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X32-AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; X32-AVX512F-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; X32-AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; X32-AVX512F-NEXT: retl %ptr0 = getelementptr inbounds double, double* %ptr, i64 1 @@ -179,15 +179,15 @@ define <8 x double> @merge_8f64_f64_1u3u5zu8(double* %ptr) nounwind uwtable noin define <8 x i64> @merge_8i64_4i64_z3(<4 x i64>* %ptr) nounwind uwtable noinline ssp { ; ALL-LABEL: merge_8i64_4i64_z3: ; ALL: # BB#0: -; ALL-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; ALL-NEXT: vinserti64x4 $1, 96(%rdi), %zmm0, %zmm0 +; ALL-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; ALL-NEXT: vinsertf64x4 $1, 96(%rdi), %zmm0, %zmm0 ; ALL-NEXT: retq ; ; X32-AVX512F-LABEL: merge_8i64_4i64_z3: ; X32-AVX512F: # BB#0: ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512F-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; X32-AVX512F-NEXT: vinserti64x4 $1, 96(%eax), %zmm0, %zmm0 +; X32-AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X32-AVX512F-NEXT: vinsertf64x4 $1, 96(%eax), %zmm0, %zmm0 ; X32-AVX512F-NEXT: retl %ptr1 = getelementptr inbounds <4 x i64>, <4 x i64>* %ptr, i64 3 %val1 = load <4 x i64>, <4 x i64>* %ptr1 @@ -198,21 +198,21 @@ define <8 x i64> @merge_8i64_4i64_z3(<4 x i64>* %ptr) nounwind uwtable noinline define <8 x i64> @merge_8i64_i64_56zz9uzz(i64* %ptr) nounwind uwtable noinline ssp { ; ALL-LABEL: merge_8i64_i64_56zz9uzz: ; ALL: # BB#0: -; ALL-NEXT: vmovdqu 40(%rdi), %xmm0 -; ALL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; ALL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; ALL-NEXT: vmovups 40(%rdi), %xmm0 +; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; ALL-NEXT: retq ; ; X32-AVX512F-LABEL: merge_8i64_i64_56zz9uzz: ; X32-AVX512F: # BB#0: ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512F-NEXT: vmovdqu 40(%eax), %xmm0 -; X32-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X32-AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; X32-AVX512F-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero -; X32-AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; X32-AVX512F-NEXT: vmovups 40(%eax), %xmm0 +; X32-AVX512F-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X32-AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; X32-AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; X32-AVX512F-NEXT: retl %ptr0 = getelementptr inbounds i64, i64* %ptr, i64 5 %ptr1 = getelementptr inbounds i64, i64* %ptr, i64 6 diff --git a/test/CodeGen/X86/shuffle-strided-with-offset-256.ll b/test/CodeGen/X86/shuffle-strided-with-offset-256.ll index 0f7523daa35..f0eab80b0cf 100644 --- a/test/CodeGen/X86/shuffle-strided-with-offset-256.ll +++ b/test/CodeGen/X86/shuffle-strided-with-offset-256.ll @@ -141,41 +141,14 @@ define void @shuffle_v8i32_to_v4i32_1(<8 x i32>* %L, <4 x i32>* %S) nounwind { ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; -; AVX512F-LABEL: shuffle_v8i32_to_v4i32_1: -; AVX512F: # BB#0: -; AVX512F-NEXT: vmovaps (%rdi), %ymm0 -; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] -; AVX512F-NEXT: vmovaps %xmm0, (%rsi) -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v8i32_to_v4i32_1: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 -; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] -; AVX512VL-NEXT: vmovaps %xmm0, (%rsi) -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: retq -; -; AVX512BW-LABEL: shuffle_v8i32_to_v4i32_1: -; AVX512BW: # BB#0: -; AVX512BW-NEXT: vmovaps (%rdi), %ymm0 -; AVX512BW-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512BW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] -; AVX512BW-NEXT: vmovaps %xmm0, (%rsi) -; AVX512BW-NEXT: vzeroupper -; AVX512BW-NEXT: retq -; -; AVX512BWVL-LABEL: shuffle_v8i32_to_v4i32_1: -; AVX512BWVL: # BB#0: -; AVX512BWVL-NEXT: vmovdqa (%rdi), %ymm0 -; AVX512BWVL-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512BWVL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] -; AVX512BWVL-NEXT: vmovaps %xmm0, (%rsi) -; AVX512BWVL-NEXT: vzeroupper -; AVX512BWVL-NEXT: retq +; AVX512-LABEL: shuffle_v8i32_to_v4i32_1: +; AVX512: # BB#0: +; AVX512-NEXT: vmovaps (%rdi), %ymm0 +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX512-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] +; AVX512-NEXT: vmovaps %xmm0, (%rsi) +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %vec = load <8 x i32>, <8 x i32>* %L %strided.vec = shufflevector <8 x i32> %vec, <8 x i32> undef, <4 x i32> store <4 x i32> %strided.vec, <4 x i32>* %S diff --git a/test/CodeGen/X86/shuffle-strided-with-offset-512.ll b/test/CodeGen/X86/shuffle-strided-with-offset-512.ll index b9f8d5f5085..df788b4dea4 100644 --- a/test/CodeGen/X86/shuffle-strided-with-offset-512.ll +++ b/test/CodeGen/X86/shuffle-strided-with-offset-512.ll @@ -95,8 +95,8 @@ define void @shuffle_v32i16_to_v16i16_1(<32 x i16>* %L, <16 x i16>* %S) nounwind define void @shuffle_v16i32_to_v8i32_1(<16 x i32>* %L, <8 x i32>* %S) nounwind { ; AVX512-LABEL: shuffle_v16i32_to_v8i32_1: ; AVX512: # BB#0: -; AVX512-NEXT: vmovdqa32 (%rdi), %zmm0 -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512-NEXT: vmovaps (%rdi), %zmm0 +; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 ; AVX512-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] ; AVX512-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX512-NEXT: vmovdqa %ymm0, (%rsi) diff --git a/test/CodeGen/X86/subvector-broadcast.ll b/test/CodeGen/X86/subvector-broadcast.ll index ade54dc9fef..fd5031f8bd1 100644 --- a/test/CodeGen/X86/subvector-broadcast.ll +++ b/test/CodeGen/X86/subvector-broadcast.ll @@ -806,69 +806,21 @@ define <64 x i8> @test_broadcast_32i8_64i8(<32 x i8> *%p) nounwind { ; define <4 x double> @test_broadcast_2f64_4f64_reuse(<2 x double>* %p0, <2 x double>* %p1) { -; X32-AVX-LABEL: test_broadcast_2f64_4f64_reuse: -; X32-AVX: ## BB#0: -; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX-NEXT: vmovaps (%ecx), %xmm0 -; X32-AVX-NEXT: vmovaps %xmm0, (%eax) -; X32-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; X32-AVX-NEXT: retl +; X32-LABEL: test_broadcast_2f64_4f64_reuse: +; X32: ## BB#0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: vmovaps (%ecx), %xmm0 +; X32-NEXT: vmovaps %xmm0, (%eax) +; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X32-NEXT: retl ; -; X32-AVX512F-LABEL: test_broadcast_2f64_4f64_reuse: -; X32-AVX512F: ## BB#0: -; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512F-NEXT: vmovaps (%ecx), %xmm0 -; X32-AVX512F-NEXT: vmovaps %xmm0, (%eax) -; X32-AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; X32-AVX512F-NEXT: retl -; -; X32-AVX512BW-LABEL: test_broadcast_2f64_4f64_reuse: -; X32-AVX512BW: ## BB#0: -; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512BW-NEXT: vmovaps (%ecx), %xmm0 -; X32-AVX512BW-NEXT: vmovaps %xmm0, (%eax) -; X32-AVX512BW-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; X32-AVX512BW-NEXT: retl -; -; X32-AVX512DQ-LABEL: test_broadcast_2f64_4f64_reuse: -; X32-AVX512DQ: ## BB#0: -; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512DQ-NEXT: vmovapd (%ecx), %xmm0 -; X32-AVX512DQ-NEXT: vmovapd %xmm0, (%eax) -; X32-AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; X32-AVX512DQ-NEXT: retl -; -; X64-AVX-LABEL: test_broadcast_2f64_4f64_reuse: -; X64-AVX: ## BB#0: -; X64-AVX-NEXT: vmovaps (%rdi), %xmm0 -; X64-AVX-NEXT: vmovaps %xmm0, (%rsi) -; X64-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; X64-AVX-NEXT: retq -; -; X64-AVX512F-LABEL: test_broadcast_2f64_4f64_reuse: -; X64-AVX512F: ## BB#0: -; X64-AVX512F-NEXT: vmovaps (%rdi), %xmm0 -; X64-AVX512F-NEXT: vmovaps %xmm0, (%rsi) -; X64-AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; X64-AVX512F-NEXT: retq -; -; X64-AVX512BW-LABEL: test_broadcast_2f64_4f64_reuse: -; X64-AVX512BW: ## BB#0: -; X64-AVX512BW-NEXT: vmovaps (%rdi), %xmm0 -; X64-AVX512BW-NEXT: vmovaps %xmm0, (%rsi) -; X64-AVX512BW-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; X64-AVX512BW-NEXT: retq -; -; X64-AVX512DQ-LABEL: test_broadcast_2f64_4f64_reuse: -; X64-AVX512DQ: ## BB#0: -; X64-AVX512DQ-NEXT: vmovapd (%rdi), %xmm0 -; X64-AVX512DQ-NEXT: vmovapd %xmm0, (%rsi) -; X64-AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; X64-AVX512DQ-NEXT: retq +; X64-LABEL: test_broadcast_2f64_4f64_reuse: +; X64: ## BB#0: +; X64-NEXT: vmovaps (%rdi), %xmm0 +; X64-NEXT: vmovaps %xmm0, (%rsi) +; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X64-NEXT: retq %1 = load <2 x double>, <2 x double>* %p0 store <2 x double> %1, <2 x double>* %p1 %2 = shufflevector <2 x double> %1, <2 x double> undef, <4 x i32> @@ -876,37 +828,21 @@ define <4 x double> @test_broadcast_2f64_4f64_reuse(<2 x double>* %p0, <2 x doub } define <4 x i64> @test_broadcast_2i64_4i64_reuse(<2 x i64>* %p0, <2 x i64>* %p1) { -; X32-AVX-LABEL: test_broadcast_2i64_4i64_reuse: -; X32-AVX: ## BB#0: -; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX-NEXT: vmovaps (%ecx), %xmm0 -; X32-AVX-NEXT: vmovaps %xmm0, (%eax) -; X32-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; X32-AVX-NEXT: retl +; X32-LABEL: test_broadcast_2i64_4i64_reuse: +; X32: ## BB#0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: vmovaps (%ecx), %xmm0 +; X32-NEXT: vmovaps %xmm0, (%eax) +; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X32-NEXT: retl ; -; X32-AVX512-LABEL: test_broadcast_2i64_4i64_reuse: -; X32-AVX512: ## BB#0: -; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512-NEXT: vmovdqa (%ecx), %xmm0 -; X32-AVX512-NEXT: vmovdqa %xmm0, (%eax) -; X32-AVX512-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-AVX512-NEXT: retl -; -; X64-AVX-LABEL: test_broadcast_2i64_4i64_reuse: -; X64-AVX: ## BB#0: -; X64-AVX-NEXT: vmovaps (%rdi), %xmm0 -; X64-AVX-NEXT: vmovaps %xmm0, (%rsi) -; X64-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; X64-AVX-NEXT: retq -; -; X64-AVX512-LABEL: test_broadcast_2i64_4i64_reuse: -; X64-AVX512: ## BB#0: -; X64-AVX512-NEXT: vmovdqa (%rdi), %xmm0 -; X64-AVX512-NEXT: vmovdqa %xmm0, (%rsi) -; X64-AVX512-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X64-AVX512-NEXT: retq +; X64-LABEL: test_broadcast_2i64_4i64_reuse: +; X64: ## BB#0: +; X64-NEXT: vmovaps (%rdi), %xmm0 +; X64-NEXT: vmovaps %xmm0, (%rsi) +; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X64-NEXT: retq %1 = load <2 x i64>, <2 x i64>* %p0 store <2 x i64> %1, <2 x i64>* %p1 %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> @@ -936,37 +872,21 @@ define <8 x float> @test_broadcast_4f32_8f32_reuse(<4 x float>* %p0, <4 x float> } define <8 x i32> @test_broadcast_4i32_8i32_reuse(<4 x i32>* %p0, <4 x i32>* %p1) { -; X32-AVX-LABEL: test_broadcast_4i32_8i32_reuse: -; X32-AVX: ## BB#0: -; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX-NEXT: vmovaps (%ecx), %xmm0 -; X32-AVX-NEXT: vmovaps %xmm0, (%eax) -; X32-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; X32-AVX-NEXT: retl +; X32-LABEL: test_broadcast_4i32_8i32_reuse: +; X32: ## BB#0: +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: vmovaps (%ecx), %xmm0 +; X32-NEXT: vmovaps %xmm0, (%eax) +; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X32-NEXT: retl ; -; X32-AVX512-LABEL: test_broadcast_4i32_8i32_reuse: -; X32-AVX512: ## BB#0: -; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512-NEXT: vmovdqa (%ecx), %xmm0 -; X32-AVX512-NEXT: vmovdqa %xmm0, (%eax) -; X32-AVX512-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X32-AVX512-NEXT: retl -; -; X64-AVX-LABEL: test_broadcast_4i32_8i32_reuse: -; X64-AVX: ## BB#0: -; X64-AVX-NEXT: vmovaps (%rdi), %xmm0 -; X64-AVX-NEXT: vmovaps %xmm0, (%rsi) -; X64-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; X64-AVX-NEXT: retq -; -; X64-AVX512-LABEL: test_broadcast_4i32_8i32_reuse: -; X64-AVX512: ## BB#0: -; X64-AVX512-NEXT: vmovdqa (%rdi), %xmm0 -; X64-AVX512-NEXT: vmovdqa %xmm0, (%rsi) -; X64-AVX512-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; X64-AVX512-NEXT: retq +; X64-LABEL: test_broadcast_4i32_8i32_reuse: +; X64: ## BB#0: +; X64-NEXT: vmovaps (%rdi), %xmm0 +; X64-NEXT: vmovaps %xmm0, (%rsi) +; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; X64-NEXT: retq %1 = load <4 x i32>, <4 x i32>* %p0 store <4 x i32> %1, <4 x i32>* %p1 %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> @@ -987,9 +907,9 @@ define <16 x i16> @test_broadcast_8i16_16i16_reuse(<8 x i16> *%p0, <8 x i16> *%p ; X32-AVX512F: ## BB#0: ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512F-NEXT: vmovdqa (%ecx), %xmm0 -; X32-AVX512F-NEXT: vmovdqa %xmm0, (%eax) -; X32-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX512F-NEXT: vmovaps (%ecx), %xmm0 +; X32-AVX512F-NEXT: vmovaps %xmm0, (%eax) +; X32-AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512F-NEXT: retl ; ; X32-AVX512BW-LABEL: test_broadcast_8i16_16i16_reuse: @@ -1005,9 +925,9 @@ define <16 x i16> @test_broadcast_8i16_16i16_reuse(<8 x i16> *%p0, <8 x i16> *%p ; X32-AVX512DQ: ## BB#0: ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512DQ-NEXT: vmovdqa (%ecx), %xmm0 -; X32-AVX512DQ-NEXT: vmovdqa %xmm0, (%eax) -; X32-AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX512DQ-NEXT: vmovaps (%ecx), %xmm0 +; X32-AVX512DQ-NEXT: vmovaps %xmm0, (%eax) +; X32-AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512DQ-NEXT: retl ; ; X64-AVX-LABEL: test_broadcast_8i16_16i16_reuse: @@ -1019,9 +939,9 @@ define <16 x i16> @test_broadcast_8i16_16i16_reuse(<8 x i16> *%p0, <8 x i16> *%p ; ; X64-AVX512F-LABEL: test_broadcast_8i16_16i16_reuse: ; X64-AVX512F: ## BB#0: -; X64-AVX512F-NEXT: vmovdqa (%rdi), %xmm0 -; X64-AVX512F-NEXT: vmovdqa %xmm0, (%rsi) -; X64-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX512F-NEXT: vmovaps (%rdi), %xmm0 +; X64-AVX512F-NEXT: vmovaps %xmm0, (%rsi) +; X64-AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512F-NEXT: retq ; ; X64-AVX512BW-LABEL: test_broadcast_8i16_16i16_reuse: @@ -1033,9 +953,9 @@ define <16 x i16> @test_broadcast_8i16_16i16_reuse(<8 x i16> *%p0, <8 x i16> *%p ; ; X64-AVX512DQ-LABEL: test_broadcast_8i16_16i16_reuse: ; X64-AVX512DQ: ## BB#0: -; X64-AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0 -; X64-AVX512DQ-NEXT: vmovdqa %xmm0, (%rsi) -; X64-AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX512DQ-NEXT: vmovaps (%rdi), %xmm0 +; X64-AVX512DQ-NEXT: vmovaps %xmm0, (%rsi) +; X64-AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512DQ-NEXT: retq %1 = load <8 x i16>, <8 x i16> *%p0 store <8 x i16> %1, <8 x i16>* %p1 @@ -1057,9 +977,9 @@ define <32 x i8> @test_broadcast_16i8_32i8_reuse(<16 x i8> *%p0, <16 x i8> *%p1) ; X32-AVX512F: ## BB#0: ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512F-NEXT: vmovdqa (%ecx), %xmm0 -; X32-AVX512F-NEXT: vmovdqa %xmm0, (%eax) -; X32-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX512F-NEXT: vmovaps (%ecx), %xmm0 +; X32-AVX512F-NEXT: vmovaps %xmm0, (%eax) +; X32-AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512F-NEXT: retl ; ; X32-AVX512BW-LABEL: test_broadcast_16i8_32i8_reuse: @@ -1075,9 +995,9 @@ define <32 x i8> @test_broadcast_16i8_32i8_reuse(<16 x i8> *%p0, <16 x i8> *%p1) ; X32-AVX512DQ: ## BB#0: ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512DQ-NEXT: vmovdqa (%ecx), %xmm0 -; X32-AVX512DQ-NEXT: vmovdqa %xmm0, (%eax) -; X32-AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX512DQ-NEXT: vmovaps (%ecx), %xmm0 +; X32-AVX512DQ-NEXT: vmovaps %xmm0, (%eax) +; X32-AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512DQ-NEXT: retl ; ; X64-AVX-LABEL: test_broadcast_16i8_32i8_reuse: @@ -1089,9 +1009,9 @@ define <32 x i8> @test_broadcast_16i8_32i8_reuse(<16 x i8> *%p0, <16 x i8> *%p1) ; ; X64-AVX512F-LABEL: test_broadcast_16i8_32i8_reuse: ; X64-AVX512F: ## BB#0: -; X64-AVX512F-NEXT: vmovdqa (%rdi), %xmm0 -; X64-AVX512F-NEXT: vmovdqa %xmm0, (%rsi) -; X64-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX512F-NEXT: vmovaps (%rdi), %xmm0 +; X64-AVX512F-NEXT: vmovaps %xmm0, (%rsi) +; X64-AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512F-NEXT: retq ; ; X64-AVX512BW-LABEL: test_broadcast_16i8_32i8_reuse: @@ -1103,9 +1023,9 @@ define <32 x i8> @test_broadcast_16i8_32i8_reuse(<16 x i8> *%p0, <16 x i8> *%p1) ; ; X64-AVX512DQ-LABEL: test_broadcast_16i8_32i8_reuse: ; X64-AVX512DQ: ## BB#0: -; X64-AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0 -; X64-AVX512DQ-NEXT: vmovdqa %xmm0, (%rsi) -; X64-AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX512DQ-NEXT: vmovaps (%rdi), %xmm0 +; X64-AVX512DQ-NEXT: vmovaps %xmm0, (%rsi) +; X64-AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512DQ-NEXT: retq %1 = load <16 x i8>, <16 x i8> *%p0 store <16 x i8> %1, <16 x i8>* %p1 @@ -1132,30 +1052,30 @@ define <8 x i32> @test_broadcast_4i32_8i32_chain(<4 x i32>* %p0, <4 x float>* %p ; X32-AVX512F: ## BB#0: ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512F-NEXT: vmovdqa (%ecx), %xmm0 +; X32-AVX512F-NEXT: vmovaps (%ecx), %xmm0 ; X32-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X32-AVX512F-NEXT: vmovdqa %xmm1, (%eax) -; X32-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512F-NEXT: retl ; ; X32-AVX512BW-LABEL: test_broadcast_4i32_8i32_chain: ; X32-AVX512BW: ## BB#0: ; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512BW-NEXT: vmovdqa (%ecx), %xmm0 +; X32-AVX512BW-NEXT: vmovaps (%ecx), %xmm0 ; X32-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X32-AVX512BW-NEXT: vmovdqa %xmm1, (%eax) -; X32-AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX512BW-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512BW-NEXT: retl ; ; X32-AVX512DQ-LABEL: test_broadcast_4i32_8i32_chain: ; X32-AVX512DQ: ## BB#0: ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-AVX512DQ-NEXT: vmovdqa (%ecx), %xmm0 +; X32-AVX512DQ-NEXT: vmovaps (%ecx), %xmm0 ; X32-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X32-AVX512DQ-NEXT: vmovaps %xmm1, (%eax) -; X32-AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X32-AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X32-AVX512DQ-NEXT: retl ; ; X64-AVX-LABEL: test_broadcast_4i32_8i32_chain: @@ -1168,26 +1088,26 @@ define <8 x i32> @test_broadcast_4i32_8i32_chain(<4 x i32>* %p0, <4 x float>* %p ; ; X64-AVX512F-LABEL: test_broadcast_4i32_8i32_chain: ; X64-AVX512F: ## BB#0: -; X64-AVX512F-NEXT: vmovdqa (%rdi), %xmm0 +; X64-AVX512F-NEXT: vmovaps (%rdi), %xmm0 ; X64-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X64-AVX512F-NEXT: vmovdqa %xmm1, (%rsi) -; X64-AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512F-NEXT: retq ; ; X64-AVX512BW-LABEL: test_broadcast_4i32_8i32_chain: ; X64-AVX512BW: ## BB#0: -; X64-AVX512BW-NEXT: vmovdqa (%rdi), %xmm0 +; X64-AVX512BW-NEXT: vmovaps (%rdi), %xmm0 ; X64-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X64-AVX512BW-NEXT: vmovdqa %xmm1, (%rsi) -; X64-AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX512BW-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512BW-NEXT: retq ; ; X64-AVX512DQ-LABEL: test_broadcast_4i32_8i32_chain: ; X64-AVX512DQ: ## BB#0: -; X64-AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0 +; X64-AVX512DQ-NEXT: vmovaps (%rdi), %xmm0 ; X64-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; X64-AVX512DQ-NEXT: vmovaps %xmm1, (%rsi) -; X64-AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; X64-AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; X64-AVX512DQ-NEXT: retq %1 = load <4 x i32>, <4 x i32>* %p0 store <4 x float> zeroinitializer, <4 x float>* %p1 diff --git a/test/CodeGen/X86/vector-half-conversions.ll b/test/CodeGen/X86/vector-half-conversions.ll index a2a7363d789..2f5c55d74fb 100644 --- a/test/CodeGen/X86/vector-half-conversions.ll +++ b/test/CodeGen/X86/vector-half-conversions.ll @@ -4393,79 +4393,42 @@ define <4 x i16> @cvt_4f64_to_4i16(<4 x double> %a0) nounwind { ; AVX2-NEXT: popq %r14 ; AVX2-NEXT: retq ; -; AVX512F-LABEL: cvt_4f64_to_4i16: -; AVX512F: # BB#0: -; AVX512F-NEXT: pushq %r14 -; AVX512F-NEXT: pushq %rbx -; AVX512F-NEXT: subq $40, %rsp -; AVX512F-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill -; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: callq __truncdfhf2 -; AVX512F-NEXT: movw %ax, %bx -; AVX512F-NEXT: shll $16, %ebx -; AVX512F-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload -; AVX512F-NEXT: # kill: %XMM0 %XMM0 %YMM0 -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: callq __truncdfhf2 -; AVX512F-NEXT: movzwl %ax, %r14d -; AVX512F-NEXT: orl %ebx, %r14d -; AVX512F-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload -; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512F-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill -; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: callq __truncdfhf2 -; AVX512F-NEXT: movw %ax, %bx -; AVX512F-NEXT: shll $16, %ebx -; AVX512F-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX512F-NEXT: callq __truncdfhf2 -; AVX512F-NEXT: movzwl %ax, %eax -; AVX512F-NEXT: orl %ebx, %eax -; AVX512F-NEXT: shlq $32, %rax -; AVX512F-NEXT: orq %r14, %rax -; AVX512F-NEXT: vmovq %rax, %xmm0 -; AVX512F-NEXT: addq $40, %rsp -; AVX512F-NEXT: popq %rbx -; AVX512F-NEXT: popq %r14 -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: cvt_4f64_to_4i16: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: pushq %r14 -; AVX512VL-NEXT: pushq %rbx -; AVX512VL-NEXT: subq $40, %rsp -; AVX512VL-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: callq __truncdfhf2 -; AVX512VL-NEXT: movw %ax, %bx -; AVX512VL-NEXT: shll $16, %ebx -; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload -; AVX512VL-NEXT: # kill: %XMM0 %XMM0 %YMM0 -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: callq __truncdfhf2 -; AVX512VL-NEXT: movzwl %ax, %r14d -; AVX512VL-NEXT: orl %ebx, %r14d -; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload -; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: callq __truncdfhf2 -; AVX512VL-NEXT: movw %ax, %bx -; AVX512VL-NEXT: shll $16, %ebx -; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX512VL-NEXT: callq __truncdfhf2 -; AVX512VL-NEXT: movzwl %ax, %eax -; AVX512VL-NEXT: orl %ebx, %eax -; AVX512VL-NEXT: shlq $32, %rax -; AVX512VL-NEXT: orq %r14, %rax -; AVX512VL-NEXT: vmovq %rax, %xmm0 -; AVX512VL-NEXT: addq $40, %rsp -; AVX512VL-NEXT: popq %rbx -; AVX512VL-NEXT: popq %r14 -; AVX512VL-NEXT: retq +; AVX512-LABEL: cvt_4f64_to_4i16: +; AVX512: # BB#0: +; AVX512-NEXT: pushq %r14 +; AVX512-NEXT: pushq %rbx +; AVX512-NEXT: subq $40, %rsp +; AVX512-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill +; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq __truncdfhf2 +; AVX512-NEXT: movw %ax, %bx +; AVX512-NEXT: shll $16, %ebx +; AVX512-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload +; AVX512-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq __truncdfhf2 +; AVX512-NEXT: movzwl %ax, %r14d +; AVX512-NEXT: orl %ebx, %r14d +; AVX512-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX512-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill +; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq __truncdfhf2 +; AVX512-NEXT: movw %ax, %bx +; AVX512-NEXT: shll $16, %ebx +; AVX512-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload +; AVX512-NEXT: callq __truncdfhf2 +; AVX512-NEXT: movzwl %ax, %eax +; AVX512-NEXT: orl %ebx, %eax +; AVX512-NEXT: shlq $32, %rax +; AVX512-NEXT: orq %r14, %rax +; AVX512-NEXT: vmovq %rax, %xmm0 +; AVX512-NEXT: addq $40, %rsp +; AVX512-NEXT: popq %rbx +; AVX512-NEXT: popq %r14 +; AVX512-NEXT: retq %1 = fptrunc <4 x double> %a0 to <4 x half> %2 = bitcast <4 x half> %1 to <4 x i16> ret <4 x i16> %2 @@ -4603,9 +4566,9 @@ define <8 x i16> @cvt_4f64_to_8i16_undef(<4 x double> %a0) nounwind { ; AVX512VL-NEXT: callq __truncdfhf2 ; AVX512VL-NEXT: movzwl %ax, %r14d ; AVX512VL-NEXT: orl %ebx, %r14d -; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload +; AVX512VL-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload ; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX512VL-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: callq __truncdfhf2 @@ -4762,9 +4725,9 @@ define <8 x i16> @cvt_4f64_to_8i16_zero(<4 x double> %a0) nounwind { ; AVX512VL-NEXT: callq __truncdfhf2 ; AVX512VL-NEXT: movzwl %ax, %r14d ; AVX512VL-NEXT: orl %ebx, %r14d -; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload +; AVX512VL-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload ; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX512VL-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: callq __truncdfhf2 @@ -4926,143 +4889,74 @@ define <8 x i16> @cvt_8f64_to_8i16(<8 x double> %a0) nounwind { ; AVX2-NEXT: popq %r15 ; AVX2-NEXT: retq ; -; AVX512F-LABEL: cvt_8f64_to_8i16: -; AVX512F: # BB#0: -; AVX512F-NEXT: pushq %r15 -; AVX512F-NEXT: pushq %r14 -; AVX512F-NEXT: pushq %rbx -; AVX512F-NEXT: subq $96, %rsp -; AVX512F-NEXT: vmovupd %zmm0, (%rsp) # 64-byte Spill -; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: callq __truncdfhf2 -; AVX512F-NEXT: movw %ax, %bx -; AVX512F-NEXT: shll $16, %ebx -; AVX512F-NEXT: vmovups (%rsp), %zmm0 # 64-byte Reload -; AVX512F-NEXT: # kill: %XMM0 %XMM0 %ZMM0 -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: callq __truncdfhf2 -; AVX512F-NEXT: movzwl %ax, %r15d -; AVX512F-NEXT: orl %ebx, %r15d -; AVX512F-NEXT: vmovupd (%rsp), %zmm0 # 64-byte Reload -; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512F-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill -; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: callq __truncdfhf2 -; AVX512F-NEXT: movw %ax, %bx -; AVX512F-NEXT: shll $16, %ebx -; AVX512F-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload -; AVX512F-NEXT: callq __truncdfhf2 -; AVX512F-NEXT: movzwl %ax, %r14d -; AVX512F-NEXT: orl %ebx, %r14d -; AVX512F-NEXT: shlq $32, %r14 -; AVX512F-NEXT: orq %r15, %r14 -; AVX512F-NEXT: vmovupd (%rsp), %zmm0 # 64-byte Reload -; AVX512F-NEXT: vextractf64x4 $1, %zmm0, %ymm0 -; AVX512F-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill -; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: callq __truncdfhf2 -; AVX512F-NEXT: movw %ax, %bx -; AVX512F-NEXT: shll $16, %ebx -; AVX512F-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload -; AVX512F-NEXT: # kill: %XMM0 %XMM0 %YMM0 -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: callq __truncdfhf2 -; AVX512F-NEXT: movzwl %ax, %r15d -; AVX512F-NEXT: orl %ebx, %r15d -; AVX512F-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload -; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512F-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill -; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: callq __truncdfhf2 -; AVX512F-NEXT: movw %ax, %bx -; AVX512F-NEXT: shll $16, %ebx -; AVX512F-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX512F-NEXT: callq __truncdfhf2 -; AVX512F-NEXT: movzwl %ax, %eax -; AVX512F-NEXT: orl %ebx, %eax -; AVX512F-NEXT: shlq $32, %rax -; AVX512F-NEXT: orq %r15, %rax -; AVX512F-NEXT: vmovq %rax, %xmm0 -; AVX512F-NEXT: vmovq %r14, %xmm1 -; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX512F-NEXT: addq $96, %rsp -; AVX512F-NEXT: popq %rbx -; AVX512F-NEXT: popq %r14 -; AVX512F-NEXT: popq %r15 -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: cvt_8f64_to_8i16: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: pushq %r15 -; AVX512VL-NEXT: pushq %r14 -; AVX512VL-NEXT: pushq %rbx -; AVX512VL-NEXT: subq $96, %rsp -; AVX512VL-NEXT: vmovupd %zmm0, (%rsp) # 64-byte Spill -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: callq __truncdfhf2 -; AVX512VL-NEXT: movw %ax, %bx -; AVX512VL-NEXT: shll $16, %ebx -; AVX512VL-NEXT: vmovups (%rsp), %zmm0 # 64-byte Reload -; AVX512VL-NEXT: # kill: %XMM0 %XMM0 %ZMM0 -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: callq __truncdfhf2 -; AVX512VL-NEXT: movzwl %ax, %r15d -; AVX512VL-NEXT: orl %ebx, %r15d -; AVX512VL-NEXT: vmovups (%rsp), %zmm0 # 64-byte Reload -; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512VL-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: callq __truncdfhf2 -; AVX512VL-NEXT: movw %ax, %bx -; AVX512VL-NEXT: shll $16, %ebx -; AVX512VL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload -; AVX512VL-NEXT: callq __truncdfhf2 -; AVX512VL-NEXT: movzwl %ax, %r14d -; AVX512VL-NEXT: orl %ebx, %r14d -; AVX512VL-NEXT: shlq $32, %r14 -; AVX512VL-NEXT: orq %r15, %r14 -; AVX512VL-NEXT: vmovupd (%rsp), %zmm0 # 64-byte Reload -; AVX512VL-NEXT: vextractf64x4 $1, %zmm0, %ymm0 -; AVX512VL-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: callq __truncdfhf2 -; AVX512VL-NEXT: movw %ax, %bx -; AVX512VL-NEXT: shll $16, %ebx -; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload -; AVX512VL-NEXT: # kill: %XMM0 %XMM0 %YMM0 -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: callq __truncdfhf2 -; AVX512VL-NEXT: movzwl %ax, %r15d -; AVX512VL-NEXT: orl %ebx, %r15d -; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload -; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: callq __truncdfhf2 -; AVX512VL-NEXT: movw %ax, %bx -; AVX512VL-NEXT: shll $16, %ebx -; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload -; AVX512VL-NEXT: callq __truncdfhf2 -; AVX512VL-NEXT: movzwl %ax, %eax -; AVX512VL-NEXT: orl %ebx, %eax -; AVX512VL-NEXT: shlq $32, %rax -; AVX512VL-NEXT: orq %r15, %rax -; AVX512VL-NEXT: vmovq %rax, %xmm0 -; AVX512VL-NEXT: vmovq %r14, %xmm1 -; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; AVX512VL-NEXT: addq $96, %rsp -; AVX512VL-NEXT: popq %rbx -; AVX512VL-NEXT: popq %r14 -; AVX512VL-NEXT: popq %r15 -; AVX512VL-NEXT: retq +; AVX512-LABEL: cvt_8f64_to_8i16: +; AVX512: # BB#0: +; AVX512-NEXT: pushq %r15 +; AVX512-NEXT: pushq %r14 +; AVX512-NEXT: pushq %rbx +; AVX512-NEXT: subq $96, %rsp +; AVX512-NEXT: vmovupd %zmm0, (%rsp) # 64-byte Spill +; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq __truncdfhf2 +; AVX512-NEXT: movw %ax, %bx +; AVX512-NEXT: shll $16, %ebx +; AVX512-NEXT: vmovups (%rsp), %zmm0 # 64-byte Reload +; AVX512-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq __truncdfhf2 +; AVX512-NEXT: movzwl %ax, %r15d +; AVX512-NEXT: orl %ebx, %r15d +; AVX512-NEXT: vmovupd (%rsp), %zmm0 # 64-byte Reload +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX512-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill +; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq __truncdfhf2 +; AVX512-NEXT: movw %ax, %bx +; AVX512-NEXT: shll $16, %ebx +; AVX512-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; AVX512-NEXT: callq __truncdfhf2 +; AVX512-NEXT: movzwl %ax, %r14d +; AVX512-NEXT: orl %ebx, %r14d +; AVX512-NEXT: shlq $32, %r14 +; AVX512-NEXT: orq %r15, %r14 +; AVX512-NEXT: vmovupd (%rsp), %zmm0 # 64-byte Reload +; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm0 +; AVX512-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill +; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq __truncdfhf2 +; AVX512-NEXT: movw %ax, %bx +; AVX512-NEXT: shll $16, %ebx +; AVX512-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload +; AVX512-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq __truncdfhf2 +; AVX512-NEXT: movzwl %ax, %r15d +; AVX512-NEXT: orl %ebx, %r15d +; AVX512-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX512-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill +; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq __truncdfhf2 +; AVX512-NEXT: movw %ax, %bx +; AVX512-NEXT: shll $16, %ebx +; AVX512-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload +; AVX512-NEXT: callq __truncdfhf2 +; AVX512-NEXT: movzwl %ax, %eax +; AVX512-NEXT: orl %ebx, %eax +; AVX512-NEXT: shlq $32, %rax +; AVX512-NEXT: orq %r15, %rax +; AVX512-NEXT: vmovq %rax, %xmm0 +; AVX512-NEXT: vmovq %r14, %xmm1 +; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX512-NEXT: addq $96, %rsp +; AVX512-NEXT: popq %rbx +; AVX512-NEXT: popq %r14 +; AVX512-NEXT: popq %r15 +; AVX512-NEXT: retq %1 = fptrunc <8 x double> %a0 to <8 x half> %2 = bitcast <8 x half> %1 to <8 x i16> ret <8 x i16> %2 @@ -5189,81 +5083,43 @@ define void @store_cvt_4f64_to_4i16(<4 x double> %a0, <4 x i16>* %a1) nounwind { ; AVX2-NEXT: popq %rbp ; AVX2-NEXT: retq ; -; AVX512F-LABEL: store_cvt_4f64_to_4i16: -; AVX512F: # BB#0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: pushq %r15 -; AVX512F-NEXT: pushq %r14 -; AVX512F-NEXT: pushq %rbx -; AVX512F-NEXT: subq $88, %rsp -; AVX512F-NEXT: movq %rdi, %rbx -; AVX512F-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill -; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: callq __truncdfhf2 -; AVX512F-NEXT: movl %eax, %r14d -; AVX512F-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload -; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512F-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill -; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: callq __truncdfhf2 -; AVX512F-NEXT: movl %eax, %r15d -; AVX512F-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload -; AVX512F-NEXT: # kill: %XMM0 %XMM0 %YMM0 -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: callq __truncdfhf2 -; AVX512F-NEXT: movl %eax, %ebp -; AVX512F-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload -; AVX512F-NEXT: callq __truncdfhf2 -; AVX512F-NEXT: movw %ax, 4(%rbx) -; AVX512F-NEXT: movw %bp, (%rbx) -; AVX512F-NEXT: movw %r15w, 6(%rbx) -; AVX512F-NEXT: movw %r14w, 2(%rbx) -; AVX512F-NEXT: addq $88, %rsp -; AVX512F-NEXT: popq %rbx -; AVX512F-NEXT: popq %r14 -; AVX512F-NEXT: popq %r15 -; AVX512F-NEXT: popq %rbp -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: store_cvt_4f64_to_4i16: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: pushq %rbp -; AVX512VL-NEXT: pushq %r15 -; AVX512VL-NEXT: pushq %r14 -; AVX512VL-NEXT: pushq %rbx -; AVX512VL-NEXT: subq $88, %rsp -; AVX512VL-NEXT: movq %rdi, %rbx -; AVX512VL-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: callq __truncdfhf2 -; AVX512VL-NEXT: movl %eax, %r14d -; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload -; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512VL-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: callq __truncdfhf2 -; AVX512VL-NEXT: movl %eax, %r15d -; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload -; AVX512VL-NEXT: # kill: %XMM0 %XMM0 %YMM0 -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: callq __truncdfhf2 -; AVX512VL-NEXT: movl %eax, %ebp -; AVX512VL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload -; AVX512VL-NEXT: callq __truncdfhf2 -; AVX512VL-NEXT: movw %ax, 4(%rbx) -; AVX512VL-NEXT: movw %bp, (%rbx) -; AVX512VL-NEXT: movw %r15w, 6(%rbx) -; AVX512VL-NEXT: movw %r14w, 2(%rbx) -; AVX512VL-NEXT: addq $88, %rsp -; AVX512VL-NEXT: popq %rbx -; AVX512VL-NEXT: popq %r14 -; AVX512VL-NEXT: popq %r15 -; AVX512VL-NEXT: popq %rbp -; AVX512VL-NEXT: retq +; AVX512-LABEL: store_cvt_4f64_to_4i16: +; AVX512: # BB#0: +; AVX512-NEXT: pushq %rbp +; AVX512-NEXT: pushq %r15 +; AVX512-NEXT: pushq %r14 +; AVX512-NEXT: pushq %rbx +; AVX512-NEXT: subq $88, %rsp +; AVX512-NEXT: movq %rdi, %rbx +; AVX512-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill +; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq __truncdfhf2 +; AVX512-NEXT: movl %eax, %r14d +; AVX512-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX512-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill +; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq __truncdfhf2 +; AVX512-NEXT: movl %eax, %r15d +; AVX512-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload +; AVX512-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq __truncdfhf2 +; AVX512-NEXT: movl %eax, %ebp +; AVX512-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; AVX512-NEXT: callq __truncdfhf2 +; AVX512-NEXT: movw %ax, 4(%rbx) +; AVX512-NEXT: movw %bp, (%rbx) +; AVX512-NEXT: movw %r15w, 6(%rbx) +; AVX512-NEXT: movw %r14w, 2(%rbx) +; AVX512-NEXT: addq $88, %rsp +; AVX512-NEXT: popq %rbx +; AVX512-NEXT: popq %r14 +; AVX512-NEXT: popq %r15 +; AVX512-NEXT: popq %rbp +; AVX512-NEXT: retq %1 = fptrunc <4 x double> %a0 to <4 x half> %2 = bitcast <4 x half> %1 to <4 x i16> store <4 x i16> %2, <4 x i16>* %a1 @@ -5416,9 +5272,9 @@ define void @store_cvt_4f64_to_8i16_undef(<4 x double> %a0, <8 x i16>* %a1) noun ; AVX512VL-NEXT: callq __truncdfhf2 ; AVX512VL-NEXT: movzwl %ax, %ebx ; AVX512VL-NEXT: orl %ebp, %ebx -; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload +; AVX512VL-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload ; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX512VL-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: callq __truncdfhf2 @@ -5592,9 +5448,9 @@ define void @store_cvt_4f64_to_8i16_zero(<4 x double> %a0, <8 x i16>* %a1) nounw ; AVX512VL-NEXT: callq __truncdfhf2 ; AVX512VL-NEXT: movzwl %ax, %ebx ; AVX512VL-NEXT: orl %ebp, %ebx -; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload +; AVX512VL-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload ; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; AVX512VL-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill ; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] ; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: callq __truncdfhf2 @@ -5761,145 +5617,75 @@ define void @store_cvt_8f64_to_8i16(<8 x double> %a0, <8 x i16>* %a1) nounwind { ; AVX2-NEXT: popq %rbp ; AVX2-NEXT: retq ; -; AVX512F-LABEL: store_cvt_8f64_to_8i16: -; AVX512F: # BB#0: -; AVX512F-NEXT: pushq %rbp -; AVX512F-NEXT: pushq %r15 -; AVX512F-NEXT: pushq %r14 -; AVX512F-NEXT: pushq %r13 -; AVX512F-NEXT: pushq %r12 -; AVX512F-NEXT: pushq %rbx -; AVX512F-NEXT: subq $200, %rsp -; AVX512F-NEXT: movq %rdi, %rbx -; AVX512F-NEXT: vmovupd %zmm0, {{[0-9]+}}(%rsp) # 64-byte Spill -; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: callq __truncdfhf2 -; AVX512F-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill -; AVX512F-NEXT: vmovupd {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload -; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512F-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill -; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: callq __truncdfhf2 -; AVX512F-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill -; AVX512F-NEXT: vmovupd {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload -; AVX512F-NEXT: vextractf64x4 $1, %zmm0, %ymm0 -; AVX512F-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill -; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: callq __truncdfhf2 -; AVX512F-NEXT: movl %eax, %r12d -; AVX512F-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload -; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512F-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill -; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: callq __truncdfhf2 -; AVX512F-NEXT: movl %eax, %r13d -; AVX512F-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload -; AVX512F-NEXT: # kill: %XMM0 %XMM0 %ZMM0 -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: callq __truncdfhf2 -; AVX512F-NEXT: movl %eax, %ebp -; AVX512F-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload -; AVX512F-NEXT: callq __truncdfhf2 -; AVX512F-NEXT: movl %eax, %r14d -; AVX512F-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload -; AVX512F-NEXT: # kill: %XMM0 %XMM0 %YMM0 -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: callq __truncdfhf2 -; AVX512F-NEXT: movl %eax, %r15d -; AVX512F-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload -; AVX512F-NEXT: callq __truncdfhf2 -; AVX512F-NEXT: movw %ax, 12(%rbx) -; AVX512F-NEXT: movw %r15w, 8(%rbx) -; AVX512F-NEXT: movw %r14w, 4(%rbx) -; AVX512F-NEXT: movw %bp, (%rbx) -; AVX512F-NEXT: movw %r13w, 14(%rbx) -; AVX512F-NEXT: movw %r12w, 10(%rbx) -; AVX512F-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # 2-byte Folded Reload -; AVX512F-NEXT: movw %ax, 6(%rbx) -; AVX512F-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # 2-byte Folded Reload -; AVX512F-NEXT: movw %ax, 2(%rbx) -; AVX512F-NEXT: addq $200, %rsp -; AVX512F-NEXT: popq %rbx -; AVX512F-NEXT: popq %r12 -; AVX512F-NEXT: popq %r13 -; AVX512F-NEXT: popq %r14 -; AVX512F-NEXT: popq %r15 -; AVX512F-NEXT: popq %rbp -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: store_cvt_8f64_to_8i16: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: pushq %rbp -; AVX512VL-NEXT: pushq %r15 -; AVX512VL-NEXT: pushq %r14 -; AVX512VL-NEXT: pushq %r13 -; AVX512VL-NEXT: pushq %r12 -; AVX512VL-NEXT: pushq %rbx -; AVX512VL-NEXT: subq $200, %rsp -; AVX512VL-NEXT: movq %rdi, %rbx -; AVX512VL-NEXT: vmovupd %zmm0, {{[0-9]+}}(%rsp) # 64-byte Spill -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: callq __truncdfhf2 -; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill -; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload -; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512VL-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: callq __truncdfhf2 -; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill -; AVX512VL-NEXT: vmovupd {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload -; AVX512VL-NEXT: vextractf64x4 $1, %zmm0, %ymm0 -; AVX512VL-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: callq __truncdfhf2 -; AVX512VL-NEXT: movl %eax, %r12d -; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload -; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX512VL-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill -; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: callq __truncdfhf2 -; AVX512VL-NEXT: movl %eax, %r13d -; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload -; AVX512VL-NEXT: # kill: %XMM0 %XMM0 %ZMM0 -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: callq __truncdfhf2 -; AVX512VL-NEXT: movl %eax, %ebp -; AVX512VL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload -; AVX512VL-NEXT: callq __truncdfhf2 -; AVX512VL-NEXT: movl %eax, %r14d -; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload -; AVX512VL-NEXT: # kill: %XMM0 %XMM0 %YMM0 -; AVX512VL-NEXT: vzeroupper -; AVX512VL-NEXT: callq __truncdfhf2 -; AVX512VL-NEXT: movl %eax, %r15d -; AVX512VL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload -; AVX512VL-NEXT: callq __truncdfhf2 -; AVX512VL-NEXT: movw %ax, 12(%rbx) -; AVX512VL-NEXT: movw %r15w, 8(%rbx) -; AVX512VL-NEXT: movw %r14w, 4(%rbx) -; AVX512VL-NEXT: movw %bp, (%rbx) -; AVX512VL-NEXT: movw %r13w, 14(%rbx) -; AVX512VL-NEXT: movw %r12w, 10(%rbx) -; AVX512VL-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # 2-byte Folded Reload -; AVX512VL-NEXT: movw %ax, 6(%rbx) -; AVX512VL-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # 2-byte Folded Reload -; AVX512VL-NEXT: movw %ax, 2(%rbx) -; AVX512VL-NEXT: addq $200, %rsp -; AVX512VL-NEXT: popq %rbx -; AVX512VL-NEXT: popq %r12 -; AVX512VL-NEXT: popq %r13 -; AVX512VL-NEXT: popq %r14 -; AVX512VL-NEXT: popq %r15 -; AVX512VL-NEXT: popq %rbp -; AVX512VL-NEXT: retq +; AVX512-LABEL: store_cvt_8f64_to_8i16: +; AVX512: # BB#0: +; AVX512-NEXT: pushq %rbp +; AVX512-NEXT: pushq %r15 +; AVX512-NEXT: pushq %r14 +; AVX512-NEXT: pushq %r13 +; AVX512-NEXT: pushq %r12 +; AVX512-NEXT: pushq %rbx +; AVX512-NEXT: subq $200, %rsp +; AVX512-NEXT: movq %rdi, %rbx +; AVX512-NEXT: vmovupd %zmm0, {{[0-9]+}}(%rsp) # 64-byte Spill +; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq __truncdfhf2 +; AVX512-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill +; AVX512-NEXT: vmovupd {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX512-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill +; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq __truncdfhf2 +; AVX512-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill +; AVX512-NEXT: vmovupd {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload +; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm0 +; AVX512-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill +; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq __truncdfhf2 +; AVX512-NEXT: movl %eax, %r12d +; AVX512-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload +; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX512-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill +; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq __truncdfhf2 +; AVX512-NEXT: movl %eax, %r13d +; AVX512-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload +; AVX512-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq __truncdfhf2 +; AVX512-NEXT: movl %eax, %ebp +; AVX512-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; AVX512-NEXT: callq __truncdfhf2 +; AVX512-NEXT: movl %eax, %r14d +; AVX512-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload +; AVX512-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: callq __truncdfhf2 +; AVX512-NEXT: movl %eax, %r15d +; AVX512-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; AVX512-NEXT: callq __truncdfhf2 +; AVX512-NEXT: movw %ax, 12(%rbx) +; AVX512-NEXT: movw %r15w, 8(%rbx) +; AVX512-NEXT: movw %r14w, 4(%rbx) +; AVX512-NEXT: movw %bp, (%rbx) +; AVX512-NEXT: movw %r13w, 14(%rbx) +; AVX512-NEXT: movw %r12w, 10(%rbx) +; AVX512-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # 2-byte Folded Reload +; AVX512-NEXT: movw %ax, 6(%rbx) +; AVX512-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # 2-byte Folded Reload +; AVX512-NEXT: movw %ax, 2(%rbx) +; AVX512-NEXT: addq $200, %rsp +; AVX512-NEXT: popq %rbx +; AVX512-NEXT: popq %r12 +; AVX512-NEXT: popq %r13 +; AVX512-NEXT: popq %r14 +; AVX512-NEXT: popq %r15 +; AVX512-NEXT: popq %rbp +; AVX512-NEXT: retq %1 = fptrunc <8 x double> %a0 to <8 x half> %2 = bitcast <8 x half> %1 to <8 x i16> store <8 x i16> %2, <8 x i16>* %a1 diff --git a/test/CodeGen/X86/vector-shuffle-256-v4.ll b/test/CodeGen/X86/vector-shuffle-256-v4.ll index cb05fc74424..4eb64c1ed88 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v4.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v4.ll @@ -857,20 +857,10 @@ define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) { } define <4 x i64> @shuffle_v4i64_0145(<4 x i64> %a, <4 x i64> %b) { -; AVX1-LABEL: shuffle_v4i64_0145: -; AVX1: # BB#0: -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v4i64_0145: -; AVX2: # BB#0: -; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v4i64_0145: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v4i64_0145: +; ALL: # BB#0: +; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; ALL-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> ret <4 x i64> %shuffle } @@ -901,20 +891,10 @@ define <4 x i64> @shuffle_v4i64_0451(<4 x i64> %a, <4 x i64> %b) { } define <4 x i64> @shuffle_v4i64_4501(<4 x i64> %a, <4 x i64> %b) { -; AVX1-LABEL: shuffle_v4i64_4501: -; AVX1: # BB#0: -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: shuffle_v4i64_4501: -; AVX2: # BB#0: -; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v4i64_4501: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 -; AVX512VL-NEXT: retq +; ALL-LABEL: shuffle_v4i64_4501: +; ALL: # BB#0: +; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; ALL-NEXT: retq %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> ret <4 x i64> %shuffle } @@ -1487,20 +1467,10 @@ define <4 x i64> @concat_v4i64_0167(<4 x i64> %a0, <4 x i64> %a1) { } define <4 x i64> @concat_v4i64_0145_bc(<4 x i64> %a0, <4 x i64> %a1) { -; AVX1-LABEL: concat_v4i64_0145_bc: -; AVX1: # BB#0: -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: concat_v4i64_0145_bc: -; AVX2: # BB#0: -; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: concat_v4i64_0145_bc: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: retq +; ALL-LABEL: concat_v4i64_0145_bc: +; ALL: # BB#0: +; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; ALL-NEXT: retq %a0lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> %a1lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> %bc0lo = bitcast <2 x i64> %a0lo to <4 x i32> diff --git a/test/CodeGen/X86/vector-shuffle-256-v8.ll b/test/CodeGen/X86/vector-shuffle-256-v8.ll index 42e5f40be45..aaf9890a024 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v8.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v8.ll @@ -2021,17 +2021,11 @@ define <8 x i32> @shuffle_v8i32_44444444(<8 x i32> %a, <8 x i32> %b) { ; AVX1-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v8i32_44444444: -; AVX2: # BB#0: -; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0 -; AVX2-NEXT: vbroadcastss %xmm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v8i32_44444444: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX512VL-NEXT: vpbroadcastd %xmm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v8i32_44444444: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ret <8 x i32> %shuffle } diff --git a/test/CodeGen/X86/vector-shuffle-512-v16.ll b/test/CodeGen/X86/vector-shuffle-512-v16.ll index 174a487160c..3fb13282e67 100644 --- a/test/CodeGen/X86/vector-shuffle-512-v16.ll +++ b/test/CodeGen/X86/vector-shuffle-512-v16.ll @@ -26,8 +26,8 @@ define <16 x float> @shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08 define <16 x float> @shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_bc(<16 x i32> %a, <16 x i32> %b) { ; ALL-LABEL: shuffle_v16f32_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_08_bc: ; ALL: # BB#0: -; ALL-NEXT: vextracti32x4 $2, %zmm0, %xmm0 -; ALL-NEXT: vpbroadcastd %xmm0, %zmm0 +; ALL-NEXT: vextractf32x4 $2, %zmm0, %xmm0 +; ALL-NEXT: vbroadcastss %xmm0, %zmm0 ; ALL-NEXT: retq %tmp0 = bitcast <16 x i32> %a to <16 x float> %tmp1 = bitcast <16 x i32> %b to <16 x float> @@ -158,8 +158,8 @@ define <16 x i32> @shuffle_v16i32_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_0 define <16 x i32> @shuffle_v16i32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04(<16 x i32> %a, <16 x i32> %b) { ; ALL-LABEL: shuffle_v16i32_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04_04: ; ALL: # BB#0: -; ALL-NEXT: vextracti32x4 $1, %zmm0, %xmm0 -; ALL-NEXT: vpbroadcastd %xmm0, %zmm0 +; ALL-NEXT: vextractf32x4 $1, %zmm0, %xmm0 +; ALL-NEXT: vbroadcastss %xmm0, %zmm0 ; ALL-NEXT: retq %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> ret <16 x i32> %shuffle @@ -283,7 +283,7 @@ define <16 x i32> @shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u(<16 x i32> %a define <8 x i32> @test_v16i32_1_3_5_7_9_11_13_15(<16 x i32> %v) { ; ALL-LABEL: test_v16i32_1_3_5_7_9_11_13_15: ; ALL: # BB#0: -; ALL-NEXT: vextracti32x8 $1, %zmm0, %ymm1 +; ALL-NEXT: vextractf32x8 $1, %zmm0, %ymm1 ; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7] ; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; ALL-NEXT: retq @@ -692,8 +692,8 @@ define <16 x i32> @mask_shuffle_v4i32_v16i32_00_01_02_03_00_01_02_03_00_01_02_03 ; ALL-LABEL: mask_shuffle_v4i32_v16i32_00_01_02_03_00_01_02_03_00_01_02_03_00_01_02_03: ; ALL: # BB#0: ; ALL-NEXT: # kill: %XMM0 %XMM0 %YMM0 -; ALL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; ALL-NEXT: vinserti32x8 $1, %ymm0, %zmm0, %zmm0 +; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; ALL-NEXT: vinsertf32x8 $1, %ymm0, %zmm0, %zmm0 ; ALL-NEXT: retq %res = shufflevector <4 x i32> %a, <4 x i32> undef, <16 x i32> ret <16 x i32> %res diff --git a/test/CodeGen/X86/vector-shuffle-512-v8.ll b/test/CodeGen/X86/vector-shuffle-512-v8.ll index 542f30dc0d2..10d378c7ddf 100644 --- a/test/CodeGen/X86/vector-shuffle-512-v8.ll +++ b/test/CodeGen/X86/vector-shuffle-512-v8.ll @@ -51,14 +51,14 @@ define <8 x double> @shuffle_v8f64_44444444(<8 x double> %a, <8 x double> %b) { define <8 x double> @shuffle_v8f64_44444444_bc(<8 x i64> %a, <8 x i64> %b) { ; AVX512F-LABEL: shuffle_v8f64_44444444_bc: ; AVX512F: # BB#0: -; AVX512F-NEXT: vextracti32x4 $2, %zmm0, %xmm0 -; AVX512F-NEXT: vpbroadcastq %xmm0, %zmm0 +; AVX512F-NEXT: vextractf32x4 $2, %zmm0, %xmm0 +; AVX512F-NEXT: vbroadcastsd %xmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8f64_44444444_bc: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vextracti32x4 $2, %zmm0, %xmm0 -; AVX512F-32-NEXT: vpbroadcastq %xmm0, %zmm0 +; AVX512F-32-NEXT: vextractf32x4 $2, %zmm0, %xmm0 +; AVX512F-32-NEXT: vbroadcastsd %xmm0, %zmm0 ; AVX512F-32-NEXT: retl %tmp0 = bitcast <8 x i64> %a to <8 x double> %tmp1 = bitcast <8 x i64> %b to <8 x double> @@ -1012,14 +1012,14 @@ define <8 x i64> @shuffle_v8i64_00000000(<8 x i64> %a, <8 x i64> %b) { define <8 x i64> @shuffle_v8i64_44444444(<8 x i64> %a, <8 x i64> %b) { ; AVX512F-LABEL: shuffle_v8i64_44444444: ; AVX512F: # BB#0: -; AVX512F-NEXT: vextracti32x4 $2, %zmm0, %xmm0 -; AVX512F-NEXT: vpbroadcastq %xmm0, %zmm0 +; AVX512F-NEXT: vextractf32x4 $2, %zmm0, %xmm0 +; AVX512F-NEXT: vbroadcastsd %xmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8i64_44444444: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vextracti32x4 $2, %zmm0, %xmm0 -; AVX512F-32-NEXT: vpbroadcastq %xmm0, %zmm0 +; AVX512F-32-NEXT: vextractf32x4 $2, %zmm0, %xmm0 +; AVX512F-32-NEXT: vbroadcastsd %xmm0, %zmm0 ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> ret <8 x i64> %shuffle @@ -1028,14 +1028,14 @@ define <8 x i64> @shuffle_v8i64_44444444(<8 x i64> %a, <8 x i64> %b) { define <8 x i64> @shuffle_v8i64_66666666(<8 x i64> %a, <8 x i64> %b) { ; AVX512F-LABEL: shuffle_v8i64_66666666: ; AVX512F: # BB#0: -; AVX512F-NEXT: vextracti32x4 $3, %zmm0, %xmm0 -; AVX512F-NEXT: vpbroadcastq %xmm0, %zmm0 +; AVX512F-NEXT: vextractf32x4 $3, %zmm0, %xmm0 +; AVX512F-NEXT: vbroadcastsd %xmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8i64_66666666: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vextracti32x4 $3, %zmm0, %xmm0 -; AVX512F-32-NEXT: vpbroadcastq %xmm0, %zmm0 +; AVX512F-32-NEXT: vextractf32x4 $3, %zmm0, %xmm0 +; AVX512F-32-NEXT: vbroadcastsd %xmm0, %zmm0 ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> ret <8 x i64> %shuffle @@ -2457,12 +2457,12 @@ define <8 x double> @shuffle_v8f64_01230123(<8 x double> %a, <8 x double> %b) { define <8 x i64> @shuffle_v8i64_012389AB(<8 x i64> %a, <8 x i64> %b) { ; AVX512F-LABEL: shuffle_v8i64_012389AB: ; AVX512F: # BB#0: -; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8i64_012389AB: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> ret <8 x i64> %shuffle @@ -2471,12 +2471,12 @@ define <8 x i64> @shuffle_v8i64_012389AB(<8 x i64> %a, <8 x i64> %b) { define <8 x i64> @shuffle_v8i64_89AB0123(<8 x i64> %a, <8 x i64> %b) { ; AVX512F-LABEL: shuffle_v8i64_89AB0123: ; AVX512F: # BB#0: -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8i64_89AB0123: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> ret <8 x i64> %shuffle @@ -2485,12 +2485,12 @@ define <8 x i64> @shuffle_v8i64_89AB0123(<8 x i64> %a, <8 x i64> %b) { define <8 x i64> @shuffle_v8i64_01230123(<8 x i64> %a, <8 x i64> %b) { ; AVX512F-LABEL: shuffle_v8i64_01230123: ; AVX512F: # BB#0: -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 +; AVX512F-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8i64_01230123: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 +; AVX512F-32-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> ret <8 x i64> %shuffle @@ -2555,12 +2555,12 @@ define <8 x double> @shuffle_v8f64_01234589(<8 x double> %a, <8 x double> %b) { define <8 x i64> @shuffle_v8i64_89234567(<8 x i64> %a, <8 x i64> %b) { ; AVX512F-LABEL: shuffle_v8i64_89234567: ; AVX512F: # BB#0: -; AVX512F-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8i64_89234567: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> ret <8 x i64> %shuffle @@ -2569,12 +2569,12 @@ define <8 x i64> @shuffle_v8i64_89234567(<8 x i64> %a, <8 x i64> %b) { define <8 x i64> @shuffle_v8i64_01894567(<8 x i64> %a, <8 x i64> %b) { ; AVX512F-LABEL: shuffle_v8i64_01894567: ; AVX512F: # BB#0: -; AVX512F-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8i64_01894567: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0 ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> ret <8 x i64> %shuffle @@ -2583,12 +2583,12 @@ define <8 x i64> @shuffle_v8i64_01894567(<8 x i64> %a, <8 x i64> %b) { define <8 x i64> @shuffle_v8i64_01238967(<8 x i64> %a, <8 x i64> %b) { ; AVX512F-LABEL: shuffle_v8i64_01238967: ; AVX512F: # BB#0: -; AVX512F-NEXT: vinserti32x4 $2, %xmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vinsertf32x4 $2, %xmm1, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8i64_01238967: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vinserti32x4 $2, %xmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vinsertf32x4 $2, %xmm1, %zmm0, %zmm0 ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> ret <8 x i64> %shuffle @@ -2597,12 +2597,12 @@ define <8 x i64> @shuffle_v8i64_01238967(<8 x i64> %a, <8 x i64> %b) { define <8 x i64> @shuffle_v8i64_01234589(<8 x i64> %a, <8 x i64> %b) { ; AVX512F-LABEL: shuffle_v8i64_01234589: ; AVX512F: # BB#0: -; AVX512F-NEXT: vinserti32x4 $3, %xmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vinsertf32x4 $3, %xmm1, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v8i64_01234589: ; AVX512F-32: # BB#0: -; AVX512F-32-NEXT: vinserti32x4 $3, %xmm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vinsertf32x4 $3, %xmm1, %zmm0, %zmm0 ; AVX512F-32-NEXT: retl %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> ret <8 x i64> %shuffle @@ -2628,15 +2628,15 @@ define <8 x i64> @shuffle_v2i64_v8i64_01010101(<2 x i64> %a) { ; AVX512F-LABEL: shuffle_v2i64_v8i64_01010101: ; AVX512F: # BB#0: ; AVX512F-NEXT: # kill: %XMM0 %XMM0 %YMM0 -; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 +; AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX512F-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512F-32-LABEL: shuffle_v2i64_v8i64_01010101: ; AVX512F-32: # BB#0: ; AVX512F-32-NEXT: # kill: %XMM0 %XMM0 %YMM0 -; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0 +; AVX512F-32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 ; AVX512F-32-NEXT: retl %shuffle = shufflevector <2 x i64> %a, <2 x i64> undef, <8 x i32> ret <8 x i64> %shuffle diff --git a/test/CodeGen/X86/vector-shuffle-avx512.ll b/test/CodeGen/X86/vector-shuffle-avx512.ll index 753367c95ab..e309ee62494 100644 --- a/test/CodeGen/X86/vector-shuffle-avx512.ll +++ b/test/CodeGen/X86/vector-shuffle-avx512.ll @@ -479,7 +479,7 @@ define <16 x float> @expand13(<8 x float> %a ) { ; ; KNL64-LABEL: expand13: ; KNL64: # BB#0: -; KNL64-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; KNL64-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; KNL64-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 ; KNL64-NEXT: retq ; @@ -491,7 +491,7 @@ define <16 x float> @expand13(<8 x float> %a ) { ; ; KNL32-LABEL: expand13: ; KNL32: # BB#0: -; KNL32-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; KNL32-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; KNL32-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 ; KNL32-NEXT: retl %res = shufflevector <8 x float> zeroinitializer, <8 x float> %a, <16 x i32>