diff --git a/test/CodeGen/X86/vector-zext.ll b/test/CodeGen/X86/vector-zext.ll index 7985043ff86..96d5125c918 100644 --- a/test/CodeGen/X86/vector-zext.ll +++ b/test/CodeGen/X86/vector-zext.ll @@ -4,6 +4,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp { ; SSE2-LABEL: zext_16i8_to_8i16: @@ -71,6 +72,11 @@ define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %A) { ; AVX2: # BB#0: # %entry ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero ; AVX2-NEXT: retq +; +; AVX512-LABEL: zext_16i8_to_16i16: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; AVX512-NEXT: retq entry: %B = zext <16 x i8> %A to <16 x i16> ret <16 x i16> %B @@ -151,6 +157,12 @@ define <8 x i32> @zext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: retq +; +; AVX512-LABEL: zext_16i8_to_8i32: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; AVX512-NEXT: retq entry: %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> %C = zext <8 x i8> %B to <8 x i32> @@ -229,6 +241,12 @@ define <4 x i64> @zext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: retq +; +; AVX512-LABEL: zext_16i8_to_4i64: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero +; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; AVX512-NEXT: retq entry: %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> %C = zext <4 x i8> %B to <4 x i64> @@ -300,6 +318,11 @@ define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp ; AVX2: # BB#0: # %entry ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX2-NEXT: retq +; +; AVX512-LABEL: zext_8i16_to_8i32: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512-NEXT: retq entry: %B = zext <8 x i16> %A to <8 x i32> ret <8 x i32>%B @@ -381,6 +404,13 @@ define <4 x i64> @zext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp ; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 ; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7],ymm0[8],ymm1[9,10,11],ymm0[12],ymm1[13,14,15] ; AVX2-NEXT: retq +; +; AVX512-LABEL: zext_8i16_to_4i64: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; AVX512-NEXT: vpxor %ymm1, %ymm1, %ymm1 +; AVX512-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7],ymm0[8],ymm1[9,10,11],ymm0[12],ymm1[13,14,15] +; AVX512-NEXT: retq entry: %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> %C = zext <4 x i16> %B to <4 x i64> @@ -452,6 +482,11 @@ define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp ; AVX2: # BB#0: # %entry ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; AVX2-NEXT: retq +; +; AVX512-LABEL: zext_4i32_to_4i64: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX512-NEXT: retq entry: %B = zext <4 x i32> %A to <4 x i64> ret <4 x i64>%B @@ -562,6 +597,11 @@ define <4 x i64> @load_zext_4i8_to_4i64(<4 x i8> *%ptr) { ; AVX2: # BB#0: # %entry ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: retq +; +; AVX512-LABEL: load_zext_4i8_to_4i64: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero +; AVX512-NEXT: retq entry: %X = load <4 x i8>, <4 x i8>* %ptr %Y = zext <4 x i8> %X to <4 x i64> @@ -637,6 +677,11 @@ define <8 x i32> @load_zext_8i8_to_8i32(<8 x i8> *%ptr) { ; AVX2: # BB#0: # %entry ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero ; AVX2-NEXT: retq +; +; AVX512-LABEL: load_zext_8i8_to_8i32: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero +; AVX512-NEXT: retq entry: %X = load <8 x i8>, <8 x i8>* %ptr %Y = zext <8 x i8> %X to <8 x i32> @@ -699,6 +744,11 @@ define <8 x i64> @load_zext_8i8_to_8i64(<8 x i8> *%ptr) { ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: retq +; +; AVX512-LABEL: load_zext_8i8_to_8i64: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vpmovzxbq (%rdi), %zmm0 +; AVX512-NEXT: retq entry: %X = load <8 x i8>, <8 x i8>* %ptr %Y = zext <8 x i8> %X to <8 x i64> @@ -741,6 +791,11 @@ define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) { ; AVX2: # BB#0: # %entry ; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero ; AVX2-NEXT: retq +; +; AVX512-LABEL: load_zext_16i8_to_16i16: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero +; AVX512-NEXT: retq entry: %X = load <16 x i8>, <16 x i8>* %ptr %Y = zext <16 x i8> %X to <16 x i16> @@ -847,6 +902,11 @@ define <4 x i64> @load_zext_4i16_to_4i64(<4 x i16> *%ptr) { ; AVX2: # BB#0: # %entry ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero ; AVX2-NEXT: retq +; +; AVX512-LABEL: load_zext_4i16_to_4i64: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero +; AVX512-NEXT: retq entry: %X = load <4 x i16>, <4 x i16>* %ptr %Y = zext <4 x i16> %X to <4 x i64> @@ -889,6 +949,11 @@ define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) { ; AVX2: # BB#0: # %entry ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero ; AVX2-NEXT: retq +; +; AVX512-LABEL: load_zext_8i16_to_8i32: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; AVX512-NEXT: retq entry: %X = load <8 x i16>, <8 x i16>* %ptr %Y = zext <8 x i16> %X to <8 x i32> @@ -961,6 +1026,11 @@ define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) { ; AVX2: # BB#0: # %entry ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero ; AVX2-NEXT: retq +; +; AVX512-LABEL: load_zext_4i32_to_4i64: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; AVX512-NEXT: retq entry: %X = load <4 x i32>, <4 x i32>* %ptr %Y = zext <4 x i32> %X to <4 x i64> @@ -1011,6 +1081,12 @@ define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) { ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX2-NEXT: retq +; +; AVX512-LABEL: zext_8i8_to_8i32: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512-NEXT: retq entry: %t = zext <8 x i8> %z to <8 x i32> ret <8 x i32> %t @@ -1053,6 +1129,11 @@ define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ; AVX2: # BB#0: # %entry ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX2-NEXT: retq +; +; AVX512-LABEL: shuf_zext_8i16_to_8i32: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512-NEXT: retq entry: %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> %Z = bitcast <16 x i16> %B to <8 x i32> @@ -1097,6 +1178,11 @@ define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ; AVX2: # BB#0: # %entry ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; AVX2-NEXT: retq +; +; AVX512-LABEL: shuf_zext_4i32_to_4i64: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX512-NEXT: retq entry: %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> %Z = bitcast <8 x i32> %B to <4 x i64> @@ -1150,6 +1236,12 @@ define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) { ; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] ; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero ; AVX2-NEXT: retq +; +; AVX512-LABEL: shuf_zext_8i8_to_8i32: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; AVX512-NEXT: retq entry: %B = shufflevector <8 x i8> %A, <8 x i8> zeroinitializer, <32 x i32> %Z = bitcast <32 x i8> %B to <8 x i32> @@ -1232,6 +1324,12 @@ define <4 x i64> @shuf_zext_16i8_to_4i64_offset11(<16 x i8> %A) nounwind uwtable ; AVX2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero ; AVX2-NEXT: retq +; +; AVX512-LABEL: shuf_zext_16i8_to_4i64_offset11: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero +; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero +; AVX512-NEXT: retq entry: %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <32 x i32> %Z = bitcast <32 x i8> %B to <4 x i64> @@ -1316,6 +1414,12 @@ define <4 x i64> @shuf_zext_8i16_to_4i64_offset2(<8 x i16> %A) nounwind uwtable ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3] ; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero ; AVX2-NEXT: retq +; +; AVX512-LABEL: shuf_zext_8i16_to_4i64_offset2: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3] +; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero +; AVX512-NEXT: retq entry: %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> %Z = bitcast <16 x i16> %B to <4 x i64> @@ -1384,6 +1488,12 @@ define <8 x i32> @shuf_zext_8i16_to_8i32_offset3(<8 x i16> %A) nounwind uwtable ; AVX2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX2-NEXT: retq +; +; AVX512-LABEL: shuf_zext_8i16_to_8i32_offset3: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero +; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512-NEXT: retq entry: %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> %Z = bitcast <16 x i16> %B to <8 x i32> @@ -1431,6 +1541,12 @@ define <8 x i32> @shuf_zext_16i16_to_8i32_offset8(<16 x i16> %A) nounwind uwtabl ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero ; AVX2-NEXT: retq +; +; AVX512-LABEL: shuf_zext_16i16_to_8i32_offset8: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512-NEXT: retq entry: %B = shufflevector <16 x i16> %A, <16 x i16> zeroinitializer, <16 x i32> %Z = bitcast <16 x i16> %B to <8 x i32> @@ -1493,6 +1609,12 @@ define <4 x i64> @shuf_zext_4i32_to_4i64_offset1(<4 x i32> %A) nounwind uwtable ; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3] ; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; AVX2-NEXT: retq +; +; AVX512-LABEL: shuf_zext_4i32_to_4i64_offset1: +; AVX512: # BB#0: # %entry +; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3] +; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX512-NEXT: retq entry: %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> %Z = bitcast <8 x i32> %B to <4 x i64>