From ea061f0a6917d44886f01c3c950cbb75e5b60be0 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 11 May 2016 15:13:29 +0000 Subject: [PATCH] [X86][AVX512] Regenerate intrinsics test llvm-svn: 269193 --- test/CodeGen/X86/avx512-intrinsics.ll | 38 ++--- test/CodeGen/X86/avx512bw-intrinsics.ll | 178 ++++++++++++++++-------- 2 files changed, 133 insertions(+), 83 deletions(-) diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index 568f2ab687e..7f93e97ad5c 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -6855,9 +6855,9 @@ define <16 x i32>@test_int_x86_avx512_mask_pmovzxb_d_512(<16 x i8> %x0, <16 x i3 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_d_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovzxbd %xmm0, %zmm1 {%k1} -; CHECK-NEXT: vpmovzxbd %xmm0, %zmm2 {%k1} {z} -; CHECK-NEXT: vpmovzxbd %xmm0, %zmm0 +; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; CHECK-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero ; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -6875,9 +6875,9 @@ define <8 x i64>@test_int_x86_avx512_mask_pmovzxb_q_512(<16 x i8> %x0, <8 x i64> ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_q_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovzxbq %xmm0, %zmm1 {%k1} -; CHECK-NEXT: vpmovzxbq %xmm0, %zmm2 {%k1} {z} -; CHECK-NEXT: vpmovzxbq %xmm0, %zmm0 +; CHECK-NEXT: vpmovzxbq {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero +; CHECK-NEXT: vpmovzxbq {{.*#+}} zmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero +; CHECK-NEXT: vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero ; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -6895,9 +6895,9 @@ define <8 x i64>@test_int_x86_avx512_mask_pmovzxd_q_512(<8 x i32> %x0, <8 x i64> ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxd_q_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovzxdq %ymm0, %zmm1 {%k1} -; CHECK-NEXT: vpmovzxdq %ymm0, %zmm2 {%k1} {z} -; CHECK-NEXT: vpmovzxdq %ymm0, %zmm0 +; CHECK-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero +; CHECK-NEXT: vpmovzxdq {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero +; CHECK-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero ; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -6915,9 +6915,9 @@ define <16 x i32>@test_int_x86_avx512_mask_pmovzxw_d_512(<16 x i16> %x0, <16 x i ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_d_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovzxwd %ymm0, %zmm1 {%k1} -; CHECK-NEXT: vpmovzxwd %ymm0, %zmm2 {%k1} {z} -; CHECK-NEXT: vpmovzxwd %ymm0, %zmm0 +; CHECK-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; CHECK-NEXT: vpmovzxwd {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; CHECK-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero ; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1 ; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -6935,9 +6935,9 @@ define <8 x i64>@test_int_x86_avx512_mask_pmovzxw_q_512(<8 x i16> %x0, <8 x i64> ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_q_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vpmovzxwq %xmm0, %zmm1 {%k1} -; CHECK-NEXT: vpmovzxwq %xmm0, %zmm2 {%k1} {z} -; CHECK-NEXT: vpmovzxwq %xmm0, %zmm0 +; CHECK-NEXT: vpmovzxwq {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; CHECK-NEXT: vpmovzxwq {{.*#+}} zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; CHECK-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero ; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1 ; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq @@ -7249,7 +7249,6 @@ define <8 x double>@test_int_x86_avx512_mask_fixupimm_pd_512(<8 x double> %x0, < ; CHECK-NEXT: vaddpd %zmm4, %zmm3, %zmm1 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq - %res = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 4, i8 %x4, i32 4) %res1 = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> zeroinitializer, <8 x double> %x1, <8 x i64> %x2, i32 5, i8 %x4, i32 4) %res2 = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 3, i8 -1, i32 8) @@ -7273,7 +7272,6 @@ define <8 x double>@test_int_x86_avx512_maskz_fixupimm_pd_512(<8 x double> %x0, ; CHECK-NEXT: vaddpd %zmm5, %zmm3, %zmm1 ; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq - %res = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 3, i8 %x4, i32 4) %res1 = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> zeroinitializer, i32 5, i8 %x4, i32 4) %res2 = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 2, i8 -1, i32 8) @@ -7298,7 +7296,6 @@ define <4 x float>@test_int_x86_avx512_mask_fixupimm_ss(<4 x float> %x0, <4 x fl ; CHECK-NEXT: vaddps %xmm5, %xmm3, %xmm1 ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq - %res = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4, i32 4) %res1 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4, i32 4) %res2 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 -1, i32 8) @@ -7323,7 +7320,6 @@ define <4 x float>@test_int_x86_avx512_maskz_fixupimm_ss(<4 x float> %x0, <4 x f ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 ; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0 ; CHECK-NEXT: retq - %res = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4, i32 4) %res1 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4, i32 8) %res2 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 -1, i32 4) @@ -7347,7 +7343,6 @@ define <16 x float>@test_int_x86_avx512_mask_fixupimm_ps_512(<16 x float> %x0, < ; CHECK-NEXT: vaddps %zmm5, %zmm3, %zmm1 ; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq - %res = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 %x4, i32 4) %res1 = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> zeroinitializer, i32 5, i16 %x4, i32 4) %res2 = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 -1, i32 8) @@ -7371,7 +7366,6 @@ define <16 x float>@test_int_x86_avx512_maskz_fixupimm_ps_512(<16 x float> %x0, ; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 ; CHECK-NEXT: vaddps %zmm4, %zmm0, %zmm0 ; CHECK-NEXT: retq - %res = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 %x4, i32 4) %res1 = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> zeroinitializer, i32 5, i16 %x4, i32 8) %res2 = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 -1, i32 4) @@ -7396,7 +7390,6 @@ define <2 x double>@test_int_x86_avx512_mask_fixupimm_sd(<2 x double> %x0, <2 x ; CHECK-NEXT: vaddpd %xmm0, %xmm3, %xmm0 ; CHECK-NEXT: vaddpd %xmm4, %xmm0, %xmm0 ; CHECK-NEXT: retq - %res = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 4) %res1 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 5, i8 %x4, i32 8) %res2 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 -1, i32 4) @@ -7421,7 +7414,6 @@ define <2 x double>@test_int_x86_avx512_maskz_fixupimm_sd(<2 x double> %x0, <2 x ; CHECK-NEXT: vaddpd %xmm5, %xmm3, %xmm1 ; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: retq - %res = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 4) %res1 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 5, i8 %x4, i32 8) %res2 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 8) diff --git a/test/CodeGen/X86/avx512bw-intrinsics.ll b/test/CodeGen/X86/avx512bw-intrinsics.ll index 21e18f5af53..c5b707e9f0a 100644 --- a/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -3219,11 +3219,11 @@ define <32 x i16>@test_int_x86_avx512_mask_loadu_w_512(i8* %ptr, i8* %ptr2, <32 ; AVX512F-32-NEXT: vmovdqu16 (%ecx), %zmm1 {%k1} {z} ; AVX512F-32-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ; AVX512F-32-NEXT: retl - %res0 = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr, <32 x i16> %x1, i32 -1) - %res = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr2, <32 x i16> %res0, i32 %mask) - %res1 = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr, <32 x i16> zeroinitializer, i32 %mask) - %res2 = add <32 x i16> %res, %res1 - ret <32 x i16> %res2 + %res0 = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr, <32 x i16> %x1, i32 -1) + %res = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr2, <32 x i16> %res0, i32 %mask) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr, <32 x i16> zeroinitializer, i32 %mask) + %res2 = add <32 x i16> %res, %res1 + ret <32 x i16> %res2 } declare <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8*, <64 x i8>, i64) @@ -3250,11 +3250,11 @@ define <64 x i8>@test_int_x86_avx512_mask_loadu_b_512(i8* %ptr, i8* %ptr2, <64 x ; AVX512F-32-NEXT: vmovdqu8 (%ecx), %zmm1 {%k1} {z} ; AVX512F-32-NEXT: vpaddb %zmm1, %zmm0, %zmm0 ; AVX512F-32-NEXT: retl - %res0 = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr, <64 x i8> %x1, i64 -1) - %res = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr2, <64 x i8> %res0, i64 %mask) - %res1 = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr, <64 x i8> zeroinitializer, i64 %mask) - %res2 = add <64 x i8> %res, %res1 - ret <64 x i8> %res2 + %res0 = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr, <64 x i8> %x1, i64 -1) + %res = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr2, <64 x i8> %res0, i64 %mask) + %res1 = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr, <64 x i8> zeroinitializer, i64 %mask) + %res2 = add <64 x i8> %res, %res1 + ret <64 x i8> %res2 } declare <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8>, <32 x i16>, i32) @@ -3263,9 +3263,9 @@ define <32 x i16>@test_int_x86_avx512_mask_pmovzxb_w_512(<32 x i8> %x0, <32 x i1 ; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovzxb_w_512: ; AVX512BW: ## BB#0: ; AVX512BW-NEXT: kmovd %edi, %k1 -; AVX512BW-NEXT: vpmovzxbw %ymm0, %zmm1 {%k1} -; AVX512BW-NEXT: vpmovzxbw %ymm0, %zmm2 {%k1} {z} -; AVX512BW-NEXT: vpmovzxbw %ymm0, %zmm0 +; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero +; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero +; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero ; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1 ; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: retq @@ -3273,9 +3273,9 @@ define <32 x i16>@test_int_x86_avx512_mask_pmovzxb_w_512(<32 x i8> %x0, <32 x i1 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_pmovzxb_w_512: ; AVX512F-32: # BB#0: ; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 -; AVX512F-32-NEXT: vpmovzxbw %ymm0, %zmm1 {%k1} -; AVX512F-32-NEXT: vpmovzxbw %ymm0, %zmm2 {%k1} {z} -; AVX512F-32-NEXT: vpmovzxbw %ymm0, %zmm0 +; AVX512F-32-NEXT: vpmovzxbw {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero +; AVX512F-32-NEXT: vpmovzxbw {{.*#+}} zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero +; AVX512F-32-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero ; AVX512F-32-NEXT: vpaddw %zmm2, %zmm1, %zmm1 ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: retl @@ -3413,10 +3413,10 @@ define <32 x i16>@test_int_x86_avx512_mask_movu_w_512(<32 x i16> %x0, <32 x i16> ; AVX512F-32-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} ; AVX512F-32-NEXT: vpaddw %zmm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: retl - %res = call <32 x i16> @llvm.x86.avx512.mask.movu.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) - %res1 = call <32 x i16> @llvm.x86.avx512.mask.movu.w.512(<32 x i16> %x0, <32 x i16> zeroinitializer, i32 %x2) - %res2 = add <32 x i16> %res, %res1 - ret <32 x i16> %res2 + %res = call <32 x i16> @llvm.x86.avx512.mask.movu.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.movu.w.512(<32 x i16> %x0, <32 x i16> zeroinitializer, i32 %x2) + %res2 = add <32 x i16> %res, %res1 + ret <32 x i16> %res2 } declare <64 x i8> @llvm.x86.avx512.mask.movu.b.512(<64 x i8>, <64 x i8>, i64) @@ -3439,24 +3439,43 @@ define <64 x i8>@test_int_x86_avx512_mask_movu_b_512(<64 x i8> %x0, <64 x i8> %x ; AVX512F-32-NEXT: vmovdqu8 %zmm0, %zmm0 {%k1} {z} ; AVX512F-32-NEXT: vpaddb %zmm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: retl - %res = call <64 x i8> @llvm.x86.avx512.mask.movu.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) - %res1 = call <64 x i8> @llvm.x86.avx512.mask.movu.b.512(<64 x i8> %x0, <64 x i8> zeroinitializer, i64 %x2) - %res2 = add <64 x i8> %res, %res1 - ret <64 x i8> %res2 + %res = call <64 x i8> @llvm.x86.avx512.mask.movu.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) + %res1 = call <64 x i8> @llvm.x86.avx512.mask.movu.b.512(<64 x i8> %x0, <64 x i8> zeroinitializer, i64 %x2) + %res2 = add <64 x i8> %res, %res1 + ret <64 x i8> %res2 } declare i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8>, <64 x i8>, i64) define i64@test_int_x86_avx512_ptestm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) { -; CHECK-LABEL: test_int_x86_avx512_ptestm_b_512: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovq %rdi, %k1 ## encoding: [0xc4,0xe1,0xfb,0x92,0xcf] -; CHECK-NEXT: vptestmb %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x26,0xc1] -; CHECK-NEXT: kmovq %k0, %rcx ## encoding: [0xc4,0xe1,0xfb,0x93,0xc8] -; CHECK-NEXT: vptestmb %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc1] -; CHECK-NEXT: kmovq %k0, %rax ## encoding: [0xc4,0xe1,0xfb,0x93,0xc0] -; CHECK-NEXT: addq %rcx, %rax ## encoding: [0x48,0x01,0xc8] -; CHECK-NEXT: retq ## encoding: [0xc3] +; AVX512BW-LABEL: test_int_x86_avx512_ptestm_b_512: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovq %rdi, %k1 +; AVX512BW-NEXT: vptestmb %zmm1, %zmm0, %k0 {%k1} +; AVX512BW-NEXT: kmovq %k0, %rcx +; AVX512BW-NEXT: vptestmb %zmm1, %zmm0, %k0 +; AVX512BW-NEXT: kmovq %k0, %rax +; AVX512BW-NEXT: addq %rcx, %rax +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_ptestm_b_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: subl $20, %esp +; AVX512F-32-NEXT: .Ltmp10: +; AVX512F-32-NEXT: .cfi_def_cfa_offset 24 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: vptestmb %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, (%esp) +; AVX512F-32-NEXT: vptestmb %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: movl (%esp), %eax +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: addl $20, %esp +; AVX512F-32-NEXT: retl %res = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) %res1 = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1) %res2 = add i64 %res, %res1 @@ -3466,15 +3485,25 @@ define i64@test_int_x86_avx512_ptestm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x declare i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16>, <32 x i16>, i32) define i32@test_int_x86_avx512_ptestm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) { -; CHECK-LABEL: test_int_x86_avx512_ptestm_w_512: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] -; CHECK-NEXT: vptestmw %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x26,0xc1] -; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] -; CHECK-NEXT: vptestmw %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc1] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8] -; CHECK-NEXT: retq ## encoding: [0xc3] +; AVX512BW-LABEL: test_int_x86_avx512_ptestm_w_512: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovd %edi, %k1 +; AVX512BW-NEXT: vptestmw %zmm1, %zmm0, %k0 {%k1} +; AVX512BW-NEXT: kmovd %k0, %ecx +; AVX512BW-NEXT: vptestmw %zmm1, %zmm0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: addl %ecx, %eax +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_ptestm_w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vptestmw %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovd %k0, %ecx +; AVX512F-32-NEXT: vptestmw %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: addl %ecx, %eax +; AVX512F-32-NEXT: retl %res = call i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) %res1 = call i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32-1) %res2 = add i32 %res, %res1 @@ -3484,15 +3513,34 @@ define i32@test_int_x86_avx512_ptestm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 declare i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8>, <64 x i8>, i64 %x2) define i64@test_int_x86_avx512_ptestnm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) { -; CHECK-LABEL: test_int_x86_avx512_ptestnm_b_512: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovq %rdi, %k1 -; CHECK-NEXT: vptestnmb %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovq %k0, %rcx -; CHECK-NEXT: vptestnmb %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovq %k0, %rax -; CHECK-NEXT: addq %rcx, %rax -; CHECK-NEXT: retq +; AVX512BW-LABEL: test_int_x86_avx512_ptestnm_b_512: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovq %rdi, %k1 +; AVX512BW-NEXT: vptestnmb %zmm1, %zmm0, %k0 {%k1} +; AVX512BW-NEXT: kmovq %k0, %rcx +; AVX512BW-NEXT: vptestnmb %zmm1, %zmm0, %k0 +; AVX512BW-NEXT: kmovq %k0, %rax +; AVX512BW-NEXT: addq %rcx, %rax +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_ptestnm_b_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: subl $20, %esp +; AVX512F-32-NEXT: .Ltmp11: +; AVX512F-32-NEXT: .cfi_def_cfa_offset 24 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0 +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1 +; AVX512F-32-NEXT: vptestnmb %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovq %k0, (%esp) +; AVX512F-32-NEXT: vptestnmb %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: movl (%esp), %eax +; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax +; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx +; AVX512F-32-NEXT: addl $20, %esp +; AVX512F-32-NEXT: retl %res = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) %res1 = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1) %res2 = add i64 %res, %res1 @@ -3502,15 +3550,25 @@ define i64@test_int_x86_avx512_ptestnm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 % declare i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16>, <32 x i16>, i32 %x2) define i32@test_int_x86_avx512_ptestnm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) { -; CHECK-LABEL: test_int_x86_avx512_ptestnm_w_512: -; CHECK: ## BB#0: -; CHECK-NEXT: kmovd %edi, %k1 -; CHECK-NEXT: vptestnmw %zmm1, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovd %k0, %ecx -; CHECK-NEXT: vptestnmw %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovd %k0, %eax -; CHECK-NEXT: addl %ecx, %eax -; CHECK-NEXT: retq +; AVX512BW-LABEL: test_int_x86_avx512_ptestnm_w_512: +; AVX512BW: ## BB#0: +; AVX512BW-NEXT: kmovd %edi, %k1 +; AVX512BW-NEXT: vptestnmw %zmm1, %zmm0, %k0 {%k1} +; AVX512BW-NEXT: kmovd %k0, %ecx +; AVX512BW-NEXT: vptestnmw %zmm1, %zmm0, %k0 +; AVX512BW-NEXT: kmovd %k0, %eax +; AVX512BW-NEXT: addl %ecx, %eax +; AVX512BW-NEXT: retq +; +; AVX512F-32-LABEL: test_int_x86_avx512_ptestnm_w_512: +; AVX512F-32: # BB#0: +; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1 +; AVX512F-32-NEXT: vptestnmw %zmm1, %zmm0, %k0 {%k1} +; AVX512F-32-NEXT: kmovd %k0, %ecx +; AVX512F-32-NEXT: vptestnmw %zmm1, %zmm0, %k0 +; AVX512F-32-NEXT: kmovd %k0, %eax +; AVX512F-32-NEXT: addl %ecx, %eax +; AVX512F-32-NEXT: retl %res = call i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) %res1 = call i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32-1) %res2 = add i32 %res, %res1