1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

[X86][SSE] Regenerate slow-pmulld.ll test checks

This commit is contained in:
Simon Pilgrim 2021-06-09 12:23:13 +01:00
parent b6a5326556
commit 3c3fd22471

View File

@ -21,61 +21,61 @@ define <4 x i32> @test_mul_v4i32_v4i8(<4 x i8> %A) {
; CHECK32-LABEL: test_mul_v4i32_v4i8:
; CHECK32: # %bb.0:
; CHECK32-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; CHECK32-NEXT: pmaddwd {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
; CHECK32-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; CHECK32-NEXT: retl
;
; CHECK64-LABEL: test_mul_v4i32_v4i8:
; CHECK64: # %bb.0:
; CHECK64-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; CHECK64-NEXT: pmaddwd {{.*}}(%rip), %xmm0
; CHECK64-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK64-NEXT: retq
;
; SSE4-32-LABEL: test_mul_v4i32_v4i8:
; SSE4-32: # %bb.0:
; SSE4-32-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; SSE4-32-NEXT: pmaddwd {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
; SSE4-32-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; SSE4-32-NEXT: retl
;
; SSE4-64-LABEL: test_mul_v4i32_v4i8:
; SSE4-64: # %bb.0:
; SSE4-64-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; SSE4-64-NEXT: pmaddwd {{.*}}(%rip), %xmm0
; SSE4-64-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE4-64-NEXT: retq
;
; AVX2-32-LABEL: test_mul_v4i32_v4i8:
; AVX2-32: # %bb.0:
; AVX2-32-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX2-32-NEXT: vpmaddwd {{\.LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; AVX2-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; AVX2-32-NEXT: retl
;
; AVX2-64-LABEL: test_mul_v4i32_v4i8:
; AVX2-64: # %bb.0:
; AVX2-64-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX2-64-NEXT: vpmaddwd {{.*}}(%rip), %xmm0, %xmm0
; AVX2-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX2-64-NEXT: retq
;
; AVX512DQ-32-LABEL: test_mul_v4i32_v4i8:
; AVX512DQ-32: # %bb.0:
; AVX512DQ-32-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX512DQ-32-NEXT: vpmaddwd {{\.LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; AVX512DQ-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; AVX512DQ-32-NEXT: retl
;
; AVX512DQ-64-LABEL: test_mul_v4i32_v4i8:
; AVX512DQ-64: # %bb.0:
; AVX512DQ-64-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX512DQ-64-NEXT: vpmaddwd {{.*}}(%rip), %xmm0, %xmm0
; AVX512DQ-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX512DQ-64-NEXT: retq
;
; AVX512BW-32-LABEL: test_mul_v4i32_v4i8:
; AVX512BW-32: # %bb.0:
; AVX512BW-32-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX512BW-32-NEXT: vpmaddwd {{\.LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; AVX512BW-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; AVX512BW-32-NEXT: retl
;
; AVX512BW-64-LABEL: test_mul_v4i32_v4i8:
; AVX512BW-64: # %bb.0:
; AVX512BW-64-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX512BW-64-NEXT: vpmaddwd {{.*}}(%rip), %xmm0, %xmm0
; AVX512BW-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX512BW-64-NEXT: retq
;
; KNL-32-LABEL: test_mul_v4i32_v4i8:
@ -168,37 +168,37 @@ define <8 x i32> @test_mul_v8i32_v8i8(<8 x i8> %A) {
; AVX2-32-LABEL: test_mul_v8i32_v8i8:
; AVX2-32: # %bb.0:
; AVX2-32-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; AVX2-32-NEXT: vpmaddwd {{\.LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
; AVX2-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
; AVX2-32-NEXT: retl
;
; AVX2-64-LABEL: test_mul_v8i32_v8i8:
; AVX2-64: # %bb.0:
; AVX2-64-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; AVX2-64-NEXT: vpmaddwd {{.*}}(%rip), %ymm0, %ymm0
; AVX2-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX2-64-NEXT: retq
;
; AVX512DQ-32-LABEL: test_mul_v8i32_v8i8:
; AVX512DQ-32: # %bb.0:
; AVX512DQ-32-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; AVX512DQ-32-NEXT: vpmaddwd {{\.LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
; AVX512DQ-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
; AVX512DQ-32-NEXT: retl
;
; AVX512DQ-64-LABEL: test_mul_v8i32_v8i8:
; AVX512DQ-64: # %bb.0:
; AVX512DQ-64-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; AVX512DQ-64-NEXT: vpmaddwd {{.*}}(%rip), %ymm0, %ymm0
; AVX512DQ-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX512DQ-64-NEXT: retq
;
; AVX512BW-32-LABEL: test_mul_v8i32_v8i8:
; AVX512BW-32: # %bb.0:
; AVX512BW-32-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; AVX512BW-32-NEXT: vpmaddwd {{\.LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
; AVX512BW-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
; AVX512BW-32-NEXT: retl
;
; AVX512BW-64-LABEL: test_mul_v8i32_v8i8:
; AVX512BW-64: # %bb.0:
; AVX512BW-64-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; AVX512BW-64-NEXT: vpmaddwd {{.*}}(%rip), %ymm0, %ymm0
; AVX512BW-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX512BW-64-NEXT: retq
;
; KNL-32-LABEL: test_mul_v8i32_v8i8:
@ -359,37 +359,37 @@ define <16 x i32> @test_mul_v16i32_v16i8(<16 x i8> %A) {
; AVX512DQ-32-LABEL: test_mul_v16i32_v16i8:
; AVX512DQ-32: # %bb.0:
; AVX512DQ-32-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512DQ-32-NEXT: vpmulld {{\.LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
; AVX512DQ-32-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
; AVX512DQ-32-NEXT: retl
;
; AVX512DQ-64-LABEL: test_mul_v16i32_v16i8:
; AVX512DQ-64: # %bb.0:
; AVX512DQ-64-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512DQ-64-NEXT: vpmulld {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512DQ-64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512DQ-64-NEXT: retq
;
; AVX512BW-32-LABEL: test_mul_v16i32_v16i8:
; AVX512BW-32: # %bb.0:
; AVX512BW-32-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512BW-32-NEXT: vpmaddwd {{\.LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0
; AVX512BW-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0
; AVX512BW-32-NEXT: retl
;
; AVX512BW-64-LABEL: test_mul_v16i32_v16i8:
; AVX512BW-64: # %bb.0:
; AVX512BW-64-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512BW-64-NEXT: vpmaddwd {{.*}}(%rip), %zmm0, %zmm0
; AVX512BW-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
; AVX512BW-64-NEXT: retq
;
; KNL-32-LABEL: test_mul_v16i32_v16i8:
; KNL-32: # %bb.0:
; KNL-32-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; KNL-32-NEXT: vpmulld {{\.LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
; KNL-32-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
; KNL-32-NEXT: retl
;
; KNL-64-LABEL: test_mul_v16i32_v16i8:
; KNL-64: # %bb.0:
; KNL-64-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; KNL-64-NEXT: vpmulld {{.*}}(%rip){1to16}, %zmm0, %zmm0
; KNL-64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; KNL-64-NEXT: retq
%z = zext <16 x i8> %A to <16 x i32>
%m = mul nuw nsw <16 x i32> %z, <i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778>
@ -418,13 +418,13 @@ define <4 x i32> @test_mul_v4i32_v4i16(<4 x i16> %A) {
; SSE4-32-LABEL: test_mul_v4i32_v4i16:
; SSE4-32: # %bb.0:
; SSE4-32-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; SSE4-32-NEXT: pmulld {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
; SSE4-32-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; SSE4-32-NEXT: retl
;
; SSE4-64-LABEL: test_mul_v4i32_v4i16:
; SSE4-64: # %bb.0:
; SSE4-64-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; SSE4-64-NEXT: pmulld {{.*}}(%rip), %xmm0
; SSE4-64-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE4-64-NEXT: retq
;
; AVX-32-LABEL: test_mul_v4i32_v4i16:
@ -666,13 +666,13 @@ define <16 x i32> @test_mul_v16i32_v16i16(<16 x i16> %A) {
; AVX512-32-LABEL: test_mul_v16i32_v16i16:
; AVX512-32: # %bb.0:
; AVX512-32-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512-32-NEXT: vpmulld {{\.LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
; AVX512-32-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
; AVX512-32-NEXT: retl
;
; AVX512-64-LABEL: test_mul_v16i32_v16i16:
; AVX512-64: # %bb.0:
; AVX512-64-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512-64-NEXT: vpmulld {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512-64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512-64-NEXT: retq
%z = zext <16 x i16> %A to <16 x i32>
%m = mul nuw nsw <16 x i32> %z, <i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778>
@ -687,61 +687,61 @@ define <4 x i32> @test_mul_v4i32_v4i8_minsize(<4 x i8> %A) minsize {
; CHECK32-LABEL: test_mul_v4i32_v4i8_minsize:
; CHECK32: # %bb.0:
; CHECK32-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; CHECK32-NEXT: pmaddwd {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
; CHECK32-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; CHECK32-NEXT: retl
;
; CHECK64-LABEL: test_mul_v4i32_v4i8_minsize:
; CHECK64: # %bb.0:
; CHECK64-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; CHECK64-NEXT: pmaddwd {{.*}}(%rip), %xmm0
; CHECK64-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK64-NEXT: retq
;
; SSE4-32-LABEL: test_mul_v4i32_v4i8_minsize:
; SSE4-32: # %bb.0:
; SSE4-32-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; SSE4-32-NEXT: pmaddwd {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
; SSE4-32-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; SSE4-32-NEXT: retl
;
; SSE4-64-LABEL: test_mul_v4i32_v4i8_minsize:
; SSE4-64: # %bb.0:
; SSE4-64-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; SSE4-64-NEXT: pmaddwd {{.*}}(%rip), %xmm0
; SSE4-64-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE4-64-NEXT: retq
;
; AVX2-32-LABEL: test_mul_v4i32_v4i8_minsize:
; AVX2-32: # %bb.0:
; AVX2-32-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX2-32-NEXT: vpmaddwd {{\.LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; AVX2-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; AVX2-32-NEXT: retl
;
; AVX2-64-LABEL: test_mul_v4i32_v4i8_minsize:
; AVX2-64: # %bb.0:
; AVX2-64-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX2-64-NEXT: vpmaddwd {{.*}}(%rip), %xmm0, %xmm0
; AVX2-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX2-64-NEXT: retq
;
; AVX512DQ-32-LABEL: test_mul_v4i32_v4i8_minsize:
; AVX512DQ-32: # %bb.0:
; AVX512DQ-32-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX512DQ-32-NEXT: vpmaddwd {{\.LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; AVX512DQ-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; AVX512DQ-32-NEXT: retl
;
; AVX512DQ-64-LABEL: test_mul_v4i32_v4i8_minsize:
; AVX512DQ-64: # %bb.0:
; AVX512DQ-64-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX512DQ-64-NEXT: vpmaddwd {{.*}}(%rip), %xmm0, %xmm0
; AVX512DQ-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX512DQ-64-NEXT: retq
;
; AVX512BW-32-LABEL: test_mul_v4i32_v4i8_minsize:
; AVX512BW-32: # %bb.0:
; AVX512BW-32-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX512BW-32-NEXT: vpmaddwd {{\.LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; AVX512BW-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; AVX512BW-32-NEXT: retl
;
; AVX512BW-64-LABEL: test_mul_v4i32_v4i8_minsize:
; AVX512BW-64: # %bb.0:
; AVX512BW-64-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX512BW-64-NEXT: vpmaddwd {{.*}}(%rip), %xmm0, %xmm0
; AVX512BW-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX512BW-64-NEXT: retq
;
; KNL-32-LABEL: test_mul_v4i32_v4i8_minsize:
@ -826,37 +826,37 @@ define <8 x i32> @test_mul_v8i32_v8i8_minsize(<8 x i8> %A) minsize {
; AVX2-32-LABEL: test_mul_v8i32_v8i8_minsize:
; AVX2-32: # %bb.0:
; AVX2-32-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; AVX2-32-NEXT: vpmaddwd {{\.LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
; AVX2-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
; AVX2-32-NEXT: retl
;
; AVX2-64-LABEL: test_mul_v8i32_v8i8_minsize:
; AVX2-64: # %bb.0:
; AVX2-64-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; AVX2-64-NEXT: vpmaddwd {{.*}}(%rip), %ymm0, %ymm0
; AVX2-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX2-64-NEXT: retq
;
; AVX512DQ-32-LABEL: test_mul_v8i32_v8i8_minsize:
; AVX512DQ-32: # %bb.0:
; AVX512DQ-32-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; AVX512DQ-32-NEXT: vpmaddwd {{\.LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
; AVX512DQ-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
; AVX512DQ-32-NEXT: retl
;
; AVX512DQ-64-LABEL: test_mul_v8i32_v8i8_minsize:
; AVX512DQ-64: # %bb.0:
; AVX512DQ-64-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; AVX512DQ-64-NEXT: vpmaddwd {{.*}}(%rip), %ymm0, %ymm0
; AVX512DQ-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX512DQ-64-NEXT: retq
;
; AVX512BW-32-LABEL: test_mul_v8i32_v8i8_minsize:
; AVX512BW-32: # %bb.0:
; AVX512BW-32-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; AVX512BW-32-NEXT: vpmaddwd {{\.LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
; AVX512BW-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
; AVX512BW-32-NEXT: retl
;
; AVX512BW-64-LABEL: test_mul_v8i32_v8i8_minsize:
; AVX512BW-64: # %bb.0:
; AVX512BW-64-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; AVX512BW-64-NEXT: vpmaddwd {{.*}}(%rip), %ymm0, %ymm0
; AVX512BW-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX512BW-64-NEXT: retq
;
; KNL-32-LABEL: test_mul_v8i32_v8i8_minsize:
@ -997,37 +997,37 @@ define <16 x i32> @test_mul_v16i32_v16i8_minsize(<16 x i8> %A) minsize {
; AVX512DQ-32-LABEL: test_mul_v16i32_v16i8_minsize:
; AVX512DQ-32: # %bb.0:
; AVX512DQ-32-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512DQ-32-NEXT: vpmulld {{\.LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
; AVX512DQ-32-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
; AVX512DQ-32-NEXT: retl
;
; AVX512DQ-64-LABEL: test_mul_v16i32_v16i8_minsize:
; AVX512DQ-64: # %bb.0:
; AVX512DQ-64-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512DQ-64-NEXT: vpmulld {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512DQ-64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512DQ-64-NEXT: retq
;
; AVX512BW-32-LABEL: test_mul_v16i32_v16i8_minsize:
; AVX512BW-32: # %bb.0:
; AVX512BW-32-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512BW-32-NEXT: vpmaddwd {{\.LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0
; AVX512BW-32-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0
; AVX512BW-32-NEXT: retl
;
; AVX512BW-64-LABEL: test_mul_v16i32_v16i8_minsize:
; AVX512BW-64: # %bb.0:
; AVX512BW-64-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; AVX512BW-64-NEXT: vpmaddwd {{.*}}(%rip), %zmm0, %zmm0
; AVX512BW-64-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
; AVX512BW-64-NEXT: retq
;
; KNL-32-LABEL: test_mul_v16i32_v16i8_minsize:
; KNL-32: # %bb.0:
; KNL-32-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; KNL-32-NEXT: vpmulld {{\.LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
; KNL-32-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
; KNL-32-NEXT: retl
;
; KNL-64-LABEL: test_mul_v16i32_v16i8_minsize:
; KNL-64: # %bb.0:
; KNL-64-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; KNL-64-NEXT: vpmulld {{.*}}(%rip){1to16}, %zmm0, %zmm0
; KNL-64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; KNL-64-NEXT: retq
%z = zext <16 x i8> %A to <16 x i32>
%m = mul nuw nsw <16 x i32> %z, <i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778>
@ -1038,25 +1038,25 @@ define <4 x i32> @test_mul_v4i32_v4i16_minsize(<4 x i16> %A) minsize {
; CHECK32-LABEL: test_mul_v4i32_v4i16_minsize:
; CHECK32: # %bb.0:
; CHECK32-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; CHECK32-NEXT: pmulld {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
; CHECK32-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; CHECK32-NEXT: retl
;
; CHECK64-LABEL: test_mul_v4i32_v4i16_minsize:
; CHECK64: # %bb.0:
; CHECK64-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; CHECK64-NEXT: pmulld {{.*}}(%rip), %xmm0
; CHECK64-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK64-NEXT: retq
;
; SSE4-32-LABEL: test_mul_v4i32_v4i16_minsize:
; SSE4-32: # %bb.0:
; SSE4-32-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; SSE4-32-NEXT: pmulld {{\.LCPI[0-9]+_[0-9]+}}, %xmm0
; SSE4-32-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; SSE4-32-NEXT: retl
;
; SSE4-64-LABEL: test_mul_v4i32_v4i16_minsize:
; SSE4-64: # %bb.0:
; SSE4-64-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; SSE4-64-NEXT: pmulld {{.*}}(%rip), %xmm0
; SSE4-64-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE4-64-NEXT: retq
;
; AVX-32-LABEL: test_mul_v4i32_v4i16_minsize:
@ -1260,13 +1260,13 @@ define <16 x i32> @test_mul_v16i32_v16i16_minsize(<16 x i16> %A) minsize {
; AVX512-32-LABEL: test_mul_v16i32_v16i16_minsize:
; AVX512-32: # %bb.0:
; AVX512-32-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512-32-NEXT: vpmulld {{\.LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
; AVX512-32-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
; AVX512-32-NEXT: retl
;
; AVX512-64-LABEL: test_mul_v16i32_v16i16_minsize:
; AVX512-64: # %bb.0:
; AVX512-64-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; AVX512-64-NEXT: vpmulld {{.*}}(%rip){1to16}, %zmm0, %zmm0
; AVX512-64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
; AVX512-64-NEXT: retq
%z = zext <16 x i16> %A to <16 x i32>
%m = mul nuw nsw <16 x i32> %z, <i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778, i32 18778>