1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-21 20:12:56 +02:00

[AVX-512] Don't change which instructions we use for unmasked subvector broadcasts when AVX512DQ is enabled.

There's no functional difference between the AVX512DQ instructions if we're not masking.

This change unifies test checks and removes extra isel entries. Similar was done for subvector insert and extracts recently.

llvm-svn: 311308
This commit is contained in:
Craig Topper 2017-08-21 05:29:02 +00:00
parent 52046dd2c7
commit 14199c7609
4 changed files with 99 additions and 253 deletions

View File

@ -1115,6 +1115,19 @@ multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
AVX5128IBase, EVEX;
}
// This should be used for the AVX512DQ broadcast instructions. It disables
// the unmasked patterns so that we only use the DQ instructions when masking
// is requested.
multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
(ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
(null_frag),
(_Dst.VT (X86SubVBroadcast
(_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
AVX5128IBase, EVEX;
}
let Predicates = [HasAVX512] in {
// 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))),
@ -1159,6 +1172,10 @@ defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
EVEX_V512, EVEX_CD8<64, CD8VT4>;
let Predicates = [HasAVX512] in {
def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
(VBROADCASTF64X4rm addr:$src)>;
def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))),
(VBROADCASTI64X4rm addr:$src)>;
def : Pat<(v32i16 (X86SubVBroadcast (bc_v16i16 (loadv4i64 addr:$src)))),
(VBROADCASTI64X4rm addr:$src)>;
def : Pat<(v64i8 (X86SubVBroadcast (bc_v32i8 (loadv4i64 addr:$src)))),
@ -1169,9 +1186,15 @@ def : Pat<(v64i8 (X86SubVBroadcast (bc_v32i8 (loadv4i64 addr:$src)))),
def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
(VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(v4f64 VR256X:$src), 1)>;
def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
(VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(v8f32 VR256X:$src), 1)>;
def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
(VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(v4i64 VR256X:$src), 1)>;
def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
(VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(v8i32 VR256X:$src), 1)>;
def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
(VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(v16i16 VR256X:$src), 1)>;
@ -1179,6 +1202,10 @@ def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
(VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(v32i8 VR256X:$src), 1)>;
def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
(VBROADCASTF32X4rm addr:$src)>;
def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
(VBROADCASTI32X4rm addr:$src)>;
def : Pat<(v32i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
(VBROADCASTI32X4rm addr:$src)>;
def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
@ -1193,6 +1220,10 @@ defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
v8f32x_info, v4f32x_info>,
EVEX_V256, EVEX_CD8<32, CD8VT4>;
def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
(VBROADCASTF32X4Z256rm addr:$src)>;
def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
(VBROADCASTI32X4Z256rm addr:$src)>;
def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
(VBROADCASTI32X4Z256rm addr:$src)>;
def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
@ -1200,9 +1231,15 @@ def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
(VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(v2f64 VR128X:$src), 1)>;
def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
(VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(v4f32 VR128X:$src), 1)>;
def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
(VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(v2i64 VR128X:$src), 1)>;
def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
(VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(v4i32 VR128X:$src), 1)>;
@ -1215,82 +1252,27 @@ def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
}
let Predicates = [HasVLX, HasDQI] in {
defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti64x2",
defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
v4i64x_info, v2i64x_info>, VEX_W,
EVEX_V256, EVEX_CD8<64, CD8VT2>;
defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf64x2",
defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
v4f64x_info, v2f64x_info>, VEX_W,
EVEX_V256, EVEX_CD8<64, CD8VT2>;
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
(VINSERTF64x2Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(v2f64 VR128X:$src), 1)>;
def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
(VINSERTI64x2Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(v2i64 VR128X:$src), 1)>;
}
let Predicates = [HasVLX, NoDQI] in {
def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
(VBROADCASTF32X4Z256rm addr:$src)>;
def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
(VBROADCASTI32X4Z256rm addr:$src)>;
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
(VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(v2f64 VR128X:$src), 1)>;
def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
(VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
(v2i64 VR128X:$src), 1)>;
}
let Predicates = [HasAVX512, NoDQI] in {
def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
(VBROADCASTF32X4rm addr:$src)>;
def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
(VBROADCASTI32X4rm addr:$src)>;
def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
(VBROADCASTF64X4rm addr:$src)>;
def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))),
(VBROADCASTI64X4rm addr:$src)>;
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
(VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(v8f32 VR256X:$src), 1)>;
def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
(VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(v8i32 VR256X:$src), 1)>;
}
let Predicates = [HasDQI] in {
defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti64x2",
defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
v8i64_info, v2i64x_info>, VEX_W,
EVEX_V512, EVEX_CD8<64, CD8VT2>;
defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti32x8",
defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
v16i32_info, v8i32x_info>,
EVEX_V512, EVEX_CD8<32, CD8VT8>;
defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf64x2",
defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
v8f64_info, v2f64x_info>, VEX_W,
EVEX_V512, EVEX_CD8<64, CD8VT2>;
defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf32x8",
defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
v16f32_info, v8f32x_info>,
EVEX_V512, EVEX_CD8<32, CD8VT8>;
// Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection.
def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
(VINSERTF32x8Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(v8f32 VR256X:$src), 1)>;
def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
(VINSERTI32x8Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
(v8i32 VR256X:$src), 1)>;
}
multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,

View File

@ -84,23 +84,11 @@ define <32 x i8> @test_broadcast_16i8_32i8(<16 x i8> *%p) nounwind {
;
define <8 x double> @test_broadcast_2f64_8f64(<2 x double> *%p) nounwind {
; X64-AVX512VL-LABEL: test_broadcast_2f64_8f64:
; X64-AVX512VL: ## BB#0:
; X64-AVX512VL-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X64-AVX512VL-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0
; X64-AVX512VL-NEXT: retq
;
; X64-AVX512BWVL-LABEL: test_broadcast_2f64_8f64:
; X64-AVX512BWVL: ## BB#0:
; X64-AVX512BWVL-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X64-AVX512BWVL-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0
; X64-AVX512BWVL-NEXT: retq
;
; X64-AVX512DQVL-LABEL: test_broadcast_2f64_8f64:
; X64-AVX512DQVL: ## BB#0:
; X64-AVX512DQVL-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 = mem[0,1,0,1,0,1,0,1]
; X64-AVX512DQVL-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0
; X64-AVX512DQVL-NEXT: retq
; X64-AVX512-LABEL: test_broadcast_2f64_8f64:
; X64-AVX512: ## BB#0:
; X64-AVX512-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X64-AVX512-NEXT: vaddpd {{.*}}(%rip), %zmm0, %zmm0
; X64-AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double> *%p
%2 = shufflevector <2 x double> %1, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
%3 = fadd <8 x double> %2, <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0>
@ -108,23 +96,11 @@ define <8 x double> @test_broadcast_2f64_8f64(<2 x double> *%p) nounwind {
}
define <8 x i64> @test_broadcast_2i64_8i64(<2 x i64> *%p) nounwind {
; X64-AVX512VL-LABEL: test_broadcast_2i64_8i64:
; X64-AVX512VL: ## BB#0:
; X64-AVX512VL-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X64-AVX512VL-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
; X64-AVX512VL-NEXT: retq
;
; X64-AVX512BWVL-LABEL: test_broadcast_2i64_8i64:
; X64-AVX512BWVL: ## BB#0:
; X64-AVX512BWVL-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X64-AVX512BWVL-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
; X64-AVX512BWVL-NEXT: retq
;
; X64-AVX512DQVL-LABEL: test_broadcast_2i64_8i64:
; X64-AVX512DQVL: ## BB#0:
; X64-AVX512DQVL-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 = mem[0,1,0,1,0,1,0,1]
; X64-AVX512DQVL-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
; X64-AVX512DQVL-NEXT: retq
; X64-AVX512-LABEL: test_broadcast_2i64_8i64:
; X64-AVX512: ## BB#0:
; X64-AVX512-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X64-AVX512-NEXT: vpaddq {{.*}}(%rip), %zmm0, %zmm0
; X64-AVX512-NEXT: retq
%1 = load <2 x i64>, <2 x i64> *%p
%2 = shufflevector <2 x i64> %1, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
%3 = add <8 x i64> %2, <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>

View File

@ -28,23 +28,11 @@ define <8 x i64> @test_broadcast_4i64_8i64(<4 x i64> *%p) nounwind {
}
define <16 x float> @test_broadcast_8f32_16f32(<8 x float> *%p) nounwind {
; X64-AVX512VL-LABEL: test_broadcast_8f32_16f32:
; X64-AVX512VL: ## BB#0:
; X64-AVX512VL-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
; X64-AVX512VL-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0
; X64-AVX512VL-NEXT: retq
;
; X64-AVX512BWVL-LABEL: test_broadcast_8f32_16f32:
; X64-AVX512BWVL: ## BB#0:
; X64-AVX512BWVL-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
; X64-AVX512BWVL-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0
; X64-AVX512BWVL-NEXT: retq
;
; X64-AVX512DQVL-LABEL: test_broadcast_8f32_16f32:
; X64-AVX512DQVL: ## BB#0:
; X64-AVX512DQVL-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; X64-AVX512DQVL-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0
; X64-AVX512DQVL-NEXT: retq
; X64-AVX512-LABEL: test_broadcast_8f32_16f32:
; X64-AVX512: ## BB#0:
; X64-AVX512-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
; X64-AVX512-NEXT: vaddps {{.*}}(%rip), %zmm0, %zmm0
; X64-AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float> *%p
%2 = shufflevector <8 x float> %1, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%3 = fadd <16 x float> %2, <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>
@ -52,23 +40,11 @@ define <16 x float> @test_broadcast_8f32_16f32(<8 x float> *%p) nounwind {
}
define <16 x i32> @test_broadcast_8i32_16i32(<8 x i32> *%p) nounwind {
; X64-AVX512VL-LABEL: test_broadcast_8i32_16i32:
; X64-AVX512VL: ## BB#0:
; X64-AVX512VL-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
; X64-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0
; X64-AVX512VL-NEXT: retq
;
; X64-AVX512BWVL-LABEL: test_broadcast_8i32_16i32:
; X64-AVX512BWVL: ## BB#0:
; X64-AVX512BWVL-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
; X64-AVX512BWVL-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0
; X64-AVX512BWVL-NEXT: retq
;
; X64-AVX512DQVL-LABEL: test_broadcast_8i32_16i32:
; X64-AVX512DQVL: ## BB#0:
; X64-AVX512DQVL-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; X64-AVX512DQVL-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0
; X64-AVX512DQVL-NEXT: retq
; X64-AVX512-LABEL: test_broadcast_8i32_16i32:
; X64-AVX512: ## BB#0:
; X64-AVX512-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
; X64-AVX512-NEXT: vpaddd {{.*}}(%rip), %zmm0, %zmm0
; X64-AVX512-NEXT: retq
%1 = load <8 x i32>, <8 x i32> *%p
%2 = shufflevector <8 x i32> %1, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%3 = add <16 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>

View File

@ -38,23 +38,11 @@ define <8 x double> @test_broadcast_2f64_8f64(<2 x double> *%p) nounwind {
; X32-AVX-NEXT: vmovdqa %ymm0, %ymm1
; X32-AVX-NEXT: retl
;
; X32-AVX512F-LABEL: test_broadcast_2f64_8f64:
; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X32-AVX512F-NEXT: retl
;
; X32-AVX512BW-LABEL: test_broadcast_2f64_8f64:
; X32-AVX512BW: # BB#0:
; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512BW-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X32-AVX512BW-NEXT: retl
;
; X32-AVX512DQ-LABEL: test_broadcast_2f64_8f64:
; X32-AVX512DQ: # BB#0:
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512DQ-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 = mem[0,1,0,1,0,1,0,1]
; X32-AVX512DQ-NEXT: retl
; X32-AVX512-LABEL: test_broadcast_2f64_8f64:
; X32-AVX512: # BB#0:
; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X32-AVX512-NEXT: retl
;
; X64-AVX-LABEL: test_broadcast_2f64_8f64:
; X64-AVX: # BB#0:
@ -62,20 +50,10 @@ define <8 x double> @test_broadcast_2f64_8f64(<2 x double> *%p) nounwind {
; X64-AVX-NEXT: vmovdqa %ymm0, %ymm1
; X64-AVX-NEXT: retq
;
; X64-AVX512F-LABEL: test_broadcast_2f64_8f64:
; X64-AVX512F: # BB#0:
; X64-AVX512F-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X64-AVX512F-NEXT: retq
;
; X64-AVX512BW-LABEL: test_broadcast_2f64_8f64:
; X64-AVX512BW: # BB#0:
; X64-AVX512BW-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X64-AVX512BW-NEXT: retq
;
; X64-AVX512DQ-LABEL: test_broadcast_2f64_8f64:
; X64-AVX512DQ: # BB#0:
; X64-AVX512DQ-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 = mem[0,1,0,1,0,1,0,1]
; X64-AVX512DQ-NEXT: retq
; X64-AVX512-LABEL: test_broadcast_2f64_8f64:
; X64-AVX512: # BB#0:
; X64-AVX512-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X64-AVX512-NEXT: retq
%1 = load <2 x double>, <2 x double> *%p
%2 = shufflevector <2 x double> %1, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
ret <8 x double> %2
@ -152,23 +130,11 @@ define <8 x i64> @test_broadcast_2i64_8i64(<2 x i64> *%p) nounwind {
; X32-AVX2-NEXT: vmovaps %ymm0, %ymm1
; X32-AVX2-NEXT: retl
;
; X32-AVX512F-LABEL: test_broadcast_2i64_8i64:
; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X32-AVX512F-NEXT: retl
;
; X32-AVX512BW-LABEL: test_broadcast_2i64_8i64:
; X32-AVX512BW: # BB#0:
; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X32-AVX512BW-NEXT: retl
;
; X32-AVX512DQ-LABEL: test_broadcast_2i64_8i64:
; X32-AVX512DQ: # BB#0:
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512DQ-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 = mem[0,1,0,1,0,1,0,1]
; X32-AVX512DQ-NEXT: retl
; X32-AVX512-LABEL: test_broadcast_2i64_8i64:
; X32-AVX512: # BB#0:
; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X32-AVX512-NEXT: retl
;
; X64-AVX1-LABEL: test_broadcast_2i64_8i64:
; X64-AVX1: # BB#0:
@ -182,20 +148,10 @@ define <8 x i64> @test_broadcast_2i64_8i64(<2 x i64> *%p) nounwind {
; X64-AVX2-NEXT: vmovaps %ymm0, %ymm1
; X64-AVX2-NEXT: retq
;
; X64-AVX512F-LABEL: test_broadcast_2i64_8i64:
; X64-AVX512F: # BB#0:
; X64-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X64-AVX512F-NEXT: retq
;
; X64-AVX512BW-LABEL: test_broadcast_2i64_8i64:
; X64-AVX512BW: # BB#0:
; X64-AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X64-AVX512BW-NEXT: retq
;
; X64-AVX512DQ-LABEL: test_broadcast_2i64_8i64:
; X64-AVX512DQ: # BB#0:
; X64-AVX512DQ-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 = mem[0,1,0,1,0,1,0,1]
; X64-AVX512DQ-NEXT: retq
; X64-AVX512-LABEL: test_broadcast_2i64_8i64:
; X64-AVX512: # BB#0:
; X64-AVX512-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X64-AVX512-NEXT: retq
%1 = load <2 x i64>, <2 x i64> *%p
%2 = shufflevector <2 x i64> %1, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
ret <8 x i64> %2
@ -283,23 +239,11 @@ define <16 x float> @test_broadcast_8f32_16f32(<8 x float> *%p) nounwind {
; X32-AVX-NEXT: vmovaps %ymm0, %ymm1
; X32-AVX-NEXT: retl
;
; X32-AVX512F-LABEL: test_broadcast_8f32_16f32:
; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
; X32-AVX512F-NEXT: retl
;
; X32-AVX512BW-LABEL: test_broadcast_8f32_16f32:
; X32-AVX512BW: # BB#0:
; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512BW-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
; X32-AVX512BW-NEXT: retl
;
; X32-AVX512DQ-LABEL: test_broadcast_8f32_16f32:
; X32-AVX512DQ: # BB#0:
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512DQ-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; X32-AVX512DQ-NEXT: retl
; X32-AVX512-LABEL: test_broadcast_8f32_16f32:
; X32-AVX512: # BB#0:
; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
; X32-AVX512-NEXT: retl
;
; X64-AVX-LABEL: test_broadcast_8f32_16f32:
; X64-AVX: # BB#0:
@ -307,20 +251,10 @@ define <16 x float> @test_broadcast_8f32_16f32(<8 x float> *%p) nounwind {
; X64-AVX-NEXT: vmovaps %ymm0, %ymm1
; X64-AVX-NEXT: retq
;
; X64-AVX512F-LABEL: test_broadcast_8f32_16f32:
; X64-AVX512F: # BB#0:
; X64-AVX512F-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
; X64-AVX512F-NEXT: retq
;
; X64-AVX512BW-LABEL: test_broadcast_8f32_16f32:
; X64-AVX512BW: # BB#0:
; X64-AVX512BW-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
; X64-AVX512BW-NEXT: retq
;
; X64-AVX512DQ-LABEL: test_broadcast_8f32_16f32:
; X64-AVX512DQ: # BB#0:
; X64-AVX512DQ-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; X64-AVX512DQ-NEXT: retq
; X64-AVX512-LABEL: test_broadcast_8f32_16f32:
; X64-AVX512: # BB#0:
; X64-AVX512-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
; X64-AVX512-NEXT: retq
%1 = load <8 x float>, <8 x float> *%p
%2 = shufflevector <8 x float> %1, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <16 x float> %2
@ -403,23 +337,11 @@ define <16 x i32> @test_broadcast_8i32_16i32(<8 x i32> *%p) nounwind {
; X32-AVX-NEXT: vmovaps %ymm0, %ymm1
; X32-AVX-NEXT: retl
;
; X32-AVX512F-LABEL: test_broadcast_8i32_16i32:
; X32-AVX512F: # BB#0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
; X32-AVX512F-NEXT: retl
;
; X32-AVX512BW-LABEL: test_broadcast_8i32_16i32:
; X32-AVX512BW: # BB#0:
; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512BW-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
; X32-AVX512BW-NEXT: retl
;
; X32-AVX512DQ-LABEL: test_broadcast_8i32_16i32:
; X32-AVX512DQ: # BB#0:
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512DQ-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; X32-AVX512DQ-NEXT: retl
; X32-AVX512-LABEL: test_broadcast_8i32_16i32:
; X32-AVX512: # BB#0:
; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
; X32-AVX512-NEXT: retl
;
; X64-AVX-LABEL: test_broadcast_8i32_16i32:
; X64-AVX: # BB#0:
@ -427,20 +349,10 @@ define <16 x i32> @test_broadcast_8i32_16i32(<8 x i32> *%p) nounwind {
; X64-AVX-NEXT: vmovaps %ymm0, %ymm1
; X64-AVX-NEXT: retq
;
; X64-AVX512F-LABEL: test_broadcast_8i32_16i32:
; X64-AVX512F: # BB#0:
; X64-AVX512F-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
; X64-AVX512F-NEXT: retq
;
; X64-AVX512BW-LABEL: test_broadcast_8i32_16i32:
; X64-AVX512BW: # BB#0:
; X64-AVX512BW-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
; X64-AVX512BW-NEXT: retq
;
; X64-AVX512DQ-LABEL: test_broadcast_8i32_16i32:
; X64-AVX512DQ: # BB#0:
; X64-AVX512DQ-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
; X64-AVX512DQ-NEXT: retq
; X64-AVX512-LABEL: test_broadcast_8i32_16i32:
; X64-AVX512: # BB#0:
; X64-AVX512-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
; X64-AVX512-NEXT: retq
%1 = load <8 x i32>, <8 x i32> *%p
%2 = shufflevector <8 x i32> %1, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <16 x i32> %2