diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 74df28056e0..c24fb0638f4 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -2690,6 +2690,18 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_vbroadcast_sd_pd_512 : GCCBuiltin<"__builtin_ia32_vbroadcastsd_pd512">, Intrinsic<[llvm_v8f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + + def int_x86_avx512_pbroadcastd_512 : + GCCBuiltin<"__builtin_ia32_pbroadcastd512">, + Intrinsic<[llvm_v16i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; + def int_x86_avx512_pbroadcastd_i32_512 : + Intrinsic<[llvm_v16i32_ty], [llvm_i32_ty], [IntrNoMem]>; + + def int_x86_avx512_pbroadcastq_512 : + GCCBuiltin<"__builtin_ia32_pbroadcastq512">, + Intrinsic<[llvm_v8i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; + def int_x86_avx512_pbroadcastq_i64_512 : + Intrinsic<[llvm_v8i64_ty], [llvm_i64_ty], [IntrNoMem]>; } // Vector sign and zero extend diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 1ac563010fd..5e854da1a0c 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -425,6 +425,11 @@ def : Pat<(v8i64 (X86VBroadcast (i64 GR64:$src))), def : Pat<(v8i64 (X86VBroadcastm VK8WM:$mask, (i64 GR64:$src))), (VPBROADCASTQrZkrr VK8WM:$mask, GR64:$src)>; +def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_i32_512 (i32 GR32:$src))), + (VPBROADCASTDrZrr GR32:$src)>; +def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_i64_512 (i64 GR64:$src))), + (VPBROADCASTQrZrr GR64:$src)>; + multiclass avx512_int_broadcast_rm opc, string OpcodeStr, X86MemOperand x86memop, PatFrag ld_frag, RegisterClass DstRC, ValueType OpVT, ValueType SrcVT, @@ -461,6 +466,11 @@ defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem, loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))), + (VPBROADCASTDZrr VR128X:$src)>; +def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))), + (VPBROADCASTQZrr VR128X:$src)>; + def : Pat<(v16f32 (X86VBroadcast (v4f32 VR128X:$src))), (VBROADCASTSSZrr VR128X:$src)>; def : Pat<(v8f64 (X86VBroadcast (v2f64 VR128X:$src))), diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index 29b508302d0..8a512487069 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -234,6 +234,34 @@ define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0) { } declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly +define <16 x i32> @test_x86_pbroadcastd_512(<4 x i32> %a0) { + ; CHECK: vpbroadcastd + %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %a0) ; <<16 x i32>> [#uses=1] + ret <16 x i32> %res +} +declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>) nounwind readonly + +define <16 x i32> @test_x86_pbroadcastd_i32_512(i32 %a0) { + ; CHECK: vpbroadcastd + %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32 %a0) ; <<16 x i32>> [#uses=1] + ret <16 x i32> %res +} +declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly + +define <8 x i64> @test_x86_pbroadcastq_512(<2 x i64> %a0) { + ; CHECK: vpbroadcastq + %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %a0) ; <<8 x i64>> [#uses=1] + ret <8 x i64> %res +} +declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>) nounwind readonly + +define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) { + ; CHECK: vpbroadcastq + %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64 %a0) ; <<8 x i64>> [#uses=1] + ret <8 x i64> %res +} +declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly + define <16 x i32> @test_x86_pmaxu_d(<16 x i32> %a0, <16 x i32> %a1) { ; CHECK: vpmaxud %res = call <16 x i32> @llvm.x86.avx512.pmaxu.d(<16 x i32> %a0, <16 x i32> %a1) ; <<16 x i32>> [#uses=1]