From 02e4f56c18205ab95e04e489e138b8f8f7d5865d Mon Sep 17 00:00:00 2001 From: Cameron McInally Date: Mon, 4 Nov 2013 19:14:56 +0000 Subject: [PATCH] Add support for AVX512 masked vector blend intrinsics. llvm-svn: 194006 --- include/llvm/IR/IntrinsicsX86.td | 24 ++++++++++-- lib/Target/X86/X86InstrAVX512.td | 54 +++++++++++++++++++-------- test/CodeGen/X86/avx512-intrinsics.ll | 32 ++++++++++++++++ 3 files changed, 92 insertions(+), 18 deletions(-) diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 4eee0ac8803..1fe1c91d9f8 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -3023,12 +3023,30 @@ let TargetPrefix = "x86" in { Intrinsic<[llvm_v8i64_ty], [llvm_v8i1_ty, llvm_v8i64_ty], []>; } -// Misc. -let TargetPrefix = "x86" in { + +// Vector blend +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_mskblend_ps_512 : GCCBuiltin<"__builtin_ia32_mskblendps512">, Intrinsic<[llvm_v16f32_ty], - [llvm_i16_ty, llvm_v16f32_ty, llvm_v16f32_ty], + [llvm_v16i1_ty, llvm_v16f32_ty, llvm_v16f32_ty], [IntrNoMem]>; + def int_x86_avx512_mskblend_pd_512 : GCCBuiltin<"__builtin_ia32_mskblendpd512">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8i1_ty, llvm_v8f64_ty, llvm_v8f64_ty], + [IntrNoMem]>; + + def int_x86_avx512_mskblend_d_512 : GCCBuiltin<"__builtin_ia32_mskblendd512">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v16i1_ty, llvm_v16i32_ty, llvm_v16i32_ty], + [IntrNoMem]>; + def int_x86_avx512_mskblend_q_512 : GCCBuiltin<"__builtin_ia32_mskblendq512">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v8i1_ty, llvm_v8i64_ty, llvm_v8i64_ty], + [IntrNoMem]>; +} + +// Misc. +let TargetPrefix = "x86" in { def int_x86_avx512_cmpeq_pi_512 : GCCBuiltin<"__builtin_ia32_cmpeqpi512">, Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>; diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 6ce5c38dd2a..8935f90ac27 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -606,7 +606,7 @@ defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64, i512mem //===----------------------------------------------------------------------===// // AVX-512 - BLEND using mask // -multiclass avx512_blendmask opc, string OpcodeStr, +multiclass avx512_blendmask opc, string OpcodeStr, Intrinsic Int, RegisterClass KRC, RegisterClass RC, X86MemOperand x86memop, PatFrag mem_frag, SDNode OpNode, ValueType vt> { @@ -616,31 +616,55 @@ multiclass avx512_blendmask opc, string OpcodeStr, "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), [(set RC:$dst, (OpNode KRC:$mask, (vt RC:$src2), (vt RC:$src1)))]>, EVEX_4V, EVEX_K; - - def rm : AVX5128I, - EVEX_4V, EVEX_K; + "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"), + [(set RC:$dst, (Int KRC:$mask, (vt RC:$src2), + (vt RC:$src1)))]>, EVEX_4V, EVEX_K; + + let mayLoad = 1 in { + def rm : AVX5128I, + EVEX_4V, EVEX_K; + + def rm_Int : AVX5128I, + EVEX_4V, EVEX_K; + } } let ExeDomain = SSEPackedSingle in -defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps", VK16WM, VR512, f512mem, +defm VBLENDMPSZ : avx512_blendmask<0x65, "vblendmps", + int_x86_avx512_mskblend_ps_512, + VK16WM, VR512, f512mem, memopv16f32, vselect, v16f32>, EVEX_CD8<32, CD8VF>, EVEX_V512; let ExeDomain = SSEPackedDouble in -defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd", VK8WM, VR512, f512mem, +defm VBLENDMPDZ : avx512_blendmask<0x65, "vblendmpd", + int_x86_avx512_mskblend_pd_512, + VK8WM, VR512, f512mem, memopv8f64, vselect, v8f64>, VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512; -defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd", VK16WM, VR512, f512mem, - memopv8i64, vselect, v16i32>, - EVEX_CD8<32, CD8VF>, EVEX_V512; +defm VPBLENDMDZ : avx512_blendmask<0x64, "vpblendmd", + int_x86_avx512_mskblend_d_512, + VK16WM, VR512, f512mem, + memopv16i32, vselect, v16i32>, + EVEX_CD8<32, CD8VF>, EVEX_V512; -defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq", VK8WM, VR512, f512mem, - memopv8i64, vselect, v8i64>, VEX_W, - EVEX_CD8<64, CD8VF>, EVEX_V512; +defm VPBLENDMQZ : avx512_blendmask<0x64, "vpblendmq", + int_x86_avx512_mskblend_q_512, + VK8WM, VR512, f512mem, + memopv8i64, vselect, v8i64>, + VEX_W, EVEX_CD8<64, CD8VF>, EVEX_V512; let Predicates = [HasAVX512] in { def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1), diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index 0570b21e78e..5bdabf23499 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -340,3 +340,35 @@ define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { ret <8 x i64> %res } declare <8 x i64> @llvm.x86.avx512.conflict.q.mask.512(<8 x i64>, <8 x i1>,<8 x i64>) nounwind readonly + +define <16 x float> @test_x86_mskblend_ps_512(i16 %a0, <16 x float> %a1, <16 x float> %a2) { + ; CHECK: vblendmps + %m0 = bitcast i16 %a0 to <16 x i1> + %res = call <16 x float> @llvm.x86.avx512.mskblend.ps.512(<16 x i1> %m0, <16 x float> %a1, <16 x float> %a2) ; <<16 x float>> [#uses=1] + ret <16 x float> %res +} +declare <16 x float> @llvm.x86.avx512.mskblend.ps.512(<16 x i1> %a0, <16 x float> %a1, <16 x float> %a2) nounwind readonly + +define <8 x double> @test_x86_mskblend_pd_512(i8 %a0, <8 x double> %a1, <8 x double> %a2) { + ; CHECK: vblendmpd + %m0 = bitcast i8 %a0 to <8 x i1> + %res = call <8 x double> @llvm.x86.avx512.mskblend.pd.512(<8 x i1> %m0, <8 x double> %a1, <8 x double> %a2) ; <<8 x double>> [#uses=1] + ret <8 x double> %res +} +declare <8 x double> @llvm.x86.avx512.mskblend.pd.512(<8 x i1> %a0, <8 x double> %a1, <8 x double> %a2) nounwind readonly + +define <16 x i32> @test_x86_mskblend_d_512(i16 %a0, <16 x i32> %a1, <16 x i32> %a2) { + ; CHECK: vpblendmd + %m0 = bitcast i16 %a0 to <16 x i1> + %res = call <16 x i32> @llvm.x86.avx512.mskblend.d.512(<16 x i1> %m0, <16 x i32> %a1, <16 x i32> %a2) ; <<16 x i32>> [#uses=1] + ret <16 x i32> %res +} +declare <16 x i32> @llvm.x86.avx512.mskblend.d.512(<16 x i1> %a0, <16 x i32> %a1, <16 x i32> %a2) nounwind readonly + +define <8 x i64> @test_x86_mskblend_q_512(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) { + ; CHECK: vpblendmq + %m0 = bitcast i8 %a0 to <8 x i1> + %res = call <8 x i64> @llvm.x86.avx512.mskblend.q.512(<8 x i1> %m0, <8 x i64> %a1, <8 x i64> %a2) ; <<8 x i64>> [#uses=1] + ret <8 x i64> %res +} +declare <8 x i64> @llvm.x86.avx512.mskblend.q.512(<8 x i1> %a0, <8 x i64> %a1, <8 x i64> %a2) nounwind readonly