From b1ef950217f810d458fa5ec80d370c7fbfa4bc44 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 7 Nov 2011 03:20:35 +0000 Subject: [PATCH] Add AVX2 VPMOVMASK instructions and intrinsics. llvm-svn: 143904 --- include/llvm/IntrinsicsX86.td | 27 +++++++++++ lib/Target/X86/X86InstrSSE.td | 38 +++++++++++++++ test/CodeGen/X86/avx2-intrinsics-x86.ll | 64 +++++++++++++++++++++++++ 3 files changed, 129 insertions(+) diff --git a/include/llvm/IntrinsicsX86.td b/include/llvm/IntrinsicsX86.td index 345c2803f4b..42209b8291d 100644 --- a/include/llvm/IntrinsicsX86.td +++ b/include/llvm/IntrinsicsX86.td @@ -1744,6 +1744,33 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; } + +// Conditional load ops +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx2_maskload_d : GCCBuiltin<"__builtin_ia32_maskloadd">, + Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_v4i32_ty], [IntrReadMem]>; + def int_x86_avx2_maskload_q : GCCBuiltin<"__builtin_ia32_maskloadq">, + Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_v2i64_ty], [IntrReadMem]>; + def int_x86_avx2_maskload_d_256 : GCCBuiltin<"__builtin_ia32_maskloadd256">, + Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty, llvm_v8i32_ty], [IntrReadMem]>; + def int_x86_avx2_maskload_q_256 : GCCBuiltin<"__builtin_ia32_maskloadq256">, + Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_v4i64_ty], [IntrReadMem]>; +} + +// Conditional store ops +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx2_maskstore_d : GCCBuiltin<"__builtin_ia32_maskstored">, + Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_v4i32_ty], []>; + def int_x86_avx2_maskstore_q : GCCBuiltin<"__builtin_ia32_maskstoreq">, + Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_v2i64_ty], []>; + def int_x86_avx2_maskstore_d_256 : + GCCBuiltin<"__builtin_ia32_maskstored256">, + Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_v8i32_ty], []>; + def int_x86_avx2_maskstore_q_256 : + GCCBuiltin<"__builtin_ia32_maskstoreq256">, + Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty], []>; +} + // Misc. let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx2_pmovmskb : GCCBuiltin<"__builtin_ia32_pmovmskb256">, diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index fc36884bf1f..b46e5d10e04 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7563,3 +7563,41 @@ let neverHasSideEffects = 1, mayStore = 1 in def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs), (ins i128mem:$dst, VR256:$src1, i8imm:$src2), "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX; + +//===----------------------------------------------------------------------===// +// VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores +// +multiclass avx2_pmovmask { + def rm : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>, VEX_4V; + def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, i256mem:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, VEX_4V; + def mr : AVX28I<0x8e, MRMDestMem, (outs), + (ins i128mem:$dst, VR128:$src1, VR128:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V; + def Ymr : AVX28I<0x8e, MRMDestMem, (outs), + (ins i256mem:$dst, VR256:$src1, VR256:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V; +} + +defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd", + int_x86_avx2_maskload_d, + int_x86_avx2_maskload_d_256, + int_x86_avx2_maskstore_d, + int_x86_avx2_maskstore_d_256, + memopv4i32, memopv8i32>; +defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq", + int_x86_avx2_maskload_q, + int_x86_avx2_maskload_q_256, + int_x86_avx2_maskstore_q, + int_x86_avx2_maskstore_q_256, + memopv2i64, memopv4i64>, VEX_W; diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll index 4ab0884b5b7..da03c386273 100644 --- a/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -902,3 +902,67 @@ define <4 x i64> @test_x86_avx2_vinserti128(<4 x i64> %a0, <2 x i64> %a1) { ret <4 x i64> %res } declare <4 x i64> @llvm.x86.avx2.vinserti128(<4 x i64>, <2 x i64>, i8) nounwind readnone + + +define <2 x i64> @test_x86_avx2_maskload_q(i8* %a0, <2 x i64> %a1) { + ; CHECK: vpmaskmovq + %res = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] + ret <2 x i64> %res +} +declare <2 x i64> @llvm.x86.avx2.maskload.q(i8*, <2 x i64>) nounwind readonly + + +define <4 x i64> @test_x86_avx2_maskload_q_256(i8* %a0, <4 x i64> %a1) { + ; CHECK: vpmaskmovq + %res = call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1] + ret <4 x i64> %res +} +declare <4 x i64> @llvm.x86.avx2.maskload.q.256(i8*, <4 x i64>) nounwind readonly + + +define <4 x i32> @test_x86_avx2_maskload_d(i8* %a0, <4 x i32> %a1) { + ; CHECK: vpmaskmovd + %res = call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] + ret <4 x i32> %res +} +declare <4 x i32> @llvm.x86.avx2.maskload.d(i8*, <4 x i32>) nounwind readonly + + +define <8 x i32> @test_x86_avx2_maskload_d_256(i8* %a0, <8 x i32> %a1) { + ; CHECK: vpmaskmovd + %res = call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] + ret <8 x i32> %res +} +declare <8 x i32> @llvm.x86.avx2.maskload.d.256(i8*, <8 x i32>) nounwind readonly + + +define void @test_x86_avx2_maskstore_q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) { + ; CHECK: vpmaskmovq + call void @llvm.x86.avx2.maskstore.q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) + ret void +} +declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>) nounwind + + +define void @test_x86_avx2_maskstore_q_256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) { + ; CHECK: vpmaskmovq + call void @llvm.x86.avx2.maskstore.q.256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) + ret void +} +declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>) nounwind + + +define void @test_x86_avx2_maskstore_d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) { + ; CHECK: vpmaskmovd + call void @llvm.x86.avx2.maskstore.d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) + ret void +} +declare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>) nounwind + + +define void @test_x86_avx2_maskstore_d_256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) { + ; CHECK: vpmaskmovd + call void @llvm.x86.avx2.maskstore.d.256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) + ret void +} +declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>) nounwind