1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 10:42:39 +01:00

[VE] Support intrinsic to isnert/extract_subreg of v512i1

Support insert/extract_subreg intrinsic instructions for v512i1
registers and add regression tests.

Reviewed By: simoll

Differential Revision: https://reviews.llvm.org/D94298
This commit is contained in:
Kazushi (Jam) Marukawa 2021-01-08 20:29:42 +09:00
parent da4d1cab18
commit 680d78da27
4 changed files with 96 additions and 0 deletions

View File

@ -11,6 +11,24 @@ let TargetPrefix = "ve" in {
def int_ve_vl_pack_f32a : GCCBuiltin<"__builtin_ve_vl_pack_f32a">,
Intrinsic<[llvm_i64_ty], [llvm_ptr_ty],
[IntrReadMem]>;
def int_ve_vl_extract_vm512u :
GCCBuiltin<"__builtin_ve_vl_extract_vm512u">,
Intrinsic<[LLVMType<v256i1>], [LLVMType<v512i1>], [IntrNoMem]>;
def int_ve_vl_extract_vm512l :
GCCBuiltin<"__builtin_ve_vl_extract_vm512l">,
Intrinsic<[LLVMType<v256i1>], [LLVMType<v512i1>], [IntrNoMem]>;
def int_ve_vl_insert_vm512u :
GCCBuiltin<"__builtin_ve_vl_insert_vm512u">,
Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<v256i1>],
[IntrNoMem]>;
def int_ve_vl_insert_vm512l :
GCCBuiltin<"__builtin_ve_vl_insert_vm512l">,
Intrinsic<[LLVMType<v512i1>], [LLVMType<v512i1>, LLVMType<v256i1>],
[IntrNoMem]>;
}
// Define intrinsics automatically generated

View File

@ -17,6 +17,19 @@ def : Pat<(i64 (int_ve_vl_pack_f32a ADDRrii:$addr)),
!add(32, 64)), 0,
(HI32 (i64 0x0000000100000001))))>;
// The extract/insert patterns.
def : Pat<(v256i1 (int_ve_vl_extract_vm512u v512i1:$vm)),
(EXTRACT_SUBREG v512i1:$vm, sub_vm_even)>;
def : Pat<(v256i1 (int_ve_vl_extract_vm512l v512i1:$vm)),
(EXTRACT_SUBREG v512i1:$vm, sub_vm_odd)>;
def : Pat<(v512i1 (int_ve_vl_insert_vm512u v512i1:$vmx, v256i1:$vmy)),
(INSERT_SUBREG v512i1:$vmx, v256i1:$vmy, sub_vm_even)>;
def : Pat<(v512i1 (int_ve_vl_insert_vm512l v512i1:$vmx, v256i1:$vmy)),
(INSERT_SUBREG v512i1:$vmx, v256i1:$vmy, sub_vm_odd)>;
// LSV patterns.
def : Pat<(int_ve_vl_lsv_vvss v256f64:$pt, i32:$sy, i64:$sz),
(LSVrr_v (i2l i32:$sy), i64:$sz, v256f64:$pt)>;

View File

@ -0,0 +1,33 @@
; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
;;; Test extract intrinsic instructions
;;;
;;; Note:
;;; We test extract_vm512u and extract_vm512l pseudo instructions.
; Function Attrs: nounwind readnone
define fastcc <256 x i1> @extract_vm512u(<512 x i1> %0) {
; CHECK-LABEL: extract_vm512u:
; CHECK: # %bb.0:
; CHECK-NEXT: andm %vm1, %vm0, %vm2
; CHECK-NEXT: b.l.t (, %s10)
%2 = tail call <256 x i1> @llvm.ve.vl.extract.vm512u(<512 x i1> %0)
ret <256 x i1> %2
}
; Function Attrs: nounwind readnone
declare <256 x i1> @llvm.ve.vl.extract.vm512u(<512 x i1>)
; Function Attrs: nounwind readnone
define fastcc <256 x i1> @extract_vm512l(<512 x i1> %0) {
; CHECK-LABEL: extract_vm512l:
; CHECK: # %bb.0:
; CHECK-NEXT: andm %vm0, %vm0, %vm2
; CHECK-NEXT: andm %vm1, %vm0, %vm3
; CHECK-NEXT: b.l.t (, %s10)
%2 = tail call <256 x i1> @llvm.ve.vl.extract.vm512l(<512 x i1> %0)
ret <256 x i1> %2
}
; Function Attrs: nounwind readnone
declare <256 x i1> @llvm.ve.vl.extract.vm512l(<512 x i1>)

View File

@ -0,0 +1,32 @@
; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s
;;; Test insert intrinsic instructions
;;;
;;; Note:
;;; We test insert_vm512u and insert_vm512l pseudo instructions.
; Function Attrs: nounwind readnone
define fastcc <512 x i1> @insert_vm512u(<512 x i1> %0, <256 x i1> %1) {
; CHECK-LABEL: insert_vm512u:
; CHECK: # %bb.0:
; CHECK-NEXT: andm %vm2, %vm0, %vm4
; CHECK-NEXT: b.l.t (, %s10)
%3 = tail call <512 x i1> @llvm.ve.vl.insert.vm512u(<512 x i1> %0, <256 x i1> %1)
ret <512 x i1> %3
}
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.ve.vl.insert.vm512u(<512 x i1>, <256 x i1>)
; Function Attrs: nounwind readnone
define fastcc <512 x i1> @insert_vm512l(<512 x i1> %0, <256 x i1> %1) {
; CHECK-LABEL: insert_vm512l:
; CHECK: # %bb.0:
; CHECK-NEXT: andm %vm3, %vm0, %vm4
; CHECK-NEXT: b.l.t (, %s10)
%3 = tail call <512 x i1> @llvm.ve.vl.insert.vm512l(<512 x i1> %0, <256 x i1> %1)
ret <512 x i1> %3
}
; Function Attrs: nounwind readnone
declare <512 x i1> @llvm.ve.vl.insert.vm512l(<512 x i1>, <256 x i1>)