1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-22 02:33:06 +01:00

[AMDGPU] Expose __builtin_amdgcn_perm for v_perm_b32

Differential Revision: https://reviews.llvm.org/D102022
This commit is contained in:
Stanislav Mekhanoshin 2021-05-06 13:29:48 -07:00
parent 4adadbc511
commit ab08d36d20
5 changed files with 62 additions and 1 deletions

View File

@ -1716,6 +1716,12 @@ def int_amdgcn_ds_bpermute :
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrConvergent, IntrWillReturn]>;
// llvm.amdgcn.perm <src0> <src1> <selector>
def int_amdgcn_perm :
GCCBuiltin<"__builtin_amdgcn_perm">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
[IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
//===----------------------------------------------------------------------===//
// GFX10 Intrinsics
//===----------------------------------------------------------------------===//

View File

@ -313,7 +313,7 @@ def AMDGPUfdot2_impl : SDNode<"AMDGPUISD::FDOT2",
SDTCisInt<4>]>,
[]>;
def AMDGPUperm : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>;
def AMDGPUperm_impl : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>;
// SI+ export
def AMDGPUExportOp : SDTypeProfile<0, 8, [
@ -463,3 +463,7 @@ def AMDGPUfdot2 : PatFrags<(ops node:$src0, node:$src1, node:$src2, node:$clamp)
def AMDGPUdiv_fmas : PatFrags<(ops node:$src0, node:$src1, node:$src2, node:$vcc),
[(int_amdgcn_div_fmas node:$src0, node:$src1, node:$src2, node:$vcc),
(AMDGPUdiv_fmas_impl node:$src0, node:$src1, node:$src2, node:$vcc)]>;
def AMDGPUperm : PatFrags<(ops node:$src0, node:$src1, node:$src2),
[(int_amdgcn_perm node:$src0, node:$src1, node:$src2),
(AMDGPUperm_impl node:$src0, node:$src1, node:$src2)]>;

View File

@ -3949,6 +3949,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_cvt_pk_u8_f32:
case Intrinsic::amdgcn_alignbit:
case Intrinsic::amdgcn_alignbyte:
case Intrinsic::amdgcn_perm:
case Intrinsic::amdgcn_fdot2:
case Intrinsic::amdgcn_sdot2:
case Intrinsic::amdgcn_udot2:

View File

@ -6695,6 +6695,9 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::amdgcn_alignbit:
return DAG.getNode(ISD::FSHR, DL, VT,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
case Intrinsic::amdgcn_perm:
return DAG.getNode(AMDGPUISD::PERM, DL, MVT::i32, Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
case Intrinsic::amdgcn_reloc_constant: {
Module *M = const_cast<Module *>(MF.getFunction().getParent());
const MDNode *Metadata = cast<MDNodeSDNode>(Op.getOperand(1))->getMD();

View File

@ -0,0 +1,47 @@
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; RUN: llc -march=amdgcn -mcpu=tonga -global-isel -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
declare i32 @llvm.amdgcn.perm(i32, i32, i32) #0
; GCN-LABEL: {{^}}v_perm_b32_v_v_v:
; GCN: v_perm_b32 v{{[0-9]+}}, v0, v1, v2
define amdgpu_ps void @v_perm_b32_v_v_v(i32 %src1, i32 %src2, i32 %src3, i32 addrspace(1)* %out) #1 {
%val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 %src3) #0
store i32 %val, i32 addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_perm_b32_v_v_c:
; GCN: v_perm_b32 v{{[0-9]+}}, v0, v1, {{[vs][0-9]+}}
define amdgpu_ps void @v_perm_b32_v_v_c(i32 %src1, i32 %src2, i32 addrspace(1)* %out) #1 {
%val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 12345) #0
store i32 %val, i32 addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_perm_b32_s_v_c:
; GCN: v_perm_b32 v{{[0-9]+}}, s0, v0, v{{[0-9]+}}
define amdgpu_ps void @v_perm_b32_s_v_c(i32 inreg %src1, i32 %src2, i32 addrspace(1)* %out) #1 {
%val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 12345) #0
store i32 %val, i32 addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_perm_b32_s_s_c:
; GCN: v_perm_b32 v{{[0-9]+}}, s0, v{{[0-9]+}}, v{{[0-9]+}}
define amdgpu_ps void @v_perm_b32_s_s_c(i32 inreg %src1, i32 inreg %src2, i32 addrspace(1)* %out) #1 {
%val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 12345) #0
store i32 %val, i32 addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}v_perm_b32_v_s_i:
; GCN: v_perm_b32 v{{[0-9]+}}, v0, s0, 1
define amdgpu_ps void @v_perm_b32_v_s_i(i32 %src1, i32 inreg %src2, i32 addrspace(1)* %out) #1 {
%val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 1) #0
store i32 %val, i32 addrspace(1)* %out
ret void
}
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }