From ab08d36d207a19670b91eaa105e22a48cd82436f Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Thu, 6 May 2021 13:29:48 -0700 Subject: [PATCH] [AMDGPU] Expose __builtin_amdgcn_perm for v_perm_b32 Differential Revision: https://reviews.llvm.org/D102022 --- include/llvm/IR/IntrinsicsAMDGPU.td | 6 +++ lib/Target/AMDGPU/AMDGPUInstrInfo.td | 6 ++- lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 1 + lib/Target/AMDGPU/SIISelLowering.cpp | 3 ++ test/CodeGen/AMDGPU/llvm.amdgcn.perm.ll | 47 ++++++++++++++++++++ 5 files changed, 62 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/AMDGPU/llvm.amdgcn.perm.ll diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td index 7b62b9de79b..46a7aeb39c9 100644 --- a/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1716,6 +1716,12 @@ def int_amdgcn_ds_bpermute : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent, IntrWillReturn]>; +// llvm.amdgcn.perm +def int_amdgcn_perm : + GCCBuiltin<"__builtin_amdgcn_perm">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], + [IntrNoMem, IntrSpeculatable, IntrWillReturn]>; + //===----------------------------------------------------------------------===// // GFX10 Intrinsics //===----------------------------------------------------------------------===// diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td index c0cb1781abe..d63bd2e9eb2 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -313,7 +313,7 @@ def AMDGPUfdot2_impl : SDNode<"AMDGPUISD::FDOT2", SDTCisInt<4>]>, []>; -def AMDGPUperm : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>; +def AMDGPUperm_impl : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>; // SI+ export def AMDGPUExportOp : SDTypeProfile<0, 8, [ @@ -463,3 +463,7 @@ def AMDGPUfdot2 : PatFrags<(ops node:$src0, node:$src1, node:$src2, node:$clamp) def AMDGPUdiv_fmas : PatFrags<(ops node:$src0, node:$src1, node:$src2, node:$vcc), [(int_amdgcn_div_fmas node:$src0, node:$src1, node:$src2, node:$vcc), (AMDGPUdiv_fmas_impl node:$src0, node:$src1, node:$src2, node:$vcc)]>; + +def AMDGPUperm : PatFrags<(ops node:$src0, node:$src1, node:$src2), + [(int_amdgcn_perm node:$src0, node:$src1, node:$src2), + (AMDGPUperm_impl node:$src0, node:$src1, node:$src2)]>; diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 482aef524f6..2126e7a2726 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -3949,6 +3949,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case Intrinsic::amdgcn_cvt_pk_u8_f32: case Intrinsic::amdgcn_alignbit: case Intrinsic::amdgcn_alignbyte: + case Intrinsic::amdgcn_perm: case Intrinsic::amdgcn_fdot2: case Intrinsic::amdgcn_sdot2: case Intrinsic::amdgcn_udot2: diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index e6eae914d54..5eca427052b 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6695,6 +6695,9 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::amdgcn_alignbit: return DAG.getNode(ISD::FSHR, DL, VT, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + case Intrinsic::amdgcn_perm: + return DAG.getNode(AMDGPUISD::PERM, DL, MVT::i32, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); case Intrinsic::amdgcn_reloc_constant: { Module *M = const_cast(MF.getFunction().getParent()); const MDNode *Metadata = cast(Op.getOperand(1))->getMD(); diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.perm.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.perm.ll new file mode 100644 index 00000000000..4d9ba39b8e0 --- /dev/null +++ b/test/CodeGen/AMDGPU/llvm.amdgcn.perm.ll @@ -0,0 +1,47 @@ +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -global-isel -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare i32 @llvm.amdgcn.perm(i32, i32, i32) #0 + +; GCN-LABEL: {{^}}v_perm_b32_v_v_v: +; GCN: v_perm_b32 v{{[0-9]+}}, v0, v1, v2 +define amdgpu_ps void @v_perm_b32_v_v_v(i32 %src1, i32 %src2, i32 %src3, i32 addrspace(1)* %out) #1 { + %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 %src3) #0 + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_perm_b32_v_v_c: +; GCN: v_perm_b32 v{{[0-9]+}}, v0, v1, {{[vs][0-9]+}} +define amdgpu_ps void @v_perm_b32_v_v_c(i32 %src1, i32 %src2, i32 addrspace(1)* %out) #1 { + %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 12345) #0 + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_perm_b32_s_v_c: +; GCN: v_perm_b32 v{{[0-9]+}}, s0, v0, v{{[0-9]+}} +define amdgpu_ps void @v_perm_b32_s_v_c(i32 inreg %src1, i32 %src2, i32 addrspace(1)* %out) #1 { + %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 12345) #0 + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_perm_b32_s_s_c: +; GCN: v_perm_b32 v{{[0-9]+}}, s0, v{{[0-9]+}}, v{{[0-9]+}} +define amdgpu_ps void @v_perm_b32_s_s_c(i32 inreg %src1, i32 inreg %src2, i32 addrspace(1)* %out) #1 { + %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 12345) #0 + store i32 %val, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}v_perm_b32_v_s_i: +; GCN: v_perm_b32 v{{[0-9]+}}, v0, s0, 1 +define amdgpu_ps void @v_perm_b32_v_s_i(i32 %src1, i32 inreg %src2, i32 addrspace(1)* %out) #1 { + %val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 1) #0 + store i32 %val, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind }