mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 02:33:06 +01:00
[AMDGPU] Expose __builtin_amdgcn_perm for v_perm_b32
Differential Revision: https://reviews.llvm.org/D102022
This commit is contained in:
parent
4adadbc511
commit
ab08d36d20
@ -1716,6 +1716,12 @@ def int_amdgcn_ds_bpermute :
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, IntrConvergent, IntrWillReturn]>;
|
||||
|
||||
// llvm.amdgcn.perm <src0> <src1> <selector>
|
||||
def int_amdgcn_perm :
|
||||
GCCBuiltin<"__builtin_amdgcn_perm">,
|
||||
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
|
||||
[IntrNoMem, IntrSpeculatable, IntrWillReturn]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// GFX10 Intrinsics
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -313,7 +313,7 @@ def AMDGPUfdot2_impl : SDNode<"AMDGPUISD::FDOT2",
|
||||
SDTCisInt<4>]>,
|
||||
[]>;
|
||||
|
||||
def AMDGPUperm : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>;
|
||||
def AMDGPUperm_impl : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>;
|
||||
|
||||
// SI+ export
|
||||
def AMDGPUExportOp : SDTypeProfile<0, 8, [
|
||||
@ -463,3 +463,7 @@ def AMDGPUfdot2 : PatFrags<(ops node:$src0, node:$src1, node:$src2, node:$clamp)
|
||||
def AMDGPUdiv_fmas : PatFrags<(ops node:$src0, node:$src1, node:$src2, node:$vcc),
|
||||
[(int_amdgcn_div_fmas node:$src0, node:$src1, node:$src2, node:$vcc),
|
||||
(AMDGPUdiv_fmas_impl node:$src0, node:$src1, node:$src2, node:$vcc)]>;
|
||||
|
||||
def AMDGPUperm : PatFrags<(ops node:$src0, node:$src1, node:$src2),
|
||||
[(int_amdgcn_perm node:$src0, node:$src1, node:$src2),
|
||||
(AMDGPUperm_impl node:$src0, node:$src1, node:$src2)]>;
|
||||
|
@ -3949,6 +3949,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
|
||||
case Intrinsic::amdgcn_cvt_pk_u8_f32:
|
||||
case Intrinsic::amdgcn_alignbit:
|
||||
case Intrinsic::amdgcn_alignbyte:
|
||||
case Intrinsic::amdgcn_perm:
|
||||
case Intrinsic::amdgcn_fdot2:
|
||||
case Intrinsic::amdgcn_sdot2:
|
||||
case Intrinsic::amdgcn_udot2:
|
||||
|
@ -6695,6 +6695,9 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
case Intrinsic::amdgcn_alignbit:
|
||||
return DAG.getNode(ISD::FSHR, DL, VT,
|
||||
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
|
||||
case Intrinsic::amdgcn_perm:
|
||||
return DAG.getNode(AMDGPUISD::PERM, DL, MVT::i32, Op.getOperand(1),
|
||||
Op.getOperand(2), Op.getOperand(3));
|
||||
case Intrinsic::amdgcn_reloc_constant: {
|
||||
Module *M = const_cast<Module *>(MF.getFunction().getParent());
|
||||
const MDNode *Metadata = cast<MDNodeSDNode>(Op.getOperand(1))->getMD();
|
||||
|
47
test/CodeGen/AMDGPU/llvm.amdgcn.perm.ll
Normal file
47
test/CodeGen/AMDGPU/llvm.amdgcn.perm.ll
Normal file
@ -0,0 +1,47 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -global-isel -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
declare i32 @llvm.amdgcn.perm(i32, i32, i32) #0
|
||||
|
||||
; GCN-LABEL: {{^}}v_perm_b32_v_v_v:
|
||||
; GCN: v_perm_b32 v{{[0-9]+}}, v0, v1, v2
|
||||
define amdgpu_ps void @v_perm_b32_v_v_v(i32 %src1, i32 %src2, i32 %src3, i32 addrspace(1)* %out) #1 {
|
||||
%val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 %src3) #0
|
||||
store i32 %val, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_perm_b32_v_v_c:
|
||||
; GCN: v_perm_b32 v{{[0-9]+}}, v0, v1, {{[vs][0-9]+}}
|
||||
define amdgpu_ps void @v_perm_b32_v_v_c(i32 %src1, i32 %src2, i32 addrspace(1)* %out) #1 {
|
||||
%val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 12345) #0
|
||||
store i32 %val, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_perm_b32_s_v_c:
|
||||
; GCN: v_perm_b32 v{{[0-9]+}}, s0, v0, v{{[0-9]+}}
|
||||
define amdgpu_ps void @v_perm_b32_s_v_c(i32 inreg %src1, i32 %src2, i32 addrspace(1)* %out) #1 {
|
||||
%val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 12345) #0
|
||||
store i32 %val, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_perm_b32_s_s_c:
|
||||
; GCN: v_perm_b32 v{{[0-9]+}}, s0, v{{[0-9]+}}, v{{[0-9]+}}
|
||||
define amdgpu_ps void @v_perm_b32_s_s_c(i32 inreg %src1, i32 inreg %src2, i32 addrspace(1)* %out) #1 {
|
||||
%val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 12345) #0
|
||||
store i32 %val, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}v_perm_b32_v_s_i:
|
||||
; GCN: v_perm_b32 v{{[0-9]+}}, v0, s0, 1
|
||||
define amdgpu_ps void @v_perm_b32_v_s_i(i32 %src1, i32 inreg %src2, i32 addrspace(1)* %out) #1 {
|
||||
%val = call i32 @llvm.amdgcn.perm(i32 %src1, i32 %src2, i32 1) #0
|
||||
store i32 %val, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone }
|
||||
attributes #1 = { nounwind }
|
Loading…
Reference in New Issue
Block a user