From 5127b285b98050d566149ac1f44c2e9a4f055dc6 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sat, 4 Jan 2020 19:49:17 -0500 Subject: [PATCH] AMDGPU: Remove custom node for exports I'm mildly worried about potentially reordering exp/exp_done with IntrWriteMem on the intrinsic. Requires hacking out the illegal type on SI, so manually select that case during lowering. --- include/llvm/IR/IntrinsicsAMDGPU.td | 4 +- lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 2 - lib/Target/AMDGPU/AMDGPUISelLowering.h | 2 - lib/Target/AMDGPU/AMDGPUInstrInfo.td | 6 --- lib/Target/AMDGPU/SIISelLowering.cpp | 47 ++++++------------------ lib/Target/AMDGPU/SIInstrInfo.td | 15 +++----- lib/Target/AMDGPU/SIInstructions.td | 46 +++++++++++++++++------ 7 files changed, 55 insertions(+), 67 deletions(-) diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td index 07ca3a9229d..385ea6d8c9c 100644 --- a/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/include/llvm/IR/IntrinsicsAMDGPU.td @@ -1156,7 +1156,7 @@ def int_amdgcn_exp : Intrinsic <[], [ llvm_i1_ty, // done llvm_i1_ty // vm ], - [ImmArg<0>, ImmArg<1>, ImmArg<6>, ImmArg<7>, IntrInaccessibleMemOnly] + [ImmArg<0>, ImmArg<1>, ImmArg<6>, ImmArg<7>, IntrWriteMem, IntrInaccessibleMemOnly] >; // exp with compr bit set. @@ -1167,7 +1167,7 @@ def int_amdgcn_exp_compr : Intrinsic <[], [ LLVMMatchType<0>, // src1 llvm_i1_ty, // done llvm_i1_ty], // vm - [ImmArg<0>, ImmArg<1>, ImmArg<4>, ImmArg<5>, IntrInaccessibleMemOnly] + [ImmArg<0>, ImmArg<1>, ImmArg<4>, ImmArg<5>, IntrWriteMem, IntrInaccessibleMemOnly] >; def int_amdgcn_buffer_wbinvl1_sc : diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 23cc9404532..c985ba67b27 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4298,8 +4298,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(MAD_U64_U32) NODE_NAME_CASE(PERM) NODE_NAME_CASE(TEXTURE_FETCH) - NODE_NAME_CASE(EXPORT) - NODE_NAME_CASE(EXPORT_DONE) NODE_NAME_CASE(R600_EXPORT) NODE_NAME_CASE(CONST_ADDRESS) NODE_NAME_CASE(REGISTER_LOAD) diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h index a90b7f5653d..087983e6cf1 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -433,8 +433,6 @@ enum NodeType : unsigned { MUL_LOHI_U24, PERM, TEXTURE_FETCH, - EXPORT, // exp on SI+ - EXPORT_DONE, // exp on SI+ with done bit set R600_EXPORT, CONST_ADDRESS, REGISTER_LOAD, diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td index 50c451be4b8..4d7c6b6b57e 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -358,12 +358,6 @@ def AMDGPUExportOp : SDTypeProfile<0, 8, [ ]>; -def AMDGPUexport: SDNode<"AMDGPUISD::EXPORT", AMDGPUExportOp, - [SDNPHasChain, SDNPMayStore]>; - -def AMDGPUexport_done: SDNode<"AMDGPUISD::EXPORT_DONE", AMDGPUExportOp, - [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>; - def R600ExportOp : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>; diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 52bee393fae..1487920aac2 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -6782,52 +6782,29 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); switch (IntrinsicID) { - case Intrinsic::amdgcn_exp: { - const ConstantSDNode *Tgt = cast(Op.getOperand(2)); - const ConstantSDNode *En = cast(Op.getOperand(3)); - const ConstantSDNode *Done = cast(Op.getOperand(8)); - const ConstantSDNode *VM = cast(Op.getOperand(9)); - - const SDValue Ops[] = { - Chain, - DAG.getTargetConstant(Tgt->getZExtValue(), DL, MVT::i8), // tgt - DAG.getTargetConstant(En->getZExtValue(), DL, MVT::i8), // en - Op.getOperand(4), // src0 - Op.getOperand(5), // src1 - Op.getOperand(6), // src2 - Op.getOperand(7), // src3 - DAG.getTargetConstant(0, DL, MVT::i1), // compr - DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i1) - }; - - unsigned Opc = Done->isNullValue() ? - AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE; - return DAG.getNode(Opc, DL, Op->getVTList(), Ops); - } case Intrinsic::amdgcn_exp_compr: { - const ConstantSDNode *Tgt = cast(Op.getOperand(2)); - const ConstantSDNode *En = cast(Op.getOperand(3)); SDValue Src0 = Op.getOperand(4); SDValue Src1 = Op.getOperand(5); - const ConstantSDNode *Done = cast(Op.getOperand(6)); - const ConstantSDNode *VM = cast(Op.getOperand(7)); + // Hack around illegal type on SI by directly selecting it. + if (isTypeLegal(Src0.getValueType())) + return SDValue(); + const ConstantSDNode *Done = cast(Op.getOperand(6)); SDValue Undef = DAG.getUNDEF(MVT::f32); const SDValue Ops[] = { - Chain, - DAG.getTargetConstant(Tgt->getZExtValue(), DL, MVT::i8), // tgt - DAG.getTargetConstant(En->getZExtValue(), DL, MVT::i8), // en - DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src0), - DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src1), + Op.getOperand(2), // tgt + DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src0), // src0 + DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src1), // src1 Undef, // src2 Undef, // src3 + Op.getOperand(7), // vm DAG.getTargetConstant(1, DL, MVT::i1), // compr - DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i1) + Op.getOperand(3), // en + Op.getOperand(0) // Chain }; - unsigned Opc = Done->isNullValue() ? - AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE; - return DAG.getNode(Opc, DL, Op->getVTList(), Ops); + unsigned Opc = Done->isNullValue() ? AMDGPU::EXP : AMDGPU::EXP_DONE; + return SDValue(DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops), 0); } case Intrinsic::amdgcn_s_barrier: { if (getTargetMachine().getOptLevel() > CodeGenOpt::None) { diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index 85e8d0582dc..02004c2739a 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -1101,7 +1101,7 @@ def abid : NamedOperandU32<"ABID", NamedMatchClass<"ABID">>; def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg", 0>>; -def exp_tgt : NamedOperandU8<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> { +def exp_tgt : NamedOperandU32<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> { } @@ -1380,24 +1380,21 @@ class SIMCInstr { // EXP classes //===----------------------------------------------------------------------===// -class EXP_Helper : EXPCommon< +class EXP_Helper : EXPCommon< (outs), (ins exp_tgt:$tgt, ExpSrc0:$src0, ExpSrc1:$src1, ExpSrc2:$src2, ExpSrc3:$src3, - exp_vm:$vm, exp_compr:$compr, i8imm:$en), - "exp$tgt $src0, $src1, $src2, $src3"#!if(done, " done", "")#"$compr$vm", - [(node (i8 timm:$tgt), (i8 timm:$en), - f32:$src0, f32:$src1, f32:$src2, f32:$src3, - (i1 timm:$compr), (i1 timm:$vm))]> { + exp_vm:$vm, exp_compr:$compr, i32imm:$en), + "exp$tgt $src0, $src1, $src2, $src3"#!if(done, " done", "")#"$compr$vm", []> { let AsmMatchConverter = "cvtExp"; } // Split EXP instruction into EXP and EXP_DONE so we can set // mayLoad for done=1. -multiclass EXP_m { +multiclass EXP_m { let mayLoad = done, DisableWQM = 1 in { let isPseudo = 1, isCodeGenOnly = 1 in { - def "" : EXP_Helper, + def "" : EXP_Helper, SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.NONE>; } diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index d84720f820e..7660cbd0962 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -24,8 +24,41 @@ include "BUFInstructions.td" // EXP Instructions //===----------------------------------------------------------------------===// -defm EXP : EXP_m<0, AMDGPUexport>; -defm EXP_DONE : EXP_m<1, AMDGPUexport_done>; +defm EXP : EXP_m<0>; +defm EXP_DONE : EXP_m<1>; + +// FIXME: GlobalISel successfully imports this pattern, but fails to +// select because the i1 done_val does a type check on done_val, which +// only works on register operands. +class ExpPattern : GCNPat< + (int_amdgcn_exp timm:$tgt, timm:$en, + (vt ExpSrc0:$src0), (vt ExpSrc1:$src1), + (vt ExpSrc2:$src2), (vt ExpSrc3:$src3), + done_val, timm:$vm), + (Inst timm:$tgt, ExpSrc0:$src0, ExpSrc1:$src1, + ExpSrc2:$src2, ExpSrc3:$src3, timm:$vm, 0, timm:$en) +>; + +class ExpComprPattern : GCNPat< + (int_amdgcn_exp_compr timm:$tgt, timm:$en, + (vt ExpSrc0:$src0), (vt ExpSrc1:$src1), + done_val, timm:$vm), + (Inst timm:$tgt, ExpSrc0:$src0, ExpSrc1:$src1, + (IMPLICIT_DEF), (IMPLICIT_DEF), timm:$vm, 1, timm:$en) +>; + +// FIXME: The generated DAG matcher seems to have strange behavior +// with a 1-bit literal to match, so use a -1 for checking a true +// 1-bit value. +def : ExpPattern; +def : ExpPattern; +def : ExpPattern; +def : ExpPattern; + +def : ExpComprPattern; +def : ExpComprPattern; +def : ExpComprPattern; +def : ExpComprPattern; //===----------------------------------------------------------------------===// // VINTRP Instructions @@ -1782,15 +1815,6 @@ def : GCNPat < SRCMODS.NONE, $src2, $clamp, $omod) >; -// Allow integer inputs -class ExpPattern : GCNPat< - (node (i8 timm:$tgt), (i8 timm:$en), vt:$src0, vt:$src1, vt:$src2, vt:$src3, (i1 timm:$compr), (i1 timm:$vm)), - (Inst i8:$tgt, vt:$src0, vt:$src1, vt:$src2, vt:$src3, i1:$vm, i1:$compr, i8:$en) ->; - -def : ExpPattern; -def : ExpPattern; - // COPY is workaround tablegen bug from multiple outputs // from S_LSHL_B32's multiple outputs from implicit scc def. def : GCNPat <