1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

AMDGPU: Remove custom node for exports

I'm mildly worried about potentially reordering exp/exp_done with
IntrWriteMem on the intrinsic.

Requires hacking out the illegal type on SI, so manually select that
case during lowering.
This commit is contained in:
Matt Arsenault 2020-01-04 19:49:17 -05:00 committed by Matt Arsenault
parent 2f408e3893
commit 5127b285b9
7 changed files with 55 additions and 67 deletions

View File

@ -1156,7 +1156,7 @@ def int_amdgcn_exp : Intrinsic <[], [
llvm_i1_ty, // done
llvm_i1_ty // vm
],
[ImmArg<0>, ImmArg<1>, ImmArg<6>, ImmArg<7>, IntrInaccessibleMemOnly]
[ImmArg<0>, ImmArg<1>, ImmArg<6>, ImmArg<7>, IntrWriteMem, IntrInaccessibleMemOnly]
>;
// exp with compr bit set.
@ -1167,7 +1167,7 @@ def int_amdgcn_exp_compr : Intrinsic <[], [
LLVMMatchType<0>, // src1
llvm_i1_ty, // done
llvm_i1_ty], // vm
[ImmArg<0>, ImmArg<1>, ImmArg<4>, ImmArg<5>, IntrInaccessibleMemOnly]
[ImmArg<0>, ImmArg<1>, ImmArg<4>, ImmArg<5>, IntrWriteMem, IntrInaccessibleMemOnly]
>;
def int_amdgcn_buffer_wbinvl1_sc :

View File

@ -4298,8 +4298,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(MAD_U64_U32)
NODE_NAME_CASE(PERM)
NODE_NAME_CASE(TEXTURE_FETCH)
NODE_NAME_CASE(EXPORT)
NODE_NAME_CASE(EXPORT_DONE)
NODE_NAME_CASE(R600_EXPORT)
NODE_NAME_CASE(CONST_ADDRESS)
NODE_NAME_CASE(REGISTER_LOAD)

View File

@ -433,8 +433,6 @@ enum NodeType : unsigned {
MUL_LOHI_U24,
PERM,
TEXTURE_FETCH,
EXPORT, // exp on SI+
EXPORT_DONE, // exp on SI+ with done bit set
R600_EXPORT,
CONST_ADDRESS,
REGISTER_LOAD,

View File

@ -358,12 +358,6 @@ def AMDGPUExportOp : SDTypeProfile<0, 8, [
]>;
def AMDGPUexport: SDNode<"AMDGPUISD::EXPORT", AMDGPUExportOp,
[SDNPHasChain, SDNPMayStore]>;
def AMDGPUexport_done: SDNode<"AMDGPUISD::EXPORT_DONE", AMDGPUExportOp,
[SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
def R600ExportOp : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>;

View File

@ -6782,52 +6782,29 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
MachineFunction &MF = DAG.getMachineFunction();
switch (IntrinsicID) {
case Intrinsic::amdgcn_exp: {
const ConstantSDNode *Tgt = cast<ConstantSDNode>(Op.getOperand(2));
const ConstantSDNode *En = cast<ConstantSDNode>(Op.getOperand(3));
const ConstantSDNode *Done = cast<ConstantSDNode>(Op.getOperand(8));
const ConstantSDNode *VM = cast<ConstantSDNode>(Op.getOperand(9));
const SDValue Ops[] = {
Chain,
DAG.getTargetConstant(Tgt->getZExtValue(), DL, MVT::i8), // tgt
DAG.getTargetConstant(En->getZExtValue(), DL, MVT::i8), // en
Op.getOperand(4), // src0
Op.getOperand(5), // src1
Op.getOperand(6), // src2
Op.getOperand(7), // src3
DAG.getTargetConstant(0, DL, MVT::i1), // compr
DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i1)
};
unsigned Opc = Done->isNullValue() ?
AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE;
return DAG.getNode(Opc, DL, Op->getVTList(), Ops);
}
case Intrinsic::amdgcn_exp_compr: {
const ConstantSDNode *Tgt = cast<ConstantSDNode>(Op.getOperand(2));
const ConstantSDNode *En = cast<ConstantSDNode>(Op.getOperand(3));
SDValue Src0 = Op.getOperand(4);
SDValue Src1 = Op.getOperand(5);
const ConstantSDNode *Done = cast<ConstantSDNode>(Op.getOperand(6));
const ConstantSDNode *VM = cast<ConstantSDNode>(Op.getOperand(7));
// Hack around illegal type on SI by directly selecting it.
if (isTypeLegal(Src0.getValueType()))
return SDValue();
const ConstantSDNode *Done = cast<ConstantSDNode>(Op.getOperand(6));
SDValue Undef = DAG.getUNDEF(MVT::f32);
const SDValue Ops[] = {
Chain,
DAG.getTargetConstant(Tgt->getZExtValue(), DL, MVT::i8), // tgt
DAG.getTargetConstant(En->getZExtValue(), DL, MVT::i8), // en
DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src0),
DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src1),
Op.getOperand(2), // tgt
DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src0), // src0
DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src1), // src1
Undef, // src2
Undef, // src3
Op.getOperand(7), // vm
DAG.getTargetConstant(1, DL, MVT::i1), // compr
DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i1)
Op.getOperand(3), // en
Op.getOperand(0) // Chain
};
unsigned Opc = Done->isNullValue() ?
AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE;
return DAG.getNode(Opc, DL, Op->getVTList(), Ops);
unsigned Opc = Done->isNullValue() ? AMDGPU::EXP : AMDGPU::EXP_DONE;
return SDValue(DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops), 0);
}
case Intrinsic::amdgcn_s_barrier: {
if (getTargetMachine().getOptLevel() > CodeGenOpt::None) {

View File

@ -1101,7 +1101,7 @@ def abid : NamedOperandU32<"ABID", NamedMatchClass<"ABID">>;
def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg", 0>>;
def exp_tgt : NamedOperandU8<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {
def exp_tgt : NamedOperandU32<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {
}
@ -1380,24 +1380,21 @@ class SIMCInstr <string pseudo, int subtarget> {
// EXP classes
//===----------------------------------------------------------------------===//
class EXP_Helper<bit done, SDPatternOperator node = null_frag> : EXPCommon<
class EXP_Helper<bit done> : EXPCommon<
(outs),
(ins exp_tgt:$tgt,
ExpSrc0:$src0, ExpSrc1:$src1, ExpSrc2:$src2, ExpSrc3:$src3,
exp_vm:$vm, exp_compr:$compr, i8imm:$en),
"exp$tgt $src0, $src1, $src2, $src3"#!if(done, " done", "")#"$compr$vm",
[(node (i8 timm:$tgt), (i8 timm:$en),
f32:$src0, f32:$src1, f32:$src2, f32:$src3,
(i1 timm:$compr), (i1 timm:$vm))]> {
exp_vm:$vm, exp_compr:$compr, i32imm:$en),
"exp$tgt $src0, $src1, $src2, $src3"#!if(done, " done", "")#"$compr$vm", []> {
let AsmMatchConverter = "cvtExp";
}
// Split EXP instruction into EXP and EXP_DONE so we can set
// mayLoad for done=1.
multiclass EXP_m<bit done, SDPatternOperator node> {
multiclass EXP_m<bit done> {
let mayLoad = done, DisableWQM = 1 in {
let isPseudo = 1, isCodeGenOnly = 1 in {
def "" : EXP_Helper<done, node>,
def "" : EXP_Helper<done>,
SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.NONE>;
}

View File

@ -24,8 +24,41 @@ include "BUFInstructions.td"
// EXP Instructions
//===----------------------------------------------------------------------===//
defm EXP : EXP_m<0, AMDGPUexport>;
defm EXP_DONE : EXP_m<1, AMDGPUexport_done>;
defm EXP : EXP_m<0>;
defm EXP_DONE : EXP_m<1>;
// FIXME: GlobalISel successfully imports this pattern, but fails to
// select because the i1 done_val does a type check on done_val, which
// only works on register operands.
class ExpPattern<ValueType vt, Instruction Inst, int done_val> : GCNPat<
(int_amdgcn_exp timm:$tgt, timm:$en,
(vt ExpSrc0:$src0), (vt ExpSrc1:$src1),
(vt ExpSrc2:$src2), (vt ExpSrc3:$src3),
done_val, timm:$vm),
(Inst timm:$tgt, ExpSrc0:$src0, ExpSrc1:$src1,
ExpSrc2:$src2, ExpSrc3:$src3, timm:$vm, 0, timm:$en)
>;
class ExpComprPattern<ValueType vt, Instruction Inst, int done_val> : GCNPat<
(int_amdgcn_exp_compr timm:$tgt, timm:$en,
(vt ExpSrc0:$src0), (vt ExpSrc1:$src1),
done_val, timm:$vm),
(Inst timm:$tgt, ExpSrc0:$src0, ExpSrc1:$src1,
(IMPLICIT_DEF), (IMPLICIT_DEF), timm:$vm, 1, timm:$en)
>;
// FIXME: The generated DAG matcher seems to have strange behavior
// with a 1-bit literal to match, so use a -1 for checking a true
// 1-bit value.
def : ExpPattern<i32, EXP, 0>;
def : ExpPattern<i32, EXP_DONE, -1>;
def : ExpPattern<f32, EXP, 0>;
def : ExpPattern<f32, EXP_DONE, -1>;
def : ExpComprPattern<v2i16, EXP, 0>;
def : ExpComprPattern<v2i16, EXP_DONE, -1>;
def : ExpComprPattern<v2f16, EXP, 0>;
def : ExpComprPattern<v2f16, EXP_DONE, -1>;
//===----------------------------------------------------------------------===//
// VINTRP Instructions
@ -1782,15 +1815,6 @@ def : GCNPat <
SRCMODS.NONE, $src2, $clamp, $omod)
>;
// Allow integer inputs
class ExpPattern<SDPatternOperator node, ValueType vt, Instruction Inst> : GCNPat<
(node (i8 timm:$tgt), (i8 timm:$en), vt:$src0, vt:$src1, vt:$src2, vt:$src3, (i1 timm:$compr), (i1 timm:$vm)),
(Inst i8:$tgt, vt:$src0, vt:$src1, vt:$src2, vt:$src3, i1:$vm, i1:$compr, i8:$en)
>;
def : ExpPattern<AMDGPUexport, i32, EXP>;
def : ExpPattern<AMDGPUexport_done, i32, EXP_DONE>;
// COPY is workaround tablegen bug from multiple outputs
// from S_LSHL_B32's multiple outputs from implicit scc def.
def : GCNPat <