mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
AMDGPU: Remove custom node for exports
I'm mildly worried about potentially reordering exp/exp_done with IntrWriteMem on the intrinsic. Requires hacking out the illegal type on SI, so manually select that case during lowering.
This commit is contained in:
parent
2f408e3893
commit
5127b285b9
@ -1156,7 +1156,7 @@ def int_amdgcn_exp : Intrinsic <[], [
|
||||
llvm_i1_ty, // done
|
||||
llvm_i1_ty // vm
|
||||
],
|
||||
[ImmArg<0>, ImmArg<1>, ImmArg<6>, ImmArg<7>, IntrInaccessibleMemOnly]
|
||||
[ImmArg<0>, ImmArg<1>, ImmArg<6>, ImmArg<7>, IntrWriteMem, IntrInaccessibleMemOnly]
|
||||
>;
|
||||
|
||||
// exp with compr bit set.
|
||||
@ -1167,7 +1167,7 @@ def int_amdgcn_exp_compr : Intrinsic <[], [
|
||||
LLVMMatchType<0>, // src1
|
||||
llvm_i1_ty, // done
|
||||
llvm_i1_ty], // vm
|
||||
[ImmArg<0>, ImmArg<1>, ImmArg<4>, ImmArg<5>, IntrInaccessibleMemOnly]
|
||||
[ImmArg<0>, ImmArg<1>, ImmArg<4>, ImmArg<5>, IntrWriteMem, IntrInaccessibleMemOnly]
|
||||
>;
|
||||
|
||||
def int_amdgcn_buffer_wbinvl1_sc :
|
||||
|
@ -4298,8 +4298,6 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
NODE_NAME_CASE(MAD_U64_U32)
|
||||
NODE_NAME_CASE(PERM)
|
||||
NODE_NAME_CASE(TEXTURE_FETCH)
|
||||
NODE_NAME_CASE(EXPORT)
|
||||
NODE_NAME_CASE(EXPORT_DONE)
|
||||
NODE_NAME_CASE(R600_EXPORT)
|
||||
NODE_NAME_CASE(CONST_ADDRESS)
|
||||
NODE_NAME_CASE(REGISTER_LOAD)
|
||||
|
@ -433,8 +433,6 @@ enum NodeType : unsigned {
|
||||
MUL_LOHI_U24,
|
||||
PERM,
|
||||
TEXTURE_FETCH,
|
||||
EXPORT, // exp on SI+
|
||||
EXPORT_DONE, // exp on SI+ with done bit set
|
||||
R600_EXPORT,
|
||||
CONST_ADDRESS,
|
||||
REGISTER_LOAD,
|
||||
|
@ -358,12 +358,6 @@ def AMDGPUExportOp : SDTypeProfile<0, 8, [
|
||||
|
||||
]>;
|
||||
|
||||
def AMDGPUexport: SDNode<"AMDGPUISD::EXPORT", AMDGPUExportOp,
|
||||
[SDNPHasChain, SDNPMayStore]>;
|
||||
|
||||
def AMDGPUexport_done: SDNode<"AMDGPUISD::EXPORT_DONE", AMDGPUExportOp,
|
||||
[SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
|
||||
|
||||
|
||||
def R600ExportOp : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>;
|
||||
|
||||
|
@ -6782,52 +6782,29 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
|
||||
switch (IntrinsicID) {
|
||||
case Intrinsic::amdgcn_exp: {
|
||||
const ConstantSDNode *Tgt = cast<ConstantSDNode>(Op.getOperand(2));
|
||||
const ConstantSDNode *En = cast<ConstantSDNode>(Op.getOperand(3));
|
||||
const ConstantSDNode *Done = cast<ConstantSDNode>(Op.getOperand(8));
|
||||
const ConstantSDNode *VM = cast<ConstantSDNode>(Op.getOperand(9));
|
||||
|
||||
const SDValue Ops[] = {
|
||||
Chain,
|
||||
DAG.getTargetConstant(Tgt->getZExtValue(), DL, MVT::i8), // tgt
|
||||
DAG.getTargetConstant(En->getZExtValue(), DL, MVT::i8), // en
|
||||
Op.getOperand(4), // src0
|
||||
Op.getOperand(5), // src1
|
||||
Op.getOperand(6), // src2
|
||||
Op.getOperand(7), // src3
|
||||
DAG.getTargetConstant(0, DL, MVT::i1), // compr
|
||||
DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i1)
|
||||
};
|
||||
|
||||
unsigned Opc = Done->isNullValue() ?
|
||||
AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE;
|
||||
return DAG.getNode(Opc, DL, Op->getVTList(), Ops);
|
||||
}
|
||||
case Intrinsic::amdgcn_exp_compr: {
|
||||
const ConstantSDNode *Tgt = cast<ConstantSDNode>(Op.getOperand(2));
|
||||
const ConstantSDNode *En = cast<ConstantSDNode>(Op.getOperand(3));
|
||||
SDValue Src0 = Op.getOperand(4);
|
||||
SDValue Src1 = Op.getOperand(5);
|
||||
const ConstantSDNode *Done = cast<ConstantSDNode>(Op.getOperand(6));
|
||||
const ConstantSDNode *VM = cast<ConstantSDNode>(Op.getOperand(7));
|
||||
// Hack around illegal type on SI by directly selecting it.
|
||||
if (isTypeLegal(Src0.getValueType()))
|
||||
return SDValue();
|
||||
|
||||
const ConstantSDNode *Done = cast<ConstantSDNode>(Op.getOperand(6));
|
||||
SDValue Undef = DAG.getUNDEF(MVT::f32);
|
||||
const SDValue Ops[] = {
|
||||
Chain,
|
||||
DAG.getTargetConstant(Tgt->getZExtValue(), DL, MVT::i8), // tgt
|
||||
DAG.getTargetConstant(En->getZExtValue(), DL, MVT::i8), // en
|
||||
DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src0),
|
||||
DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src1),
|
||||
Op.getOperand(2), // tgt
|
||||
DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src0), // src0
|
||||
DAG.getNode(ISD::BITCAST, DL, MVT::f32, Src1), // src1
|
||||
Undef, // src2
|
||||
Undef, // src3
|
||||
Op.getOperand(7), // vm
|
||||
DAG.getTargetConstant(1, DL, MVT::i1), // compr
|
||||
DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i1)
|
||||
Op.getOperand(3), // en
|
||||
Op.getOperand(0) // Chain
|
||||
};
|
||||
|
||||
unsigned Opc = Done->isNullValue() ?
|
||||
AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE;
|
||||
return DAG.getNode(Opc, DL, Op->getVTList(), Ops);
|
||||
unsigned Opc = Done->isNullValue() ? AMDGPU::EXP : AMDGPU::EXP_DONE;
|
||||
return SDValue(DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops), 0);
|
||||
}
|
||||
case Intrinsic::amdgcn_s_barrier: {
|
||||
if (getTargetMachine().getOptLevel() > CodeGenOpt::None) {
|
||||
|
@ -1101,7 +1101,7 @@ def abid : NamedOperandU32<"ABID", NamedMatchClass<"ABID">>;
|
||||
|
||||
def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg", 0>>;
|
||||
|
||||
def exp_tgt : NamedOperandU8<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {
|
||||
def exp_tgt : NamedOperandU32<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {
|
||||
|
||||
}
|
||||
|
||||
@ -1380,24 +1380,21 @@ class SIMCInstr <string pseudo, int subtarget> {
|
||||
// EXP classes
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class EXP_Helper<bit done, SDPatternOperator node = null_frag> : EXPCommon<
|
||||
class EXP_Helper<bit done> : EXPCommon<
|
||||
(outs),
|
||||
(ins exp_tgt:$tgt,
|
||||
ExpSrc0:$src0, ExpSrc1:$src1, ExpSrc2:$src2, ExpSrc3:$src3,
|
||||
exp_vm:$vm, exp_compr:$compr, i8imm:$en),
|
||||
"exp$tgt $src0, $src1, $src2, $src3"#!if(done, " done", "")#"$compr$vm",
|
||||
[(node (i8 timm:$tgt), (i8 timm:$en),
|
||||
f32:$src0, f32:$src1, f32:$src2, f32:$src3,
|
||||
(i1 timm:$compr), (i1 timm:$vm))]> {
|
||||
exp_vm:$vm, exp_compr:$compr, i32imm:$en),
|
||||
"exp$tgt $src0, $src1, $src2, $src3"#!if(done, " done", "")#"$compr$vm", []> {
|
||||
let AsmMatchConverter = "cvtExp";
|
||||
}
|
||||
|
||||
// Split EXP instruction into EXP and EXP_DONE so we can set
|
||||
// mayLoad for done=1.
|
||||
multiclass EXP_m<bit done, SDPatternOperator node> {
|
||||
multiclass EXP_m<bit done> {
|
||||
let mayLoad = done, DisableWQM = 1 in {
|
||||
let isPseudo = 1, isCodeGenOnly = 1 in {
|
||||
def "" : EXP_Helper<done, node>,
|
||||
def "" : EXP_Helper<done>,
|
||||
SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.NONE>;
|
||||
}
|
||||
|
||||
|
@ -24,8 +24,41 @@ include "BUFInstructions.td"
|
||||
// EXP Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
defm EXP : EXP_m<0, AMDGPUexport>;
|
||||
defm EXP_DONE : EXP_m<1, AMDGPUexport_done>;
|
||||
defm EXP : EXP_m<0>;
|
||||
defm EXP_DONE : EXP_m<1>;
|
||||
|
||||
// FIXME: GlobalISel successfully imports this pattern, but fails to
|
||||
// select because the i1 done_val does a type check on done_val, which
|
||||
// only works on register operands.
|
||||
class ExpPattern<ValueType vt, Instruction Inst, int done_val> : GCNPat<
|
||||
(int_amdgcn_exp timm:$tgt, timm:$en,
|
||||
(vt ExpSrc0:$src0), (vt ExpSrc1:$src1),
|
||||
(vt ExpSrc2:$src2), (vt ExpSrc3:$src3),
|
||||
done_val, timm:$vm),
|
||||
(Inst timm:$tgt, ExpSrc0:$src0, ExpSrc1:$src1,
|
||||
ExpSrc2:$src2, ExpSrc3:$src3, timm:$vm, 0, timm:$en)
|
||||
>;
|
||||
|
||||
class ExpComprPattern<ValueType vt, Instruction Inst, int done_val> : GCNPat<
|
||||
(int_amdgcn_exp_compr timm:$tgt, timm:$en,
|
||||
(vt ExpSrc0:$src0), (vt ExpSrc1:$src1),
|
||||
done_val, timm:$vm),
|
||||
(Inst timm:$tgt, ExpSrc0:$src0, ExpSrc1:$src1,
|
||||
(IMPLICIT_DEF), (IMPLICIT_DEF), timm:$vm, 1, timm:$en)
|
||||
>;
|
||||
|
||||
// FIXME: The generated DAG matcher seems to have strange behavior
|
||||
// with a 1-bit literal to match, so use a -1 for checking a true
|
||||
// 1-bit value.
|
||||
def : ExpPattern<i32, EXP, 0>;
|
||||
def : ExpPattern<i32, EXP_DONE, -1>;
|
||||
def : ExpPattern<f32, EXP, 0>;
|
||||
def : ExpPattern<f32, EXP_DONE, -1>;
|
||||
|
||||
def : ExpComprPattern<v2i16, EXP, 0>;
|
||||
def : ExpComprPattern<v2i16, EXP_DONE, -1>;
|
||||
def : ExpComprPattern<v2f16, EXP, 0>;
|
||||
def : ExpComprPattern<v2f16, EXP_DONE, -1>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VINTRP Instructions
|
||||
@ -1782,15 +1815,6 @@ def : GCNPat <
|
||||
SRCMODS.NONE, $src2, $clamp, $omod)
|
||||
>;
|
||||
|
||||
// Allow integer inputs
|
||||
class ExpPattern<SDPatternOperator node, ValueType vt, Instruction Inst> : GCNPat<
|
||||
(node (i8 timm:$tgt), (i8 timm:$en), vt:$src0, vt:$src1, vt:$src2, vt:$src3, (i1 timm:$compr), (i1 timm:$vm)),
|
||||
(Inst i8:$tgt, vt:$src0, vt:$src1, vt:$src2, vt:$src3, i1:$vm, i1:$compr, i8:$en)
|
||||
>;
|
||||
|
||||
def : ExpPattern<AMDGPUexport, i32, EXP>;
|
||||
def : ExpPattern<AMDGPUexport_done, i32, EXP_DONE>;
|
||||
|
||||
// COPY is workaround tablegen bug from multiple outputs
|
||||
// from S_LSHL_B32's multiple outputs from implicit scc def.
|
||||
def : GCNPat <
|
||||
|
Loading…
Reference in New Issue
Block a user