mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 19:52:54 +01:00
R600/SI: Remove explicit m0 operand from s_sendmsg
Instead add m0 as an implicit operand. This allows us to avoid using the M0Reg register class and eliminates a number of unnecessary spills when using s_sendmsg instructions. This impacts one shader in the shader-db: SGPRS: 48 -> 40 (-16.67 %) VGPRS: 112 -> 108 (-3.57 %) Code Size: 40132 -> 38796 (-3.33 %) bytes LDS: 0 -> 0 (0.00 %) blocks Scratch: 2048 -> 0 (-100.00 %) bytes per wave llvm-svn: 237133
This commit is contained in:
parent
adcfdabda4
commit
5dde4f57b7
@ -2677,6 +2677,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
|
||||
NODE_NAME_CASE(CONST_DATA_PTR)
|
||||
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
|
||||
NODE_NAME_CASE(SENDMSG)
|
||||
NODE_NAME_CASE(STORE_MSKOR)
|
||||
NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
|
||||
case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break;
|
||||
|
@ -288,6 +288,7 @@ enum NodeType : unsigned {
|
||||
BUILD_VERTICAL_VECTOR,
|
||||
/// Pointer to the start of the shader's constant data.
|
||||
CONST_DATA_PTR,
|
||||
SENDMSG,
|
||||
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
|
||||
STORE_MSKOR,
|
||||
LOAD_CONSTANT,
|
||||
|
@ -219,6 +219,10 @@ def AMDGPUmad_i24 : SDNode<"AMDGPUISD::MAD_I24", AMDGPUDTIntTernaryOp,
|
||||
[]
|
||||
>;
|
||||
|
||||
def AMDGPUsendmsg : SDNode<"AMDGPUISD::SENDMSG",
|
||||
SDTypeProfile<0, 1, [SDTCisInt<0>]>,
|
||||
[SDNPHasChain, SDNPInGlue]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Flow Control Profile Types
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -838,6 +838,23 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
|
||||
return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi);
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL,
|
||||
SDValue V) const {
|
||||
// We can't use CopyToReg, because MachineCSE won't combine COPY instructions,
|
||||
// so we will end up with redundant moves to m0.
|
||||
//
|
||||
// We can't use S_MOV_B32, because there is no way to specify m0 as the
|
||||
// destination register.
|
||||
//
|
||||
// We have to use them both. Machine cse will combine all the S_MOV_B32
|
||||
// instructions and the register coalescer eliminate the extra copies.
|
||||
SDNode *M0 = DAG.getMachineNode(AMDGPU::S_MOV_B32, DL, V.getValueType(), V);
|
||||
return DAG.getCopyToReg(Chain, DL, DAG.getRegister(AMDGPU::M0, MVT::i32),
|
||||
SDValue(M0, 0), SDValue()); // Glue
|
||||
// A Null SDValue creates
|
||||
// a glue result.
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
@ -940,12 +957,18 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
SDLoc DL(Op);
|
||||
SDValue Chain = Op.getOperand(0);
|
||||
unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
||||
|
||||
switch (IntrinsicID) {
|
||||
case AMDGPUIntrinsic::SI_sendmsg: {
|
||||
Chain = copyToM0(DAG, Chain, DL, Op.getOperand(3));
|
||||
SDValue Glue = Chain.getValue(1);
|
||||
return DAG.getNode(AMDGPUISD::SENDMSG, DL, MVT::Other, Chain,
|
||||
Op.getOperand(2), Glue);
|
||||
}
|
||||
case AMDGPUIntrinsic::SI_tbuffer_store: {
|
||||
SDLoc DL(Op);
|
||||
SDValue Ops[] = {
|
||||
Chain,
|
||||
Op.getOperand(2),
|
||||
|
@ -117,6 +117,7 @@ public:
|
||||
std::pair<unsigned, const TargetRegisterClass *> getRegForInlineAsmConstraint(
|
||||
const TargetRegisterInfo *TRI,
|
||||
const std::string &Constraint, MVT VT) const override;
|
||||
SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL, SDValue V) const;
|
||||
};
|
||||
|
||||
} // End namespace llvm
|
||||
|
@ -488,13 +488,11 @@ def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">;
|
||||
def S_SLEEP : SOPP <0x0000000e, (ins i16imm:$simm16), "s_sleep $simm16">;
|
||||
def S_SETPRIO : SOPP <0x0000000f, (ins i16imm:$sim16), "s_setprio $sim16">;
|
||||
|
||||
let Uses = [EXEC] in {
|
||||
def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16, M0Reg:$m0), "s_sendmsg $simm16",
|
||||
[(int_SI_sendmsg imm:$simm16, M0Reg:$m0)]
|
||||
> {
|
||||
let DisableEncoding = "$m0";
|
||||
}
|
||||
} // End Uses = [EXEC]
|
||||
let Uses = [EXEC, M0] in {
|
||||
def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16",
|
||||
[(AMDGPUsendmsg (i32 imm:$simm16))]
|
||||
>;
|
||||
} // End Uses = [EXEC, M0]
|
||||
|
||||
def S_SENDMSGHALT : SOPP <0x00000011, (ins i16imm:$simm16), "s_sendmsghalt $simm16">;
|
||||
def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16">;
|
||||
|
Loading…
Reference in New Issue
Block a user