mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 19:52:54 +01:00
R600/SI: Remove explicit m0 operand from v_interp instructions
Instead add m0 as an implicit operand. This helps avoid spills of the m0 register in some cases. llvm-svn: 237140
This commit is contained in:
parent
29eae3af52
commit
1617b6ced4
@ -2678,6 +2678,9 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
NODE_NAME_CASE(CONST_DATA_PTR)
|
||||
case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
|
||||
NODE_NAME_CASE(SENDMSG)
|
||||
NODE_NAME_CASE(INTERP_MOV)
|
||||
NODE_NAME_CASE(INTERP_P1)
|
||||
NODE_NAME_CASE(INTERP_P2)
|
||||
NODE_NAME_CASE(STORE_MSKOR)
|
||||
NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
|
||||
case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break;
|
||||
|
@ -289,6 +289,9 @@ enum NodeType : unsigned {
|
||||
/// Pointer to the start of the shader's constant data.
|
||||
CONST_DATA_PTR,
|
||||
SENDMSG,
|
||||
INTERP_MOV,
|
||||
INTERP_P1,
|
||||
INTERP_P2,
|
||||
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
|
||||
STORE_MSKOR,
|
||||
LOAD_CONSTANT,
|
||||
|
@ -223,6 +223,18 @@ def AMDGPUsendmsg : SDNode<"AMDGPUISD::SENDMSG",
|
||||
SDTypeProfile<0, 1, [SDTCisInt<0>]>,
|
||||
[SDNPHasChain, SDNPInGlue]>;
|
||||
|
||||
def AMDGPUinterp_mov : SDNode<"AMDGPUISD::INTERP_MOV",
|
||||
SDTypeProfile<1, 3, [SDTCisFP<0>]>,
|
||||
[SDNPInGlue]>;
|
||||
|
||||
def AMDGPUinterp_p1 : SDNode<"AMDGPUISD::INTERP_P1",
|
||||
SDTypeProfile<1, 3, [SDTCisFP<0>]>,
|
||||
[SDNPInGlue, SDNPOutGlue]>;
|
||||
|
||||
def AMDGPUinterp_p2 : SDNode<"AMDGPUISD::INTERP_P2",
|
||||
SDTypeProfile<1, 4, [SDTCisFP<0>]>,
|
||||
[SDNPInGlue]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Flow Control Profile Types
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -948,7 +948,28 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
||||
case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
|
||||
return DAG.getNode(ISD::FSUB, DL, VT, Op.getOperand(1),
|
||||
DAG.getNode(ISD::FFLOOR, DL, VT, Op.getOperand(1)));
|
||||
|
||||
case AMDGPUIntrinsic::SI_fs_constant: {
|
||||
SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(3));
|
||||
SDValue Glue = M0.getValue(1);
|
||||
return DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32,
|
||||
DAG.getConstant(2, DL, MVT::i32), // P0
|
||||
Op.getOperand(1), Op.getOperand(2), Glue);
|
||||
}
|
||||
case AMDGPUIntrinsic::SI_fs_interp: {
|
||||
SDValue IJ = Op.getOperand(4);
|
||||
SDValue I = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, IJ,
|
||||
DAG.getConstant(0, DL, MVT::i32));
|
||||
SDValue J = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, IJ,
|
||||
DAG.getConstant(1, DL, MVT::i32));
|
||||
SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(3));
|
||||
SDValue Glue = M0.getValue(1);
|
||||
SDValue P1 = DAG.getNode(AMDGPUISD::INTERP_P1, DL,
|
||||
DAG.getVTList(MVT::f32, MVT::Glue),
|
||||
I, Op.getOperand(1), Op.getOperand(2), Glue);
|
||||
Glue = SDValue(P1.getNode(), 1);
|
||||
return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, P1, J,
|
||||
Op.getOperand(1), Op.getOperand(2), Glue);
|
||||
}
|
||||
default:
|
||||
return AMDGPUTargetLowering::LowerOperation(Op, DAG);
|
||||
}
|
||||
|
@ -1670,8 +1670,8 @@ class VINTRP_Real_vi <bits <2> op, string opName, dag outs, dag ins,
|
||||
SIMCInstr<opName, SISubtarget.VI>;
|
||||
|
||||
multiclass VINTRP_m <bits <2> op, string opName, dag outs, dag ins, string asm,
|
||||
string disableEncoding = "", string constraints = "",
|
||||
list<dag> pattern = []> {
|
||||
list<dag> pattern = [],
|
||||
string disableEncoding = "", string constraints = ""> {
|
||||
let DisableEncoding = disableEncoding,
|
||||
Constraints = constraints in {
|
||||
def "" : VINTRP_Pseudo <opName, outs, ins, pattern>;
|
||||
|
@ -1433,28 +1433,36 @@ defm V_RSQ_CLAMP_F64 : VOP1InstSI <vop1<0x32>, "v_rsq_clamp_f64",
|
||||
// VINTRP Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Uses = [M0] in {
|
||||
|
||||
// FIXME: Specify SchedRW for VINTRP insturctions.
|
||||
defm V_INTERP_P1_F32 : VINTRP_m <
|
||||
0x00000000, "v_interp_p1_f32",
|
||||
(outs VGPR_32:$dst),
|
||||
(ins VGPR_32:$i, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
|
||||
"v_interp_p1_f32 $dst, $i, $attr_chan, $attr, [$m0]",
|
||||
"$m0">;
|
||||
(ins VGPR_32:$i, i32imm:$attr_chan, i32imm:$attr),
|
||||
"v_interp_p1_f32 $dst, $i, $attr_chan, $attr, [m0]",
|
||||
[(set f32:$dst, (AMDGPUinterp_p1 i32:$i, (i32 imm:$attr_chan),
|
||||
(i32 imm:$attr)))]>;
|
||||
|
||||
defm V_INTERP_P2_F32 : VINTRP_m <
|
||||
0x00000001, "v_interp_p2_f32",
|
||||
(outs VGPR_32:$dst),
|
||||
(ins VGPR_32:$src0, VGPR_32:$j, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
|
||||
"v_interp_p2_f32 $dst, [$src0], $j, $attr_chan, $attr, [$m0]",
|
||||
"$src0,$m0",
|
||||
(ins VGPR_32:$src0, VGPR_32:$j, i32imm:$attr_chan, i32imm:$attr),
|
||||
"v_interp_p2_f32 $dst, [$src0], $j, $attr_chan, $attr, [m0]",
|
||||
[(set f32:$dst, (AMDGPUinterp_p2 f32:$src0, i32:$j, (i32 imm:$attr_chan),
|
||||
(i32 imm:$attr)))],
|
||||
"$src0",
|
||||
"$src0 = $dst">;
|
||||
|
||||
defm V_INTERP_MOV_F32 : VINTRP_m <
|
||||
0x00000002, "v_interp_mov_f32",
|
||||
(outs VGPR_32:$dst),
|
||||
(ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr, M0Reg:$m0),
|
||||
"v_interp_mov_f32 $dst, $src0, $attr_chan, $attr, [$m0]",
|
||||
"$m0">;
|
||||
(ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr),
|
||||
"v_interp_mov_f32 $dst, $src0, $attr_chan, $attr, [m0]",
|
||||
[(set f32:$dst, (AMDGPUinterp_mov (i32 imm:$src0), (i32 imm:$attr_chan),
|
||||
(i32 imm:$attr)))]>;
|
||||
|
||||
} // End Uses = [M0]
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VOP2 Instructions
|
||||
@ -2724,27 +2732,6 @@ def : Pat <
|
||||
(S_MOV_B64 (f64 (bitcast_fpimm_to_i64 InlineFPImm<f64>:$imm)))
|
||||
>;
|
||||
|
||||
/********** ===================== **********/
|
||||
/********** Interpolation Paterns **********/
|
||||
/********** ===================== **********/
|
||||
|
||||
// The value of $params is constant through out the entire kernel.
|
||||
// We need to use S_MOV_B32 $params, because CSE ignores copies, so
|
||||
// without it we end up with a lot of redundant moves.
|
||||
|
||||
def : Pat <
|
||||
(int_SI_fs_constant imm:$attr_chan, imm:$attr, i32:$params),
|
||||
(V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, (S_MOV_B32 $params))
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(int_SI_fs_interp imm:$attr_chan, imm:$attr, i32:$params, v2i32:$ij),
|
||||
(V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG v2i32:$ij, sub0),
|
||||
imm:$attr_chan, imm:$attr, (S_MOV_B32 $params)),
|
||||
(EXTRACT_SUBREG $ij, sub1),
|
||||
imm:$attr_chan, imm:$attr, (S_MOV_B32 $params))
|
||||
>;
|
||||
|
||||
/********** ================== **********/
|
||||
/********** Intrinsic Patterns **********/
|
||||
/********** ================== **********/
|
||||
|
Loading…
Reference in New Issue
Block a user