mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
AMDGPU: Use CopyToReg for interp intrinsic lowering
This doesn't use the default value, so doesn't benefit from the hack to help optimize it. llvm-svn: 375450
This commit is contained in:
parent
17322b4070
commit
cc63a99c6d
@ -5877,34 +5877,35 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||||||
case Intrinsic::amdgcn_fdiv_fast:
|
case Intrinsic::amdgcn_fdiv_fast:
|
||||||
return lowerFDIV_FAST(Op, DAG);
|
return lowerFDIV_FAST(Op, DAG);
|
||||||
case Intrinsic::amdgcn_interp_mov: {
|
case Intrinsic::amdgcn_interp_mov: {
|
||||||
SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(4));
|
SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
|
||||||
SDValue Glue = M0.getValue(1);
|
Op.getOperand(4), SDValue());
|
||||||
return DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32, Op.getOperand(1),
|
return DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32, Op.getOperand(1),
|
||||||
Op.getOperand(2), Op.getOperand(3), Glue);
|
Op.getOperand(2), Op.getOperand(3), ToM0.getValue(1));
|
||||||
}
|
}
|
||||||
case Intrinsic::amdgcn_interp_p1: {
|
case Intrinsic::amdgcn_interp_p1: {
|
||||||
SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(4));
|
SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
|
||||||
SDValue Glue = M0.getValue(1);
|
Op.getOperand(4), SDValue());
|
||||||
return DAG.getNode(AMDGPUISD::INTERP_P1, DL, MVT::f32, Op.getOperand(1),
|
return DAG.getNode(AMDGPUISD::INTERP_P1, DL, MVT::f32, Op.getOperand(1),
|
||||||
Op.getOperand(2), Op.getOperand(3), Glue);
|
Op.getOperand(2), Op.getOperand(3), ToM0.getValue(1));
|
||||||
}
|
}
|
||||||
case Intrinsic::amdgcn_interp_p2: {
|
case Intrinsic::amdgcn_interp_p2: {
|
||||||
SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(5));
|
SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
|
||||||
SDValue Glue = SDValue(M0.getNode(), 1);
|
Op.getOperand(5), SDValue());
|
||||||
return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, Op.getOperand(1),
|
return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, Op.getOperand(1),
|
||||||
Op.getOperand(2), Op.getOperand(3), Op.getOperand(4),
|
Op.getOperand(2), Op.getOperand(3), Op.getOperand(4),
|
||||||
Glue);
|
ToM0.getValue(1));
|
||||||
}
|
}
|
||||||
case Intrinsic::amdgcn_interp_p1_f16: {
|
case Intrinsic::amdgcn_interp_p1_f16: {
|
||||||
SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(5));
|
SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
|
||||||
SDValue Glue = M0.getValue(1);
|
Op.getOperand(5), SDValue());
|
||||||
|
|
||||||
if (getSubtarget()->getLDSBankCount() == 16) {
|
if (getSubtarget()->getLDSBankCount() == 16) {
|
||||||
// 16 bank LDS
|
// 16 bank LDS
|
||||||
SDValue S = DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32,
|
SDValue S = DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32,
|
||||||
DAG.getConstant(2, DL, MVT::i32), // P0
|
DAG.getConstant(2, DL, MVT::i32), // P0
|
||||||
Op.getOperand(2), // Attrchan
|
Op.getOperand(2), // Attrchan
|
||||||
Op.getOperand(3), // Attr
|
Op.getOperand(3), // Attr
|
||||||
Glue);
|
ToM0.getValue(1));
|
||||||
SDValue Ops[] = {
|
SDValue Ops[] = {
|
||||||
Op.getOperand(1), // Src0
|
Op.getOperand(1), // Src0
|
||||||
Op.getOperand(2), // Attrchan
|
Op.getOperand(2), // Attrchan
|
||||||
@ -5927,14 +5928,14 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||||||
Op.getOperand(4), // high
|
Op.getOperand(4), // high
|
||||||
DAG.getTargetConstant(0, DL, MVT::i1), // $clamp
|
DAG.getTargetConstant(0, DL, MVT::i1), // $clamp
|
||||||
DAG.getTargetConstant(0, DL, MVT::i32), // $omod
|
DAG.getTargetConstant(0, DL, MVT::i32), // $omod
|
||||||
Glue
|
ToM0.getValue(1)
|
||||||
};
|
};
|
||||||
return DAG.getNode(AMDGPUISD::INTERP_P1LL_F16, DL, MVT::f32, Ops);
|
return DAG.getNode(AMDGPUISD::INTERP_P1LL_F16, DL, MVT::f32, Ops);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
case Intrinsic::amdgcn_interp_p2_f16: {
|
case Intrinsic::amdgcn_interp_p2_f16: {
|
||||||
SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(6));
|
SDValue ToM0 = DAG.getCopyToReg(DAG.getEntryNode(), DL, AMDGPU::M0,
|
||||||
SDValue Glue = SDValue(M0.getNode(), 1);
|
Op.getOperand(6), SDValue());
|
||||||
SDValue Ops[] = {
|
SDValue Ops[] = {
|
||||||
Op.getOperand(2), // Src0
|
Op.getOperand(2), // Src0
|
||||||
Op.getOperand(3), // Attrchan
|
Op.getOperand(3), // Attrchan
|
||||||
@ -5944,7 +5945,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
|
|||||||
DAG.getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
|
DAG.getTargetConstant(0, DL, MVT::i32), // $src2_modifiers
|
||||||
Op.getOperand(5), // high
|
Op.getOperand(5), // high
|
||||||
DAG.getTargetConstant(0, DL, MVT::i1), // $clamp
|
DAG.getTargetConstant(0, DL, MVT::i1), // $clamp
|
||||||
Glue
|
ToM0.getValue(1)
|
||||||
};
|
};
|
||||||
return DAG.getNode(AMDGPUISD::INTERP_P2_F16, DL, MVT::f16, Ops);
|
return DAG.getNode(AMDGPUISD::INTERP_P2_F16, DL, MVT::f16, Ops);
|
||||||
}
|
}
|
||||||
|
@ -6,8 +6,8 @@
|
|||||||
define amdgpu_ps half @interp_f16(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
|
define amdgpu_ps half @interp_f16(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
|
||||||
; GFX9-32BANK-LABEL: interp_f16:
|
; GFX9-32BANK-LABEL: interp_f16:
|
||||||
; GFX9-32BANK: ; %bb.0: ; %main_body
|
; GFX9-32BANK: ; %bb.0: ; %main_body
|
||||||
; GFX9-32BANK-NEXT: v_mov_b32_e32 v0, s0
|
|
||||||
; GFX9-32BANK-NEXT: s_mov_b32 m0, s2
|
; GFX9-32BANK-NEXT: s_mov_b32 m0, s2
|
||||||
|
; GFX9-32BANK-NEXT: v_mov_b32_e32 v0, s0
|
||||||
; GFX9-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
|
; GFX9-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
|
||||||
; GFX9-32BANK-NEXT: v_interp_p1ll_f16 v1, v0, attr2.y
|
; GFX9-32BANK-NEXT: v_interp_p1ll_f16 v1, v0, attr2.y
|
||||||
; GFX9-32BANK-NEXT: v_mov_b32_e32 v2, s1
|
; GFX9-32BANK-NEXT: v_mov_b32_e32 v2, s1
|
||||||
@ -20,8 +20,8 @@ define amdgpu_ps half @interp_f16(float inreg %i, float inreg %j, i32 inreg %m0)
|
|||||||
;
|
;
|
||||||
; GFX8-32BANK-LABEL: interp_f16:
|
; GFX8-32BANK-LABEL: interp_f16:
|
||||||
; GFX8-32BANK: ; %bb.0: ; %main_body
|
; GFX8-32BANK: ; %bb.0: ; %main_body
|
||||||
; GFX8-32BANK-NEXT: v_mov_b32_e32 v0, s0
|
|
||||||
; GFX8-32BANK-NEXT: s_mov_b32 m0, s2
|
; GFX8-32BANK-NEXT: s_mov_b32 m0, s2
|
||||||
|
; GFX8-32BANK-NEXT: v_mov_b32_e32 v0, s0
|
||||||
; GFX8-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
|
; GFX8-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
|
||||||
; GFX8-32BANK-NEXT: v_interp_p1ll_f16 v1, v0, attr2.y
|
; GFX8-32BANK-NEXT: v_interp_p1ll_f16 v1, v0, attr2.y
|
||||||
; GFX8-32BANK-NEXT: v_mov_b32_e32 v2, s1
|
; GFX8-32BANK-NEXT: v_mov_b32_e32 v2, s1
|
||||||
@ -119,8 +119,8 @@ main_body:
|
|||||||
define amdgpu_ps half @interp_p2_m0_setup(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
|
define amdgpu_ps half @interp_p2_m0_setup(float inreg %i, float inreg %j, i32 inreg %m0) #0 {
|
||||||
; GFX9-32BANK-LABEL: interp_p2_m0_setup:
|
; GFX9-32BANK-LABEL: interp_p2_m0_setup:
|
||||||
; GFX9-32BANK: ; %bb.0: ; %main_body
|
; GFX9-32BANK: ; %bb.0: ; %main_body
|
||||||
; GFX9-32BANK-NEXT: v_mov_b32_e32 v0, s0
|
|
||||||
; GFX9-32BANK-NEXT: s_mov_b32 m0, s2
|
; GFX9-32BANK-NEXT: s_mov_b32 m0, s2
|
||||||
|
; GFX9-32BANK-NEXT: v_mov_b32_e32 v0, s0
|
||||||
; GFX9-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
|
; GFX9-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
|
||||||
; GFX9-32BANK-NEXT: v_interp_p1ll_f16 v0, v0, attr2.y
|
; GFX9-32BANK-NEXT: v_interp_p1ll_f16 v0, v0, attr2.y
|
||||||
; GFX9-32BANK-NEXT: ;;#ASMSTART
|
; GFX9-32BANK-NEXT: ;;#ASMSTART
|
||||||
@ -136,8 +136,8 @@ define amdgpu_ps half @interp_p2_m0_setup(float inreg %i, float inreg %j, i32 in
|
|||||||
;
|
;
|
||||||
; GFX8-32BANK-LABEL: interp_p2_m0_setup:
|
; GFX8-32BANK-LABEL: interp_p2_m0_setup:
|
||||||
; GFX8-32BANK: ; %bb.0: ; %main_body
|
; GFX8-32BANK: ; %bb.0: ; %main_body
|
||||||
; GFX8-32BANK-NEXT: v_mov_b32_e32 v0, s0
|
|
||||||
; GFX8-32BANK-NEXT: s_mov_b32 m0, s2
|
; GFX8-32BANK-NEXT: s_mov_b32 m0, s2
|
||||||
|
; GFX8-32BANK-NEXT: v_mov_b32_e32 v0, s0
|
||||||
; GFX8-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
|
; GFX8-32BANK-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
|
||||||
; GFX8-32BANK-NEXT: v_interp_p1ll_f16 v0, v0, attr2.y
|
; GFX8-32BANK-NEXT: v_interp_p1ll_f16 v0, v0, attr2.y
|
||||||
; GFX8-32BANK-NEXT: ;;#ASMSTART
|
; GFX8-32BANK-NEXT: ;;#ASMSTART
|
||||||
|
Loading…
Reference in New Issue
Block a user