mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 12:41:49 +01:00
AMDGPU: Make f16 ConstantFP legal
Not having this legal led to combine failures, resulting in dumb things like bitcasts of constants not being folded away. The only reason I'm leaving the v_mov_b32 hack that f32 already uses is to avoid madak formation test regressions. PeepholeOptimizer has an ordering issue where the immediate fold attempt is into the sgpr->vgpr copy instead of the actual use. Running it twice avoids that problem. llvm-svn: 289096
This commit is contained in:
parent
e735965677
commit
2c46312910
@ -277,7 +277,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
|
||||
setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
|
||||
|
||||
// F16 - Constant Actions.
|
||||
setOperationAction(ISD::ConstantFP, MVT::f16, Custom);
|
||||
setOperationAction(ISD::ConstantFP, MVT::f16, Legal);
|
||||
|
||||
// F16 - Load/Store Actions.
|
||||
setOperationAction(ISD::LOAD, MVT::f16, Promote);
|
||||
@ -1848,9 +1848,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG);
|
||||
case ISD::ADDRSPACECAST: return lowerADDRSPACECAST(Op, DAG);
|
||||
case ISD::TRAP: return lowerTRAP(Op, DAG);
|
||||
|
||||
case ISD::ConstantFP:
|
||||
return lowerConstantFP(Op, DAG);
|
||||
case ISD::FP_ROUND:
|
||||
return lowerFP_ROUND(Op, DAG);
|
||||
}
|
||||
@ -2055,15 +2052,6 @@ SDValue SITargetLowering::getFPExtOrFPTrunc(SelectionDAG &DAG,
|
||||
DAG.getNode(ISD::FTRUNC, DL, VT, Op);
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::lowerConstantFP(SDValue Op, SelectionDAG &DAG) const {
|
||||
if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(Op)) {
|
||||
return DAG.getConstant(FP->getValueAPF().bitcastToAPInt().getZExtValue(),
|
||||
SDLoc(Op), MVT::i32);
|
||||
}
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
|
||||
assert(Op.getValueType() == MVT::f16 &&
|
||||
"Do not know how to custom lower FP_ROUND for non-f16 type");
|
||||
|
@ -53,9 +53,6 @@ class SITargetLowering final : public AMDGPUTargetLowering {
|
||||
const SDLoc &DL,
|
||||
EVT VT) const;
|
||||
|
||||
/// \brief Custom lowering for ISD::ConstantFP.
|
||||
SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
/// \brief Custom lowering for ISD::FP_ROUND for MVT::f16.
|
||||
SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
|
@ -706,11 +706,24 @@ def : Pat <
|
||||
(S_MOV_B32 imm:$imm)
|
||||
>;
|
||||
|
||||
// FIXME: Workaround for ordering issue with peephole optimizer where
|
||||
// a register class copy interferes with immediate folding. Should
|
||||
// use s_mov_b32, which can be shrunk to s_movk_i32
|
||||
def : Pat <
|
||||
(VGPRImm<(f16 fpimm)>:$imm),
|
||||
(V_MOV_B32_e32 (f16 (bitcast_fpimm_to_i32 $imm)))
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(f32 fpimm:$imm),
|
||||
(S_MOV_B32 (f32 (bitcast_fpimm_to_i32 $imm)))
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(f16 fpimm:$imm),
|
||||
(S_MOV_B32 (i32 (bitcast_fpimm_to_i32 $imm)))
|
||||
>;
|
||||
|
||||
def : Pat <
|
||||
(i32 frameindex:$fi),
|
||||
(V_MOV_B32_e32 (i32 (frameindex_to_targetframeindex $fi)))
|
||||
|
@ -47,8 +47,9 @@ two:
|
||||
|
||||
; SI: v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
|
||||
; SI: v_cmp_ngt_f32_e32 vcc, v[[B_F32]], v[[A_F32]]
|
||||
; VI: v_cmp_nle_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
|
||||
; SI: s_cbranch_vccz
|
||||
|
||||
; VI: v_cmp_nlt_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
|
||||
; VI: s_cbranch_vccnz
|
||||
|
||||
; VI: one{{$}}
|
||||
@ -85,7 +86,7 @@ two:
|
||||
|
||||
; SI: v_cvt_f32_f16_e32 v[[A_F32:[0-9]+]], v[[A_F16]]
|
||||
; SI: v_cmp_nlt_f32_e32 vcc, v[[A_F32]], v[[B_F32]]
|
||||
; VI: v_cmp_nge_f16_e32 vcc, v[[B_F16]], v[[A_F16]]
|
||||
; VI: v_cmp_ngt_f16_e32 vcc, v[[B_F16]], v[[A_F16]]
|
||||
; GCN: s_cbranch_vccnz
|
||||
|
||||
; GCN: one{{$}}
|
||||
|
Loading…
x
Reference in New Issue
Block a user