mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
AMDGPU: Fix not emitting nofpexcept on fdiv expansion
In this awkward case, we have to emit custom pseudo-constrained FP wrappers. InstrEmitter concludes that since a mayRaiseFPException instruction had a chain, it can't add nofpexcept. Test deferred until mayRaiseFPException is really set on everything.
This commit is contained in:
parent
b5e5fd1027
commit
6b892181f5
@ -7907,9 +7907,10 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
|
||||
}
|
||||
|
||||
static SDValue getFPBinOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL,
|
||||
EVT VT, SDValue A, SDValue B, SDValue GlueChain) {
|
||||
EVT VT, SDValue A, SDValue B, SDValue GlueChain,
|
||||
SDNodeFlags Flags) {
|
||||
if (GlueChain->getNumValues() <= 1) {
|
||||
return DAG.getNode(Opcode, SL, VT, A, B);
|
||||
return DAG.getNode(Opcode, SL, VT, A, B, Flags);
|
||||
}
|
||||
|
||||
assert(GlueChain->getNumValues() == 3);
|
||||
@ -7922,15 +7923,16 @@ static SDValue getFPBinOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL,
|
||||
break;
|
||||
}
|
||||
|
||||
return DAG.getNode(Opcode, SL, VTList, GlueChain.getValue(1), A, B,
|
||||
GlueChain.getValue(2));
|
||||
return DAG.getNode(Opcode, SL, VTList,
|
||||
{GlueChain.getValue(1), A, B, GlueChain.getValue(2)},
|
||||
Flags);
|
||||
}
|
||||
|
||||
static SDValue getFPTernOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL,
|
||||
EVT VT, SDValue A, SDValue B, SDValue C,
|
||||
SDValue GlueChain) {
|
||||
SDValue GlueChain, SDNodeFlags Flags) {
|
||||
if (GlueChain->getNumValues() <= 1) {
|
||||
return DAG.getNode(Opcode, SL, VT, A, B, C);
|
||||
return DAG.getNode(Opcode, SL, VT, {A, B, C}, Flags);
|
||||
}
|
||||
|
||||
assert(GlueChain->getNumValues() == 3);
|
||||
@ -7943,8 +7945,9 @@ static SDValue getFPTernOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL,
|
||||
break;
|
||||
}
|
||||
|
||||
return DAG.getNode(Opcode, SL, VTList, GlueChain.getValue(1), A, B, C,
|
||||
GlueChain.getValue(2));
|
||||
return DAG.getNode(Opcode, SL, VTList,
|
||||
{GlueChain.getValue(1), A, B, C, GlueChain.getValue(2)},
|
||||
Flags);
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::LowerFDIV16(SDValue Op, SelectionDAG &DAG) const {
|
||||
@ -8018,6 +8021,13 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
|
||||
if (SDValue FastLowered = lowerFastUnsafeFDIV(Op, DAG))
|
||||
return FastLowered;
|
||||
|
||||
// The selection matcher assumes anything with a chain selecting to a
|
||||
// mayRaiseFPException machine instruction. Since we're introducing a chain
|
||||
// here, we need to explicitly report nofpexcept for the regular fdiv
|
||||
// lowering.
|
||||
SDNodeFlags Flags = Op->getFlags();
|
||||
Flags.setNoFPExcept(true);
|
||||
|
||||
SDLoc SL(Op);
|
||||
SDValue LHS = Op.getOperand(0);
|
||||
SDValue RHS = Op.getOperand(1);
|
||||
@ -8027,15 +8037,15 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
|
||||
SDVTList ScaleVT = DAG.getVTList(MVT::f32, MVT::i1);
|
||||
|
||||
SDValue DenominatorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT,
|
||||
RHS, RHS, LHS);
|
||||
{RHS, RHS, LHS}, Flags);
|
||||
SDValue NumeratorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT,
|
||||
LHS, RHS, LHS);
|
||||
{LHS, RHS, LHS}, Flags);
|
||||
|
||||
// Denominator is scaled to not be denormal, so using rcp is ok.
|
||||
SDValue ApproxRcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32,
|
||||
DenominatorScaled);
|
||||
DenominatorScaled, Flags);
|
||||
SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f32,
|
||||
DenominatorScaled);
|
||||
DenominatorScaled, Flags);
|
||||
|
||||
const unsigned Denorm32Reg = AMDGPU::Hwreg::ID_MODE |
|
||||
(4 << AMDGPU::Hwreg::OFFSET_SHIFT_) |
|
||||
@ -8045,6 +8055,10 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
|
||||
const bool HasFP32Denormals = hasFP32Denormals(DAG.getMachineFunction());
|
||||
|
||||
if (!HasFP32Denormals) {
|
||||
// Note we can't use the STRICT_FMA/STRICT_FMUL for the non-strict FDIV
|
||||
// lowering. The chain dependence is insufficient, and we need glue. We do
|
||||
// not need the glue variants in a strictfp function.
|
||||
|
||||
SDVTList BindParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
|
||||
|
||||
SDNode *EnableDenorm;
|
||||
@ -8072,21 +8086,22 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
|
||||
}
|
||||
|
||||
SDValue Fma0 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0,
|
||||
ApproxRcp, One, NegDivScale0);
|
||||
ApproxRcp, One, NegDivScale0, Flags);
|
||||
|
||||
SDValue Fma1 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, Fma0, ApproxRcp,
|
||||
ApproxRcp, Fma0);
|
||||
ApproxRcp, Fma0, Flags);
|
||||
|
||||
SDValue Mul = getFPBinOp(DAG, ISD::FMUL, SL, MVT::f32, NumeratorScaled,
|
||||
Fma1, Fma1);
|
||||
Fma1, Fma1, Flags);
|
||||
|
||||
SDValue Fma2 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Mul,
|
||||
NumeratorScaled, Mul);
|
||||
NumeratorScaled, Mul, Flags);
|
||||
|
||||
SDValue Fma3 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, Fma2, Fma1, Mul, Fma2);
|
||||
SDValue Fma3 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32,
|
||||
Fma2, Fma1, Mul, Fma2, Flags);
|
||||
|
||||
SDValue Fma4 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Fma3,
|
||||
NumeratorScaled, Fma3);
|
||||
NumeratorScaled, Fma3, Flags);
|
||||
|
||||
if (!HasFP32Denormals) {
|
||||
SDNode *DisableDenorm;
|
||||
@ -8113,9 +8128,9 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
||||
SDValue Scale = NumeratorScaled.getValue(1);
|
||||
SDValue Fmas = DAG.getNode(AMDGPUISD::DIV_FMAS, SL, MVT::f32,
|
||||
Fma4, Fma1, Fma3, Scale);
|
||||
{Fma4, Fma1, Fma3, Scale}, Flags);
|
||||
|
||||
return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f32, Fmas, RHS, LHS);
|
||||
return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f32, Fmas, RHS, LHS, Flags);
|
||||
}
|
||||
|
||||
SDValue SITargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const {
|
||||
|
Loading…
Reference in New Issue
Block a user