mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
[AMDGPU] Propagate fast-math flags when lowering FSIN and FCOS
Differential Revision: https://reviews.llvm.org/D80813
This commit is contained in:
parent
52203cdb2c
commit
8cf2c72f7d
@ -8288,22 +8288,24 @@ SDValue SITargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
SDValue Arg = Op.getOperand(0);
|
SDValue Arg = Op.getOperand(0);
|
||||||
SDValue TrigVal;
|
SDValue TrigVal;
|
||||||
|
|
||||||
// TODO: Should this propagate fast-math-flags?
|
// Propagate fast-math flags so that the multiply we introduce can be folded
|
||||||
|
// if Arg is already the result of a multiply by constant.
|
||||||
|
auto Flags = Op->getFlags();
|
||||||
|
|
||||||
SDValue OneOver2Pi = DAG.getConstantFP(0.5 * numbers::inv_pi, DL, VT);
|
SDValue OneOver2Pi = DAG.getConstantFP(0.5 * numbers::inv_pi, DL, VT);
|
||||||
|
|
||||||
if (Subtarget->hasTrigReducedRange()) {
|
if (Subtarget->hasTrigReducedRange()) {
|
||||||
SDValue MulVal = DAG.getNode(ISD::FMUL, DL, VT, Arg, OneOver2Pi);
|
SDValue MulVal = DAG.getNode(ISD::FMUL, DL, VT, Arg, OneOver2Pi, Flags);
|
||||||
TrigVal = DAG.getNode(AMDGPUISD::FRACT, DL, VT, MulVal);
|
TrigVal = DAG.getNode(AMDGPUISD::FRACT, DL, VT, MulVal, Flags);
|
||||||
} else {
|
} else {
|
||||||
TrigVal = DAG.getNode(ISD::FMUL, DL, VT, Arg, OneOver2Pi);
|
TrigVal = DAG.getNode(ISD::FMUL, DL, VT, Arg, OneOver2Pi, Flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (Op.getOpcode()) {
|
switch (Op.getOpcode()) {
|
||||||
case ISD::FCOS:
|
case ISD::FCOS:
|
||||||
return DAG.getNode(AMDGPUISD::COS_HW, SDLoc(Op), VT, TrigVal);
|
return DAG.getNode(AMDGPUISD::COS_HW, SDLoc(Op), VT, TrigVal, Flags);
|
||||||
case ISD::FSIN:
|
case ISD::FSIN:
|
||||||
return DAG.getNode(AMDGPUISD::SIN_HW, SDLoc(Op), VT, TrigVal);
|
return DAG.getNode(AMDGPUISD::SIN_HW, SDLoc(Op), VT, TrigVal, Flags);
|
||||||
default:
|
default:
|
||||||
llvm_unreachable("Wrong trig opcode");
|
llvm_unreachable("Wrong trig opcode");
|
||||||
}
|
}
|
||||||
|
@ -52,7 +52,8 @@ define amdgpu_kernel void @unsafe_sin_3x_f32(float addrspace(1)* %out, float %x)
|
|||||||
}
|
}
|
||||||
|
|
||||||
; FUNC-LABEL: {{^}}fmf_sin_3x_f32:
|
; FUNC-LABEL: {{^}}fmf_sin_3x_f32:
|
||||||
; GCN: v_mul_f32
|
; GCN-NOT: v_add_f32
|
||||||
|
; GCN: 0x3ef47644
|
||||||
; GCN: v_mul_f32
|
; GCN: v_mul_f32
|
||||||
; SICIVI: v_fract_f32
|
; SICIVI: v_fract_f32
|
||||||
; GFX9-NOT: v_fract_f32
|
; GFX9-NOT: v_fract_f32
|
||||||
@ -95,7 +96,8 @@ define amdgpu_kernel void @unsafe_sin_2x_f32(float addrspace(1)* %out, float %x)
|
|||||||
}
|
}
|
||||||
|
|
||||||
; FUNC-LABEL: {{^}}fmf_sin_2x_f32:
|
; FUNC-LABEL: {{^}}fmf_sin_2x_f32:
|
||||||
; GCN: v_add_f32
|
; GCN-NOT: v_add_f32
|
||||||
|
; GCN: 0x3ea2f983
|
||||||
; GCN: v_mul_f32
|
; GCN: v_mul_f32
|
||||||
; SICIVI: v_fract_f32
|
; SICIVI: v_fract_f32
|
||||||
; GFX9-NOT: v_fract_f32
|
; GFX9-NOT: v_fract_f32
|
||||||
@ -137,8 +139,8 @@ define amdgpu_kernel void @unsafe_sin_cancel_f32(float addrspace(1)* %out, float
|
|||||||
}
|
}
|
||||||
|
|
||||||
; FUNC-LABEL: {{^}}fmf_sin_cancel_f32:
|
; FUNC-LABEL: {{^}}fmf_sin_cancel_f32:
|
||||||
; GCN: v_mul_f32
|
; GCN-NOT: v_add_f32
|
||||||
; GCN: v_mul_f32
|
; GCN-NOT: v_mul_f32
|
||||||
; SICIVI: v_fract_f32
|
; SICIVI: v_fract_f32
|
||||||
; GFX9-NOT: v_fract_f32
|
; GFX9-NOT: v_fract_f32
|
||||||
; GCN: v_sin_f32
|
; GCN: v_sin_f32
|
||||||
|
Loading…
Reference in New Issue
Block a user