1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-18 18:42:46 +02:00

[DAGCombiner] cancel fnegs from multiplied operands of FMA

(-X) * (-Y) + Z --> X * Y + Z

This is a missing optimization that shows up as a potential regression in D66050,
so we should solve it first. We appear to be partly missing this fold in IR as well.

We do handle the simpler case already:
(-X) * (-Y) --> X * Y

And it might be beneficial to make the constraint less conservative (eg, if both
operands are cheap, but not necessarily cheaper), but that causes infinite looping
for the existing fmul transform.

Differential Revision: https://reviews.llvm.org/D66755

llvm-svn: 370071
This commit is contained in:
Sanjay Patel 2019-08-27 15:17:46 +00:00
parent 24233f2415
commit 7bb0bc7bff
3 changed files with 33 additions and 20 deletions

View File

@ -516,6 +516,7 @@ namespace {
bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
SDValue &CC) const;
bool isOneUseSetCC(SDValue N) const;
bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y);
SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp);
@ -12110,6 +12111,22 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
return SDValue();
}
/// Return true if both inputs are at least as cheap in negated form and at
/// least one input is strictly cheaper in negated form.
bool DAGCombiner::isCheaperToUseNegatedFPOps(SDValue X, SDValue Y) {
const TargetOptions &Options = DAG.getTarget().Options;
if (char LHSNeg = isNegatibleForFree(X, LegalOperations, TLI, &Options,
ForCodeSize))
if (char RHSNeg = isNegatibleForFree(Y, LegalOperations, TLI, &Options,
ForCodeSize))
// Both negated operands are at least as cheap as their counterparts.
// Check to see if at least one is cheaper negated.
if (LHSNeg == 2 || RHSNeg == 2)
return true;
return false;
}
SDValue DAGCombiner::visitFMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@ -12180,21 +12197,11 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
return DAG.getNode(ISD::FNEG, DL, VT, N0);
// fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,
ForCodeSize)) {
if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,
ForCodeSize)) {
// Both can be negated for free, check to see if at least one is cheaper
// negated.
if (LHSNeg == 2 || RHSNeg == 2)
return DAG.getNode(ISD::FMUL, DL, VT,
GetNegatedExpression(N0, DAG, LegalOperations,
ForCodeSize),
GetNegatedExpression(N1, DAG, LegalOperations,
ForCodeSize),
Flags);
}
// -N0 * -N1 --> N0 * N1
if (isCheaperToUseNegatedFPOps(N0, N1)) {
SDValue NegN0 = GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
SDValue NegN1 = GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags);
}
// fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
@ -12273,6 +12280,13 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
}
// (-N0 * -N1) + N2 --> (N0 * N1) + N2
if (isCheaperToUseNegatedFPOps(N0, N1)) {
SDValue NegN0 = GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
SDValue NegN1 = GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags);
}
if (UnsafeFPMath) {
if (N0CFP && N0CFP->isZero())
return N2;

View File

@ -1205,7 +1205,7 @@ define amdgpu_kernel void @v_fneg_fma_x_fneg_y_f32(float addrspace(1)* %out, flo
; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], -[[B]], [[C]]
; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]]
; GCN-SAFE: v_xor_b32_e32 v{{[[0-9]+}}, 0x80000000, [[FMA]]
; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], -[[C]]

View File

@ -20,8 +20,7 @@ declare float @llvm.fmuladd.f32(float, float, float) #4
define float @fma_fneg_fneg(float %x, float %y, float %z) {
; CHECK-LABEL: fma_fneg_fneg:
; CHECK: # %bb.0:
; CHECK-NEXT: fneg f0, f2
; CHECK-NEXT: fnmsubs f1, f1, f0, f3
; CHECK-NEXT: fmadds f1, f1, f2, f3
; CHECK-NEXT: blr
%negx = fneg float %x
%negy = fneg float %y
@ -32,8 +31,8 @@ define float @fma_fneg_fneg(float %x, float %y, float %z) {
define float @fma_fneg_fsub(float %x, float %y0, float %y1, float %z) {
; CHECK-LABEL: fma_fneg_fsub:
; CHECK: # %bb.0:
; CHECK-NEXT: fsubs f0, f2, f3
; CHECK-NEXT: fnmsubs f1, f1, f0, f4
; CHECK-NEXT: fsubs f0, f3, f2
; CHECK-NEXT: fmadds f1, f1, f0, f4
; CHECK-NEXT: blr
%negx = fneg float %x
%negy = fsub nsz float %y0, %y1