mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
[DAGCombine] Transform (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B)).
Differential Revision: http://reviews.llvm.org/D32596 llvm-svn: 302153
This commit is contained in:
parent
f720911efd
commit
1f7103004e
@ -9469,6 +9469,14 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
static bool isFMulNegTwo(SDValue &N) {
|
||||
if (N.getOpcode() != ISD::FMUL)
|
||||
return false;
|
||||
if (ConstantFPSDNode *CFP = isConstOrConstSplatFP(N.getOperand(1)))
|
||||
return CFP->isExactlyValue(-2.0);
|
||||
return false;
|
||||
}
|
||||
|
||||
SDValue DAGCombiner::visitFADD(SDNode *N) {
|
||||
SDValue N0 = N->getOperand(0);
|
||||
SDValue N1 = N->getOperand(1);
|
||||
@ -9507,6 +9515,16 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
|
||||
return DAG.getNode(ISD::FSUB, DL, VT, N1,
|
||||
GetNegatedExpression(N0, DAG, LegalOperations), Flags);
|
||||
|
||||
// fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B))
|
||||
// fold (fadd (fmul B, -2.0), A) -> (fsub A, (fadd B, B))
|
||||
if ((isFMulNegTwo(N0) && N0.hasOneUse()) ||
|
||||
(isFMulNegTwo(N1) && N1.hasOneUse())) {
|
||||
bool N1IsFMul = isFMulNegTwo(N1);
|
||||
SDValue AddOp = N1IsFMul ? N1.getOperand(0) : N0.getOperand(0);
|
||||
SDValue Add = DAG.getNode(ISD::FADD, DL, VT, AddOp, AddOp, Flags);
|
||||
return DAG.getNode(ISD::FSUB, DL, VT, N1IsFMul ? N0 : N1, Add, Flags);
|
||||
}
|
||||
|
||||
// FIXME: Auto-upgrade the target/function-level option.
|
||||
if (Options.NoSignedZerosFPMath || N->getFlags().hasNoSignedZeros()) {
|
||||
// fold (fadd A, 0) -> A
|
||||
|
78
test/CodeGen/AArch64/fadd-combines.ll
Normal file
78
test/CodeGen/AArch64/fadd-combines.ll
Normal file
@ -0,0 +1,78 @@
|
||||
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -verify-machineinstrs | FileCheck %s
|
||||
|
||||
; CHECK-LABEL: test1:
|
||||
; CHECK: fadd d1, d1, d1
|
||||
; CHECK: fsub d0, d0, d1
|
||||
define double @test1(double %a, double %b) local_unnamed_addr #0 {
|
||||
entry:
|
||||
%mul = fmul double %b, -2.000000e+00
|
||||
%add1 = fadd double %a, %mul
|
||||
ret double %add1
|
||||
}
|
||||
|
||||
; DAGCombine will canonicalize 'a - 2.0*b' to 'a + -2.0*b'
|
||||
; CHECK-LABEL: test2:
|
||||
; CHECK: fadd d1, d1, d1
|
||||
; CHECK: fsub d0, d0, d1
|
||||
define double @test2(double %a, double %b) local_unnamed_addr #0 {
|
||||
entry:
|
||||
%mul = fmul double %b, 2.000000e+00
|
||||
%add1 = fsub double %a, %mul
|
||||
ret double %add1
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test3:
|
||||
; CHECK: fmul d0, d0, d1
|
||||
; CHECK: fadd d1, d2, d2
|
||||
; CHECK: fsub d0, d0, d1
|
||||
define double @test3(double %a, double %b, double %c) local_unnamed_addr #0 {
|
||||
entry:
|
||||
%mul = fmul double %a, %b
|
||||
%mul1 = fmul double %c, 2.000000e+00
|
||||
%sub = fsub double %mul, %mul1
|
||||
ret double %sub
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test4:
|
||||
; CHECK: fmul d0, d0, d1
|
||||
; CHECK: fadd d1, d2, d2
|
||||
; CHECK: fsub d0, d0, d1
|
||||
define double @test4(double %a, double %b, double %c) local_unnamed_addr #0 {
|
||||
entry:
|
||||
%mul = fmul double %a, %b
|
||||
%mul1 = fmul double %c, -2.000000e+00
|
||||
%add2 = fadd double %mul, %mul1
|
||||
ret double %add2
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test5:
|
||||
; CHECK: fadd v1.4s, v1.4s, v1.4s
|
||||
; CHECK: fsub v0.4s, v0.4s, v1.4s
|
||||
define <4 x float> @test5(<4 x float> %a, <4 x float> %b) {
|
||||
%mul = fmul <4 x float> %b, <float -2.0, float -2.0, float -2.0, float -2.0>
|
||||
%add = fadd <4 x float> %a, %mul
|
||||
ret <4 x float> %add
|
||||
}
|
||||
|
||||
; CHECK-LABEL: test6:
|
||||
; CHECK: fadd v1.4s, v1.4s, v1.4s
|
||||
; CHECK: fsub v0.4s, v0.4s, v1.4s
|
||||
define <4 x float> @test6(<4 x float> %a, <4 x float> %b) {
|
||||
%mul = fmul <4 x float> %b, <float 2.0, float 2.0, float 2.0, float 2.0>
|
||||
%add = fsub <4 x float> %a, %mul
|
||||
ret <4 x float> %add
|
||||
}
|
||||
|
||||
; Don't fold (fadd A, (fmul B, -2.0)) -> (fsub A, (fadd B, B)) if the fmul has
|
||||
; multiple uses.
|
||||
; CHECK-LABEL: test7:
|
||||
; CHECK: fmul
|
||||
define double @test7(double %a, double %b) local_unnamed_addr #0 {
|
||||
entry:
|
||||
%mul = fmul double %b, -2.000000e+00
|
||||
%add1 = fadd double %a, %mul
|
||||
call void @use(double %mul)
|
||||
ret double %add1
|
||||
}
|
||||
|
||||
declare void @use(double)
|
@ -191,8 +191,8 @@ define amdgpu_kernel void @fadd_b_a_a_f32(float addrspace(1)* %out,
|
||||
|
||||
; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], [[R1]], -2.0, [[R2]]
|
||||
|
||||
; GCN-DENORM-SLOWFMA: v_mul_f32_e32 [[TMP:v[0-9]+]], -2.0, [[R1]]
|
||||
; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[RESULT:v[0-9]+]], [[R2]], [[TMP]]
|
||||
; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
|
||||
; GCN-DENORM-SLOWFMA: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
|
||||
|
||||
; SI-DENORM: buffer_store_dword [[RESULT]]
|
||||
; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
@ -251,8 +251,8 @@ define amdgpu_kernel void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out,
|
||||
|
||||
; GCN-DENORM-FASTFMA: v_fma_f32 [[RESULT:v[0-9]+]], -[[R1]], 2.0, [[R2]]
|
||||
|
||||
; GCN-DENORM-SLOWFMA: v_mul_f32_e32 [[TMP:v[0-9]+]], -2.0, [[R1]]
|
||||
; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
|
||||
; GCN-DENORM-SLOWFMA: v_add_f32_e32 [[TMP:v[0-9]+]], [[R1]], [[R1]]
|
||||
; GCN-DENORM-SLOWFMA: v_subrev_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[R2]]
|
||||
|
||||
; SI-DENORM: buffer_store_dword [[RESULT]]
|
||||
; VI-DENORM: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
|
||||
|
Loading…
Reference in New Issue
Block a user