mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
DAG: Fix wrong legality check for ISD::FMAD
Since 1725f2884175ca618d29b06e35f5c6ebd618053d, this should check isFMADLegalForFAddFSub rather than the the plain isOperationLegal. This would assert in a subset of cases due to an oddity in how FMAD is selected. We will allow FMA formation pre-legalize, but not FMAD even in cases where it would be valid. The current hook requires passing in the root fadd/fsub. However, in this distributed case, this would be far more complicated to pass in the relevant operand. AMDGPU doesn't get any value from the node, and only needs the type and is the only implementor, so I'm not sure why we have this complexity. Just rename and expand the assert to avoid the more complicated checks spread through the distribution logic.
This commit is contained in:
parent
e8ce9b7ca7
commit
7a1d9a278d
@ -2642,11 +2642,13 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Returns true if the FADD or FSUB node passed could legally be combined with
|
||||
/// an fmul to form an ISD::FMAD.
|
||||
virtual bool isFMADLegalForFAddFSub(const SelectionDAG &DAG,
|
||||
const SDNode *N) const {
|
||||
assert(N->getOpcode() == ISD::FADD || N->getOpcode() == ISD::FSUB);
|
||||
/// Returns true if be combined with to form an ISD::FMAD. \p N may be an
|
||||
/// ISD::FADD, ISD::FSUB, or an ISD::FMUL which will be distributed into an
|
||||
/// fadd/fsub.
|
||||
virtual bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const {
|
||||
assert((N->getOpcode() == ISD::FADD || N->getOpcode() == ISD::FSUB ||
|
||||
N->getOpcode() == ISD::FMUL) &&
|
||||
"unexpected node in FMAD forming combine");
|
||||
return isOperationLegal(ISD::FMAD, N->getValueType(0));
|
||||
}
|
||||
|
||||
|
@ -11728,7 +11728,7 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
|
||||
const TargetOptions &Options = DAG.getTarget().Options;
|
||||
|
||||
// Floating-point multiply-add with intermediate rounding.
|
||||
bool HasFMAD = (LegalOperations && TLI.isFMADLegalForFAddFSub(DAG, N));
|
||||
bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
|
||||
|
||||
// Floating-point multiply-add without intermediate rounding.
|
||||
bool HasFMA =
|
||||
@ -11945,7 +11945,7 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
|
||||
|
||||
const TargetOptions &Options = DAG.getTarget().Options;
|
||||
// Floating-point multiply-add with intermediate rounding.
|
||||
bool HasFMAD = (LegalOperations && TLI.isFMADLegalForFAddFSub(DAG, N));
|
||||
bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N));
|
||||
|
||||
// Floating-point multiply-add without intermediate rounding.
|
||||
bool HasFMA =
|
||||
@ -12289,7 +12289,7 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
|
||||
// Floating-point multiply-add with intermediate rounding. This can result
|
||||
// in a less precise result due to the changed rounding order.
|
||||
bool HasFMAD = Options.UnsafeFPMath &&
|
||||
(LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
|
||||
(LegalOperations && TLI.isFMADLegal(DAG, N));
|
||||
|
||||
// No valid opcode, do not combine.
|
||||
if (!HasFMAD && !HasFMA)
|
||||
|
@ -3946,8 +3946,8 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SITargetLowering::isFMADLegalForFAddFSub(const SelectionDAG &DAG,
|
||||
const SDNode *N) const {
|
||||
bool SITargetLowering::isFMADLegal(const SelectionDAG &DAG,
|
||||
const SDNode *N) const {
|
||||
// TODO: Check future ftz flag
|
||||
// v_mad_f32/v_mac_f32 do not support denormals.
|
||||
EVT VT = N->getValueType(0);
|
||||
|
@ -358,8 +358,7 @@ public:
|
||||
MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override;
|
||||
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
|
||||
EVT VT) const override;
|
||||
bool isFMADLegalForFAddFSub(const SelectionDAG &DAG,
|
||||
const SDNode *N) const override;
|
||||
bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const override;
|
||||
|
||||
SDValue splitUnaryVectorOp(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue splitBinaryVectorOp(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
@ -0,0 +1,170 @@
|
||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=GCN,FMA %s
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=GCN,NOFUSE %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=GCN,NOFUSE %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=GCN,NOFUSE %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=GCN,FMA %s
|
||||
|
||||
; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN,FMAD %s
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN,FMAD %s
|
||||
; RUN: llc -march=amdgcn -mcpu=fiji -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN,FMAD %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN,FMAD %s
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN,FMAD %s
|
||||
|
||||
; Check for incorrect fmad formation when distributing
|
||||
|
||||
define float @unsafe_fmul_fadd_distribute_fast_f32(float %arg0, float %arg1) #0 {
|
||||
; FMA-LABEL: unsafe_fmul_fadd_distribute_fast_f32:
|
||||
; FMA: ; %bb.0:
|
||||
; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; FMA-NEXT: v_fma_f32 v0, v1, v0, v0
|
||||
; FMA-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; NOFUSE-LABEL: unsafe_fmul_fadd_distribute_fast_f32:
|
||||
; NOFUSE: ; %bb.0:
|
||||
; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; NOFUSE-NEXT: v_add_f32_e32 v1, 1.0, v1
|
||||
; NOFUSE-NEXT: v_mul_f32_e32 v0, v0, v1
|
||||
; NOFUSE-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; FMAD-LABEL: unsafe_fmul_fadd_distribute_fast_f32:
|
||||
; FMAD: ; %bb.0:
|
||||
; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; FMAD-NEXT: v_mac_f32_e32 v0, v1, v0
|
||||
; FMAD-NEXT: s_setpc_b64 s[30:31]
|
||||
%add = fadd fast float %arg1, 1.0
|
||||
%tmp1 = fmul fast float %arg0, %add
|
||||
ret float %tmp1
|
||||
}
|
||||
|
||||
define float @unsafe_fmul_fsub_distribute_fast_f32(float %arg0, float %arg1) #0 {
|
||||
; FMA-LABEL: unsafe_fmul_fsub_distribute_fast_f32:
|
||||
; FMA: ; %bb.0:
|
||||
; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; FMA-NEXT: v_fma_f32 v0, -v1, v0, v0
|
||||
; FMA-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; NOFUSE-LABEL: unsafe_fmul_fsub_distribute_fast_f32:
|
||||
; NOFUSE: ; %bb.0:
|
||||
; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; NOFUSE-NEXT: v_sub_f32_e32 v1, 1.0, v1
|
||||
; NOFUSE-NEXT: v_mul_f32_e32 v0, v0, v1
|
||||
; NOFUSE-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; FMAD-LABEL: unsafe_fmul_fsub_distribute_fast_f32:
|
||||
; FMAD: ; %bb.0:
|
||||
; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; FMAD-NEXT: v_mad_f32 v0, -v1, v0, v0
|
||||
; FMAD-NEXT: s_setpc_b64 s[30:31]
|
||||
%add = fsub fast float 1.0, %arg1
|
||||
%tmp1 = fmul fast float %arg0, %add
|
||||
ret float %tmp1
|
||||
}
|
||||
|
||||
define <2 x float> @unsafe_fmul_fadd_distribute_fast_v2f32(<2 x float> %arg0, <2 x float> %arg1) #0 {
|
||||
; FMA-LABEL: unsafe_fmul_fadd_distribute_fast_v2f32:
|
||||
; FMA: ; %bb.0:
|
||||
; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; FMA-NEXT: v_fma_f32 v0, v2, v0, v0
|
||||
; FMA-NEXT: v_fma_f32 v1, v3, v1, v1
|
||||
; FMA-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; NOFUSE-LABEL: unsafe_fmul_fadd_distribute_fast_v2f32:
|
||||
; NOFUSE: ; %bb.0:
|
||||
; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; NOFUSE-NEXT: v_add_f32_e32 v3, 1.0, v3
|
||||
; NOFUSE-NEXT: v_add_f32_e32 v2, 1.0, v2
|
||||
; NOFUSE-NEXT: v_mul_f32_e32 v0, v0, v2
|
||||
; NOFUSE-NEXT: v_mul_f32_e32 v1, v1, v3
|
||||
; NOFUSE-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; FMAD-LABEL: unsafe_fmul_fadd_distribute_fast_v2f32:
|
||||
; FMAD: ; %bb.0:
|
||||
; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; FMAD-NEXT: v_mac_f32_e32 v0, v2, v0
|
||||
; FMAD-NEXT: v_mac_f32_e32 v1, v3, v1
|
||||
; FMAD-NEXT: s_setpc_b64 s[30:31]
|
||||
%add = fadd fast <2 x float> %arg1, <float 1.0, float 1.0>
|
||||
%tmp1 = fmul fast <2 x float> %arg0, %add
|
||||
ret <2 x float> %tmp1
|
||||
}
|
||||
|
||||
define <2 x float> @unsafe_fmul_fsub_distribute_fast_v2f32(<2 x float> %arg0, <2 x float> %arg1) #0 {
|
||||
; FMA-LABEL: unsafe_fmul_fsub_distribute_fast_v2f32:
|
||||
; FMA: ; %bb.0:
|
||||
; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; FMA-NEXT: v_fma_f32 v0, -v2, v0, v0
|
||||
; FMA-NEXT: v_fma_f32 v1, -v3, v1, v1
|
||||
; FMA-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; NOFUSE-LABEL: unsafe_fmul_fsub_distribute_fast_v2f32:
|
||||
; NOFUSE: ; %bb.0:
|
||||
; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; NOFUSE-NEXT: v_sub_f32_e32 v3, 1.0, v3
|
||||
; NOFUSE-NEXT: v_sub_f32_e32 v2, 1.0, v2
|
||||
; NOFUSE-NEXT: v_mul_f32_e32 v0, v0, v2
|
||||
; NOFUSE-NEXT: v_mul_f32_e32 v1, v1, v3
|
||||
; NOFUSE-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; FMAD-LABEL: unsafe_fmul_fsub_distribute_fast_v2f32:
|
||||
; FMAD: ; %bb.0:
|
||||
; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; FMAD-NEXT: v_mad_f32 v0, -v2, v0, v0
|
||||
; FMAD-NEXT: v_mad_f32 v1, -v3, v1, v1
|
||||
; FMAD-NEXT: s_setpc_b64 s[30:31]
|
||||
%add = fsub fast <2 x float> <float 1.0, float 1.0>, %arg1
|
||||
%tmp1 = fmul fast <2 x float> %arg0, %add
|
||||
ret <2 x float> %tmp1
|
||||
}
|
||||
|
||||
define <2 x float> @unsafe_fast_fmul_fadd_distribute_post_legalize_f32(float %arg0, <2 x float> %arg1) #0 {
|
||||
; FMA-LABEL: unsafe_fast_fmul_fadd_distribute_post_legalize_f32:
|
||||
; FMA: ; %bb.0:
|
||||
; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; FMA-NEXT: v_fma_f32 v0, v0, v1, v1
|
||||
; FMA-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; NOFUSE-LABEL: unsafe_fast_fmul_fadd_distribute_post_legalize_f32:
|
||||
; NOFUSE: ; %bb.0:
|
||||
; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; NOFUSE-NEXT: v_add_f32_e32 v0, 1.0, v0
|
||||
; NOFUSE-NEXT: v_mul_f32_e32 v0, v1, v0
|
||||
; NOFUSE-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; FMAD-LABEL: unsafe_fast_fmul_fadd_distribute_post_legalize_f32:
|
||||
; FMAD: ; %bb.0:
|
||||
; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; FMAD-NEXT: v_mad_f32 v0, v0, v1, v1
|
||||
; FMAD-NEXT: s_setpc_b64 s[30:31]
|
||||
%add = fadd fast float %arg0, 1.0
|
||||
%splat = insertelement <2 x float> undef, float %add, i32 0
|
||||
%tmp1 = fmul fast <2 x float> %arg1, %splat
|
||||
ret <2 x float> %tmp1
|
||||
}
|
||||
|
||||
define <2 x float> @unsafe_fast_fmul_fsub_ditribute_post_legalize(float %arg0, <2 x float> %arg1) #0 {
|
||||
; FMA-LABEL: unsafe_fast_fmul_fsub_ditribute_post_legalize:
|
||||
; FMA: ; %bb.0:
|
||||
; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; FMA-NEXT: v_fma_f32 v0, -v0, v1, v1
|
||||
; FMA-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; NOFUSE-LABEL: unsafe_fast_fmul_fsub_ditribute_post_legalize:
|
||||
; NOFUSE: ; %bb.0:
|
||||
; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; NOFUSE-NEXT: v_sub_f32_e32 v0, 1.0, v0
|
||||
; NOFUSE-NEXT: v_mul_f32_e32 v0, v1, v0
|
||||
; NOFUSE-NEXT: s_setpc_b64 s[30:31]
|
||||
;
|
||||
; FMAD-LABEL: unsafe_fast_fmul_fsub_ditribute_post_legalize:
|
||||
; FMAD: ; %bb.0:
|
||||
; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; FMAD-NEXT: v_mad_f32 v0, -v0, v1, v1
|
||||
; FMAD-NEXT: s_setpc_b64 s[30:31]
|
||||
%sub = fsub fast float 1.0, %arg0
|
||||
%splat = insertelement <2 x float> undef, float %sub, i32 0
|
||||
%tmp1 = fmul fast <2 x float> %arg1, %splat
|
||||
ret <2 x float> %tmp1
|
||||
}
|
||||
|
||||
attributes #0 = { "no-infs-fp-math"="true" "unsafe-fp-math"="true" }
|
Loading…
x
Reference in New Issue
Block a user