1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00
llvm-mirror/test/CodeGen/AMDGPU/fmad-formation-fmul-distribute-denormal-mode.ll
Matt Arsenault 7a1d9a278d DAG: Fix wrong legality check for ISD::FMAD
Since 1725f2884175ca618d29b06e35f5c6ebd618053d, this should check
isFMADLegalForFAddFSub rather than the the plain isOperationLegal.

This would assert in a subset of cases due to an oddity in how FMAD is
selected. We will allow FMA formation pre-legalize, but not FMAD even
in cases where it would be valid.

The current hook requires passing in the root fadd/fsub. However, in
this distributed case, this would be far more complicated to pass in
the relevant operand. AMDGPU doesn't get any value from the node, and
only needs the type and is the only implementor, so I'm not sure why
we have this complexity. Just rename and expand the assert to avoid
the more complicated checks spread through the distribution logic.
2020-04-13 10:25:39 -07:00

171 lines
7.1 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=GCN,FMA %s
; RUN: llc -march=amdgcn -mcpu=verde -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=GCN,NOFUSE %s
; RUN: llc -march=amdgcn -mcpu=fiji -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=GCN,NOFUSE %s
; RUN: llc -march=amdgcn -mcpu=tonga -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=GCN,NOFUSE %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=GCN,FMA %s
; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN,FMAD %s
; RUN: llc -march=amdgcn -mcpu=verde -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN,FMAD %s
; RUN: llc -march=amdgcn -mcpu=fiji -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN,FMAD %s
; RUN: llc -march=amdgcn -mcpu=tonga -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN,FMAD %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN,FMAD %s
; Check for incorrect fmad formation when distributing
define float @unsafe_fmul_fadd_distribute_fast_f32(float %arg0, float %arg1) #0 {
; FMA-LABEL: unsafe_fmul_fadd_distribute_fast_f32:
; FMA: ; %bb.0:
; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; FMA-NEXT: v_fma_f32 v0, v1, v0, v0
; FMA-NEXT: s_setpc_b64 s[30:31]
;
; NOFUSE-LABEL: unsafe_fmul_fadd_distribute_fast_f32:
; NOFUSE: ; %bb.0:
; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; NOFUSE-NEXT: v_add_f32_e32 v1, 1.0, v1
; NOFUSE-NEXT: v_mul_f32_e32 v0, v0, v1
; NOFUSE-NEXT: s_setpc_b64 s[30:31]
;
; FMAD-LABEL: unsafe_fmul_fadd_distribute_fast_f32:
; FMAD: ; %bb.0:
; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; FMAD-NEXT: v_mac_f32_e32 v0, v1, v0
; FMAD-NEXT: s_setpc_b64 s[30:31]
%add = fadd fast float %arg1, 1.0
%tmp1 = fmul fast float %arg0, %add
ret float %tmp1
}
define float @unsafe_fmul_fsub_distribute_fast_f32(float %arg0, float %arg1) #0 {
; FMA-LABEL: unsafe_fmul_fsub_distribute_fast_f32:
; FMA: ; %bb.0:
; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; FMA-NEXT: v_fma_f32 v0, -v1, v0, v0
; FMA-NEXT: s_setpc_b64 s[30:31]
;
; NOFUSE-LABEL: unsafe_fmul_fsub_distribute_fast_f32:
; NOFUSE: ; %bb.0:
; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; NOFUSE-NEXT: v_sub_f32_e32 v1, 1.0, v1
; NOFUSE-NEXT: v_mul_f32_e32 v0, v0, v1
; NOFUSE-NEXT: s_setpc_b64 s[30:31]
;
; FMAD-LABEL: unsafe_fmul_fsub_distribute_fast_f32:
; FMAD: ; %bb.0:
; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; FMAD-NEXT: v_mad_f32 v0, -v1, v0, v0
; FMAD-NEXT: s_setpc_b64 s[30:31]
%add = fsub fast float 1.0, %arg1
%tmp1 = fmul fast float %arg0, %add
ret float %tmp1
}
define <2 x float> @unsafe_fmul_fadd_distribute_fast_v2f32(<2 x float> %arg0, <2 x float> %arg1) #0 {
; FMA-LABEL: unsafe_fmul_fadd_distribute_fast_v2f32:
; FMA: ; %bb.0:
; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; FMA-NEXT: v_fma_f32 v0, v2, v0, v0
; FMA-NEXT: v_fma_f32 v1, v3, v1, v1
; FMA-NEXT: s_setpc_b64 s[30:31]
;
; NOFUSE-LABEL: unsafe_fmul_fadd_distribute_fast_v2f32:
; NOFUSE: ; %bb.0:
; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; NOFUSE-NEXT: v_add_f32_e32 v3, 1.0, v3
; NOFUSE-NEXT: v_add_f32_e32 v2, 1.0, v2
; NOFUSE-NEXT: v_mul_f32_e32 v0, v0, v2
; NOFUSE-NEXT: v_mul_f32_e32 v1, v1, v3
; NOFUSE-NEXT: s_setpc_b64 s[30:31]
;
; FMAD-LABEL: unsafe_fmul_fadd_distribute_fast_v2f32:
; FMAD: ; %bb.0:
; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; FMAD-NEXT: v_mac_f32_e32 v0, v2, v0
; FMAD-NEXT: v_mac_f32_e32 v1, v3, v1
; FMAD-NEXT: s_setpc_b64 s[30:31]
%add = fadd fast <2 x float> %arg1, <float 1.0, float 1.0>
%tmp1 = fmul fast <2 x float> %arg0, %add
ret <2 x float> %tmp1
}
define <2 x float> @unsafe_fmul_fsub_distribute_fast_v2f32(<2 x float> %arg0, <2 x float> %arg1) #0 {
; FMA-LABEL: unsafe_fmul_fsub_distribute_fast_v2f32:
; FMA: ; %bb.0:
; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; FMA-NEXT: v_fma_f32 v0, -v2, v0, v0
; FMA-NEXT: v_fma_f32 v1, -v3, v1, v1
; FMA-NEXT: s_setpc_b64 s[30:31]
;
; NOFUSE-LABEL: unsafe_fmul_fsub_distribute_fast_v2f32:
; NOFUSE: ; %bb.0:
; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; NOFUSE-NEXT: v_sub_f32_e32 v3, 1.0, v3
; NOFUSE-NEXT: v_sub_f32_e32 v2, 1.0, v2
; NOFUSE-NEXT: v_mul_f32_e32 v0, v0, v2
; NOFUSE-NEXT: v_mul_f32_e32 v1, v1, v3
; NOFUSE-NEXT: s_setpc_b64 s[30:31]
;
; FMAD-LABEL: unsafe_fmul_fsub_distribute_fast_v2f32:
; FMAD: ; %bb.0:
; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; FMAD-NEXT: v_mad_f32 v0, -v2, v0, v0
; FMAD-NEXT: v_mad_f32 v1, -v3, v1, v1
; FMAD-NEXT: s_setpc_b64 s[30:31]
%add = fsub fast <2 x float> <float 1.0, float 1.0>, %arg1
%tmp1 = fmul fast <2 x float> %arg0, %add
ret <2 x float> %tmp1
}
define <2 x float> @unsafe_fast_fmul_fadd_distribute_post_legalize_f32(float %arg0, <2 x float> %arg1) #0 {
; FMA-LABEL: unsafe_fast_fmul_fadd_distribute_post_legalize_f32:
; FMA: ; %bb.0:
; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; FMA-NEXT: v_fma_f32 v0, v0, v1, v1
; FMA-NEXT: s_setpc_b64 s[30:31]
;
; NOFUSE-LABEL: unsafe_fast_fmul_fadd_distribute_post_legalize_f32:
; NOFUSE: ; %bb.0:
; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; NOFUSE-NEXT: v_add_f32_e32 v0, 1.0, v0
; NOFUSE-NEXT: v_mul_f32_e32 v0, v1, v0
; NOFUSE-NEXT: s_setpc_b64 s[30:31]
;
; FMAD-LABEL: unsafe_fast_fmul_fadd_distribute_post_legalize_f32:
; FMAD: ; %bb.0:
; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; FMAD-NEXT: v_mad_f32 v0, v0, v1, v1
; FMAD-NEXT: s_setpc_b64 s[30:31]
%add = fadd fast float %arg0, 1.0
%splat = insertelement <2 x float> undef, float %add, i32 0
%tmp1 = fmul fast <2 x float> %arg1, %splat
ret <2 x float> %tmp1
}
define <2 x float> @unsafe_fast_fmul_fsub_ditribute_post_legalize(float %arg0, <2 x float> %arg1) #0 {
; FMA-LABEL: unsafe_fast_fmul_fsub_ditribute_post_legalize:
; FMA: ; %bb.0:
; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; FMA-NEXT: v_fma_f32 v0, -v0, v1, v1
; FMA-NEXT: s_setpc_b64 s[30:31]
;
; NOFUSE-LABEL: unsafe_fast_fmul_fsub_ditribute_post_legalize:
; NOFUSE: ; %bb.0:
; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; NOFUSE-NEXT: v_sub_f32_e32 v0, 1.0, v0
; NOFUSE-NEXT: v_mul_f32_e32 v0, v1, v0
; NOFUSE-NEXT: s_setpc_b64 s[30:31]
;
; FMAD-LABEL: unsafe_fast_fmul_fsub_ditribute_post_legalize:
; FMAD: ; %bb.0:
; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; FMAD-NEXT: v_mad_f32 v0, -v0, v1, v1
; FMAD-NEXT: s_setpc_b64 s[30:31]
%sub = fsub fast float 1.0, %arg0
%splat = insertelement <2 x float> undef, float %sub, i32 0
%tmp1 = fmul fast <2 x float> %arg1, %splat
ret <2 x float> %tmp1
}
attributes #0 = { "no-infs-fp-math"="true" "unsafe-fp-math"="true" }