mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
7a1d9a278d
Since 1725f2884175ca618d29b06e35f5c6ebd618053d, this should check isFMADLegalForFAddFSub rather than the the plain isOperationLegal. This would assert in a subset of cases due to an oddity in how FMAD is selected. We will allow FMA formation pre-legalize, but not FMAD even in cases where it would be valid. The current hook requires passing in the root fadd/fsub. However, in this distributed case, this would be far more complicated to pass in the relevant operand. AMDGPU doesn't get any value from the node, and only needs the type and is the only implementor, so I'm not sure why we have this complexity. Just rename and expand the assert to avoid the more complicated checks spread through the distribution logic.
171 lines
7.1 KiB
LLVM
171 lines
7.1 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=GCN,FMA %s
|
|
; RUN: llc -march=amdgcn -mcpu=verde -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=GCN,NOFUSE %s
|
|
; RUN: llc -march=amdgcn -mcpu=fiji -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=GCN,NOFUSE %s
|
|
; RUN: llc -march=amdgcn -mcpu=tonga -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=GCN,NOFUSE %s
|
|
; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=GCN,FMA %s
|
|
|
|
; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN,FMAD %s
|
|
; RUN: llc -march=amdgcn -mcpu=verde -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN,FMAD %s
|
|
; RUN: llc -march=amdgcn -mcpu=fiji -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN,FMAD %s
|
|
; RUN: llc -march=amdgcn -mcpu=tonga -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN,FMAD %s
|
|
; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN,FMAD %s
|
|
|
|
; Check for incorrect fmad formation when distributing
|
|
|
|
define float @unsafe_fmul_fadd_distribute_fast_f32(float %arg0, float %arg1) #0 {
|
|
; FMA-LABEL: unsafe_fmul_fadd_distribute_fast_f32:
|
|
; FMA: ; %bb.0:
|
|
; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; FMA-NEXT: v_fma_f32 v0, v1, v0, v0
|
|
; FMA-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; NOFUSE-LABEL: unsafe_fmul_fadd_distribute_fast_f32:
|
|
; NOFUSE: ; %bb.0:
|
|
; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; NOFUSE-NEXT: v_add_f32_e32 v1, 1.0, v1
|
|
; NOFUSE-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; NOFUSE-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; FMAD-LABEL: unsafe_fmul_fadd_distribute_fast_f32:
|
|
; FMAD: ; %bb.0:
|
|
; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; FMAD-NEXT: v_mac_f32_e32 v0, v1, v0
|
|
; FMAD-NEXT: s_setpc_b64 s[30:31]
|
|
%add = fadd fast float %arg1, 1.0
|
|
%tmp1 = fmul fast float %arg0, %add
|
|
ret float %tmp1
|
|
}
|
|
|
|
define float @unsafe_fmul_fsub_distribute_fast_f32(float %arg0, float %arg1) #0 {
|
|
; FMA-LABEL: unsafe_fmul_fsub_distribute_fast_f32:
|
|
; FMA: ; %bb.0:
|
|
; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; FMA-NEXT: v_fma_f32 v0, -v1, v0, v0
|
|
; FMA-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; NOFUSE-LABEL: unsafe_fmul_fsub_distribute_fast_f32:
|
|
; NOFUSE: ; %bb.0:
|
|
; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; NOFUSE-NEXT: v_sub_f32_e32 v1, 1.0, v1
|
|
; NOFUSE-NEXT: v_mul_f32_e32 v0, v0, v1
|
|
; NOFUSE-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; FMAD-LABEL: unsafe_fmul_fsub_distribute_fast_f32:
|
|
; FMAD: ; %bb.0:
|
|
; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; FMAD-NEXT: v_mad_f32 v0, -v1, v0, v0
|
|
; FMAD-NEXT: s_setpc_b64 s[30:31]
|
|
%add = fsub fast float 1.0, %arg1
|
|
%tmp1 = fmul fast float %arg0, %add
|
|
ret float %tmp1
|
|
}
|
|
|
|
define <2 x float> @unsafe_fmul_fadd_distribute_fast_v2f32(<2 x float> %arg0, <2 x float> %arg1) #0 {
|
|
; FMA-LABEL: unsafe_fmul_fadd_distribute_fast_v2f32:
|
|
; FMA: ; %bb.0:
|
|
; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; FMA-NEXT: v_fma_f32 v0, v2, v0, v0
|
|
; FMA-NEXT: v_fma_f32 v1, v3, v1, v1
|
|
; FMA-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; NOFUSE-LABEL: unsafe_fmul_fadd_distribute_fast_v2f32:
|
|
; NOFUSE: ; %bb.0:
|
|
; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; NOFUSE-NEXT: v_add_f32_e32 v3, 1.0, v3
|
|
; NOFUSE-NEXT: v_add_f32_e32 v2, 1.0, v2
|
|
; NOFUSE-NEXT: v_mul_f32_e32 v0, v0, v2
|
|
; NOFUSE-NEXT: v_mul_f32_e32 v1, v1, v3
|
|
; NOFUSE-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; FMAD-LABEL: unsafe_fmul_fadd_distribute_fast_v2f32:
|
|
; FMAD: ; %bb.0:
|
|
; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; FMAD-NEXT: v_mac_f32_e32 v0, v2, v0
|
|
; FMAD-NEXT: v_mac_f32_e32 v1, v3, v1
|
|
; FMAD-NEXT: s_setpc_b64 s[30:31]
|
|
%add = fadd fast <2 x float> %arg1, <float 1.0, float 1.0>
|
|
%tmp1 = fmul fast <2 x float> %arg0, %add
|
|
ret <2 x float> %tmp1
|
|
}
|
|
|
|
define <2 x float> @unsafe_fmul_fsub_distribute_fast_v2f32(<2 x float> %arg0, <2 x float> %arg1) #0 {
|
|
; FMA-LABEL: unsafe_fmul_fsub_distribute_fast_v2f32:
|
|
; FMA: ; %bb.0:
|
|
; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; FMA-NEXT: v_fma_f32 v0, -v2, v0, v0
|
|
; FMA-NEXT: v_fma_f32 v1, -v3, v1, v1
|
|
; FMA-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; NOFUSE-LABEL: unsafe_fmul_fsub_distribute_fast_v2f32:
|
|
; NOFUSE: ; %bb.0:
|
|
; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; NOFUSE-NEXT: v_sub_f32_e32 v3, 1.0, v3
|
|
; NOFUSE-NEXT: v_sub_f32_e32 v2, 1.0, v2
|
|
; NOFUSE-NEXT: v_mul_f32_e32 v0, v0, v2
|
|
; NOFUSE-NEXT: v_mul_f32_e32 v1, v1, v3
|
|
; NOFUSE-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; FMAD-LABEL: unsafe_fmul_fsub_distribute_fast_v2f32:
|
|
; FMAD: ; %bb.0:
|
|
; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; FMAD-NEXT: v_mad_f32 v0, -v2, v0, v0
|
|
; FMAD-NEXT: v_mad_f32 v1, -v3, v1, v1
|
|
; FMAD-NEXT: s_setpc_b64 s[30:31]
|
|
%add = fsub fast <2 x float> <float 1.0, float 1.0>, %arg1
|
|
%tmp1 = fmul fast <2 x float> %arg0, %add
|
|
ret <2 x float> %tmp1
|
|
}
|
|
|
|
define <2 x float> @unsafe_fast_fmul_fadd_distribute_post_legalize_f32(float %arg0, <2 x float> %arg1) #0 {
|
|
; FMA-LABEL: unsafe_fast_fmul_fadd_distribute_post_legalize_f32:
|
|
; FMA: ; %bb.0:
|
|
; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; FMA-NEXT: v_fma_f32 v0, v0, v1, v1
|
|
; FMA-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; NOFUSE-LABEL: unsafe_fast_fmul_fadd_distribute_post_legalize_f32:
|
|
; NOFUSE: ; %bb.0:
|
|
; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; NOFUSE-NEXT: v_add_f32_e32 v0, 1.0, v0
|
|
; NOFUSE-NEXT: v_mul_f32_e32 v0, v1, v0
|
|
; NOFUSE-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; FMAD-LABEL: unsafe_fast_fmul_fadd_distribute_post_legalize_f32:
|
|
; FMAD: ; %bb.0:
|
|
; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; FMAD-NEXT: v_mad_f32 v0, v0, v1, v1
|
|
; FMAD-NEXT: s_setpc_b64 s[30:31]
|
|
%add = fadd fast float %arg0, 1.0
|
|
%splat = insertelement <2 x float> undef, float %add, i32 0
|
|
%tmp1 = fmul fast <2 x float> %arg1, %splat
|
|
ret <2 x float> %tmp1
|
|
}
|
|
|
|
define <2 x float> @unsafe_fast_fmul_fsub_ditribute_post_legalize(float %arg0, <2 x float> %arg1) #0 {
|
|
; FMA-LABEL: unsafe_fast_fmul_fsub_ditribute_post_legalize:
|
|
; FMA: ; %bb.0:
|
|
; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; FMA-NEXT: v_fma_f32 v0, -v0, v1, v1
|
|
; FMA-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; NOFUSE-LABEL: unsafe_fast_fmul_fsub_ditribute_post_legalize:
|
|
; NOFUSE: ; %bb.0:
|
|
; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; NOFUSE-NEXT: v_sub_f32_e32 v0, 1.0, v0
|
|
; NOFUSE-NEXT: v_mul_f32_e32 v0, v1, v0
|
|
; NOFUSE-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; FMAD-LABEL: unsafe_fast_fmul_fsub_ditribute_post_legalize:
|
|
; FMAD: ; %bb.0:
|
|
; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; FMAD-NEXT: v_mad_f32 v0, -v0, v1, v1
|
|
; FMAD-NEXT: s_setpc_b64 s[30:31]
|
|
%sub = fsub fast float 1.0, %arg0
|
|
%splat = insertelement <2 x float> undef, float %sub, i32 0
|
|
%tmp1 = fmul fast <2 x float> %arg1, %splat
|
|
ret <2 x float> %tmp1
|
|
}
|
|
|
|
attributes #0 = { "no-infs-fp-math"="true" "unsafe-fp-math"="true" }
|