mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
2a5c21f1b1
This is the first of a set of DAGCombiner changes enabling strictfp optimizations. I want to test to waters with this to make sure changes like these are acceptable for the strictfp case- this particular change should preserve exception ordering and result precision perfectly, and many other possible changes appear to be able to as well. Copied from regular fadd combines but modified to preserve ordering via the chain, this change allows strict_fadd x, (fneg y) to become struct_fsub x, y and strict_fadd (fneg x), y to become strict_fsub y, x. Differential Revision: https://reviews.llvm.org/D85548
130 lines
6.0 KiB
LLVM
130 lines
6.0 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 < %s | FileCheck -check-prefix=GCN %s
|
|
|
|
define float @v_constained_fadd_f32_fpexcept_strict(float %x, float %y) #0 {
|
|
; GCN-LABEL: v_constained_fadd_f32_fpexcept_strict:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: v_add_f32_e32 v0, v0, v1
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
|
|
ret float %val
|
|
}
|
|
|
|
define float @v_constained_fadd_f32_fpexcept_ignore(float %x, float %y) #0 {
|
|
; GCN-LABEL: v_constained_fadd_f32_fpexcept_ignore:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: v_add_f32_e32 v0, v0, v1
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
|
|
ret float %val
|
|
}
|
|
|
|
define float @v_constained_fadd_f32_fpexcept_maytrap(float %x, float %y) #0 {
|
|
; GCN-LABEL: v_constained_fadd_f32_fpexcept_maytrap:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: v_add_f32_e32 v0, v0, v1
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
|
|
ret float %val
|
|
}
|
|
|
|
define <2 x float> @v_constained_fadd_v2f32_fpexcept_strict(<2 x float> %x, <2 x float> %y) #0 {
|
|
; GCN-LABEL: v_constained_fadd_v2f32_fpexcept_strict:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: v_add_f32_e32 v0, v0, v2
|
|
; GCN-NEXT: v_add_f32_e32 v1, v1, v3
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
|
|
ret <2 x float> %val
|
|
}
|
|
|
|
define <2 x float> @v_constained_fadd_v2f32_fpexcept_ignore(<2 x float> %x, <2 x float> %y) #0 {
|
|
; GCN-LABEL: v_constained_fadd_v2f32_fpexcept_ignore:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: v_add_f32_e32 v0, v0, v2
|
|
; GCN-NEXT: v_add_f32_e32 v1, v1, v3
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.ignore")
|
|
ret <2 x float> %val
|
|
}
|
|
|
|
define <2 x float> @v_constained_fadd_v2f32_fpexcept_maytrap(<2 x float> %x, <2 x float> %y) #0 {
|
|
; GCN-LABEL: v_constained_fadd_v2f32_fpexcept_maytrap:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: v_add_f32_e32 v0, v0, v2
|
|
; GCN-NEXT: v_add_f32_e32 v1, v1, v3
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%val = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %x, <2 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.maytrap")
|
|
ret <2 x float> %val
|
|
}
|
|
|
|
define <3 x float> @v_constained_fadd_v3f32_fpexcept_strict(<3 x float> %x, <3 x float> %y) #0 {
|
|
; GCN-LABEL: v_constained_fadd_v3f32_fpexcept_strict:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: v_add_f32_e32 v0, v0, v3
|
|
; GCN-NEXT: v_add_f32_e32 v1, v1, v4
|
|
; GCN-NEXT: v_add_f32_e32 v2, v2, v5
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%val = call <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float> %x, <3 x float> %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
|
|
ret <3 x float> %val
|
|
}
|
|
|
|
define amdgpu_ps float @s_constained_fadd_f32_fpexcept_strict(float inreg %x, float inreg %y) #0 {
|
|
; GCN-LABEL: s_constained_fadd_f32_fpexcept_strict:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: v_mov_b32_e32 v0, s3
|
|
; GCN-NEXT: v_add_f32_e32 v0, s2, v0
|
|
; GCN-NEXT: ; return to shader part epilog
|
|
%val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
|
|
ret float %val
|
|
}
|
|
|
|
define float @v_constained_fadd_f32_fpexcept_strict_fabs_lhs(float %x, float %y) #0 {
|
|
; GCN-LABEL: v_constained_fadd_f32_fpexcept_strict_fabs_lhs:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: v_add_f32_e64 v0, |v0|, v1
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%fabs.x = call float @llvm.fabs.f32(float %x)
|
|
%val = call float @llvm.experimental.constrained.fadd.f32(float %fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
|
|
ret float %val
|
|
}
|
|
|
|
define float @v_constained_fadd_f32_fpexcept_strict_fabs_rhs(float %x, float %y) #0 {
|
|
; GCN-LABEL: v_constained_fadd_f32_fpexcept_strict_fabs_rhs:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: v_add_f32_e64 v0, v0, |v1|
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%fabs.y = call float @llvm.fabs.f32(float %y)
|
|
%val = call float @llvm.experimental.constrained.fadd.f32(float %x, float %fabs.y, metadata !"round.tonearest", metadata !"fpexcept.strict")
|
|
ret float %val
|
|
}
|
|
|
|
define float @v_constained_fadd_f32_fpexcept_strict_fneg_fabs_lhs(float %x, float %y) #0 {
|
|
; GCN-LABEL: v_constained_fadd_f32_fpexcept_strict_fneg_fabs_lhs:
|
|
; GCN: ; %bb.0:
|
|
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; GCN-NEXT: v_sub_f32_e64 v0, v1, |v0|
|
|
; GCN-NEXT: s_setpc_b64 s[30:31]
|
|
%fabs.x = call float @llvm.fabs.f32(float %x)
|
|
%neg.fabs.x = fneg float %fabs.x
|
|
%val = call float @llvm.experimental.constrained.fadd.f32(float %neg.fabs.x, float %y, metadata !"round.tonearest", metadata !"fpexcept.strict")
|
|
ret float %val
|
|
}
|
|
|
|
declare float @llvm.fabs.f32(float) #1
|
|
declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) #1
|
|
declare <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float>, <2 x float>, metadata, metadata) #1
|
|
declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata) #1
|
|
|
|
attributes #0 = { strictfp }
|
|
attributes #1 = { inaccessiblememonly nounwind willreturn }
|