1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-01-31 20:51:52 +01:00
llvm-mirror/test/CodeGen/AMDGPU/amdgcn-ieee.ll
Matt Arsenault 33f5f7933c AMDGPU: Change pre-gfx9 implementation of fcanonicalize to mul
If f32 denormals were enabled pre-gfx9, we would still try to
implement this with v_max_f32. Pre-gfx9, these instructions ignored
the denormal mode and did not flush. Switch to the multiply form for
f32 as a workaround which should always work in any case.

This fixes conformance failures when the library implementation of
fmin/fmax were accidentally not inlined, forcing the assumption of no
flushing on targets where denormals are not enabled by default. This
is a workaround, since really we should not be mixing code with
different FP mode expectations, but prefer the lowering that will work
in any mode.

Now this will always use max to implement canonicalize on gfx9+. This
is only really beneficial for f64. For f32/f16 it's a neutral choice
(and worse in terms of code size in 1 case), but possibly worse for
the compiler since it does add an extra register use operand. Leave
this change for later.
2020-04-23 15:24:13 -04:00

189 lines
7.6 KiB
LLVM

; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; GCN-LABEL: {{^}}kernel_ieee_mode_default:
; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
; GCN-NOT: v_mul_f32
define amdgpu_kernel void @kernel_ieee_mode_default() #0 {
%val0 = load volatile float, float addrspace(1)* undef
%val1 = load volatile float, float addrspace(1)* undef
%min = call float @llvm.minnum.f32(float %val0, float %val1)
store volatile float %min, float addrspace(1)* undef
ret void
}
; GCN-LABEL: {{^}}kernel_ieee_mode_on:
; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
; GCN-NOT: v_mul_f32
define amdgpu_kernel void @kernel_ieee_mode_on() #1 {
%val0 = load volatile float, float addrspace(1)* undef
%val1 = load volatile float, float addrspace(1)* undef
%min = call float @llvm.minnum.f32(float %val0, float %val1)
store volatile float %min, float addrspace(1)* undef
ret void
}
; GCN-LABEL: {{^}}kernel_ieee_mode_off:
; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
; GCN-NOT: [[VAL0]]
; GCN-NOT: [[VAL1]]
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[VAL0]], [[VAL1]]
; GCN-NOT: v_mul_f32
define amdgpu_kernel void @kernel_ieee_mode_off() #2 {
%val0 = load volatile float, float addrspace(1)* undef
%val1 = load volatile float, float addrspace(1)* undef
%min = call float @llvm.minnum.f32(float %val0, float %val1)
store volatile float %min, float addrspace(1)* undef
ret void
}
; GCN-LABEL: {{^}}func_ieee_mode_default:
; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
; GCN-NOT: v_mul_f32
define void @func_ieee_mode_default() #0 {
%val0 = load volatile float, float addrspace(1)* undef
%val1 = load volatile float, float addrspace(1)* undef
%min = call float @llvm.minnum.f32(float %val0, float %val1)
store volatile float %min, float addrspace(1)* undef
ret void
}
; GCN-LABEL: {{^}}func_ieee_mode_on:
; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
; GCN-NOT: v_mul_f32
define void @func_ieee_mode_on() #1 {
%val0 = load volatile float, float addrspace(1)* undef
%val1 = load volatile float, float addrspace(1)* undef
%min = call float @llvm.minnum.f32(float %val0, float %val1)
store volatile float %min, float addrspace(1)* undef
ret void
}
; GCN-LABEL: {{^}}func_ieee_mode_off:
; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
; GCN-NOT: [[VAL0]]
; GCN-NOT: [[VAL1]]
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[VAL0]], [[VAL1]]
; GCN-NOT: v_mul_f32
define void @func_ieee_mode_off() #2 {
%val0 = load volatile float, float addrspace(1)* undef
%val1 = load volatile float, float addrspace(1)* undef
%min = call float @llvm.minnum.f32(float %val0, float %val1)
store volatile float %min, float addrspace(1)* undef
ret void
}
; GCN-LABEL: {{^}}cs_ieee_mode_default:
; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
; GCN-NOT: v_mul_f32
define amdgpu_cs void @cs_ieee_mode_default() #0 {
%val0 = load volatile float, float addrspace(1)* undef
%val1 = load volatile float, float addrspace(1)* undef
%min = call float @llvm.minnum.f32(float %val0, float %val1)
store volatile float %min, float addrspace(1)* undef
ret void
}
; GCN-LABEL: {{^}}cs_ieee_mode_on:
; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
; GCN-NOT: v_mul_f32
define amdgpu_cs void @cs_ieee_mode_on() #1 {
%val0 = load volatile float, float addrspace(1)* undef
%val1 = load volatile float, float addrspace(1)* undef
%min = call float @llvm.minnum.f32(float %val0, float %val1)
store volatile float %min, float addrspace(1)* undef
ret void
}
; GCN-LABEL: {{^}}cs_ieee_mode_off:
; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
; GCN-NOT: [[VAL0]]
; GCN-NOT: [[VAL1]]
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[VAL0]], [[VAL1]]
; GCN-NOT: v_mul_f32
define amdgpu_cs void @cs_ieee_mode_off() #2 {
%val0 = load volatile float, float addrspace(1)* undef
%val1 = load volatile float, float addrspace(1)* undef
%min = call float @llvm.minnum.f32(float %val0, float %val1)
store volatile float %min, float addrspace(1)* undef
ret void
}
; GCN-LABEL: {{^}}ps_ieee_mode_default:
; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
; GCN-NOT: [[VAL0]]
; GCN-NOT: [[VAL1]]
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[VAL0]], [[VAL1]]
; GCN-NOT: v_mul_f32
define amdgpu_ps void @ps_ieee_mode_default() #0 {
%val0 = load volatile float, float addrspace(1)* undef
%val1 = load volatile float, float addrspace(1)* undef
%min = call float @llvm.minnum.f32(float %val0, float %val1)
store volatile float %min, float addrspace(1)* undef
ret void
}
; GCN-LABEL: {{^}}ps_ieee_mode_on:
; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
; GCN-DAG: v_mul_f32_e32 [[QUIET0:v[0-9]+]], 1.0, [[VAL0]]
; GCN-DAG: v_mul_f32_e32 [[QUIET1:v[0-9]+]], 1.0, [[VAL1]]
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[QUIET0]], [[QUIET1]]
; GCN-NOT: v_mul_f32
define amdgpu_ps void @ps_ieee_mode_on() #1 {
%val0 = load volatile float, float addrspace(1)* undef
%val1 = load volatile float, float addrspace(1)* undef
%min = call float @llvm.minnum.f32(float %val0, float %val1)
store volatile float %min, float addrspace(1)* undef
ret void
}
; GCN-LABEL: {{^}}ps_ieee_mode_off:
; GCN: {{buffer|global|flat}}_load_dword [[VAL0:v[0-9]+]]
; GCN-NEXT: {{buffer|global|flat}}_load_dword [[VAL1:v[0-9]+]]
; GCN-NOT: [[VAL0]]
; GCN-NOT: [[VAL1]]
; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], [[VAL0]], [[VAL1]]
; GCN-NOT: v_mul_f32
define amdgpu_ps void @ps_ieee_mode_off() #2 {
%val0 = load volatile float, float addrspace(1)* undef
%val1 = load volatile float, float addrspace(1)* undef
%min = call float @llvm.minnum.f32(float %val0, float %val1)
store volatile float %min, float addrspace(1)* undef
ret void
}
declare float @llvm.minnum.f32(float, float) #3
attributes #0 = { nounwind }
attributes #1 = { nounwind "amdgpu-ieee"="true" }
attributes #2 = { nounwind "amdgpu-ieee"="false" }
attributes #3 = { nounwind readnone speculatable }