1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 05:01:59 +01:00
llvm-mirror/test/CodeGen/AMDGPU/hsa-fp-mode.ll
Matt Arsenault ddf10ac5a7 AMDGPU: Remove dx10-clamp from subtarget features
Since this can be set with s_setreg*, it should not be a subtarget
property. Set a default based on the calling convention, and Introduce
a new amdgpu-dx10-clamp attribute to override this if desired.

Also introduce a new amdgpu-ieee attribute to match.

The values need to match to allow inlining. I think it is OK for the
caller's dx10-clamp attribute to override the callee, but there
doesn't appear to be the infrastructure to do this currently without
definining the attribute in the generic Attributes.td.

Eventually the calling convention lowering will need to insert a mode
switch somewhere for these.

llvm-svn: 357302
2019-03-29 19:14:54 +00:00

102 lines
3.9 KiB
LLVM

; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; GCN-LABEL: {{^}}test_default_ci:
; GCN: float_mode = 192
; GCN: enable_dx10_clamp = 1
; GCN: enable_ieee_mode = 1
define amdgpu_kernel void @test_default_ci(float addrspace(1)* %out0, double addrspace(1)* %out1) #0 {
store float 0.0, float addrspace(1)* %out0
store double 0.0, double addrspace(1)* %out1
ret void
}
; GCN-LABEL: {{^}}test_default_vi:
; GCN: float_mode = 192
; GCN: enable_dx10_clamp = 1
; GCN: enable_ieee_mode = 1
define amdgpu_kernel void @test_default_vi(float addrspace(1)* %out0, double addrspace(1)* %out1) #1 {
store float 0.0, float addrspace(1)* %out0
store double 0.0, double addrspace(1)* %out1
ret void
}
; GCN-LABEL: {{^}}test_f64_denormals:
; GCN: float_mode = 192
; GCN: enable_dx10_clamp = 1
; GCN: enable_ieee_mode = 1
define amdgpu_kernel void @test_f64_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #2 {
store float 0.0, float addrspace(1)* %out0
store double 0.0, double addrspace(1)* %out1
ret void
}
; GCN-LABEL: {{^}}test_f32_denormals:
; GCN: float_mode = 48
; GCN: enable_dx10_clamp = 1
; GCN: enable_ieee_mode = 1
define amdgpu_kernel void @test_f32_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #3 {
store float 0.0, float addrspace(1)* %out0
store double 0.0, double addrspace(1)* %out1
ret void
}
; GCN-LABEL: {{^}}test_f32_f64_denormals:
; GCN: float_mode = 240
; GCN: enable_dx10_clamp = 1
; GCN: enable_ieee_mode = 1
define amdgpu_kernel void @test_f32_f64_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #4 {
store float 0.0, float addrspace(1)* %out0
store double 0.0, double addrspace(1)* %out1
ret void
}
; GCN-LABEL: {{^}}test_no_denormals:
; GCN: float_mode = 0
; GCN: enable_dx10_clamp = 1
; GCN: enable_ieee_mode = 1
define amdgpu_kernel void @test_no_denormals(float addrspace(1)* %out0, double addrspace(1)* %out1) #5 {
store float 0.0, float addrspace(1)* %out0
store double 0.0, double addrspace(1)* %out1
ret void
}
; GCN-LABEL: {{^}}test_no_dx10_clamp_vi:
; GCN: float_mode = 192
; GCN: enable_dx10_clamp = 0
; GCN: enable_ieee_mode = 1
define amdgpu_kernel void @test_no_dx10_clamp_vi(float addrspace(1)* %out0, double addrspace(1)* %out1) #6 {
store float 0.0, float addrspace(1)* %out0
store double 0.0, double addrspace(1)* %out1
ret void
}
; GCN-LABEL: {{^}}test_no_ieee_mode_vi:
; GCN: float_mode = 192
; GCN: enable_dx10_clamp = 1
; GCN: enable_ieee_mode = 0
define amdgpu_kernel void @test_no_ieee_mode_vi(float addrspace(1)* %out0, double addrspace(1)* %out1) #7 {
store float 0.0, float addrspace(1)* %out0
store double 0.0, double addrspace(1)* %out1
ret void
}
; GCN-LABEL: {{^}}test_no_ieee_mode_no_dx10_clamp_vi:
; GCN: float_mode = 192
; GCN: enable_dx10_clamp = 0
; GCN: enable_ieee_mode = 0
define amdgpu_kernel void @test_no_ieee_mode_no_dx10_clamp_vi(float addrspace(1)* %out0, double addrspace(1)* %out1) #8 {
store float 0.0, float addrspace(1)* %out0
store double 0.0, double addrspace(1)* %out1
ret void
}
attributes #0 = { nounwind "target-cpu"="kaveri" "target-features"="-code-object-v3" }
attributes #1 = { nounwind "target-cpu"="fiji" "target-features"="-code-object-v3" }
attributes #2 = { nounwind "target-features"="-code-object-v3,-fp32-denormals,+fp64-fp16-denormals" }
attributes #3 = { nounwind "target-features"="-code-object-v3,+fp32-denormals,-fp64-fp16-denormals" }
attributes #4 = { nounwind "target-features"="-code-object-v3,+fp32-denormals,+fp64-fp16-denormals" }
attributes #5 = { nounwind "target-features"="-code-object-v3,-fp32-denormals,-fp64-fp16-denormals" }
attributes #6 = { nounwind "amdgpu-dx10-clamp"="false" "target-cpu"="fiji" "target-features"="-code-object-v3" }
attributes #7 = { nounwind "amdgpu-ieee"="false" "target-cpu"="fiji" "target-features"="-code-object-v3" }
attributes #8 = { nounwind "amdgpu-dx10-clamp"="false" "amdgpu-ieee"="false" "target-cpu"="fiji" "target-features"="-code-object-v3" }