1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 12:43:36 +01:00
llvm-mirror/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.rsq.clamp.ll
Matt Arsenault 544cd96b07 AMDGPU/GlobalISel: Implement expansion for rsq.clamp
Not sure why we handle this removed instruction on newer subtargets
for this one and no others, but maintain compatibility with the DAG.
2020-08-06 10:23:25 -04:00

171 lines
5.9 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI %s
define float @v_rsq_clamp_f32(float %src) #0 {
; SI-LABEL: v_rsq_clamp_f32:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_rsq_clamp_f32_e32 v0, v0
; SI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: v_rsq_clamp_f32:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_rsq_f32_e32 v0, v0
; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0
; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0
; VI-NEXT: s_setpc_b64 s[30:31]
%rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %src)
ret float %rsq_clamp
}
define float @v_rsq_clamp_fabs_f32(float %src) #0 {
; SI-LABEL: v_rsq_clamp_fabs_f32:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_rsq_clamp_f32_e64 v0, |v0|
; SI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: v_rsq_clamp_fabs_f32:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_rsq_f32_e64 v0, |v0|
; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0
; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0
; VI-NEXT: s_setpc_b64 s[30:31]
%fabs.src = call float @llvm.fabs.f32(float %src)
%rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %fabs.src)
ret float %rsq_clamp
}
define double @v_rsq_clamp_f64(double %src) #0 {
; SI-LABEL: v_rsq_clamp_f64:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_rsq_clamp_f64_e32 v[0:1], v[0:1]
; SI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: v_rsq_clamp_f64:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
; VI-NEXT: s_mov_b32 s4, -1
; VI-NEXT: s_mov_b32 s5, 0x7fefffff
; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
; VI-NEXT: s_mov_b32 s5, 0xffefffff
; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
; VI-NEXT: s_setpc_b64 s[30:31]
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src)
ret double %rsq_clamp
}
define double @v_rsq_clamp_fabs_f64(double %src) #0 {
; SI-LABEL: v_rsq_clamp_fabs_f64:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_rsq_clamp_f64_e64 v[0:1], |v[0:1]|
; SI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: v_rsq_clamp_fabs_f64:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_rsq_f64_e64 v[0:1], |v[0:1]|
; VI-NEXT: s_mov_b32 s4, -1
; VI-NEXT: s_mov_b32 s5, 0x7fefffff
; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
; VI-NEXT: s_mov_b32 s5, 0xffefffff
; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
; VI-NEXT: s_setpc_b64 s[30:31]
%fabs.src = call double @llvm.fabs.f64(double %src)
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %fabs.src)
ret double %rsq_clamp
}
define float @v_rsq_clamp_undef_f32() #0 {
; SI-LABEL: v_rsq_clamp_undef_f32:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_rsq_clamp_f32_e32 v0, s4
; SI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: v_rsq_clamp_undef_f32:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_rsq_f32_e32 v0, s4
; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0
; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0
; VI-NEXT: s_setpc_b64 s[30:31]
%rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float undef)
ret float %rsq_clamp
}
define double @v_rsq_clamp_undef_f64() #0 {
; SI-LABEL: v_rsq_clamp_undef_f64:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_rsq_clamp_f64_e32 v[0:1], s[4:5]
; SI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: v_rsq_clamp_undef_f64:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_rsq_f64_e32 v[0:1], s[4:5]
; VI-NEXT: s_mov_b32 s4, -1
; VI-NEXT: s_mov_b32 s5, 0x7fefffff
; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
; VI-NEXT: s_mov_b32 s5, 0xffefffff
; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
; VI-NEXT: s_setpc_b64 s[30:31]
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double undef)
ret double %rsq_clamp
}
define float @v_rsq_clamp_f32_non_ieee(float %src) #2 {
; SI-LABEL: v_rsq_clamp_f32_non_ieee:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_rsq_clamp_f32_e32 v0, v0
; SI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: v_rsq_clamp_f32_non_ieee:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_rsq_f32_e32 v0, v0
; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0
; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0
; VI-NEXT: s_setpc_b64 s[30:31]
%rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %src)
ret float %rsq_clamp
}
define double @v_rsq_clamp_f64_non_ieee(double %src) #2 {
; SI-LABEL: v_rsq_clamp_f64_non_ieee:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_rsq_clamp_f64_e32 v[0:1], v[0:1]
; SI-NEXT: s_setpc_b64 s[30:31]
;
; VI-LABEL: v_rsq_clamp_f64_non_ieee:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VI-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
; VI-NEXT: s_mov_b32 s4, -1
; VI-NEXT: s_mov_b32 s5, 0x7fefffff
; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
; VI-NEXT: s_mov_b32 s5, 0xffefffff
; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
; VI-NEXT: s_setpc_b64 s[30:31]
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src)
ret double %rsq_clamp
}
declare float @llvm.fabs.f32(float) #1
declare float @llvm.amdgcn.rsq.clamp.f32(float) #1
declare double @llvm.fabs.f64(double) #1
declare double @llvm.amdgcn.rsq.clamp.f64(double) #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind "amdgpu-ieee"="false" }