mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
544cd96b07
Not sure why we handle this removed instruction on newer subtargets for this one and no others, but maintain compatibility with the DAG.
171 lines
5.9 KiB
LLVM
171 lines
5.9 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
|
; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI %s
|
|
|
|
define float @v_rsq_clamp_f32(float %src) #0 {
|
|
; SI-LABEL: v_rsq_clamp_f32:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SI-NEXT: v_rsq_clamp_f32_e32 v0, v0
|
|
; SI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_rsq_clamp_f32:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_rsq_f32_e32 v0, v0
|
|
; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0
|
|
; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
%rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %src)
|
|
ret float %rsq_clamp
|
|
}
|
|
|
|
define float @v_rsq_clamp_fabs_f32(float %src) #0 {
|
|
; SI-LABEL: v_rsq_clamp_fabs_f32:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SI-NEXT: v_rsq_clamp_f32_e64 v0, |v0|
|
|
; SI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_rsq_clamp_fabs_f32:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_rsq_f32_e64 v0, |v0|
|
|
; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0
|
|
; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
%fabs.src = call float @llvm.fabs.f32(float %src)
|
|
%rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %fabs.src)
|
|
ret float %rsq_clamp
|
|
}
|
|
|
|
define double @v_rsq_clamp_f64(double %src) #0 {
|
|
; SI-LABEL: v_rsq_clamp_f64:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SI-NEXT: v_rsq_clamp_f64_e32 v[0:1], v[0:1]
|
|
; SI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_rsq_clamp_f64:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
|
|
; VI-NEXT: s_mov_b32 s4, -1
|
|
; VI-NEXT: s_mov_b32 s5, 0x7fefffff
|
|
; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
|
|
; VI-NEXT: s_mov_b32 s5, 0xffefffff
|
|
; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src)
|
|
ret double %rsq_clamp
|
|
}
|
|
|
|
define double @v_rsq_clamp_fabs_f64(double %src) #0 {
|
|
; SI-LABEL: v_rsq_clamp_fabs_f64:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SI-NEXT: v_rsq_clamp_f64_e64 v[0:1], |v[0:1]|
|
|
; SI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_rsq_clamp_fabs_f64:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_rsq_f64_e64 v[0:1], |v[0:1]|
|
|
; VI-NEXT: s_mov_b32 s4, -1
|
|
; VI-NEXT: s_mov_b32 s5, 0x7fefffff
|
|
; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
|
|
; VI-NEXT: s_mov_b32 s5, 0xffefffff
|
|
; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
%fabs.src = call double @llvm.fabs.f64(double %src)
|
|
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %fabs.src)
|
|
ret double %rsq_clamp
|
|
}
|
|
|
|
define float @v_rsq_clamp_undef_f32() #0 {
|
|
; SI-LABEL: v_rsq_clamp_undef_f32:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SI-NEXT: v_rsq_clamp_f32_e32 v0, s4
|
|
; SI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_rsq_clamp_undef_f32:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_rsq_f32_e32 v0, s4
|
|
; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0
|
|
; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
%rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float undef)
|
|
ret float %rsq_clamp
|
|
}
|
|
|
|
define double @v_rsq_clamp_undef_f64() #0 {
|
|
; SI-LABEL: v_rsq_clamp_undef_f64:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SI-NEXT: v_rsq_clamp_f64_e32 v[0:1], s[4:5]
|
|
; SI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_rsq_clamp_undef_f64:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_rsq_f64_e32 v[0:1], s[4:5]
|
|
; VI-NEXT: s_mov_b32 s4, -1
|
|
; VI-NEXT: s_mov_b32 s5, 0x7fefffff
|
|
; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
|
|
; VI-NEXT: s_mov_b32 s5, 0xffefffff
|
|
; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double undef)
|
|
ret double %rsq_clamp
|
|
}
|
|
|
|
define float @v_rsq_clamp_f32_non_ieee(float %src) #2 {
|
|
; SI-LABEL: v_rsq_clamp_f32_non_ieee:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SI-NEXT: v_rsq_clamp_f32_e32 v0, v0
|
|
; SI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_rsq_clamp_f32_non_ieee:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_rsq_f32_e32 v0, v0
|
|
; VI-NEXT: v_min_f32_e32 v0, 0x7f7fffff, v0
|
|
; VI-NEXT: v_max_f32_e32 v0, 0xff7fffff, v0
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
%rsq_clamp = call float @llvm.amdgcn.rsq.clamp.f32(float %src)
|
|
ret float %rsq_clamp
|
|
}
|
|
|
|
define double @v_rsq_clamp_f64_non_ieee(double %src) #2 {
|
|
; SI-LABEL: v_rsq_clamp_f64_non_ieee:
|
|
; SI: ; %bb.0:
|
|
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; SI-NEXT: v_rsq_clamp_f64_e32 v[0:1], v[0:1]
|
|
; SI-NEXT: s_setpc_b64 s[30:31]
|
|
;
|
|
; VI-LABEL: v_rsq_clamp_f64_non_ieee:
|
|
; VI: ; %bb.0:
|
|
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
|
; VI-NEXT: v_rsq_f64_e32 v[0:1], v[0:1]
|
|
; VI-NEXT: s_mov_b32 s4, -1
|
|
; VI-NEXT: s_mov_b32 s5, 0x7fefffff
|
|
; VI-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5]
|
|
; VI-NEXT: s_mov_b32 s5, 0xffefffff
|
|
; VI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5]
|
|
; VI-NEXT: s_setpc_b64 s[30:31]
|
|
%rsq_clamp = call double @llvm.amdgcn.rsq.clamp.f64(double %src)
|
|
ret double %rsq_clamp
|
|
}
|
|
|
|
declare float @llvm.fabs.f32(float) #1
|
|
declare float @llvm.amdgcn.rsq.clamp.f32(float) #1
|
|
declare double @llvm.fabs.f64(double) #1
|
|
declare double @llvm.amdgcn.rsq.clamp.f64(double) #1
|
|
|
|
attributes #0 = { nounwind }
|
|
attributes #1 = { nounwind readnone }
|
|
attributes #2 = { nounwind "amdgpu-ieee"="false" }
|