mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-24 13:33:37 +02:00
93d42a5136
The operand types were defined to fit the fp16_to_fp node, which has the half as an integer type. v_cvt_f32_f16 does support source modifiers, so change this to have an FP type and modifiers. For targets without legal f16, this requires recognizing the bit operations and trying to produce them. llvm-svn: 293857
126 lines
4.5 KiB
LLVM
126 lines
4.5 KiB
LLVM
; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
|
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
|
|
|
; GCN-LABEL: {{^}}fptrunc_f32_to_f16:
|
|
; GCN: buffer_load_dword v[[A_F32:[0-9]+]]
|
|
; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]]
|
|
; GCN: buffer_store_short v[[R_F16]]
|
|
; GCN: s_endpgm
|
|
define void @fptrunc_f32_to_f16(
|
|
half addrspace(1)* %r,
|
|
float addrspace(1)* %a) {
|
|
entry:
|
|
%a.val = load float, float addrspace(1)* %a
|
|
%r.val = fptrunc float %a.val to half
|
|
store half %r.val, half addrspace(1)* %r
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fptrunc_f64_to_f16:
|
|
; GCN: buffer_load_dwordx2 v{{\[}}[[A_F64_0:[0-9]+]]:[[A_F64_1:[0-9]+]]{{\]}}
|
|
; GCN: v_cvt_f32_f64_e32 v[[A_F32:[0-9]+]], v{{\[}}[[A_F64_0]]:[[A_F64_1]]{{\]}}
|
|
; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]]
|
|
; GCN: buffer_store_short v[[R_F16]]
|
|
; GCN: s_endpgm
|
|
define void @fptrunc_f64_to_f16(
|
|
half addrspace(1)* %r,
|
|
double addrspace(1)* %a) {
|
|
entry:
|
|
%a.val = load double, double addrspace(1)* %a
|
|
%r.val = fptrunc double %a.val to half
|
|
store half %r.val, half addrspace(1)* %r
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fptrunc_v2f32_to_v2f16:
|
|
; GCN: buffer_load_dwordx2 v{{\[}}[[A_F32_0:[0-9]+]]:[[A_F32_1:[0-9]+]]{{\]}}
|
|
; GCN-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[A_F32_0]]
|
|
; GCN-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[A_F32_1]]
|
|
; GCN-DAG: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]]
|
|
; GCN-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
|
|
; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]]
|
|
; GCN: buffer_store_dword v[[R_V2_F16]]
|
|
; GCN: s_endpgm
|
|
define void @fptrunc_v2f32_to_v2f16(
|
|
<2 x half> addrspace(1)* %r,
|
|
<2 x float> addrspace(1)* %a) {
|
|
entry:
|
|
%a.val = load <2 x float>, <2 x float> addrspace(1)* %a
|
|
%r.val = fptrunc <2 x float> %a.val to <2 x half>
|
|
store <2 x half> %r.val, <2 x half> addrspace(1)* %r
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fptrunc_v2f64_to_v2f16:
|
|
; GCN: buffer_load_dwordx4 v{{\[}}[[A_F64_0:[0-9]+]]:[[A_F64_3:[0-9]+]]{{\]}}
|
|
; GCN: v_cvt_f32_f64_e32 v[[A_F32_0:[0-9]+]], v{{\[}}[[A_F64_0]]:{{[0-9]+}}{{\]}}
|
|
; GCN: v_cvt_f32_f64_e32 v[[A_F32_1:[0-9]+]], v{{\[}}{{[0-9]+}}:[[A_F64_3]]{{\]}}
|
|
; GCN: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[A_F32_0]]
|
|
; GCN: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[A_F32_1]]
|
|
; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]]
|
|
; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
|
|
; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]]
|
|
; GCN: buffer_store_dword v[[R_V2_F16]]
|
|
define void @fptrunc_v2f64_to_v2f16(
|
|
<2 x half> addrspace(1)* %r,
|
|
<2 x double> addrspace(1)* %a) {
|
|
entry:
|
|
%a.val = load <2 x double>, <2 x double> addrspace(1)* %a
|
|
%r.val = fptrunc <2 x double> %a.val to <2 x half>
|
|
store <2 x half> %r.val, <2 x half> addrspace(1)* %r
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fneg_fptrunc_f32_to_f16:
|
|
; GCN: buffer_load_dword v[[A_F32:[0-9]+]]
|
|
; GCN: v_cvt_f16_f32_e64 v[[R_F16:[0-9]+]], -v[[A_F32]]
|
|
; GCN: buffer_store_short v[[R_F16]]
|
|
; GCN: s_endpgm
|
|
define void @fneg_fptrunc_f32_to_f16(
|
|
half addrspace(1)* %r,
|
|
float addrspace(1)* %a) {
|
|
entry:
|
|
%a.val = load float, float addrspace(1)* %a
|
|
%a.fneg = fsub float -0.0, %a.val
|
|
%r.val = fptrunc float %a.fneg to half
|
|
store half %r.val, half addrspace(1)* %r
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fabs_fptrunc_f32_to_f16:
|
|
; GCN: buffer_load_dword v[[A_F32:[0-9]+]]
|
|
; GCN: v_cvt_f16_f32_e64 v[[R_F16:[0-9]+]], |v[[A_F32]]|
|
|
; GCN: buffer_store_short v[[R_F16]]
|
|
; GCN: s_endpgm
|
|
define void @fabs_fptrunc_f32_to_f16(
|
|
half addrspace(1)* %r,
|
|
float addrspace(1)* %a) {
|
|
entry:
|
|
%a.val = load float, float addrspace(1)* %a
|
|
%a.fabs = call float @llvm.fabs.f32(float %a.val)
|
|
%r.val = fptrunc float %a.fabs to half
|
|
store half %r.val, half addrspace(1)* %r
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fneg_fabs_fptrunc_f32_to_f16:
|
|
; GCN: buffer_load_dword v[[A_F32:[0-9]+]]
|
|
; GCN: v_cvt_f16_f32_e64 v[[R_F16:[0-9]+]], -|v[[A_F32]]|
|
|
; GCN: buffer_store_short v[[R_F16]]
|
|
; GCN: s_endpgm
|
|
define void @fneg_fabs_fptrunc_f32_to_f16(
|
|
half addrspace(1)* %r,
|
|
float addrspace(1)* %a) {
|
|
entry:
|
|
%a.val = load float, float addrspace(1)* %a
|
|
%a.fabs = call float @llvm.fabs.f32(float %a.val)
|
|
%a.fneg.fabs = fsub float -0.0, %a.fabs
|
|
%r.val = fptrunc float %a.fneg.fabs to half
|
|
store half %r.val, half addrspace(1)* %r
|
|
ret void
|
|
}
|
|
|
|
declare float @llvm.fabs.f32(float) #1
|
|
|
|
attributes #1 = { nounwind readnone }
|