1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-24 13:33:37 +02:00
llvm-mirror/test/CodeGen/AMDGPU/fptrunc.f16.ll
Matt Arsenault 93d42a5136 AMDGPU: Use source modifiers with f16->f32 conversions
The operand types were defined to fit the fp16_to_fp node, which
has the half as an integer type. v_cvt_f32_f16 does support
source modifiers, so change this to have an FP type and modifiers.

For targets without legal f16, this requires recognizing the
bit operations and trying to produce them.

llvm-svn: 293857
2017-02-02 02:27:04 +00:00

126 lines
4.5 KiB
LLVM

; RUN: llc -march=amdgcn -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; GCN-LABEL: {{^}}fptrunc_f32_to_f16:
; GCN: buffer_load_dword v[[A_F32:[0-9]+]]
; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]]
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define void @fptrunc_f32_to_f16(
half addrspace(1)* %r,
float addrspace(1)* %a) {
entry:
%a.val = load float, float addrspace(1)* %a
%r.val = fptrunc float %a.val to half
store half %r.val, half addrspace(1)* %r
ret void
}
; GCN-LABEL: {{^}}fptrunc_f64_to_f16:
; GCN: buffer_load_dwordx2 v{{\[}}[[A_F64_0:[0-9]+]]:[[A_F64_1:[0-9]+]]{{\]}}
; GCN: v_cvt_f32_f64_e32 v[[A_F32:[0-9]+]], v{{\[}}[[A_F64_0]]:[[A_F64_1]]{{\]}}
; GCN: v_cvt_f16_f32_e32 v[[R_F16:[0-9]+]], v[[A_F32]]
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define void @fptrunc_f64_to_f16(
half addrspace(1)* %r,
double addrspace(1)* %a) {
entry:
%a.val = load double, double addrspace(1)* %a
%r.val = fptrunc double %a.val to half
store half %r.val, half addrspace(1)* %r
ret void
}
; GCN-LABEL: {{^}}fptrunc_v2f32_to_v2f16:
; GCN: buffer_load_dwordx2 v{{\[}}[[A_F32_0:[0-9]+]]:[[A_F32_1:[0-9]+]]{{\]}}
; GCN-DAG: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[A_F32_0]]
; GCN-DAG: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[A_F32_1]]
; GCN-DAG: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]]
; GCN-DAG: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]]
; GCN: buffer_store_dword v[[R_V2_F16]]
; GCN: s_endpgm
define void @fptrunc_v2f32_to_v2f16(
<2 x half> addrspace(1)* %r,
<2 x float> addrspace(1)* %a) {
entry:
%a.val = load <2 x float>, <2 x float> addrspace(1)* %a
%r.val = fptrunc <2 x float> %a.val to <2 x half>
store <2 x half> %r.val, <2 x half> addrspace(1)* %r
ret void
}
; GCN-LABEL: {{^}}fptrunc_v2f64_to_v2f16:
; GCN: buffer_load_dwordx4 v{{\[}}[[A_F64_0:[0-9]+]]:[[A_F64_3:[0-9]+]]{{\]}}
; GCN: v_cvt_f32_f64_e32 v[[A_F32_0:[0-9]+]], v{{\[}}[[A_F64_0]]:{{[0-9]+}}{{\]}}
; GCN: v_cvt_f32_f64_e32 v[[A_F32_1:[0-9]+]], v{{\[}}{{[0-9]+}}:[[A_F64_3]]{{\]}}
; GCN: v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[A_F32_0]]
; GCN: v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[A_F32_1]]
; GCN: v_and_b32_e32 v[[R_F16_LO:[0-9]+]], 0xffff, v[[R_F16_0]]
; GCN: v_lshlrev_b32_e32 v[[R_F16_HI:[0-9]+]], 16, v[[R_F16_1]]
; GCN: v_or_b32_e32 v[[R_V2_F16:[0-9]+]], v[[R_F16_HI]], v[[R_F16_LO]]
; GCN: buffer_store_dword v[[R_V2_F16]]
define void @fptrunc_v2f64_to_v2f16(
<2 x half> addrspace(1)* %r,
<2 x double> addrspace(1)* %a) {
entry:
%a.val = load <2 x double>, <2 x double> addrspace(1)* %a
%r.val = fptrunc <2 x double> %a.val to <2 x half>
store <2 x half> %r.val, <2 x half> addrspace(1)* %r
ret void
}
; GCN-LABEL: {{^}}fneg_fptrunc_f32_to_f16:
; GCN: buffer_load_dword v[[A_F32:[0-9]+]]
; GCN: v_cvt_f16_f32_e64 v[[R_F16:[0-9]+]], -v[[A_F32]]
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define void @fneg_fptrunc_f32_to_f16(
half addrspace(1)* %r,
float addrspace(1)* %a) {
entry:
%a.val = load float, float addrspace(1)* %a
%a.fneg = fsub float -0.0, %a.val
%r.val = fptrunc float %a.fneg to half
store half %r.val, half addrspace(1)* %r
ret void
}
; GCN-LABEL: {{^}}fabs_fptrunc_f32_to_f16:
; GCN: buffer_load_dword v[[A_F32:[0-9]+]]
; GCN: v_cvt_f16_f32_e64 v[[R_F16:[0-9]+]], |v[[A_F32]]|
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define void @fabs_fptrunc_f32_to_f16(
half addrspace(1)* %r,
float addrspace(1)* %a) {
entry:
%a.val = load float, float addrspace(1)* %a
%a.fabs = call float @llvm.fabs.f32(float %a.val)
%r.val = fptrunc float %a.fabs to half
store half %r.val, half addrspace(1)* %r
ret void
}
; GCN-LABEL: {{^}}fneg_fabs_fptrunc_f32_to_f16:
; GCN: buffer_load_dword v[[A_F32:[0-9]+]]
; GCN: v_cvt_f16_f32_e64 v[[R_F16:[0-9]+]], -|v[[A_F32]]|
; GCN: buffer_store_short v[[R_F16]]
; GCN: s_endpgm
define void @fneg_fabs_fptrunc_f32_to_f16(
half addrspace(1)* %r,
float addrspace(1)* %a) {
entry:
%a.val = load float, float addrspace(1)* %a
%a.fabs = call float @llvm.fabs.f32(float %a.val)
%a.fneg.fabs = fsub float -0.0, %a.fabs
%r.val = fptrunc float %a.fneg.fabs to half
store half %r.val, half addrspace(1)* %r
ret void
}
declare float @llvm.fabs.f32(float) #1
attributes #1 = { nounwind readnone }