mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 11:13:28 +01:00
099f4c5254
Allow a target to do something other than search for copies that will avoid cross register bank copies. Implement for SI by only rewriting the most basic copies, so it should look through anything like a subregister extract. I'm not entirely satisified with this because it seems like eliminating a reg_sequence that isn't fully used should work generically for all targets without them having to override something. However, it seems to be tricky to have a simple implementation of this without rewriting to invalid kinds of subregister copies on some targets. I'm not sure if there is currently a generic way to easily check if a subregister index would be valid for the current use. The current set of TargetRegisterInfo::get*Class functions don't quite behave like I would expect (e.g. getSubClassWithSubReg returns the maximal register class rather than the minimal), so I'm not sure how to make the generic test keep searching if SrcRC:SrcSubReg is a valid replacement for DefRC:DefSubReg. Making the default implementation to check for simple copies breaks a variety of ARM and x86 tests by producing illegal subregister uses. The ARM tests are not actually changed since it should still be using the same sharesSameRegisterFile implementation, this just relaxes them to not check for specific registers. llvm-svn: 248478
606 lines
20 KiB
LLVM
606 lines
20 KiB
LLVM
; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
|
|
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
|
|
|
; half args should be promoted to float
|
|
|
|
; GCN-LABEL: {{^}}load_f16_arg:
|
|
; GCN: s_load_dword [[ARG:s[0-9]+]]
|
|
; GCN: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[ARG]]
|
|
; GCN: buffer_store_short [[CVT]]
|
|
define void @load_f16_arg(half addrspace(1)* %out, half %arg) #0 {
|
|
store half %arg, half addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}load_v2f16_arg:
|
|
; GCN-DAG: buffer_load_ushort [[V0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
|
|
; GCN-DAG: buffer_load_ushort [[V1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:46
|
|
; GCN-DAG: buffer_store_short [[V0]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
|
; GCN-DAG: buffer_store_short [[V1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
|
|
; GCN: s_endpgm
|
|
define void @load_v2f16_arg(<2 x half> addrspace(1)* %out, <2 x half> %arg) #0 {
|
|
store <2 x half> %arg, <2 x half> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}load_v3f16_arg:
|
|
; GCN: buffer_load_ushort
|
|
; GCN: buffer_load_ushort
|
|
; GCN: buffer_load_ushort
|
|
; GCN-NOT: buffer_load
|
|
; GCN-DAG: buffer_store_dword
|
|
; GCN-DAG: buffer_store_short
|
|
; GCN-NOT: buffer_store
|
|
; GCN: s_endpgm
|
|
define void @load_v3f16_arg(<3 x half> addrspace(1)* %out, <3 x half> %arg) #0 {
|
|
store <3 x half> %arg, <3 x half> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}load_v4f16_arg:
|
|
; GCN: buffer_load_ushort
|
|
; GCN: buffer_load_ushort
|
|
; GCN: buffer_load_ushort
|
|
; GCN: buffer_load_ushort
|
|
; GCN: buffer_store_short
|
|
; GCN: buffer_store_short
|
|
; GCN: buffer_store_short
|
|
; GCN: buffer_store_short
|
|
; GCN: s_endpgm
|
|
define void @load_v4f16_arg(<4 x half> addrspace(1)* %out, <4 x half> %arg) #0 {
|
|
store <4 x half> %arg, <4 x half> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}load_v8f16_arg:
|
|
define void @load_v8f16_arg(<8 x half> addrspace(1)* %out, <8 x half> %arg) #0 {
|
|
store <8 x half> %arg, <8 x half> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}extload_v2f16_arg:
|
|
define void @extload_v2f16_arg(<2 x float> addrspace(1)* %out, <2 x half> %in) #0 {
|
|
%fpext = fpext <2 x half> %in to <2 x float>
|
|
store <2 x float> %fpext, <2 x float> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}extload_f16_to_f32_arg:
|
|
define void @extload_f16_to_f32_arg(float addrspace(1)* %out, half %arg) #0 {
|
|
%ext = fpext half %arg to float
|
|
store float %ext, float addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}extload_v2f16_to_v2f32_arg:
|
|
define void @extload_v2f16_to_v2f32_arg(<2 x float> addrspace(1)* %out, <2 x half> %arg) #0 {
|
|
%ext = fpext <2 x half> %arg to <2 x float>
|
|
store <2 x float> %ext, <2 x float> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}extload_v3f16_to_v3f32_arg:
|
|
; GCN: buffer_load_ushort
|
|
; GCN: buffer_load_ushort
|
|
; GCN: buffer_load_ushort
|
|
; GCN-NOT: buffer_load
|
|
; GCN: v_cvt_f32_f16_e32
|
|
; GCN: v_cvt_f32_f16_e32
|
|
; GCN: v_cvt_f32_f16_e32
|
|
; GCN-NOT: v_cvt_f32_f16
|
|
; GCN-DAG: buffer_store_dword
|
|
; GCN-DAG: buffer_store_dwordx2
|
|
; GCN: s_endpgm
|
|
define void @extload_v3f16_to_v3f32_arg(<3 x float> addrspace(1)* %out, <3 x half> %arg) #0 {
|
|
%ext = fpext <3 x half> %arg to <3 x float>
|
|
store <3 x float> %ext, <3 x float> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}extload_v4f16_to_v4f32_arg:
|
|
define void @extload_v4f16_to_v4f32_arg(<4 x float> addrspace(1)* %out, <4 x half> %arg) #0 {
|
|
%ext = fpext <4 x half> %arg to <4 x float>
|
|
store <4 x float> %ext, <4 x float> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}extload_v8f16_to_v8f32_arg:
|
|
define void @extload_v8f16_to_v8f32_arg(<8 x float> addrspace(1)* %out, <8 x half> %arg) #0 {
|
|
%ext = fpext <8 x half> %arg to <8 x float>
|
|
store <8 x float> %ext, <8 x float> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}extload_f16_to_f64_arg:
|
|
; SI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb{{$}}
|
|
; VI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c{{$}}
|
|
; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[ARG]]
|
|
; GCN: buffer_store_dwordx2 [[RESULT]]
|
|
define void @extload_f16_to_f64_arg(double addrspace(1)* %out, half %arg) #0 {
|
|
%ext = fpext half %arg to double
|
|
store double %ext, double addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}extload_v2f16_to_v2f64_arg:
|
|
; GCN-DAG: buffer_load_ushort v
|
|
; GCN-DAG: buffer_load_ushort v
|
|
; GCN-DAG: v_cvt_f32_f16_e32
|
|
; GCN-DAG: v_cvt_f32_f16_e32
|
|
; GCN-DAG: v_cvt_f64_f32_e32
|
|
; GCN-DAG: v_cvt_f64_f32_e32
|
|
; GCN: s_endpgm
|
|
define void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)* %out, <2 x half> %arg) #0 {
|
|
%ext = fpext <2 x half> %arg to <2 x double>
|
|
store <2 x double> %ext, <2 x double> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}extload_v3f16_to_v3f64_arg:
|
|
; GCN-DAG: buffer_load_ushort v
|
|
; GCN-DAG: buffer_load_ushort v
|
|
; GCN-DAG: buffer_load_ushort v
|
|
; GCN-DAG: v_cvt_f32_f16_e32
|
|
; GCN-DAG: v_cvt_f32_f16_e32
|
|
; GCN-DAG: v_cvt_f32_f16_e32
|
|
; GCN-DAG: v_cvt_f64_f32_e32
|
|
; GCN-DAG: v_cvt_f64_f32_e32
|
|
; GCN-DAG: v_cvt_f64_f32_e32
|
|
; GCN: s_endpgm
|
|
define void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)* %out, <3 x half> %arg) #0 {
|
|
%ext = fpext <3 x half> %arg to <3 x double>
|
|
store <3 x double> %ext, <3 x double> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}extload_v4f16_to_v4f64_arg:
|
|
; GCN-DAG: buffer_load_ushort v
|
|
; GCN-DAG: buffer_load_ushort v
|
|
; GCN-DAG: buffer_load_ushort v
|
|
; GCN-DAG: buffer_load_ushort v
|
|
; GCN-DAG: v_cvt_f32_f16_e32
|
|
; GCN-DAG: v_cvt_f32_f16_e32
|
|
; GCN-DAG: v_cvt_f32_f16_e32
|
|
; GCN-DAG: v_cvt_f32_f16_e32
|
|
; GCN-DAG: v_cvt_f64_f32_e32
|
|
; GCN-DAG: v_cvt_f64_f32_e32
|
|
; GCN-DAG: v_cvt_f64_f32_e32
|
|
; GCN-DAG: v_cvt_f64_f32_e32
|
|
; GCN: s_endpgm
|
|
define void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)* %out, <4 x half> %arg) #0 {
|
|
%ext = fpext <4 x half> %arg to <4 x double>
|
|
store <4 x double> %ext, <4 x double> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}extload_v8f16_to_v8f64_arg:
|
|
; GCN-DAG: buffer_load_ushort v
|
|
; GCN-DAG: buffer_load_ushort v
|
|
; GCN-DAG: buffer_load_ushort v
|
|
; GCN-DAG: buffer_load_ushort v
|
|
|
|
; GCN-DAG: buffer_load_ushort v
|
|
; GCN-DAG: buffer_load_ushort v
|
|
; GCN-DAG: buffer_load_ushort v
|
|
; GCN-DAG: buffer_load_ushort v
|
|
|
|
; GCN-DAG: v_cvt_f32_f16_e32
|
|
; GCN-DAG: v_cvt_f32_f16_e32
|
|
; GCN-DAG: v_cvt_f32_f16_e32
|
|
; GCN-DAG: v_cvt_f32_f16_e32
|
|
|
|
; GCN-DAG: v_cvt_f32_f16_e32
|
|
; GCN-DAG: v_cvt_f32_f16_e32
|
|
; GCN-DAG: v_cvt_f32_f16_e32
|
|
; GCN-DAG: v_cvt_f32_f16_e32
|
|
|
|
; GCN-DAG: v_cvt_f64_f32_e32
|
|
; GCN-DAG: v_cvt_f64_f32_e32
|
|
; GCN-DAG: v_cvt_f64_f32_e32
|
|
; GCN-DAG: v_cvt_f64_f32_e32
|
|
|
|
; GCN-DAG: v_cvt_f64_f32_e32
|
|
; GCN-DAG: v_cvt_f64_f32_e32
|
|
; GCN-DAG: v_cvt_f64_f32_e32
|
|
; GCN-DAG: v_cvt_f64_f32_e32
|
|
|
|
; GCN: s_endpgm
|
|
define void @extload_v8f16_to_v8f64_arg(<8 x double> addrspace(1)* %out, <8 x half> %arg) #0 {
|
|
%ext = fpext <8 x half> %arg to <8 x double>
|
|
store <8 x double> %ext, <8 x double> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}global_load_store_f16:
|
|
; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
|
|
; GCN: buffer_store_short [[TMP]]
|
|
define void @global_load_store_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
|
|
%val = load half, half addrspace(1)* %in
|
|
store half %val, half addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}global_load_store_v2f16:
|
|
; GCN: buffer_load_dword [[TMP:v[0-9]+]]
|
|
; GCN: buffer_store_dword [[TMP]]
|
|
define void @global_load_store_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
|
|
%val = load <2 x half>, <2 x half> addrspace(1)* %in
|
|
store <2 x half> %val, <2 x half> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}global_load_store_v4f16:
|
|
; GCN: buffer_load_dwordx2 [[TMP:v\[[0-9]+:[0-9]+\]]]
|
|
; GCN: buffer_store_dwordx2 [[TMP]]
|
|
define void @global_load_store_v4f16(<4 x half> addrspace(1)* %in, <4 x half> addrspace(1)* %out) #0 {
|
|
%val = load <4 x half>, <4 x half> addrspace(1)* %in
|
|
store <4 x half> %val, <4 x half> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}global_load_store_v8f16:
|
|
; GCN: buffer_load_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]]
|
|
; GCN: buffer_store_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]]
|
|
; GCN: s_endpgm
|
|
define void @global_load_store_v8f16(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
|
|
%val = load <8 x half>, <8 x half> addrspace(1)* %in
|
|
store <8 x half> %val, <8 x half> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}global_extload_f16_to_f32:
|
|
; GCN: buffer_load_ushort [[LOAD:v[0-9]+]]
|
|
; GCN: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[LOAD]]
|
|
; GCN: buffer_store_dword [[CVT]]
|
|
define void @global_extload_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %in) #0 {
|
|
%val = load half, half addrspace(1)* %in
|
|
%cvt = fpext half %val to float
|
|
store float %cvt, float addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f32:
|
|
; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
|
; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
|
|
; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]]
|
|
; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]]
|
|
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[CVT0]]:[[CVT1]]{{\]}}
|
|
; GCN: s_endpgm
|
|
define void @global_extload_v2f16_to_v2f32(<2 x float> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
|
|
%val = load <2 x half>, <2 x half> addrspace(1)* %in
|
|
%cvt = fpext <2 x half> %val to <2 x float>
|
|
store <2 x float> %cvt, <2 x float> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f32:
|
|
define void @global_extload_v3f16_to_v3f32(<3 x float> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
|
|
%val = load <3 x half>, <3 x half> addrspace(1)* %in
|
|
%cvt = fpext <3 x half> %val to <3 x float>
|
|
store <3 x float> %cvt, <3 x float> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f32:
|
|
define void @global_extload_v4f16_to_v4f32(<4 x float> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
|
|
%val = load <4 x half>, <4 x half> addrspace(1)* %in
|
|
%cvt = fpext <4 x half> %val to <4 x float>
|
|
store <4 x float> %cvt, <4 x float> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f32:
|
|
define void @global_extload_v8f16_to_v8f32(<8 x float> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
|
|
%val = load <8 x half>, <8 x half> addrspace(1)* %in
|
|
%cvt = fpext <8 x half> %val to <8 x float>
|
|
store <8 x float> %cvt, <8 x float> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f32:
|
|
define void @global_extload_v16f16_to_v16f32(<16 x float> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 {
|
|
%val = load <16 x half>, <16 x half> addrspace(1)* %in
|
|
%cvt = fpext <16 x half> %val to <16 x float>
|
|
store <16 x float> %cvt, <16 x float> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}global_extload_f16_to_f64:
|
|
; GCN: buffer_load_ushort [[LOAD:v[0-9]+]]
|
|
; GCN: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[LOAD]]
|
|
; GCN: v_cvt_f64_f32_e32 [[CVT1:v\[[0-9]+:[0-9]+\]]], [[CVT0]]
|
|
; GCN: buffer_store_dwordx2 [[CVT1]]
|
|
define void @global_extload_f16_to_f64(double addrspace(1)* %out, half addrspace(1)* %in) #0 {
|
|
%val = load half, half addrspace(1)* %in
|
|
%cvt = fpext half %val to double
|
|
store double %cvt, double addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f64:
|
|
; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
|
|
; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
|
|
; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]]
|
|
; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]]
|
|
; GCN-DAG: v_cvt_f64_f32_e32 [[CVT2:v\[[0-9]+:[0-9]+\]]], v[[CVT0]]
|
|
; GCN-DAG: v_cvt_f64_f32_e32 [[CVT3:v\[[0-9]+:[0-9]+\]]], v[[CVT1]]
|
|
; GCN-DAG: buffer_store_dwordx2 [[CVT2]]
|
|
; GCN-DAG: buffer_store_dwordx2 [[CVT3]]
|
|
; GCN: s_endpgm
|
|
define void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
|
|
%val = load <2 x half>, <2 x half> addrspace(1)* %in
|
|
%cvt = fpext <2 x half> %val to <2 x double>
|
|
store <2 x double> %cvt, <2 x double> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f64:
|
|
define void @global_extload_v3f16_to_v3f64(<3 x double> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
|
|
%val = load <3 x half>, <3 x half> addrspace(1)* %in
|
|
%cvt = fpext <3 x half> %val to <3 x double>
|
|
store <3 x double> %cvt, <3 x double> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f64:
|
|
define void @global_extload_v4f16_to_v4f64(<4 x double> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
|
|
%val = load <4 x half>, <4 x half> addrspace(1)* %in
|
|
%cvt = fpext <4 x half> %val to <4 x double>
|
|
store <4 x double> %cvt, <4 x double> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f64:
|
|
define void @global_extload_v8f16_to_v8f64(<8 x double> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
|
|
%val = load <8 x half>, <8 x half> addrspace(1)* %in
|
|
%cvt = fpext <8 x half> %val to <8 x double>
|
|
store <8 x double> %cvt, <8 x double> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f64:
|
|
define void @global_extload_v16f16_to_v16f64(<16 x double> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 {
|
|
%val = load <16 x half>, <16 x half> addrspace(1)* %in
|
|
%cvt = fpext <16 x half> %val to <16 x double>
|
|
store <16 x double> %cvt, <16 x double> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}global_truncstore_f32_to_f16:
|
|
; GCN: buffer_load_dword [[LOAD:v[0-9]+]]
|
|
; GCN: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[LOAD]]
|
|
; GCN: buffer_store_short [[CVT]]
|
|
define void @global_truncstore_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %in) #0 {
|
|
%val = load float, float addrspace(1)* %in
|
|
%cvt = fptrunc float %val to half
|
|
store half %cvt, half addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}global_truncstore_v2f32_to_v2f16:
|
|
; GCN: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
|
|
; GCN-DAG: v_cvt_f16_f32_e32 [[CVT0:v[0-9]+]], v[[LO]]
|
|
; GCN-DAG: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], v[[HI]]
|
|
; GCN-DAG: buffer_store_short [[CVT0]]
|
|
; GCN-DAG: buffer_store_short [[CVT1]]
|
|
; GCN: s_endpgm
|
|
define void @global_truncstore_v2f32_to_v2f16(<2 x half> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
|
|
%val = load <2 x float>, <2 x float> addrspace(1)* %in
|
|
%cvt = fptrunc <2 x float> %val to <2 x half>
|
|
store <2 x half> %cvt, <2 x half> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}global_truncstore_v3f32_to_v3f16:
|
|
; GCN: buffer_load_dwordx4
|
|
; GCN: v_cvt_f16_f32_e32
|
|
; GCN: v_cvt_f16_f32_e32
|
|
; GCN: v_cvt_f16_f32_e32
|
|
; GCN-NOT: v_cvt_f16_f32_e32
|
|
; GCN: buffer_store_short
|
|
; GCN: buffer_store_dword
|
|
; GCN: s_endpgm
|
|
define void @global_truncstore_v3f32_to_v3f16(<3 x half> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
|
|
%val = load <3 x float>, <3 x float> addrspace(1)* %in
|
|
%cvt = fptrunc <3 x float> %val to <3 x half>
|
|
store <3 x half> %cvt, <3 x half> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}global_truncstore_v4f32_to_v4f16:
|
|
; GCN: buffer_load_dwordx4
|
|
; GCN: v_cvt_f16_f32_e32
|
|
; GCN: v_cvt_f16_f32_e32
|
|
; GCN: v_cvt_f16_f32_e32
|
|
; GCN: v_cvt_f16_f32_e32
|
|
; GCN: buffer_store_short
|
|
; GCN: buffer_store_short
|
|
; GCN: buffer_store_short
|
|
; GCN: buffer_store_short
|
|
; GCN: s_endpgm
|
|
define void @global_truncstore_v4f32_to_v4f16(<4 x half> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
|
|
%val = load <4 x float>, <4 x float> addrspace(1)* %in
|
|
%cvt = fptrunc <4 x float> %val to <4 x half>
|
|
store <4 x half> %cvt, <4 x half> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}global_truncstore_v8f32_to_v8f16:
|
|
; GCN: buffer_load_dword
|
|
; GCN: buffer_load_dword
|
|
; GCN: buffer_load_dword
|
|
; GCN: buffer_load_dword
|
|
; GCN: buffer_load_dword
|
|
; GCN: buffer_load_dword
|
|
; GCN: buffer_load_dword
|
|
; GCN: buffer_load_dword
|
|
; GCN: v_cvt_f16_f32_e32
|
|
; GCN: v_cvt_f16_f32_e32
|
|
; GCN: v_cvt_f16_f32_e32
|
|
; GCN: v_cvt_f16_f32_e32
|
|
; GCN: v_cvt_f16_f32_e32
|
|
; GCN: v_cvt_f16_f32_e32
|
|
; GCN: v_cvt_f16_f32_e32
|
|
; GCN: v_cvt_f16_f32_e32
|
|
; GCN: buffer_store_short
|
|
; GCN: buffer_store_short
|
|
; GCN: buffer_store_short
|
|
; GCN: buffer_store_short
|
|
; GCN: buffer_store_short
|
|
; GCN: buffer_store_short
|
|
; GCN: buffer_store_short
|
|
; GCN: buffer_store_short
|
|
; GCN: s_endpgm
|
|
define void @global_truncstore_v8f32_to_v8f16(<8 x half> addrspace(1)* %out, <8 x float> addrspace(1)* %in) #0 {
|
|
%val = load <8 x float>, <8 x float> addrspace(1)* %in
|
|
%cvt = fptrunc <8 x float> %val to <8 x half>
|
|
store <8 x half> %cvt, <8 x half> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}global_truncstore_v16f32_to_v16f16:
|
|
; GCN: buffer_load_dword
|
|
; GCN: buffer_load_dword
|
|
; GCN: buffer_load_dword
|
|
; GCN: buffer_load_dword
|
|
; GCN: buffer_load_dword
|
|
; GCN: buffer_load_dword
|
|
; GCN: buffer_load_dword
|
|
; GCN: buffer_load_dword
|
|
; GCN: buffer_load_dword
|
|
; GCN: buffer_load_dword
|
|
; GCN: buffer_load_dword
|
|
; GCN: buffer_load_dword
|
|
; GCN: buffer_load_dword
|
|
; GCN: buffer_load_dword
|
|
; GCN: buffer_load_dword
|
|
; GCN: buffer_load_dword
|
|
; GCN-DAG: v_cvt_f16_f32_e32
|
|
; GCN-DAG: v_cvt_f16_f32_e32
|
|
; GCN-DAG: v_cvt_f16_f32_e32
|
|
; GCN-DAG: v_cvt_f16_f32_e32
|
|
; GCN-DAG: v_cvt_f16_f32_e32
|
|
; GCN-DAG: v_cvt_f16_f32_e32
|
|
; GCN-DAG: v_cvt_f16_f32_e32
|
|
; GCN-DAG: v_cvt_f16_f32_e32
|
|
; GCN-DAG: v_cvt_f16_f32_e32
|
|
; GCN-DAG: v_cvt_f16_f32_e32
|
|
; GCN-DAG: v_cvt_f16_f32_e32
|
|
; GCN-DAG: v_cvt_f16_f32_e32
|
|
; GCN-DAG: v_cvt_f16_f32_e32
|
|
; GCN-DAG: v_cvt_f16_f32_e32
|
|
; GCN-DAG: v_cvt_f16_f32_e32
|
|
; GCN-DAG: v_cvt_f16_f32_e32
|
|
; GCN-DAG: buffer_store_short
|
|
; GCN-DAG: buffer_store_short
|
|
; GCN-DAG: buffer_store_short
|
|
; GCN-DAG: buffer_store_short
|
|
; GCN-DAG: buffer_store_short
|
|
; GCN-DAG: buffer_store_short
|
|
; GCN-DAG: buffer_store_short
|
|
; GCN-DAG: buffer_store_short
|
|
; GCN-DAG: buffer_store_short
|
|
; GCN-DAG: buffer_store_short
|
|
; GCN-DAG: buffer_store_short
|
|
; GCN-DAG: buffer_store_short
|
|
; GCN-DAG: buffer_store_short
|
|
; GCN-DAG: buffer_store_short
|
|
; GCN-DAG: buffer_store_short
|
|
; GCN-DAG: buffer_store_short
|
|
; GCN: s_endpgm
|
|
define void @global_truncstore_v16f32_to_v16f16(<16 x half> addrspace(1)* %out, <16 x float> addrspace(1)* %in) #0 {
|
|
%val = load <16 x float>, <16 x float> addrspace(1)* %in
|
|
%cvt = fptrunc <16 x float> %val to <16 x half>
|
|
store <16 x half> %cvt, <16 x half> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; FIXME: Unsafe math should fold conversions away
|
|
; GCN-LABEL: {{^}}fadd_f16:
|
|
; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
|
|
; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
|
|
; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
|
|
; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
|
|
; SI: v_add_f32
|
|
; GCN: s_endpgm
|
|
define void @fadd_f16(half addrspace(1)* %out, half %a, half %b) #0 {
|
|
%add = fadd half %a, %b
|
|
store half %add, half addrspace(1)* %out, align 4
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fadd_v2f16:
|
|
; SI: v_add_f32
|
|
; SI: v_add_f32
|
|
; GCN: s_endpgm
|
|
define void @fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %a, <2 x half> %b) #0 {
|
|
%add = fadd <2 x half> %a, %b
|
|
store <2 x half> %add, <2 x half> addrspace(1)* %out, align 8
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fadd_v4f16:
|
|
; SI: v_add_f32
|
|
; SI: v_add_f32
|
|
; SI: v_add_f32
|
|
; SI: v_add_f32
|
|
; GCN: s_endpgm
|
|
define void @fadd_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
|
|
%b_ptr = getelementptr <4 x half>, <4 x half> addrspace(1)* %in, i32 1
|
|
%a = load <4 x half>, <4 x half> addrspace(1)* %in, align 16
|
|
%b = load <4 x half>, <4 x half> addrspace(1)* %b_ptr, align 16
|
|
%result = fadd <4 x half> %a, %b
|
|
store <4 x half> %result, <4 x half> addrspace(1)* %out, align 16
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fadd_v8f16:
|
|
; SI: v_add_f32
|
|
; SI: v_add_f32
|
|
; SI: v_add_f32
|
|
; SI: v_add_f32
|
|
; SI: v_add_f32
|
|
; SI: v_add_f32
|
|
; SI: v_add_f32
|
|
; SI: v_add_f32
|
|
; GCN: s_endpgm
|
|
define void @fadd_v8f16(<8 x half> addrspace(1)* %out, <8 x half> %a, <8 x half> %b) #0 {
|
|
%add = fadd <8 x half> %a, %b
|
|
store <8 x half> %add, <8 x half> addrspace(1)* %out, align 32
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}fsub_f16:
|
|
; GCN: v_subrev_f32_e32
|
|
; GCN: s_endpgm
|
|
define void @fsub_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
|
|
%b_ptr = getelementptr half, half addrspace(1)* %in, i32 1
|
|
%a = load half, half addrspace(1)* %in
|
|
%b = load half, half addrspace(1)* %b_ptr
|
|
%sub = fsub half %a, %b
|
|
store half %sub, half addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}test_bitcast_from_half:
|
|
; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
|
|
; GCN: buffer_store_short [[TMP]]
|
|
define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %out) #0 {
|
|
%val = load half, half addrspace(1)* %in
|
|
%val_int = bitcast half %val to i16
|
|
store i16 %val_int, i16 addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}test_bitcast_to_half:
|
|
; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
|
|
; GCN: buffer_store_short [[TMP]]
|
|
define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
|
|
%val = load i16, i16 addrspace(1)* %in
|
|
%val_fp = bitcast i16 %val to half
|
|
store half %val_fp, half addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind }
|