1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-10-22 04:22:57 +02:00
llvm-mirror/test/CodeGen/AMDGPU/half.ll
Matt Arsenault cf419f3ccc DAGCombiner: Turn truncate of a bitcasted vector to an extract
On AMDGPU where operations i64 operations are often bitcasted to v2i32
and back, this pattern shows up regularly where it breaks some
expected combines on i64, such as load width reducing.

This fixes some test failures in a future commit when i64 loads
are changed to promote.

llvm-svn: 262397
2016-03-01 21:31:53 +00:00

663 lines
22 KiB
LLVM

; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
; half args should be promoted to float
; GCN-LABEL: {{^}}load_f16_arg:
; GCN: s_load_dword [[ARG:s[0-9]+]]
; GCN: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[ARG]]
; GCN: buffer_store_short [[CVT]]
define void @load_f16_arg(half addrspace(1)* %out, half %arg) #0 {
store half %arg, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}load_v2f16_arg:
; GCN-DAG: buffer_load_ushort [[V0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
; GCN-DAG: buffer_load_ushort [[V1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:46
; GCN-DAG: buffer_store_short [[V0]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
; GCN-DAG: buffer_store_short [[V1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
; GCN: s_endpgm
define void @load_v2f16_arg(<2 x half> addrspace(1)* %out, <2 x half> %arg) #0 {
store <2 x half> %arg, <2 x half> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}load_v3f16_arg:
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN-NOT: buffer_load
; GCN-DAG: buffer_store_dword
; GCN-DAG: buffer_store_short
; GCN-NOT: buffer_store
; GCN: s_endpgm
define void @load_v3f16_arg(<3 x half> addrspace(1)* %out, <3 x half> %arg) #0 {
store <3 x half> %arg, <3 x half> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}load_v4f16_arg:
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_store_short
; GCN: buffer_store_short
; GCN: buffer_store_short
; GCN: buffer_store_short
; GCN: s_endpgm
define void @load_v4f16_arg(<4 x half> addrspace(1)* %out, <4 x half> %arg) #0 {
store <4 x half> %arg, <4 x half> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}load_v8f16_arg:
define void @load_v8f16_arg(<8 x half> addrspace(1)* %out, <8 x half> %arg) #0 {
store <8 x half> %arg, <8 x half> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}extload_v2f16_arg:
define void @extload_v2f16_arg(<2 x float> addrspace(1)* %out, <2 x half> %in) #0 {
%fpext = fpext <2 x half> %in to <2 x float>
store <2 x float> %fpext, <2 x float> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}extload_f16_to_f32_arg:
define void @extload_f16_to_f32_arg(float addrspace(1)* %out, half %arg) #0 {
%ext = fpext half %arg to float
store float %ext, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}extload_v2f16_to_v2f32_arg:
define void @extload_v2f16_to_v2f32_arg(<2 x float> addrspace(1)* %out, <2 x half> %arg) #0 {
%ext = fpext <2 x half> %arg to <2 x float>
store <2 x float> %ext, <2 x float> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}extload_v3f16_to_v3f32_arg:
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN-NOT: buffer_load
; GCN: v_cvt_f32_f16_e32
; GCN: v_cvt_f32_f16_e32
; GCN: v_cvt_f32_f16_e32
; GCN-NOT: v_cvt_f32_f16
; GCN-DAG: buffer_store_dword
; GCN-DAG: buffer_store_dwordx2
; GCN: s_endpgm
define void @extload_v3f16_to_v3f32_arg(<3 x float> addrspace(1)* %out, <3 x half> %arg) #0 {
%ext = fpext <3 x half> %arg to <3 x float>
store <3 x float> %ext, <3 x float> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}extload_v4f16_to_v4f32_arg:
define void @extload_v4f16_to_v4f32_arg(<4 x float> addrspace(1)* %out, <4 x half> %arg) #0 {
%ext = fpext <4 x half> %arg to <4 x float>
store <4 x float> %ext, <4 x float> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}extload_v8f16_to_v8f32_arg:
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: v_cvt_f32_f16_e32
; GCN: v_cvt_f32_f16_e32
; GCN: v_cvt_f32_f16_e32
; GCN: v_cvt_f32_f16_e32
; GCN: v_cvt_f32_f16_e32
; GCN: v_cvt_f32_f16_e32
; GCN: v_cvt_f32_f16_e32
; GCN: v_cvt_f32_f16_e32
; GCN: buffer_store_dwordx4
; GCN: buffer_store_dwordx4
define void @extload_v8f16_to_v8f32_arg(<8 x float> addrspace(1)* %out, <8 x half> %arg) #0 {
%ext = fpext <8 x half> %arg to <8 x float>
store <8 x float> %ext, <8 x float> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}extload_f16_to_f64_arg:
; SI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb{{$}}
; VI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c{{$}}
; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[ARG]]
; GCN: buffer_store_dwordx2 [[RESULT]]
define void @extload_f16_to_f64_arg(double addrspace(1)* %out, half %arg) #0 {
%ext = fpext half %arg to double
store double %ext, double addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}extload_v2f16_to_v2f64_arg:
; GCN-DAG: buffer_load_ushort v
; GCN-DAG: buffer_load_ushort v
; GCN-DAG: v_cvt_f32_f16_e32
; GCN-DAG: v_cvt_f32_f16_e32
; GCN-DAG: v_cvt_f64_f32_e32
; GCN-DAG: v_cvt_f64_f32_e32
; GCN: s_endpgm
define void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)* %out, <2 x half> %arg) #0 {
%ext = fpext <2 x half> %arg to <2 x double>
store <2 x double> %ext, <2 x double> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}extload_v3f16_to_v3f64_arg:
; GCN-DAG: buffer_load_ushort v
; GCN-DAG: buffer_load_ushort v
; GCN-DAG: buffer_load_ushort v
; GCN-DAG: v_cvt_f32_f16_e32
; GCN-DAG: v_cvt_f32_f16_e32
; GCN-DAG: v_cvt_f32_f16_e32
; GCN-DAG: v_cvt_f64_f32_e32
; GCN-DAG: v_cvt_f64_f32_e32
; GCN-DAG: v_cvt_f64_f32_e32
; GCN: s_endpgm
define void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)* %out, <3 x half> %arg) #0 {
%ext = fpext <3 x half> %arg to <3 x double>
store <3 x double> %ext, <3 x double> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}extload_v4f16_to_v4f64_arg:
; GCN-DAG: buffer_load_ushort v
; GCN-DAG: buffer_load_ushort v
; GCN-DAG: buffer_load_ushort v
; GCN-DAG: buffer_load_ushort v
; GCN-DAG: v_cvt_f32_f16_e32
; GCN-DAG: v_cvt_f32_f16_e32
; GCN-DAG: v_cvt_f32_f16_e32
; GCN-DAG: v_cvt_f32_f16_e32
; GCN-DAG: v_cvt_f64_f32_e32
; GCN-DAG: v_cvt_f64_f32_e32
; GCN-DAG: v_cvt_f64_f32_e32
; GCN-DAG: v_cvt_f64_f32_e32
; GCN: s_endpgm
define void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)* %out, <4 x half> %arg) #0 {
%ext = fpext <4 x half> %arg to <4 x double>
store <4 x double> %ext, <4 x double> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}extload_v8f16_to_v8f64_arg:
; GCN-DAG: buffer_load_ushort v
; GCN-DAG: buffer_load_ushort v
; GCN-DAG: buffer_load_ushort v
; GCN-DAG: buffer_load_ushort v
; GCN-DAG: buffer_load_ushort v
; GCN-DAG: buffer_load_ushort v
; GCN-DAG: buffer_load_ushort v
; GCN-DAG: buffer_load_ushort v
; GCN-DAG: v_cvt_f32_f16_e32
; GCN-DAG: v_cvt_f32_f16_e32
; GCN-DAG: v_cvt_f32_f16_e32
; GCN-DAG: v_cvt_f32_f16_e32
; GCN-DAG: v_cvt_f32_f16_e32
; GCN-DAG: v_cvt_f32_f16_e32
; GCN-DAG: v_cvt_f32_f16_e32
; GCN-DAG: v_cvt_f32_f16_e32
; GCN-DAG: v_cvt_f64_f32_e32
; GCN-DAG: v_cvt_f64_f32_e32
; GCN-DAG: v_cvt_f64_f32_e32
; GCN-DAG: v_cvt_f64_f32_e32
; GCN-DAG: v_cvt_f64_f32_e32
; GCN-DAG: v_cvt_f64_f32_e32
; GCN-DAG: v_cvt_f64_f32_e32
; GCN-DAG: v_cvt_f64_f32_e32
; GCN: s_endpgm
define void @extload_v8f16_to_v8f64_arg(<8 x double> addrspace(1)* %out, <8 x half> %arg) #0 {
%ext = fpext <8 x half> %arg to <8 x double>
store <8 x double> %ext, <8 x double> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}global_load_store_f16:
; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
; GCN: buffer_store_short [[TMP]]
define void @global_load_store_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
%val = load half, half addrspace(1)* %in
store half %val, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}global_load_store_v2f16:
; GCN: buffer_load_dword [[TMP:v[0-9]+]]
; GCN: buffer_store_dword [[TMP]]
define void @global_load_store_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
%val = load <2 x half>, <2 x half> addrspace(1)* %in
store <2 x half> %val, <2 x half> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}global_load_store_v4f16:
; GCN: buffer_load_dwordx2 [[TMP:v\[[0-9]+:[0-9]+\]]]
; GCN: buffer_store_dwordx2 [[TMP]]
define void @global_load_store_v4f16(<4 x half> addrspace(1)* %in, <4 x half> addrspace(1)* %out) #0 {
%val = load <4 x half>, <4 x half> addrspace(1)* %in
store <4 x half> %val, <4 x half> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}global_load_store_v8f16:
; GCN: buffer_load_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]]
; GCN: buffer_store_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]]
; GCN: s_endpgm
define void @global_load_store_v8f16(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
%val = load <8 x half>, <8 x half> addrspace(1)* %in
store <8 x half> %val, <8 x half> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}global_extload_f16_to_f32:
; GCN: buffer_load_ushort [[LOAD:v[0-9]+]]
; GCN: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[LOAD]]
; GCN: buffer_store_dword [[CVT]]
define void @global_extload_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %in) #0 {
%val = load half, half addrspace(1)* %in
%cvt = fpext half %val to float
store float %cvt, float addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f32:
; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]]
; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]]
; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[CVT0]]:[[CVT1]]{{\]}}
; GCN: s_endpgm
define void @global_extload_v2f16_to_v2f32(<2 x float> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
%val = load <2 x half>, <2 x half> addrspace(1)* %in
%cvt = fpext <2 x half> %val to <2 x float>
store <2 x float> %cvt, <2 x float> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f32:
define void @global_extload_v3f16_to_v3f32(<3 x float> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
%val = load <3 x half>, <3 x half> addrspace(1)* %in
%cvt = fpext <3 x half> %val to <3 x float>
store <3 x float> %cvt, <3 x float> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f32:
define void @global_extload_v4f16_to_v4f32(<4 x float> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
%val = load <4 x half>, <4 x half> addrspace(1)* %in
%cvt = fpext <4 x half> %val to <4 x float>
store <4 x float> %cvt, <4 x float> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f32:
define void @global_extload_v8f16_to_v8f32(<8 x float> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
%val = load <8 x half>, <8 x half> addrspace(1)* %in
%cvt = fpext <8 x half> %val to <8 x float>
store <8 x float> %cvt, <8 x float> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f32:
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: buffer_load_ushort
; GCN: v_cvt_f32_f16_e32
; GCN: v_cvt_f32_f16_e32
; GCN: v_cvt_f32_f16_e32
; GCN: v_cvt_f32_f16_e32
; GCN: v_cvt_f32_f16_e32
; GCN: v_cvt_f32_f16_e32
; GCN: v_cvt_f32_f16_e32
; GCN: v_cvt_f32_f16_e32
; GCN: v_cvt_f32_f16_e32
; GCN: v_cvt_f32_f16_e32
; GCN: v_cvt_f32_f16_e32
; GCN: v_cvt_f32_f16_e32
; GCN: v_cvt_f32_f16_e32
; GCN: v_cvt_f32_f16_e32
; GCN: v_cvt_f32_f16_e32
; GCN: v_cvt_f32_f16_e32
; GCN: buffer_store_dwordx4
; GCN: buffer_store_dwordx4
; GCN: buffer_store_dwordx4
; GCN: buffer_store_dwordx4
; GCN: s_endpgm
define void @global_extload_v16f16_to_v16f32(<16 x float> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 {
%val = load <16 x half>, <16 x half> addrspace(1)* %in
%cvt = fpext <16 x half> %val to <16 x float>
store <16 x float> %cvt, <16 x float> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}global_extload_f16_to_f64:
; GCN: buffer_load_ushort [[LOAD:v[0-9]+]]
; GCN: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[LOAD]]
; GCN: v_cvt_f64_f32_e32 [[CVT1:v\[[0-9]+:[0-9]+\]]], [[CVT0]]
; GCN: buffer_store_dwordx2 [[CVT1]]
define void @global_extload_f16_to_f64(double addrspace(1)* %out, half addrspace(1)* %in) #0 {
%val = load half, half addrspace(1)* %in
%cvt = fpext half %val to double
store double %cvt, double addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f64:
; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]]
; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]]
; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT2_LO:[0-9]+]]:[[CVT2_HI:[0-9]+]]{{\]}}, v[[CVT0]]
; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT3_LO:[0-9]+]]:[[CVT3_HI:[0-9]+]]{{\]}}, v[[CVT1]]
; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[CVT2_LO]]:[[CVT3_HI]]{{\]}}
; GCN: s_endpgm
define void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
%val = load <2 x half>, <2 x half> addrspace(1)* %in
%cvt = fpext <2 x half> %val to <2 x double>
store <2 x double> %cvt, <2 x double> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f64:
; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]]
; GCN: v_cvt_f32_f16_e32
; GCN: v_cvt_f32_f16_e32
; GCN-DAG: v_lshrrev_b32_e32 {{v[0-9]+}}, 16, {{v[0-9]+}}
; GCN: v_cvt_f32_f16_e32
; GCN-NOT: v_cvt_f32_f16
; GCN: v_cvt_f64_f32_e32
; GCN: v_cvt_f64_f32_e32
; GCN: v_cvt_f64_f32_e32
; GCN-NOT: v_cvt_f64_f32_e32
; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16
; GCN: s_endpgm
define void @global_extload_v3f16_to_v3f64(<3 x double> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
%val = load <3 x half>, <3 x half> addrspace(1)* %in
%cvt = fpext <3 x half> %val to <3 x double>
store <3 x double> %cvt, <3 x double> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f64:
define void @global_extload_v4f16_to_v4f64(<4 x double> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
%val = load <4 x half>, <4 x half> addrspace(1)* %in
%cvt = fpext <4 x half> %val to <4 x double>
store <4 x double> %cvt, <4 x double> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f64:
define void @global_extload_v8f16_to_v8f64(<8 x double> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
%val = load <8 x half>, <8 x half> addrspace(1)* %in
%cvt = fpext <8 x half> %val to <8 x double>
store <8 x double> %cvt, <8 x double> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f64:
define void @global_extload_v16f16_to_v16f64(<16 x double> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 {
%val = load <16 x half>, <16 x half> addrspace(1)* %in
%cvt = fpext <16 x half> %val to <16 x double>
store <16 x double> %cvt, <16 x double> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}global_truncstore_f32_to_f16:
; GCN: buffer_load_dword [[LOAD:v[0-9]+]]
; GCN: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[LOAD]]
; GCN: buffer_store_short [[CVT]]
define void @global_truncstore_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %in) #0 {
%val = load float, float addrspace(1)* %in
%cvt = fptrunc float %val to half
store half %cvt, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}global_truncstore_v2f32_to_v2f16:
; GCN: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
; GCN-DAG: v_cvt_f16_f32_e32 [[CVT0:v[0-9]+]], v[[LO]]
; GCN-DAG: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], v[[HI]]
; GCN-DAG: buffer_store_short [[CVT0]]
; GCN-DAG: buffer_store_short [[CVT1]]
; GCN: s_endpgm
define void @global_truncstore_v2f32_to_v2f16(<2 x half> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
%val = load <2 x float>, <2 x float> addrspace(1)* %in
%cvt = fptrunc <2 x float> %val to <2 x half>
store <2 x half> %cvt, <2 x half> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}global_truncstore_v3f32_to_v3f16:
; GCN: buffer_load_dwordx4
; GCN: v_cvt_f16_f32_e32
; GCN: v_cvt_f16_f32_e32
; GCN: v_cvt_f16_f32_e32
; GCN-NOT: v_cvt_f16_f32_e32
; GCN: buffer_store_short
; GCN: buffer_store_dword
; GCN: s_endpgm
define void @global_truncstore_v3f32_to_v3f16(<3 x half> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
%val = load <3 x float>, <3 x float> addrspace(1)* %in
%cvt = fptrunc <3 x float> %val to <3 x half>
store <3 x half> %cvt, <3 x half> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}global_truncstore_v4f32_to_v4f16:
; GCN: buffer_load_dwordx4
; GCN: v_cvt_f16_f32_e32
; GCN: v_cvt_f16_f32_e32
; GCN: v_cvt_f16_f32_e32
; GCN: v_cvt_f16_f32_e32
; GCN: buffer_store_short
; GCN: buffer_store_short
; GCN: buffer_store_short
; GCN: buffer_store_short
; GCN: s_endpgm
define void @global_truncstore_v4f32_to_v4f16(<4 x half> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
%val = load <4 x float>, <4 x float> addrspace(1)* %in
%cvt = fptrunc <4 x float> %val to <4 x half>
store <4 x half> %cvt, <4 x half> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}global_truncstore_v8f32_to_v8f16:
; GCN: buffer_load_dwordx4
; GCN: buffer_load_dwordx4
; GCN: v_cvt_f16_f32_e32
; GCN: v_cvt_f16_f32_e32
; GCN: v_cvt_f16_f32_e32
; GCN: v_cvt_f16_f32_e32
; GCN: v_cvt_f16_f32_e32
; GCN: v_cvt_f16_f32_e32
; GCN: v_cvt_f16_f32_e32
; GCN: v_cvt_f16_f32_e32
; GCN: buffer_store_short
; GCN: buffer_store_short
; GCN: buffer_store_short
; GCN: buffer_store_short
; GCN: buffer_store_short
; GCN: buffer_store_short
; GCN: buffer_store_short
; GCN: buffer_store_short
; GCN: s_endpgm
define void @global_truncstore_v8f32_to_v8f16(<8 x half> addrspace(1)* %out, <8 x float> addrspace(1)* %in) #0 {
%val = load <8 x float>, <8 x float> addrspace(1)* %in
%cvt = fptrunc <8 x float> %val to <8 x half>
store <8 x half> %cvt, <8 x half> addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}global_truncstore_v16f32_to_v16f16:
; GCN: buffer_load_dwordx4
; GCN: buffer_load_dwordx4
; GCN: buffer_load_dwordx4
; GCN: buffer_load_dwordx4
; GCN-DAG: v_cvt_f16_f32_e32
; GCN-DAG: v_cvt_f16_f32_e32
; GCN-DAG: v_cvt_f16_f32_e32
; GCN-DAG: v_cvt_f16_f32_e32
; GCN-DAG: v_cvt_f16_f32_e32
; GCN-DAG: v_cvt_f16_f32_e32
; GCN-DAG: v_cvt_f16_f32_e32
; GCN-DAG: v_cvt_f16_f32_e32
; GCN-DAG: v_cvt_f16_f32_e32
; GCN-DAG: v_cvt_f16_f32_e32
; GCN-DAG: v_cvt_f16_f32_e32
; GCN-DAG: v_cvt_f16_f32_e32
; GCN-DAG: v_cvt_f16_f32_e32
; GCN-DAG: v_cvt_f16_f32_e32
; GCN-DAG: v_cvt_f16_f32_e32
; GCN-DAG: v_cvt_f16_f32_e32
; GCN-DAG: buffer_store_short
; GCN-DAG: buffer_store_short
; GCN-DAG: buffer_store_short
; GCN-DAG: buffer_store_short
; GCN-DAG: buffer_store_short
; GCN-DAG: buffer_store_short
; GCN-DAG: buffer_store_short
; GCN-DAG: buffer_store_short
; GCN-DAG: buffer_store_short
; GCN-DAG: buffer_store_short
; GCN-DAG: buffer_store_short
; GCN-DAG: buffer_store_short
; GCN-DAG: buffer_store_short
; GCN-DAG: buffer_store_short
; GCN-DAG: buffer_store_short
; GCN-DAG: buffer_store_short
; GCN: s_endpgm
define void @global_truncstore_v16f32_to_v16f16(<16 x half> addrspace(1)* %out, <16 x float> addrspace(1)* %in) #0 {
%val = load <16 x float>, <16 x float> addrspace(1)* %in
%cvt = fptrunc <16 x float> %val to <16 x half>
store <16 x half> %cvt, <16 x half> addrspace(1)* %out
ret void
}
; FIXME: Unsafe math should fold conversions away
; GCN-LABEL: {{^}}fadd_f16:
; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
; SI: v_add_f32
; GCN: s_endpgm
define void @fadd_f16(half addrspace(1)* %out, half %a, half %b) #0 {
%add = fadd half %a, %b
store half %add, half addrspace(1)* %out, align 4
ret void
}
; GCN-LABEL: {{^}}fadd_v2f16:
; SI: v_add_f32
; SI: v_add_f32
; GCN: s_endpgm
define void @fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %a, <2 x half> %b) #0 {
%add = fadd <2 x half> %a, %b
store <2 x half> %add, <2 x half> addrspace(1)* %out, align 8
ret void
}
; GCN-LABEL: {{^}}fadd_v4f16:
; SI: v_add_f32
; SI: v_add_f32
; SI: v_add_f32
; SI: v_add_f32
; GCN: s_endpgm
define void @fadd_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
%b_ptr = getelementptr <4 x half>, <4 x half> addrspace(1)* %in, i32 1
%a = load <4 x half>, <4 x half> addrspace(1)* %in, align 16
%b = load <4 x half>, <4 x half> addrspace(1)* %b_ptr, align 16
%result = fadd <4 x half> %a, %b
store <4 x half> %result, <4 x half> addrspace(1)* %out, align 16
ret void
}
; GCN-LABEL: {{^}}fadd_v8f16:
; SI: v_add_f32
; SI: v_add_f32
; SI: v_add_f32
; SI: v_add_f32
; SI: v_add_f32
; SI: v_add_f32
; SI: v_add_f32
; SI: v_add_f32
; GCN: s_endpgm
define void @fadd_v8f16(<8 x half> addrspace(1)* %out, <8 x half> %a, <8 x half> %b) #0 {
%add = fadd <8 x half> %a, %b
store <8 x half> %add, <8 x half> addrspace(1)* %out, align 32
ret void
}
; GCN-LABEL: {{^}}fsub_f16:
; GCN: v_subrev_f32_e32
; GCN: s_endpgm
define void @fsub_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
%b_ptr = getelementptr half, half addrspace(1)* %in, i32 1
%a = load half, half addrspace(1)* %in
%b = load half, half addrspace(1)* %b_ptr
%sub = fsub half %a, %b
store half %sub, half addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}test_bitcast_from_half:
; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
; GCN: buffer_store_short [[TMP]]
define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %out) #0 {
%val = load half, half addrspace(1)* %in
%val_int = bitcast half %val to i16
store i16 %val_int, i16 addrspace(1)* %out
ret void
}
; GCN-LABEL: {{^}}test_bitcast_to_half:
; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
; GCN: buffer_store_short [[TMP]]
define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
%val = load i16, i16 addrspace(1)* %in
%val_fp = bitcast i16 %val to half
store half %val_fp, half addrspace(1)* %out
ret void
}
attributes #0 = { nounwind }