mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 12:43:36 +01:00
248de533d0
These used to consistently be zeroed pre-gfx9, but gfx9 made the situation complicated since now some still do and some don't. This also manages to pick up a few cases that the pattern fails to optimize away. We handle some cases with instruction patterns, but some get through. In particular this improves the integer cases.
49 lines
1.6 KiB
LLVM
49 lines
1.6 KiB
LLVM
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
|
|
|
|
declare i16 @llvm.amdgcn.frexp.exp.i16.f16(half %a)
|
|
|
|
; GCN-LABEL: {{^}}frexp_exp_f16
|
|
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
|
|
; VI: v_frexp_exp_i16_f16_e32 v[[R_I16:[0-9]+]], v[[A_F16]]
|
|
; GCN: buffer_store_short v[[R_I16]]
|
|
define amdgpu_kernel void @frexp_exp_f16(
|
|
i16 addrspace(1)* %r,
|
|
half addrspace(1)* %a) {
|
|
entry:
|
|
%a.val = load half, half addrspace(1)* %a
|
|
%r.val = call i16 @llvm.amdgcn.frexp.exp.i16.f16(half %a.val)
|
|
store i16 %r.val, i16 addrspace(1)* %r
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}frexp_exp_f16_sext
|
|
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
|
|
; VI: v_frexp_exp_i16_f16_e32 v[[R_I16:[0-9]+]], v[[A_F16]]
|
|
; VI: v_bfe_i32 v[[R_I32:[0-9]+]], v[[R_I16]], 0, 16{{$}}
|
|
; GCN: buffer_store_dword v[[R_I32]]
|
|
define amdgpu_kernel void @frexp_exp_f16_sext(
|
|
i32 addrspace(1)* %r,
|
|
half addrspace(1)* %a) {
|
|
entry:
|
|
%a.val = load half, half addrspace(1)* %a
|
|
%r.val = call i16 @llvm.amdgcn.frexp.exp.i16.f16(half %a.val)
|
|
%r.val.sext = sext i16 %r.val to i32
|
|
store i32 %r.val.sext, i32 addrspace(1)* %r
|
|
ret void
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}frexp_exp_f16_zext
|
|
; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]
|
|
; VI: v_frexp_exp_i16_f16_e32 v[[R_I16:[0-9]+]], v[[A_F16]]
|
|
; GCN: buffer_store_dword v[[R_I16]]
|
|
define amdgpu_kernel void @frexp_exp_f16_zext(
|
|
i32 addrspace(1)* %r,
|
|
half addrspace(1)* %a) {
|
|
entry:
|
|
%a.val = load half, half addrspace(1)* %a
|
|
%r.val = call i16 @llvm.amdgcn.frexp.exp.i16.f16(half %a.val)
|
|
%r.val.zext = zext i16 %r.val to i32
|
|
store i32 %r.val.zext, i32 addrspace(1)* %r
|
|
ret void
|
|
}
|