1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2025-02-01 13:11:39 +01:00
llvm-mirror/test/CodeGen/AMDGPU/dpp_combine.ll
Stanislav Mekhanoshin 99df21352a [AMDGPU] link dpp pseudos and real instructions on gfx10
This defaults to zero fi operand, but we do not expose it
anyway. Should we expose it later it needs to be added to
the pseudo.

This enables dpp combining on gfx10.

Differential Revision: https://reviews.llvm.org/D68888

llvm-svn: 374604
2019-10-11 22:03:36 +00:00

54 lines
2.3 KiB
LLVM

; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck %s -check-prefix=GCN
; GCN-LABEL: {{^}}dpp_add:
; GCN: global_load_dword [[V:v[0-9]+]],
; GCN: v_add_{{(nc_)?}}u32_dpp [[V]], [[V]], [[V]] quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:0{{$}}
define amdgpu_kernel void @dpp_add(i32 addrspace(1)* %arg) {
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %id
%load = load i32, i32 addrspace(1)* %gep
%tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %load, i32 %load, i32 1, i32 15, i32 15, i1 1) #0
%add = add i32 %tmp0, %load
store i32 %add, i32 addrspace(1)* %gep
ret void
}
; GCN-LABEL: {{^}}dpp_ceil:
; GCN: global_load_dword [[V:v[0-9]+]],
; GCN: v_ceil_f32_dpp [[V]], [[V]] quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:0{{$}}
define amdgpu_kernel void @dpp_ceil(i32 addrspace(1)* %arg) {
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %id
%load = load i32, i32 addrspace(1)* %gep
%tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %load, i32 %load, i32 1, i32 15, i32 15, i1 1) #0
%tmp1 = bitcast i32 %tmp0 to float
%round = tail call float @llvm.ceil.f32(float %tmp1)
%tmp2 = bitcast float %round to i32
store i32 %tmp2, i32 addrspace(1)* %gep
ret void
}
; GCN-LABEL: {{^}}dpp_fadd:
; GCN: global_load_dword [[V:v[0-9]+]],
; GCN: v_add_f32_dpp [[V]], [[V]], [[V]] quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:0{{$}}
define amdgpu_kernel void @dpp_fadd(i32 addrspace(1)* %arg) {
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %id
%load = load i32, i32 addrspace(1)* %gep
%tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %load, i32 %load, i32 1, i32 15, i32 15, i1 1) #0
%tmp1 = bitcast i32 %tmp0 to float
%t = bitcast i32 %load to float
%add = fadd float %tmp1, %t
%tmp2 = bitcast float %add to i32
store i32 %tmp2, i32 addrspace(1)* %gep
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x()
declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) #0
declare float @llvm.ceil.f32(float)
attributes #0 = { nounwind readnone convergent }