mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-25 05:52:53 +02:00
12f6142ff9
Summary: For shrinking SOPK instructions, we were creating a hint to tell the register allocator to use the register allocated for src0 for the dst operand as well. However, this seems to not work sometimes depending on the order virtual registers are assigned physical registers. To fix this, I've added a second allocation hint which does the reverse, asks that the register allocated for dst is used for src0. Reviewers: arsenm Subscribers: arsenm, llvm-commits, kzhuravl Differential Revision: https://reviews.llvm.org/D23862 llvm-svn: 279968
91 lines
3.3 KiB
LLVM
91 lines
3.3 KiB
LLVM
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
|
|
|
|
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
|
|
|
; Test with inline immediate
|
|
|
|
; FUNC-LABEL: {{^}}shl_2_add_9_i32:
|
|
; SI: v_lshlrev_b32_e32 [[REG:v[0-9]+]], 2, {{v[0-9]+}}
|
|
; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 36, [[REG]]
|
|
; SI: buffer_store_dword [[RESULT]]
|
|
; SI: s_endpgm
|
|
define void @shl_2_add_9_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
|
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
|
%ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
|
|
%val = load i32, i32 addrspace(1)* %ptr, align 4
|
|
%add = add i32 %val, 9
|
|
%result = shl i32 %add, 2
|
|
store i32 %result, i32 addrspace(1)* %out, align 4
|
|
ret void
|
|
}
|
|
|
|
; FUNC-LABEL: {{^}}shl_2_add_9_i32_2_add_uses:
|
|
; SI-DAG: v_add_i32_e32 [[ADDREG:v[0-9]+]], vcc, 9, {{v[0-9]+}}
|
|
; SI-DAG: v_lshlrev_b32_e32 [[SHLREG:v[0-9]+]], 2, {{v[0-9]+}}
|
|
; SI-DAG: buffer_store_dword [[ADDREG]]
|
|
; SI-DAG: buffer_store_dword [[SHLREG]]
|
|
; SI: s_endpgm
|
|
define void @shl_2_add_9_i32_2_add_uses(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 addrspace(1)* %in) #0 {
|
|
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
|
%ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
|
|
%val = load i32, i32 addrspace(1)* %ptr, align 4
|
|
%add = add i32 %val, 9
|
|
%result = shl i32 %add, 2
|
|
store i32 %result, i32 addrspace(1)* %out0, align 4
|
|
store i32 %add, i32 addrspace(1)* %out1, align 4
|
|
ret void
|
|
}
|
|
|
|
; Test with add literal constant
|
|
|
|
; FUNC-LABEL: {{^}}shl_2_add_999_i32:
|
|
; SI: v_lshlrev_b32_e32 [[REG:v[0-9]+]], 2, {{v[0-9]+}}
|
|
; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 0xf9c, [[REG]]
|
|
; SI: buffer_store_dword [[RESULT]]
|
|
; SI: s_endpgm
|
|
define void @shl_2_add_999_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
|
|
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
|
%ptr = getelementptr i32, i32 addrspace(1)* %in, i32 %tid.x
|
|
%val = load i32, i32 addrspace(1)* %ptr, align 4
|
|
%shl = add i32 %val, 999
|
|
%result = shl i32 %shl, 2
|
|
store i32 %result, i32 addrspace(1)* %out, align 4
|
|
ret void
|
|
}
|
|
|
|
; FUNC-LABEL: {{^}}test_add_shl_add_constant:
|
|
; SI-DAG: s_load_dword [[X:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
|
; SI-DAG: s_load_dword [[Y:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
|
|
; SI: s_lshl_b32 [[SHL3:s[0-9]+]], [[X]], 3
|
|
; SI: s_add_i32 [[RESULT:s[0-9]+]], [[SHL3]], [[Y]]
|
|
; SI: s_addk_i32 [[RESULT]], 0x3d8
|
|
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
|
|
; SI: buffer_store_dword [[VRESULT]]
|
|
define void @test_add_shl_add_constant(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
|
|
%add.0 = add i32 %x, 123
|
|
%shl = shl i32 %add.0, 3
|
|
%add.1 = add i32 %shl, %y
|
|
store i32 %add.1, i32 addrspace(1)* %out, align 4
|
|
ret void
|
|
}
|
|
|
|
; FUNC-LABEL: {{^}}test_add_shl_add_constant_inv:
|
|
; SI-DAG: s_load_dword [[X:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
|
|
; SI-DAG: s_load_dword [[Y:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
|
|
; SI: s_lshl_b32 [[SHL3:s[0-9]+]], [[X]], 3
|
|
; SI: s_add_i32 [[TMP:s[0-9]+]], [[Y]], [[SHL3]]
|
|
; SI: s_addk_i32 [[TMP]], 0x3d8
|
|
; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[TMP]]
|
|
; SI: buffer_store_dword [[VRESULT]]
|
|
|
|
define void @test_add_shl_add_constant_inv(i32 addrspace(1)* %out, i32 %x, i32 %y) #0 {
|
|
%add.0 = add i32 %x, 123
|
|
%shl = shl i32 %add.0, 3
|
|
%add.1 = add i32 %y, %shl
|
|
store i32 %add.1, i32 addrspace(1)* %out, align 4
|
|
ret void
|
|
}
|
|
|
|
attributes #0 = { nounwind }
|
|
attributes #1 = { nounwind readnone }
|