1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 20:23:11 +01:00
llvm-mirror/test/CodeGen/AMDGPU/amdhsa-trap-num-sgprs.ll
hsmahesha cca61dc4bb [AMDGPU/MemOpsCluster] Clean-up fixme's around mem ops clustering logic
Get rid of all fixmes and base heuristic on `num-clustered-dwords`. The main intuition behind this is as
follows. The existing heuristic roughly summarizes as below:

* Assume, all the mem ops instructions participating in the clustering process,  loads/stores same num bytes
* If num bytes loaded by each mem op is 4 bytes, then cluster at max 5 mem ops, that is at max 20 bytes
* If num bytes loaded by each mem op is 8 bytes, then cluster at max 3 mem ops, that is at max 24 bytes
* If num bytes loaded by each mem op is 16 bytes, then cluster at max 2 mem ops, that is at max 32 bytes

So, we need to make sure that the new heuristic do not completey deviate away from the above one, and it
properly handles both the sub-word loads and the wide loads.

Reviewed By: arsenm, rampitec

Differential Revision: https://reviews.llvm.org/D84354
2020-07-30 21:41:13 +05:30

71 lines
2.9 KiB
LLVM

; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=+trap-handler < %s | FileCheck %s --check-prefixes=GCN,TRAP-HANDLER-ENABLE
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=-trap-handler < %s | FileCheck %s --check-prefixes=GCN,TRAP-HANDLER-DISABLE
; GCN-LABEL: {{^}}amdhsa_trap_num_sgprs
; TRAP-HANDLER-ENABLE: NumSgprs: 61
; TRAP-HANDLER-DISABLE: NumSgprs: 77
define amdgpu_kernel void @amdhsa_trap_num_sgprs(
i32 addrspace(1)* %out0, i32 %in0,
i32 addrspace(1)* %out1, i32 %in1,
i32 addrspace(1)* %out2, i32 %in2,
i32 addrspace(1)* %out3, i32 %in3,
i32 addrspace(1)* %out4, i32 %in4,
i32 addrspace(1)* %out5, i32 %in5,
i32 addrspace(1)* %out6, i32 %in6,
i32 addrspace(1)* %out7, i32 %in7,
i32 addrspace(1)* %out8, i32 %in8,
i32 addrspace(1)* %out9, i32 %in9,
i32 addrspace(1)* %out10, i32 %in10,
i32 addrspace(1)* %out11, i32 %in11,
i32 addrspace(1)* %out12, i32 %in12,
i32 addrspace(1)* %out13, i32 %in13,
i32 addrspace(1)* %out14, i32 %in14,
i32 addrspace(1)* %out15, i32 %in15,
i32 addrspace(1)* %out16, i32 %in16,
i32 addrspace(1)* %out17, i32 %in17,
i32 addrspace(1)* %out18, i32 %in18,
i32 addrspace(1)* %out19, i32 %in19,
i32 addrspace(1)* %out20, i32 %in20,
i32 addrspace(1)* %out21, i32 %in21,
i32 addrspace(1)* %out22, i32 %in22,
i32 addrspace(1)* %out23, i32 %in23,
i32 addrspace(1)* %out24, i32 %in24,
i32 addrspace(1)* %out25, i32 %in25,
i32 addrspace(1)* %out26, i32 %in26,
i32 addrspace(1)* %out27, i32 %in27,
i32 addrspace(1)* %out28, i32 %in28,
i32 addrspace(1)* %out29, i32 %in29) {
entry:
store i32 %in0, i32 addrspace(1)* %out0
store i32 %in1, i32 addrspace(1)* %out1
store i32 %in2, i32 addrspace(1)* %out2
store i32 %in3, i32 addrspace(1)* %out3
store i32 %in4, i32 addrspace(1)* %out4
store i32 %in5, i32 addrspace(1)* %out5
store i32 %in6, i32 addrspace(1)* %out6
store i32 %in7, i32 addrspace(1)* %out7
store i32 %in8, i32 addrspace(1)* %out8
store i32 %in9, i32 addrspace(1)* %out9
store i32 %in10, i32 addrspace(1)* %out10
store i32 %in11, i32 addrspace(1)* %out11
store i32 %in12, i32 addrspace(1)* %out12
store i32 %in13, i32 addrspace(1)* %out13
store i32 %in14, i32 addrspace(1)* %out14
store i32 %in15, i32 addrspace(1)* %out15
store i32 %in16, i32 addrspace(1)* %out16
store i32 %in17, i32 addrspace(1)* %out17
store i32 %in18, i32 addrspace(1)* %out18
store i32 %in19, i32 addrspace(1)* %out19
store i32 %in20, i32 addrspace(1)* %out20
store i32 %in21, i32 addrspace(1)* %out21
store i32 %in22, i32 addrspace(1)* %out22
store i32 %in23, i32 addrspace(1)* %out23
store i32 %in24, i32 addrspace(1)* %out24
store i32 %in25, i32 addrspace(1)* %out25
store i32 %in26, i32 addrspace(1)* %out26
store i32 %in27, i32 addrspace(1)* %out27
store i32 %in28, i32 addrspace(1)* %out28
store i32 %in29, i32 addrspace(1)* %out29
ret void
}