mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
2ca71a2147
Clustering loads has caching benefits, but as far as I know there is no advantage to clustering stores on any AMDGPU subtargets. The disadvantage is that it tends to increase register pressure and restricts scheduling freedom. Differential Revision: https://reviews.llvm.org/D85530
59 lines
2.8 KiB
LLVM
59 lines
2.8 KiB
LLVM
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-TFILD %s
|
|
; RUN: llc -march=amdgcn -mcpu=gfx900 -combiner-tokenfactor-inline-limit=7 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-TFIL7 %s
|
|
|
|
|
|
; GCN-LABEL: {{^}}token_factor_inline_limit_test:
|
|
|
|
; GCN-TFILD: v_mov_b32_e32 [[REG8:v[0-9]+]], 8
|
|
; GCN-TFILD: buffer_store_dword [[REG8]], {{.*$}}
|
|
; GCN-TFILD: v_mov_b32_e32 [[REG9:v[0-9]+]], 9
|
|
; GCN-TFILD: buffer_store_dword [[REG9]], {{.*}} offset:4
|
|
; GCN-TFILD: v_mov_b32_e32 [[REG10:v[0-9]+]], 10
|
|
; GCN-TFILD: buffer_store_dword [[REG10]], {{.*}} offset:8
|
|
; GCN-TFILD: v_mov_b32_e32 [[REG11:v[0-9]+]], 11
|
|
; GCN-TFILD: buffer_store_dword [[REG11]], {{.*}} offset:12
|
|
; GCN-TFILD: v_mov_b32_e32 [[REG12:v[0-9]+]], 12
|
|
; GCN-TFILD: buffer_store_dword [[REG12]], {{.*}} offset:16
|
|
; GCN-TFILD: v_mov_b32_e32 [[REG13:v[0-9]+]], 13
|
|
; GCN-TFILD: buffer_store_dword [[REG13]], {{.*}} offset:20
|
|
; GCN-TFILD: v_mov_b32_e32 [[REG14:v[0-9]+]], 14
|
|
; GCN-TFILD: buffer_store_dword [[REG14]], {{.*}} offset:24
|
|
; GCN-TFILD: v_mov_b32_e32 [[REG15:v[0-9]+]], 15
|
|
; GCN-TFILD: buffer_store_dword [[REG15]], {{.*}} offset:28
|
|
|
|
; GCN-TFIL7: v_mov_b32_e32 [[REG15:v[0-9]+]], 15
|
|
; GCN-TFIL7: buffer_store_dword [[REG15]], {{.*}} offset:28
|
|
; GCN-TFIL7: v_mov_b32_e32 [[REG14:v[0-9]+]], 14
|
|
; GCN-TFIL7: buffer_store_dword [[REG14]], {{.*}} offset:24
|
|
; GCN-TFIL7: v_mov_b32_e32 [[REG13:v[0-9]+]], 13
|
|
; GCN-TFIL7: buffer_store_dword [[REG13]], {{.*}} offset:20
|
|
; GCN-TFIL7: v_mov_b32_e32 [[REG12:v[0-9]+]], 12
|
|
; GCN-TFIL7: buffer_store_dword [[REG12]], {{.*}} offset:16
|
|
; GCN-TFIL7: v_mov_b32_e32 [[REG11:v[0-9]+]], 11
|
|
; GCN-TFIL7: buffer_store_dword [[REG11]], {{.*}} offset:12
|
|
; GCN-TFIL7: v_mov_b32_e32 [[REG10:v[0-9]+]], 10
|
|
; GCN-TFIL7: buffer_store_dword [[REG10]], {{.*}} offset:8
|
|
; GCN-TFIL7: v_mov_b32_e32 [[REG9:v[0-9]+]], 9
|
|
; GCN-TFIL7: buffer_store_dword [[REG9]], {{.*}} offset:4
|
|
; GCN-TFIL7: v_mov_b32_e32 [[REG8:v[0-9]+]], 8
|
|
; GCN-TFIL7: buffer_store_dword [[REG8]], {{.*$}}
|
|
|
|
; GCN: v_mov_b32_e32 v31, 7
|
|
; GCN: s_getpc
|
|
define void @token_factor_inline_limit_test() {
|
|
entry:
|
|
call void @external_void_func_8xv5i32(
|
|
<5 x i32><i32 0, i32 0, i32 0, i32 0, i32 0>,
|
|
<5 x i32><i32 1, i32 1, i32 1, i32 1, i32 1>,
|
|
<5 x i32><i32 2, i32 2, i32 2, i32 2, i32 2>,
|
|
<5 x i32><i32 3, i32 3, i32 3, i32 3, i32 3>,
|
|
<5 x i32><i32 4, i32 4, i32 4, i32 4, i32 4>,
|
|
<5 x i32><i32 5, i32 5, i32 5, i32 5, i32 5>,
|
|
<5 x i32><i32 6, i32 7, i32 8, i32 9, i32 10>,
|
|
<5 x i32><i32 11, i32 12, i32 13, i32 14, i32 15>)
|
|
ret void
|
|
}
|
|
|
|
declare hidden void @external_void_func_8xv5i32(<5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>,
|
|
<5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>)
|