mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-22 18:54:02 +01:00
7520c18108
Before packing LDS globals into a sorted structure, make sure that their alignment is properly updated based on their size. This will make sure that the members of sorted structure are properly aligned, and hence it will further reduce the probability of unaligned LDS access. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D103261
44 lines
1.9 KiB
LLVM
44 lines
1.9 KiB
LLVM
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s
|
|
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s
|
|
|
|
@lds = addrspace(3) global [512 x float] undef, align 4
|
|
|
|
; offset0 is larger than offset1
|
|
|
|
; SI-LABEL: {{^}}offset_order:
|
|
; SI-DAG: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}
|
|
; SI-DAG: ds_read_b64 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:8
|
|
; SI-DAG: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset0:11 offset1:12
|
|
; SI-DAG: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset0:6 offset1:248
|
|
define amdgpu_kernel void @offset_order(float addrspace(1)* %out) {
|
|
entry:
|
|
%ptr0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 0
|
|
%val0 = load float, float addrspace(3)* %ptr0
|
|
|
|
%ptr1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 256
|
|
%val1 = load float, float addrspace(3)* %ptr1
|
|
%add1 = fadd float %val0, %val1
|
|
|
|
%ptr2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 3
|
|
%val2 = load float, float addrspace(3)* %ptr2
|
|
%add2 = fadd float %add1, %val2
|
|
|
|
%ptr3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 2
|
|
%val3 = load float, float addrspace(3)* %ptr3
|
|
%add3 = fadd float %add2, %val3
|
|
|
|
%ptr4 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 12
|
|
%val4 = load float, float addrspace(3)* %ptr4
|
|
%add4 = fadd float %add3, %val4
|
|
|
|
%ptr5 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 14
|
|
%val5 = load float, float addrspace(3)* %ptr5
|
|
%add5 = fadd float %add4, %val5
|
|
|
|
%ptr6 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 11
|
|
%val6 = load float, float addrspace(3)* %ptr6
|
|
%add6 = fadd float %add5, %val6
|
|
store float %add6, float addrspace(1)* %out
|
|
ret void
|
|
}
|