1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 12:43:36 +01:00
llvm-mirror/test/CodeGen/AMDGPU/ds_read2_offset_order.ll
hsmahesha 7520c18108 [AMDGPU] Increase alignment of LDS globals if necessary before LDS lowering.
Before packing LDS globals into a sorted structure, make sure that
their alignment is properly updated based on their size. This will make
sure that the members of sorted structure are properly aligned, and
hence it will further reduce the probability of unaligned LDS access.

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D103261
2021-06-07 18:00:41 +05:30

44 lines
1.9 KiB
LLVM

; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -strict-whitespace -check-prefix=SI %s
@lds = addrspace(3) global [512 x float] undef, align 4
; offset0 is larger than offset1
; SI-LABEL: {{^}}offset_order:
; SI-DAG: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}}
; SI-DAG: ds_read_b64 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset:8
; SI-DAG: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset0:11 offset1:12
; SI-DAG: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]+}} offset0:6 offset1:248
define amdgpu_kernel void @offset_order(float addrspace(1)* %out) {
entry:
%ptr0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 0
%val0 = load float, float addrspace(3)* %ptr0
%ptr1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 256
%val1 = load float, float addrspace(3)* %ptr1
%add1 = fadd float %val0, %val1
%ptr2 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 3
%val2 = load float, float addrspace(3)* %ptr2
%add2 = fadd float %add1, %val2
%ptr3 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 2
%val3 = load float, float addrspace(3)* %ptr3
%add3 = fadd float %add2, %val3
%ptr4 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 12
%val4 = load float, float addrspace(3)* %ptr4
%add4 = fadd float %add3, %val4
%ptr5 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 14
%val5 = load float, float addrspace(3)* %ptr5
%add5 = fadd float %add4, %val5
%ptr6 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 11
%val6 = load float, float addrspace(3)* %ptr6
%add6 = fadd float %add5, %val6
store float %add6, float addrspace(1)* %out
ret void
}