1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 12:43:36 +01:00
llvm-mirror/test/CodeGen/AMDGPU/lds-output-queue.ll
Francis Visoiu Mistrih 86edc13433 [CodeGen] Print "%vreg0" as "%0" in both MIR and debug output
As part of the unification of the debug format and the MIR format, avoid
printing "vreg" for virtual registers (which is one of the current MIR
possibilities).

Basically:

* find . \( -name "*.mir" -o -name "*.cpp" -o -name "*.h" -o -name "*.ll" \) -type f -print0 | xargs -0 sed -i '' -E "s/%vreg([0-9]+)/%\1/g"
* grep -nr '%vreg' . and fix if needed
* find . \( -name "*.mir" -o -name "*.cpp" -o -name "*.h" -o -name "*.ll" \) -type f -print0 | xargs -0 sed -i '' -E "s/ vreg([0-9]+)/ %\1/g"
* grep -nr 'vreg[0-9]\+' . and fix if needed

Differential Revision: https://reviews.llvm.org/D40420

llvm-svn: 319427
2017-11-30 12:12:19 +00:00

100 lines
3.6 KiB
LLVM

; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s
;
; This test checks that the lds input queue will is empty at the end of
; the ALU clause.
; CHECK-LABEL: {{^}}lds_input_queue:
; CHECK: LDS_READ_RET * OQAP
; CHECK-NOT: ALU clause
; CHECK: MOV * T{{[0-9]\.[XYZW]}}, OQAP
@local_mem = internal unnamed_addr addrspace(3) global [2 x i32] undef, align 4
define amdgpu_kernel void @lds_input_queue(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %index) {
entry:
%0 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(3)* @local_mem, i32 0, i32 %index
%1 = load i32, i32 addrspace(3)* %0
call void @llvm.r600.group.barrier()
; This will start a new clause for the vertex fetch
%2 = load i32, i32 addrspace(1)* %in
%3 = add i32 %1, %2
store i32 %3, i32 addrspace(1)* %out
ret void
}
declare void @llvm.r600.group.barrier() nounwind convergent
; The machine scheduler does not do proper alias analysis and assumes that
; loads from global values (Note that a global value is different that a
; value from global memory. A global value is a value that is declared
; outside of a function, it can reside in any address space) alias with
; all other loads.
;
; This is a problem for scheduling the reads from the local data share (lds).
; These reads are implemented using two instructions. The first copies the
; data from lds into the lds output queue, and the second moves the data from
; the input queue into main memory. These two instructions don't have to be
; scheduled one after the other, but they do need to be scheduled in the same
; clause. The aliasing problem mentioned above causes problems when there is a
; load from global memory which immediately follows a load from a global value that
; has been declared in the local memory space:
;
; %0 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(3)* @local_mem, i32 0, i32 %index
; %1 = load i32, i32 addrspace(3)* %0
; %2 = load i32, i32 addrspace(1)* %in
;
; The instruction selection phase will generate ISA that looks like this:
; %oqap = LDS_READ_RET
; %0 = MOV %oqap
; %1 = VTX_READ_32
; %2 = ADD_INT %1, %0
;
; The bottom scheduler will schedule the two ALU instructions first:
;
; UNSCHEDULED:
; %oqap = LDS_READ_RET
; %1 = VTX_READ_32
;
; SCHEDULED:
;
; %0 = MOV %oqap
; %2 = ADD_INT %1, %2
;
; The lack of proper aliasing results in the local memory read (LDS_READ_RET)
; to consider the global memory read (VTX_READ_32) has a chain dependency, so
; the global memory read will always be scheduled first. This will give us a
; final program which looks like this:
;
; Alu clause:
; %oqap = LDS_READ_RET
; VTX clause:
; %1 = VTX_READ_32
; Alu clause:
; %0 = MOV %oqap
; %2 = ADD_INT %1, %2
;
; This is an illegal program because the oqap def and use know occur in
; different ALU clauses.
;
; This test checks this scenario and makes sure it doesn't result in an
; illegal program. For now, we have fixed this issue by merging the
; LDS_READ_RET and MOV together during instruction selection and then
; expanding them after scheduling. Once the scheduler has better alias
; analysis, we should be able to keep these instructions sparate before
; scheduling.
;
; CHECK-LABEL: {{^}}local_global_alias:
; CHECK: LDS_READ_RET
; CHECK-NOT: ALU clause
; CHECK: MOV * T{{[0-9]\.[XYZW]}}, OQAP
define amdgpu_kernel void @local_global_alias(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
entry:
%0 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(3)* @local_mem, i32 0, i32 0
%1 = load i32, i32 addrspace(3)* %0
%2 = load i32, i32 addrspace(1)* %in
%3 = add i32 %2, %1
store i32 %3, i32 addrspace(1)* %out
ret void
}