mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 12:43:36 +01:00
86edc13433
As part of the unification of the debug format and the MIR format, avoid printing "vreg" for virtual registers (which is one of the current MIR possibilities). Basically: * find . \( -name "*.mir" -o -name "*.cpp" -o -name "*.h" -o -name "*.ll" \) -type f -print0 | xargs -0 sed -i '' -E "s/%vreg([0-9]+)/%\1/g" * grep -nr '%vreg' . and fix if needed * find . \( -name "*.mir" -o -name "*.cpp" -o -name "*.h" -o -name "*.ll" \) -type f -print0 | xargs -0 sed -i '' -E "s/ vreg([0-9]+)/ %\1/g" * grep -nr 'vreg[0-9]\+' . and fix if needed Differential Revision: https://reviews.llvm.org/D40420 llvm-svn: 319427
100 lines
3.6 KiB
LLVM
100 lines
3.6 KiB
LLVM
; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck %s
|
|
;
|
|
; This test checks that the lds input queue will is empty at the end of
|
|
; the ALU clause.
|
|
|
|
; CHECK-LABEL: {{^}}lds_input_queue:
|
|
; CHECK: LDS_READ_RET * OQAP
|
|
; CHECK-NOT: ALU clause
|
|
; CHECK: MOV * T{{[0-9]\.[XYZW]}}, OQAP
|
|
|
|
@local_mem = internal unnamed_addr addrspace(3) global [2 x i32] undef, align 4
|
|
|
|
define amdgpu_kernel void @lds_input_queue(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %index) {
|
|
entry:
|
|
%0 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(3)* @local_mem, i32 0, i32 %index
|
|
%1 = load i32, i32 addrspace(3)* %0
|
|
call void @llvm.r600.group.barrier()
|
|
|
|
; This will start a new clause for the vertex fetch
|
|
%2 = load i32, i32 addrspace(1)* %in
|
|
%3 = add i32 %1, %2
|
|
store i32 %3, i32 addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
declare void @llvm.r600.group.barrier() nounwind convergent
|
|
|
|
; The machine scheduler does not do proper alias analysis and assumes that
|
|
; loads from global values (Note that a global value is different that a
|
|
; value from global memory. A global value is a value that is declared
|
|
; outside of a function, it can reside in any address space) alias with
|
|
; all other loads.
|
|
;
|
|
; This is a problem for scheduling the reads from the local data share (lds).
|
|
; These reads are implemented using two instructions. The first copies the
|
|
; data from lds into the lds output queue, and the second moves the data from
|
|
; the input queue into main memory. These two instructions don't have to be
|
|
; scheduled one after the other, but they do need to be scheduled in the same
|
|
; clause. The aliasing problem mentioned above causes problems when there is a
|
|
; load from global memory which immediately follows a load from a global value that
|
|
; has been declared in the local memory space:
|
|
;
|
|
; %0 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(3)* @local_mem, i32 0, i32 %index
|
|
; %1 = load i32, i32 addrspace(3)* %0
|
|
; %2 = load i32, i32 addrspace(1)* %in
|
|
;
|
|
; The instruction selection phase will generate ISA that looks like this:
|
|
; %oqap = LDS_READ_RET
|
|
; %0 = MOV %oqap
|
|
; %1 = VTX_READ_32
|
|
; %2 = ADD_INT %1, %0
|
|
;
|
|
; The bottom scheduler will schedule the two ALU instructions first:
|
|
;
|
|
; UNSCHEDULED:
|
|
; %oqap = LDS_READ_RET
|
|
; %1 = VTX_READ_32
|
|
;
|
|
; SCHEDULED:
|
|
;
|
|
; %0 = MOV %oqap
|
|
; %2 = ADD_INT %1, %2
|
|
;
|
|
; The lack of proper aliasing results in the local memory read (LDS_READ_RET)
|
|
; to consider the global memory read (VTX_READ_32) has a chain dependency, so
|
|
; the global memory read will always be scheduled first. This will give us a
|
|
; final program which looks like this:
|
|
;
|
|
; Alu clause:
|
|
; %oqap = LDS_READ_RET
|
|
; VTX clause:
|
|
; %1 = VTX_READ_32
|
|
; Alu clause:
|
|
; %0 = MOV %oqap
|
|
; %2 = ADD_INT %1, %2
|
|
;
|
|
; This is an illegal program because the oqap def and use know occur in
|
|
; different ALU clauses.
|
|
;
|
|
; This test checks this scenario and makes sure it doesn't result in an
|
|
; illegal program. For now, we have fixed this issue by merging the
|
|
; LDS_READ_RET and MOV together during instruction selection and then
|
|
; expanding them after scheduling. Once the scheduler has better alias
|
|
; analysis, we should be able to keep these instructions sparate before
|
|
; scheduling.
|
|
;
|
|
; CHECK-LABEL: {{^}}local_global_alias:
|
|
; CHECK: LDS_READ_RET
|
|
; CHECK-NOT: ALU clause
|
|
; CHECK: MOV * T{{[0-9]\.[XYZW]}}, OQAP
|
|
define amdgpu_kernel void @local_global_alias(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
|
|
entry:
|
|
%0 = getelementptr inbounds [2 x i32], [2 x i32] addrspace(3)* @local_mem, i32 0, i32 0
|
|
%1 = load i32, i32 addrspace(3)* %0
|
|
%2 = load i32, i32 addrspace(1)* %in
|
|
%3 = add i32 %2, %1
|
|
store i32 %3, i32 addrspace(1)* %out
|
|
ret void
|
|
}
|