1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 12:12:47 +01:00
llvm-mirror/test/CodeGen/AMDGPU/buffer-schedule.ll
Jay Foad 7f0f3d6b7b [AMDGPU] Fix MMO for raw/struct buffer access with non-constant offset
Codegen for the raw/struct buffer access intrinsics would update the
offset in the MMO to reflect the combined offset, if it was known to be
constant. If the combined offset was not known to be constant, or if
there was an index, it would set the offset in the MMO to 0. This is
unsafe because it makes it look like the access does not alias with
another access with a fixed non-zero offset.

Fix these cases by setting the pointer in the MMO to null, to reflect
the fact that we do not have any known IR value pointer + constant
offset for the access.

Differential Revision: https://reviews.llvm.org/D106284
2021-07-26 14:27:30 +01:00

69 lines
3.5 KiB
LLVM

; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
; The buffer_loads and buffer_stores all access the same location. Check they do
; not get reordered by the scheduler.
; GCN-LABEL: {{^}}_amdgpu_cs_main:
; GCN: buffer_load_dword
; GCN: buffer_store_dword
; GCN: buffer_load_dword
; GCN: buffer_store_dword
; GCN: buffer_load_dword
; GCN: buffer_store_dword
; GCN: buffer_load_dword
; GCN: buffer_store_dword
; Function Attrs: nounwind
define amdgpu_cs void @_amdgpu_cs_main(<3 x i32> inreg %arg3, <3 x i32> %arg5) {
.entry:
%tmp9 = add <3 x i32> %arg3, %arg5
%tmp10 = extractelement <3 x i32> %tmp9, i32 0
%tmp11 = shl i32 %tmp10, 2
%tmp12 = inttoptr i64 undef to <4 x i32> addrspace(4)*
%tmp13 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp12, align 16
%tmp14 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %tmp13, i32 0, i32 %tmp11, i1 false, i1 false) #0
%tmp17 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp12, align 16
call void @llvm.amdgcn.buffer.store.f32(float %tmp14, <4 x i32> %tmp17, i32 0, i32 %tmp11, i1 false, i1 false) #0
%tmp20 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp12, align 16
%tmp21 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %tmp20, i32 0, i32 %tmp11, i1 false, i1 false) #0
%tmp22 = fadd reassoc nnan arcp contract float %tmp21, 1.000000e+00
call void @llvm.amdgcn.buffer.store.f32(float %tmp22, <4 x i32> %tmp20, i32 0, i32 %tmp11, i1 false, i1 false) #0
%tmp25 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp12, align 16
%tmp26 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %tmp25, i32 0, i32 %tmp11, i1 false, i1 false) #0
%tmp27 = fadd reassoc nnan arcp contract float %tmp26, 1.000000e+00
call void @llvm.amdgcn.buffer.store.f32(float %tmp27, <4 x i32> %tmp25, i32 0, i32 %tmp11, i1 false, i1 false) #0
%tmp30 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp12, align 16
%tmp31 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %tmp30, i32 0, i32 %tmp11, i1 false, i1 false) #0
%tmp32 = fadd reassoc nnan arcp contract float %tmp31, 1.000000e+00
call void @llvm.amdgcn.buffer.store.f32(float %tmp32, <4 x i32> %tmp30, i32 0, i32 %tmp11, i1 false, i1 false) #0
%tmp35 = load <4 x i32>, <4 x i32> addrspace(4)* %tmp12, align 16
%tmp36 = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %tmp35, i32 0, i32 %tmp11, i1 false, i1 false) #0
%tmp37 = fadd reassoc nnan arcp contract float %tmp36, 1.000000e+00
call void @llvm.amdgcn.buffer.store.f32(float %tmp37, <4 x i32> %tmp35, i32 0, i32 %tmp11, i1 false, i1 false) #0
ret void
}
; GCN-LABEL: {{^}}test1:
; GCN: buffer_store_dword
; GCN: buffer_load_dword
; GCN: buffer_store_dword
define amdgpu_cs void @test1(<4 x i32> inreg %buf, i32 %off) {
.entry:
call void @llvm.amdgcn.raw.buffer.store.i32(i32 0, <4 x i32> %buf, i32 8, i32 0, i32 0)
%val = call i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32> %buf, i32 %off, i32 0, i32 0)
call void @llvm.amdgcn.raw.buffer.store.i32(i32 %val, <4 x i32> %buf, i32 0, i32 0, i32 0)
ret void
}
declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #2
declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #3
declare i32 @llvm.amdgcn.raw.buffer.load.i32(<4 x i32>, i32, i32, i32) #2
declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32) #3
attributes #2 = { nounwind readonly }
attributes #3 = { nounwind writeonly }