1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-26 04:32:44 +01:00

[AMDGPU] getMemOperandsWithOffset: add vaddr operand for stack access BUF instructions

A consequence is that checkInstOffsetsDoNotOverlap can now distinguish
sp+offset from fp+offset, so it knows that it shouldn't try to work out
whether the accesses overlap just by comparing the offsets. For example
in these two instructions:

MIR:
BUFFER_STORE_DWORD_OFFSET %0:vgpr_32(s32), $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into stack + 4, addrspace 5)
%4:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0.alloca, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from `i8 addrspace(5)* undef`, addrspace 5)

ISA:
buffer_store_dword v0, off, s[0:3], s32 offset:4
buffer_load_dword v0, off, s[0:3], s34

Differential Revision: https://reviews.llvm.org/D73957
This commit is contained in:
Jay Foad 2019-12-20 15:13:57 +00:00
parent 4ec21b78e4
commit 85f4b8dffe
8 changed files with 29 additions and 45 deletions

View File

@ -316,39 +316,22 @@ bool SIInstrInfo::getMemOperandsWithOffsetWidth(
}
if (isMUBUF(LdSt) || isMTBUF(LdSt)) {
const MachineOperand *SOffset = getNamedOperand(LdSt, AMDGPU::OpName::soffset);
if (SOffset && SOffset->isReg()) {
// We can only handle this if it's a stack access, as any other resource
// would require reporting multiple base registers.
const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
if (AddrReg && !AddrReg->isFI())
return false;
const MachineOperand *RSrc = getNamedOperand(LdSt, AMDGPU::OpName::srsrc);
const SIMachineFunctionInfo *MFI
= LdSt.getParent()->getParent()->getInfo<SIMachineFunctionInfo>();
if (RSrc->getReg() != MFI->getScratchRSrcReg())
return false;
const MachineOperand *OffsetImm =
getNamedOperand(LdSt, AMDGPU::OpName::offset);
BaseOps.push_back(RSrc);
BaseOps.push_back(SOffset);
Offset = OffsetImm->getImm();
} else {
BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::srsrc);
if (!BaseOp) // e.g. BUFFER_WBINVL1_VOL
return false;
const MachineOperand *RSrc = getNamedOperand(LdSt, AMDGPU::OpName::srsrc);
if (!RSrc) // e.g. BUFFER_WBINVL1_VOL
return false;
BaseOps.push_back(RSrc);
BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
if (BaseOp && !BaseOp->isFI())
BaseOps.push_back(BaseOp);
BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
if (BaseOp)
BaseOps.push_back(BaseOp);
const MachineOperand *OffsetImm =
getNamedOperand(LdSt, AMDGPU::OpName::offset);
Offset = OffsetImm->getImm();
if (SOffset) // soffset can be an inline immediate.
const MachineOperand *OffsetImm =
getNamedOperand(LdSt, AMDGPU::OpName::offset);
Offset = OffsetImm->getImm();
const MachineOperand *SOffset =
getNamedOperand(LdSt, AMDGPU::OpName::soffset);
if (SOffset) {
if (SOffset->isReg())
BaseOps.push_back(SOffset);
else
Offset += SOffset->getImm();
}
// Get appropriate operand, and compute width accordingly.

View File

@ -149,7 +149,7 @@ attributes #0 = { nounwind }
; GCN-NEXT: - .registers:
; SDAG-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ca{{$}}
; SDAG-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
; GISEL-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ca{{$}}
; GISEL-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ce{{$}}
; GISEL-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
; GCN-NEXT: .shader_functions:
; GCN-NEXT: dynamic_stack:

View File

@ -739,8 +739,8 @@ entry:
; GCN-LABEL: {{^}}tail_call_byval_align16:
; GCN-NOT: s32
; GCN: buffer_load_dword [[VREG1:v[0-9]+]], off, s[0:3], s32 offset:8
; GCN: buffer_load_dword [[VREG2:v[0-9]+]], off, s[0:3], s32 offset:12
; GCN: buffer_load_dword [[VREG1:v[0-9]+]], off, s[0:3], s32 offset:8
; GCN: s_getpc_b64

View File

@ -3294,14 +3294,15 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
; GFX9-LABEL: tail_call_byval_align16:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:12
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:12
; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4
; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32
; GFX9-NEXT: s_waitcnt vmcnt(1)
; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4
; GFX9-NEXT: s_waitcnt vmcnt(1)
; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32
; GFX9-NEXT: s_setpc_b64 s[4:5]
;
; GFX10-LABEL: tail_call_byval_align16:

View File

@ -39,8 +39,8 @@ main_body:
}
; GCN-LABEL: {{^}}tbuffer_load_immoffs_large
; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] offset:4095
; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_SSCALED] offset:73
; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] offset:4095
; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] offset:1
; GFX10-DAG: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_FMT_10_10_10_2_SSCALED] offset:4095
; GFX10-DAG: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_FMT_32_32_UINT] offset:73

View File

@ -42,8 +42,8 @@ main_body:
; GCN-LABEL: {{^}}tbuffer_load_immoffs_large
; GCN: v_mov_b32_e32 [[ZEROREG:v[0-9]+]], 0
; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] idxen offset:4095
; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_SSCALED] idxen offset:73
; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] idxen offset:4095
; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:1
; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_FMT_10_10_10_2_SSCALED] idxen offset:4095
; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_FMT_32_32_UINT] idxen offset:73

View File

@ -33,8 +33,8 @@ main_body:
}
; GCN-LABEL: {{^}}tbuffer_load_immoffs_large
; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] offset:4095
; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_SSCALED] offset:73
; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] offset:4095
; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] offset:1
; GCN: s_waitcnt
define amdgpu_vs {<4 x float>, <4 x float>, <4 x float>} @tbuffer_load_immoffs_large(<4 x i32> inreg, i32 inreg %soffs) {

View File

@ -173,9 +173,9 @@ entry:
; GCN-NOHSA-DAG: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x9a40{{$}}
; CI-NOHSA-DAG: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x9a50{{$}}
; CI-NOHSA-NOT: v_add
; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16
; CI-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}}
; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}}
; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16
; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
@ -203,10 +203,10 @@ entry:
; GCN-LABEL: {{^}}smrd_valu_ci_offset_x16:
; SI: s_mov_b32 {{s[0-9]+}}, 0x13480
; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16
; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:32
; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], {{s[0-9]+}} addr64
; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:48
; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16
; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], {{s[0-9]+}} addr64
; CI-NOHSA-DAG: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x13480{{$}}
; CI-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}}
; CI-NOHSA-DAG: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x13490{{$}}