mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 04:32:44 +01:00
[AMDGPU] getMemOperandsWithOffset: add vaddr operand for stack access BUF instructions
A consequence is that checkInstOffsetsDoNotOverlap can now distinguish sp+offset from fp+offset, so it knows that it shouldn't try to work out whether the accesses overlap just by comparing the offsets. For example in these two instructions: MIR: BUFFER_STORE_DWORD_OFFSET %0:vgpr_32(s32), $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into stack + 4, addrspace 5) %4:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0.alloca, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from `i8 addrspace(5)* undef`, addrspace 5) ISA: buffer_store_dword v0, off, s[0:3], s32 offset:4 buffer_load_dword v0, off, s[0:3], s34 Differential Revision: https://reviews.llvm.org/D73957
This commit is contained in:
parent
4ec21b78e4
commit
85f4b8dffe
@ -316,39 +316,22 @@ bool SIInstrInfo::getMemOperandsWithOffsetWidth(
|
||||
}
|
||||
|
||||
if (isMUBUF(LdSt) || isMTBUF(LdSt)) {
|
||||
const MachineOperand *SOffset = getNamedOperand(LdSt, AMDGPU::OpName::soffset);
|
||||
if (SOffset && SOffset->isReg()) {
|
||||
// We can only handle this if it's a stack access, as any other resource
|
||||
// would require reporting multiple base registers.
|
||||
const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
|
||||
if (AddrReg && !AddrReg->isFI())
|
||||
return false;
|
||||
|
||||
const MachineOperand *RSrc = getNamedOperand(LdSt, AMDGPU::OpName::srsrc);
|
||||
const SIMachineFunctionInfo *MFI
|
||||
= LdSt.getParent()->getParent()->getInfo<SIMachineFunctionInfo>();
|
||||
if (RSrc->getReg() != MFI->getScratchRSrcReg())
|
||||
return false;
|
||||
|
||||
const MachineOperand *OffsetImm =
|
||||
getNamedOperand(LdSt, AMDGPU::OpName::offset);
|
||||
BaseOps.push_back(RSrc);
|
||||
BaseOps.push_back(SOffset);
|
||||
Offset = OffsetImm->getImm();
|
||||
} else {
|
||||
BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::srsrc);
|
||||
if (!BaseOp) // e.g. BUFFER_WBINVL1_VOL
|
||||
return false;
|
||||
const MachineOperand *RSrc = getNamedOperand(LdSt, AMDGPU::OpName::srsrc);
|
||||
if (!RSrc) // e.g. BUFFER_WBINVL1_VOL
|
||||
return false;
|
||||
BaseOps.push_back(RSrc);
|
||||
BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
|
||||
if (BaseOp && !BaseOp->isFI())
|
||||
BaseOps.push_back(BaseOp);
|
||||
|
||||
BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
|
||||
if (BaseOp)
|
||||
BaseOps.push_back(BaseOp);
|
||||
|
||||
const MachineOperand *OffsetImm =
|
||||
getNamedOperand(LdSt, AMDGPU::OpName::offset);
|
||||
Offset = OffsetImm->getImm();
|
||||
if (SOffset) // soffset can be an inline immediate.
|
||||
const MachineOperand *OffsetImm =
|
||||
getNamedOperand(LdSt, AMDGPU::OpName::offset);
|
||||
Offset = OffsetImm->getImm();
|
||||
const MachineOperand *SOffset =
|
||||
getNamedOperand(LdSt, AMDGPU::OpName::soffset);
|
||||
if (SOffset) {
|
||||
if (SOffset->isReg())
|
||||
BaseOps.push_back(SOffset);
|
||||
else
|
||||
Offset += SOffset->getImm();
|
||||
}
|
||||
// Get appropriate operand, and compute width accordingly.
|
||||
|
@ -149,7 +149,7 @@ attributes #0 = { nounwind }
|
||||
; GCN-NEXT: - .registers:
|
||||
; SDAG-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ca{{$}}
|
||||
; SDAG-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
|
||||
; GISEL-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ca{{$}}
|
||||
; GISEL-NEXT: 0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ce{{$}}
|
||||
; GISEL-NEXT: 0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
|
||||
; GCN-NEXT: .shader_functions:
|
||||
; GCN-NEXT: dynamic_stack:
|
||||
|
@ -739,8 +739,8 @@ entry:
|
||||
|
||||
; GCN-LABEL: {{^}}tail_call_byval_align16:
|
||||
; GCN-NOT: s32
|
||||
; GCN: buffer_load_dword [[VREG1:v[0-9]+]], off, s[0:3], s32 offset:8
|
||||
; GCN: buffer_load_dword [[VREG2:v[0-9]+]], off, s[0:3], s32 offset:12
|
||||
; GCN: buffer_load_dword [[VREG1:v[0-9]+]], off, s[0:3], s32 offset:8
|
||||
|
||||
; GCN: s_getpc_b64
|
||||
|
||||
|
@ -3294,14 +3294,15 @@ define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 {
|
||||
; GFX9-LABEL: tail_call_byval_align16:
|
||||
; GFX9: ; %bb.0: ; %entry
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
|
||||
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8
|
||||
; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:12
|
||||
; GFX9-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:12
|
||||
; GFX9-NEXT: buffer_load_dword v33, off, s[0:3], s32 offset:8
|
||||
; GFX9-NEXT: s_getpc_b64 s[4:5]
|
||||
; GFX9-NEXT: s_add_u32 s4, s4, byval_align16_f64_arg@rel32@lo+4
|
||||
; GFX9-NEXT: s_addc_u32 s5, s5, byval_align16_f64_arg@rel32@hi+12
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32 offset:4
|
||||
; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(1)
|
||||
; GFX9-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4
|
||||
; GFX9-NEXT: s_waitcnt vmcnt(1)
|
||||
; GFX9-NEXT: buffer_store_dword v33, off, s[0:3], s32
|
||||
; GFX9-NEXT: s_setpc_b64 s[4:5]
|
||||
;
|
||||
; GFX10-LABEL: tail_call_byval_align16:
|
||||
|
@ -39,8 +39,8 @@ main_body:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}tbuffer_load_immoffs_large
|
||||
; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] offset:4095
|
||||
; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_SSCALED] offset:73
|
||||
; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] offset:4095
|
||||
; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] offset:1
|
||||
; GFX10-DAG: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_FMT_10_10_10_2_SSCALED] offset:4095
|
||||
; GFX10-DAG: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_FMT_32_32_UINT] offset:73
|
||||
|
@ -42,8 +42,8 @@ main_body:
|
||||
|
||||
; GCN-LABEL: {{^}}tbuffer_load_immoffs_large
|
||||
; GCN: v_mov_b32_e32 [[ZEROREG:v[0-9]+]], 0
|
||||
; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] idxen offset:4095
|
||||
; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_SSCALED] idxen offset:73
|
||||
; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] idxen offset:4095
|
||||
; PREGFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] idxen offset:1
|
||||
; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_FMT_10_10_10_2_SSCALED] idxen offset:4095
|
||||
; GFX10: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, [[ZEROREG]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_FMT_32_32_UINT] idxen offset:73
|
||||
|
@ -33,8 +33,8 @@ main_body:
|
||||
}
|
||||
|
||||
; GCN-LABEL: {{^}}tbuffer_load_immoffs_large
|
||||
; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] offset:4095
|
||||
; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_DATA_FORMAT_32_32_32_32,BUF_NUM_FORMAT_SSCALED] offset:73
|
||||
; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 61 format:[BUF_DATA_FORMAT_RESERVED_15,BUF_NUM_FORMAT_USCALED] offset:4095
|
||||
; GCN: tbuffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} format:[BUF_DATA_FORMAT_32_32_32,BUF_NUM_FORMAT_UINT] offset:1
|
||||
; GCN: s_waitcnt
|
||||
define amdgpu_vs {<4 x float>, <4 x float>, <4 x float>} @tbuffer_load_immoffs_large(<4 x i32> inreg, i32 inreg %soffs) {
|
||||
|
@ -173,9 +173,9 @@ entry:
|
||||
; GCN-NOHSA-DAG: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x9a40{{$}}
|
||||
; CI-NOHSA-DAG: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x9a50{{$}}
|
||||
; CI-NOHSA-NOT: v_add
|
||||
; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16
|
||||
; CI-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}}
|
||||
; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}}
|
||||
; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16
|
||||
|
||||
; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
|
||||
; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
|
||||
@ -203,10 +203,10 @@ entry:
|
||||
; GCN-LABEL: {{^}}smrd_valu_ci_offset_x16:
|
||||
|
||||
; SI: s_mov_b32 {{s[0-9]+}}, 0x13480
|
||||
; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16
|
||||
; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:32
|
||||
; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], {{s[0-9]+}} addr64
|
||||
; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:48
|
||||
; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16
|
||||
; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], {{s[0-9]+}} addr64
|
||||
; CI-NOHSA-DAG: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x13480{{$}}
|
||||
; CI-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}}
|
||||
; CI-NOHSA-DAG: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x13490{{$}}
|
||||
|
Loading…
Reference in New Issue
Block a user