mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 13:11:39 +01:00
[AMDGPU] Cluster FLAT instructions with both vaddr and saddr
Reviewers: rampitec, arsenm Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D73634
This commit is contained in:
parent
b179c9ce12
commit
d73841a388
@ -365,23 +365,14 @@ bool SIInstrInfo::getMemOperandsWithOffset(
|
||||
}
|
||||
|
||||
if (isFLAT(LdSt)) {
|
||||
const MachineOperand *VAddr = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
|
||||
if (VAddr) {
|
||||
// Can't analyze 2 offsets.
|
||||
// FIXME remove this restriction!
|
||||
if (getNamedOperand(LdSt, AMDGPU::OpName::saddr))
|
||||
return false;
|
||||
|
||||
BaseOp = VAddr;
|
||||
} else {
|
||||
// scratch instructions have either vaddr or saddr.
|
||||
BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::saddr);
|
||||
}
|
||||
|
||||
// Instructions have either vaddr or saddr or both.
|
||||
BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
|
||||
if (BaseOp)
|
||||
BaseOps.push_back(BaseOp);
|
||||
BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::saddr);
|
||||
if (BaseOp)
|
||||
BaseOps.push_back(BaseOp);
|
||||
Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm();
|
||||
if (!BaseOp->isReg())
|
||||
return false;
|
||||
BaseOps.push_back(BaseOp);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -2,11 +2,11 @@
|
||||
|
||||
; Test for a conv2d like sequence of loads.
|
||||
|
||||
; GFX9: global_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}}
|
||||
; GFX9: global_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}{{$}}
|
||||
; GFX9: global_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}}
|
||||
; GFX9: global_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:-16{{$}}
|
||||
; GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
|
||||
; GFX9: global_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:-32{{$}}
|
||||
; GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
|
||||
; GFX9: global_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:8{{$}}
|
||||
|
||||
define hidden amdgpu_kernel void @simpleSaddrs(i64 addrspace(1)* %dst_image, i64 addrspace(1)* %src_image ) {
|
||||
@ -45,9 +45,9 @@ entry:
|
||||
store volatile i64 %add7, i64 addrspace(1)* %ptr9
|
||||
|
||||
; Test various offset boundaries.
|
||||
; GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:4088{{$}}
|
||||
; GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:4088{{$}}
|
||||
; GFX9: global_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:2040{{$}}
|
||||
; GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:4088{{$}}
|
||||
%gep11 = getelementptr inbounds i64, i64 addrspace(1)* %gep, i64 511
|
||||
%load11 = load i64, i64 addrspace(1)* %gep11
|
||||
%gep12 = getelementptr inbounds i64, i64 addrspace(1)* %gep, i64 1023
|
||||
|
@ -264,11 +264,11 @@ define amdgpu_kernel void @reorder_global_offsets(i32 addrspace(1)* nocapture %o
|
||||
; CI-NEXT: buffer_store_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:52{{$}}
|
||||
|
||||
; GFX9: global_load_dword {{v[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:12
|
||||
; GFX9: global_load_dword {{v[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:28
|
||||
; GFX9: global_load_dword {{v[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:44
|
||||
|
||||
; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}{{$}}
|
||||
; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:20
|
||||
; GFX9: global_load_dword {{v[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}} offset:28
|
||||
|
||||
; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:36
|
||||
; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:52
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user