mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-18 18:42:46 +02:00
AMDGPU: Fix areLoadsFromSameBasePtr for DS atomics
The offset operand index is different for atomics. llvm-svn: 357073
This commit is contained in:
parent
915462d704
commit
07895e51b1
@ -167,12 +167,19 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
|
||||
// Skip read2 / write2 variants for simplicity.
|
||||
// TODO: We should report true if the used offsets are adjacent (excluded
|
||||
// st64 versions).
|
||||
if (AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::data1) != -1 ||
|
||||
AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1)
|
||||
int Offset0Idx = AMDGPU::getNamedOperandIdx(Opc0, AMDGPU::OpName::offset);
|
||||
int Offset1Idx = AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::offset);
|
||||
if (Offset0Idx == -1 || Offset1Idx == -1)
|
||||
return false;
|
||||
|
||||
Offset0 = cast<ConstantSDNode>(Load0->getOperand(1))->getZExtValue();
|
||||
Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue();
|
||||
// XXX - be careful of datalesss loads
|
||||
// getNamedOperandIdx returns the index for MachineInstrs. Since they
|
||||
// include the output in the operand list, but SDNodes don't, we need to
|
||||
// subtract the index by one.
|
||||
Offset0Idx -= get(Opc0).NumDefs;
|
||||
Offset1Idx -= get(Opc1).NumDefs;
|
||||
Offset0 = cast<ConstantSDNode>(Load0->getOperand(Offset0Idx))->getZExtValue();
|
||||
Offset1 = cast<ConstantSDNode>(Load1->getOperand(Offset1Idx))->getZExtValue();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
17
test/CodeGen/AMDGPU/are-loads-from-same-base-ptr.ll
Normal file
17
test/CodeGen/AMDGPU/are-loads-from-same-base-ptr.ll
Normal file
@ -0,0 +1,17 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; TII::areLoadsFromSameBasePtr failed because the offset for atomics
|
||||
; is different from a normal load due to the data operand.
|
||||
|
||||
; GCN-LABEL: {{^}}are_loads_from_same_base_ptr_ds_atomic:
|
||||
; GCN: global_load_dword
|
||||
; GCN: ds_min_u32
|
||||
; GCN: ds_max_u32
|
||||
define amdgpu_kernel void @are_loads_from_same_base_ptr_ds_atomic(i32 addrspace(1)* %arg0, i32 addrspace(3)* noalias %ptr0) #0 {
|
||||
%tmp1 = load volatile i32, i32 addrspace(1)* %arg0
|
||||
%tmp2 = atomicrmw umin i32 addrspace(3)* %ptr0, i32 %tmp1 seq_cst
|
||||
%tmp3 = atomicrmw umax i32 addrspace(3)* %ptr0, i32 %tmp1 seq_cst
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
Loading…
Reference in New Issue
Block a user