mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-31 20:51:52 +01:00
AMDGPU: Correct DS implementation of areLoadsFromSameBasePtr
This was checking the wrong operands for the base register and the offsets. The indexes are shifted by the number of output registers from the machine instruction definition, and the chain is moved to the end. llvm-svn: 355722
This commit is contained in:
parent
8ab2488af7
commit
99d9633961
@ -167,7 +167,7 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
|
||||
return false;
|
||||
|
||||
// Check base reg.
|
||||
if (Load0->getOperand(1) != Load1->getOperand(1))
|
||||
if (Load0->getOperand(0) != Load1->getOperand(0))
|
||||
return false;
|
||||
|
||||
// Check chain.
|
||||
@ -181,8 +181,8 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
|
||||
AMDGPU::getNamedOperandIdx(Opc1, AMDGPU::OpName::data1) != -1)
|
||||
return false;
|
||||
|
||||
Offset0 = cast<ConstantSDNode>(Load0->getOperand(2))->getZExtValue();
|
||||
Offset1 = cast<ConstantSDNode>(Load1->getOperand(2))->getZExtValue();
|
||||
Offset0 = cast<ConstantSDNode>(Load0->getOperand(1))->getZExtValue();
|
||||
Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getZExtValue();
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -232,7 +232,7 @@ bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
|
||||
return false;
|
||||
|
||||
// getNamedOperandIdx returns the index for MachineInstrs. Since they
|
||||
// inlcude the output in the operand list, but SDNodes don't, we need to
|
||||
// include the output in the operand list, but SDNodes don't, we need to
|
||||
// subtract the index by one.
|
||||
--OffIdx0;
|
||||
--OffIdx1;
|
||||
|
@ -6,8 +6,8 @@
|
||||
|
||||
; GCN-LABEL: {{^}}ds_combine_nodep
|
||||
|
||||
; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:26 offset1:27
|
||||
; GCN-NEXT: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:7 offset1:8
|
||||
; GCN: ds_read2_b32 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:7 offset1:8
|
||||
; GCN-NEXT: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:26 offset1:27
|
||||
define amdgpu_kernel void @ds_combine_nodep(float addrspace(1)* %out, float addrspace(3)* %inptr) {
|
||||
|
||||
%base = bitcast float addrspace(3)* %inptr to i8 addrspace(3)*
|
||||
|
Loading…
x
Reference in New Issue
Block a user