mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-02-01 05:01:59 +01:00
[AMDGPU] With XNACK, cannot clause a load with result coalesced with operand
Summary: With XNACK, an smem load whose result is coalesced with an operand (thus it overwrites its own operand) cannot appear in a clause, because some other instruction might XNACK and restart the whole clause. The clause breaker already realized that an smem that overwrites an operand cannot appear in a clause, and broke the clause. The problem that this commit fixes is that the SIFormMemoryClauses optimization formed a bundle with early clobber, which caused the earlier code that set up the coalesced operand to be removed as dead. Differential Revision: https://reviews.llvm.org/D57008 Change-Id: I703c4d5b0bf7d6060222bec491f45c18bb3c0016 llvm-svn: 351950
This commit is contained in:
parent
da693adf63
commit
388d17336e
@ -118,6 +118,17 @@ static bool isValidClauseInst(const MachineInstr &MI, bool IsVMEMClause) {
|
||||
return false;
|
||||
if (!IsVMEMClause && !isSMEMClauseInst(MI))
|
||||
return false;
|
||||
// If this is a load instruction where the result has been coalesced with an operand, then we cannot clause it.
|
||||
for (const MachineOperand &ResMO : MI.defs()) {
|
||||
unsigned ResReg = ResMO.getReg();
|
||||
for (const MachineOperand &MO : MI.uses()) {
|
||||
if (!MO.isReg() || MO.isDef())
|
||||
continue;
|
||||
if (MO.getReg() == ResReg)
|
||||
return false;
|
||||
}
|
||||
break; // Only check the first def.
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
48
test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir
Normal file
48
test/CodeGen/AMDGPU/smem-no-clause-coalesced.mir
Normal file
@ -0,0 +1,48 @@
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx902 -o - %s -run-pass si-form-memory-clauses -verify-machineinstrs | FileCheck -check-prefix=XNACK %s
|
||||
|
||||
# The SIFormMemoryClauses pass must not form a clause (indicated by BUNDLE)
|
||||
# from the two adjacent smem instructions, because the first one has its
|
||||
# result coalesced with an operand.
|
||||
|
||||
# XNACK-LABEL: body:
|
||||
# XNACK-NOT: BUNDLE
|
||||
|
||||
---
|
||||
name: _amdgpu_cs_main
|
||||
alignment: 0
|
||||
exposesReturnsTwice: false
|
||||
legalized: false
|
||||
regBankSelected: false
|
||||
selected: false
|
||||
failedISel: false
|
||||
tracksRegLiveness: true
|
||||
hasWinCFI: false
|
||||
fixedStack: []
|
||||
stack: []
|
||||
constants: []
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr2, $sgpr3, $sgpr12, $sgpr13, $sgpr14, $vgpr0, $vgpr1
|
||||
|
||||
%0:vgpr_32 = COPY $vgpr1
|
||||
%1:sgpr_32 = COPY $sgpr12
|
||||
%2:sgpr_32 = COPY $sgpr3
|
||||
undef %3.sub0:sgpr_128 = COPY $sgpr2
|
||||
%4:vgpr_32 = COPY $vgpr0
|
||||
%5:sgpr_32 = COPY $sgpr14
|
||||
%6:sgpr_32 = COPY $sgpr13
|
||||
%7:sreg_64_xexec = S_GETPC_B64
|
||||
%7.sub0:sreg_64_xexec = COPY %1
|
||||
%3.sub1:sgpr_128 = S_AND_B32 %2, 65535, implicit-def dead $scc
|
||||
%3.sub3:sgpr_128 = S_MOV_B32 151468
|
||||
%3.sub2:sgpr_128 = S_MOV_B32 -1
|
||||
%7.sub0:sreg_64_xexec = S_LOAD_DWORD_IMM %7, 48, 0 :: (load 4 from `i8 addrspace(4)* undef`, addrspace 4)
|
||||
%8:sreg_64_xexec = S_BUFFER_LOAD_DWORDX2_IMM %3, 640, 0 :: (dereferenceable invariant load 8)
|
||||
undef %9.sub0:vreg_128 = V_LSHL_ADD_U32 %6, 4, %4, implicit $exec
|
||||
%9.sub1:vreg_128 = V_LSHL_ADD_U32 %5, 4, %0, implicit $exec
|
||||
S_ENDPGM
|
||||
|
||||
...
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user