1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-25 04:02:41 +01:00

[AMDGPU] SIOptimizeExecMaskingPreRA should check constant bus constraint when folds EXEC copy

Folding EXEC copy into it's single use may lead to constant bus constraint violation as it adds one more SGPR operand.
         This change makes it validate the user instruction with the new SGPR operand and only fold it if it is legal.

Reviewed By: rampitec, arsenm

Differential Revision: https://reviews.llvm.org/D98888
This commit is contained in:
alex-t 2021-03-18 22:22:08 +03:00
parent d57a381a65
commit 24e5eb0aab
2 changed files with 37 additions and 9 deletions

View File

@ -416,15 +416,20 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
continue;
Register SavedExec = I->getOperand(0).getReg();
if (SavedExec.isVirtual() && MRI->hasOneNonDBGUse(SavedExec) &&
MRI->use_instr_nodbg_begin(SavedExec)->getParent() ==
I->getParent()) {
LLVM_DEBUG(dbgs() << "Redundant EXEC COPY: " << *I << '\n');
LIS->RemoveMachineInstrFromMaps(*I);
I->eraseFromParent();
MRI->replaceRegWith(SavedExec, ExecReg);
LIS->removeInterval(SavedExec);
Changed = true;
if (SavedExec.isVirtual() && MRI->hasOneNonDBGUse(SavedExec)) {
MachineInstr *SingleExecUser = &*MRI->use_instr_nodbg_begin(SavedExec);
int Idx = SingleExecUser->findRegisterUseOperandIdx(SavedExec);
assert(Idx != -1);
if (SingleExecUser->getParent() == I->getParent() &&
!SingleExecUser->getOperand(Idx).isImplicit() &&
TII->isOperandLegal(*SingleExecUser, Idx, &I->getOperand(1))) {
LLVM_DEBUG(dbgs() << "Redundant EXEC COPY: " << *I << '\n');
LIS->RemoveMachineInstrFromMaps(*I);
I->eraseFromParent();
MRI->replaceRegWith(SavedExec, ExecReg);
LIS->removeInterval(SavedExec);
Changed = true;
}
}
break;
}

View File

@ -0,0 +1,23 @@
# RUN: llc -run-pass si-optimize-exec-masking-pre-ra -march=amdgcn -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN %s
---
# GCN-LABEL: name: opt_exec_copy_fold
# GCN: %2:vreg_64 = COPY $exec
name: opt_exec_copy_fold
tracksRegLiveness: true
liveins:
- { reg: '$sgpr0_sgpr1' }
body: |
bb.0:
liveins: $sgpr0_sgpr1
%0:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr0_sgpr1, implicit $exec
%1:sreg_64 = V_CMP_NE_U32_e64 0, %0, implicit $exec
%2:vreg_64 = COPY $exec
%3:sreg_64 = V_CMP_EQ_U64_e64 %1, %2, implicit $exec
$scc = COPY %3
S_CBRANCH_SCC0 %bb.1, implicit $scc
bb.1:
...