From 24e5eb0aab98f14613aab1eded6be20b546cf5f8 Mon Sep 17 00:00:00 2001 From: alex-t Date: Thu, 18 Mar 2021 22:22:08 +0300 Subject: [PATCH] [AMDGPU] SIOptimizeExecMaskingPreRA should check constant bus constraint when folds EXEC copy Folding EXEC copy into it's single use may lead to constant bus constraint violation as it adds one more SGPR operand. This change makes it validate the user instruction with the new SGPR operand and only fold it if it is legal. Reviewed By: rampitec, arsenm Differential Revision: https://reviews.llvm.org/D98888 --- .../AMDGPU/SIOptimizeExecMaskingPreRA.cpp | 23 +++++++++++-------- test/CodeGen/AMDGPU/opt_exec_copy_fold.mir | 23 +++++++++++++++++++ 2 files changed, 37 insertions(+), 9 deletions(-) create mode 100644 test/CodeGen/AMDGPU/opt_exec_copy_fold.mir diff --git a/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp index 162e96655df..5f89f382668 100644 --- a/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ b/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -416,15 +416,20 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) { continue; Register SavedExec = I->getOperand(0).getReg(); - if (SavedExec.isVirtual() && MRI->hasOneNonDBGUse(SavedExec) && - MRI->use_instr_nodbg_begin(SavedExec)->getParent() == - I->getParent()) { - LLVM_DEBUG(dbgs() << "Redundant EXEC COPY: " << *I << '\n'); - LIS->RemoveMachineInstrFromMaps(*I); - I->eraseFromParent(); - MRI->replaceRegWith(SavedExec, ExecReg); - LIS->removeInterval(SavedExec); - Changed = true; + if (SavedExec.isVirtual() && MRI->hasOneNonDBGUse(SavedExec)) { + MachineInstr *SingleExecUser = &*MRI->use_instr_nodbg_begin(SavedExec); + int Idx = SingleExecUser->findRegisterUseOperandIdx(SavedExec); + assert(Idx != -1); + if (SingleExecUser->getParent() == I->getParent() && + !SingleExecUser->getOperand(Idx).isImplicit() && + TII->isOperandLegal(*SingleExecUser, Idx, &I->getOperand(1))) { + LLVM_DEBUG(dbgs() << "Redundant EXEC COPY: " << *I << '\n'); + LIS->RemoveMachineInstrFromMaps(*I); + I->eraseFromParent(); + MRI->replaceRegWith(SavedExec, ExecReg); + LIS->removeInterval(SavedExec); + Changed = true; + } } break; } diff --git a/test/CodeGen/AMDGPU/opt_exec_copy_fold.mir b/test/CodeGen/AMDGPU/opt_exec_copy_fold.mir new file mode 100644 index 00000000000..4ebfa2cc964 --- /dev/null +++ b/test/CodeGen/AMDGPU/opt_exec_copy_fold.mir @@ -0,0 +1,23 @@ +# RUN: llc -run-pass si-optimize-exec-masking-pre-ra -march=amdgcn -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GCN %s +--- +# GCN-LABEL: name: opt_exec_copy_fold +# GCN: %2:vreg_64 = COPY $exec +name: opt_exec_copy_fold +tracksRegLiveness: true +liveins: + - { reg: '$sgpr0_sgpr1' } +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + %0:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr0_sgpr1, implicit $exec + %1:sreg_64 = V_CMP_NE_U32_e64 0, %0, implicit $exec + %2:vreg_64 = COPY $exec + %3:sreg_64 = V_CMP_EQ_U64_e64 %1, %2, implicit $exec + $scc = COPY %3 + S_CBRANCH_SCC0 %bb.1, implicit $scc + + bb.1: +... + +