From a98d5601f80e238147c129859b27e34454a15455 Mon Sep 17 00:00:00 2001 From: dstuttar Date: Tue, 23 Jun 2020 17:47:58 +0100 Subject: [PATCH] [AMDGPU] Make sure to fix implicit operands on insertBranch Summary: Without fixImplicitOperands we may end up creating default implicit operands that are the wrong wave size Includes simple test that provokes insertBranch in the correct way to expose the issue being fixed. Change-Id: I92bdcdee9fcb7b4d91529b84e76a48ac8218483e Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, tpr, t-tye, hiraditya, kerbowa, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D82459 --- lib/Target/AMDGPU/SIInstrInfo.cpp | 4 +- test/CodeGen/AMDGPU/insert-branch-w32.mir | 47 +++++++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) create mode 100644 test/CodeGen/AMDGPU/insert-branch-w32.mir diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index a0c25faa825..64dfd4a37b7 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2265,6 +2265,7 @@ unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB, // Copy the flags onto the implicit condition register operand. preserveCondRegFlags(CondBr->getOperand(1), Cond[1]); + fixImplicitOperands(*CondBr); if (BytesAdded) *BytesAdded = 4; @@ -3326,7 +3327,8 @@ static void copyFlagsToImplicitVCC(MachineInstr &MI, const MachineOperand &Orig) { for (MachineOperand &Use : MI.implicit_operands()) { - if (Use.isUse() && Use.getReg() == AMDGPU::VCC) { + if (Use.isUse() && + (Use.getReg() == AMDGPU::VCC || Use.getReg() == AMDGPU::VCC_LO)) { Use.setIsUndef(Orig.isUndef()); Use.setIsKill(Orig.isKill()); return; diff --git a/test/CodeGen/AMDGPU/insert-branch-w32.mir b/test/CodeGen/AMDGPU/insert-branch-w32.mir new file mode 100644 index 00000000000..5ccf84d3a08 --- /dev/null +++ b/test/CodeGen/AMDGPU/insert-branch-w32.mir @@ -0,0 +1,47 @@ +# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass branch-folder -o - %s | FileCheck %s + +# Designed to provoke calling SIInstrInfo::insertBranch in wave32 mode +# The implicit $vcc operand should be $vcc_lo in this case + +... +# CHECK-LABEL: bb.1: +# CHECK: S_CBRANCH_VCCNZ %bb.1, implicit $vcc_lo + +name: _amdgpu_cs_main +body: | + bb.0: + $vgpr1 = V_MOV_B32_e32 1050, implicit $exec + $sgpr0 = S_MOV_B32 1123418112 + $vcc_hi = IMPLICIT_DEF + bb.1: + $vgpr0 = COPY killed $vgpr1, implicit $exec + V_CMP_GT_U32_e32 5, $vgpr1, implicit-def $vcc_lo, implicit $exec, implicit-def $vcc + $vcc_lo = S_AND_B32 $exec_lo, $vcc_lo, implicit-def dead $scc + S_CBRANCH_VCCNZ %bb.1, implicit $vcc_lo, implicit $vcc + S_BRANCH %bb.2 + + bb.2: + $sgpr1 = COPY $sgpr0 + S_BRANCH %bb.1 + +... + +# CHECK-LABEL: bb.1: +# CHECK: S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc_lo +--- +name: _amdgpu_cs_main_undef +body: | + bb.0: + $vgpr1 = V_MOV_B32_e32 1050, implicit $exec + $sgpr0 = S_MOV_B32 1123418112 + $vcc_hi = IMPLICIT_DEF + bb.1: + $vgpr0 = COPY killed $vgpr1, implicit $exec + S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc_lo, implicit undef $vcc + S_BRANCH %bb.2 + + bb.2: + $sgpr1 = COPY $sgpr0 + S_BRANCH %bb.1 + +...