diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 288e5b1524f..5e822453f4c 100644 --- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -234,6 +234,46 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, return true; } +static bool phiHasVGPROperands(const MachineInstr &PHI, + const MachineRegisterInfo &MRI, + const SIRegisterInfo *TRI, + const SIInstrInfo *TII) { + + for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) { + unsigned Reg = PHI.getOperand(i).getReg(); + if (TRI->hasVGPRs(MRI.getRegClass(Reg))) + return true; + } + return false; +} +static bool phiHasBreakDef(const MachineInstr &PHI, + const MachineRegisterInfo &MRI, + SmallSet &Visited) { + + for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) { + unsigned Reg = PHI.getOperand(i).getReg(); + if (Visited.count(Reg)) + continue; + + Visited.insert(Reg); + + MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg); + assert(DefInstr); + switch (DefInstr->getOpcode()) { + default: + break; + case AMDGPU::SI_BREAK: + case AMDGPU::SI_IF_BREAK: + case AMDGPU::SI_ELSE_BREAK: + return true; + case AMDGPU::PHI: + if (phiHasBreakDef(*DefInstr, MRI, Visited)) + return true; + } + } + return false; +} + bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { const SISubtarget &ST = MF.getSubtarget(); MachineRegisterInfo &MRI = MF.getRegInfo(); @@ -311,31 +351,11 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { // the first block (where the condition is computed), so there // is no chance for values to be over-written. - bool HasBreakDef = false; - for (unsigned i = 1; i < MI.getNumOperands(); i+=2) { - unsigned Reg = MI.getOperand(i).getReg(); - if (TRI->hasVGPRs(MRI.getRegClass(Reg))) { - TII->moveToVALU(MI); - break; - } - MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg); - assert(DefInstr); - switch(DefInstr->getOpcode()) { - - case AMDGPU::SI_BREAK: - case AMDGPU::SI_IF_BREAK: - case AMDGPU::SI_ELSE_BREAK: - // If we see a PHI instruction that defines an SGPR, then that PHI - // instruction has already been considered and should have - // a *_BREAK as an operand. - case AMDGPU::PHI: - HasBreakDef = true; - break; - } - } - - if (!SGPRBranch && !HasBreakDef) + SmallSet Visited; + if (phiHasVGPROperands(MI, MRI, TRI, TII) || + (!SGPRBranch && !phiHasBreakDef(MI, MRI, Visited))) { TII->moveToVALU(MI); + } break; } case AMDGPU::REG_SEQUENCE: { diff --git a/test/CodeGen/AMDGPU/salu-to-valu.ll b/test/CodeGen/AMDGPU/salu-to-valu.ll index fd73ff86af5..ff013060dce 100644 --- a/test/CodeGen/AMDGPU/salu-to-valu.ll +++ b/test/CodeGen/AMDGPU/salu-to-valu.ll @@ -457,5 +457,26 @@ bb7: ; preds = %bb3 ret void } +; GCN-LABEL: {{^}}phi_visit_order: +; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, 1, v{{[0-9]+}} +define void @phi_visit_order() { +bb: + br label %bb1 + +bb1: + %tmp = phi i32 [ 0, %bb ], [ %tmp5, %bb4 ] + %tid = call i32 @llvm.amdgcn.workitem.id.x() + %cnd = icmp eq i32 %tid, 0 + br i1 %cnd, label %bb4, label %bb2 + +bb2: + %tmp3 = add nsw i32 %tmp, 1 + br label %bb4 + +bb4: + %tmp5 = phi i32 [ %tmp3, %bb2 ], [ %tmp, %bb1 ] + br label %bb1 +} + attributes #0 = { nounwind readnone } attributes #1 = { nounwind } diff --git a/test/CodeGen/MIR/AMDGPU/si-fix-sgpr-copies.mir b/test/CodeGen/MIR/AMDGPU/si-fix-sgpr-copies.mir new file mode 100644 index 00000000000..016a6e6fd06 --- /dev/null +++ b/test/CodeGen/MIR/AMDGPU/si-fix-sgpr-copies.mir @@ -0,0 +1,43 @@ +# RUN: llc -march=amdgcn -run-pass si-fix-sgpr-copies %s -o - | FileCheck %s -check-prefixes=GCN + +--- | + define void @phi_visit_order() { ret void } + +name: phi_visit_order +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_32 } + - { id: 1, class: sreg_64 } + - { id: 2, class: sreg_32 } + - { id: 7, class: vgpr_32 } + - { id: 8, class: sreg_32 } + - { id: 9, class: vgpr_32 } + - { id: 10, class: sreg_64 } + - { id: 11, class: sreg_32 } + +body: | + ; GCN-LABEL: name: phi_visit_order + ; GCN: V_ADD_I32 + bb.0: + liveins: %vgpr0 + successors: %bb.1 + %7 = COPY %vgpr0 + %8 = S_MOV_B32 0 + + bb.1: + successors: %bb.1, %bb.2 + %0 = PHI %8, %bb.0, %0, %bb.1, %2, %bb.2 + %9 = V_MOV_B32_e32 9, implicit %exec + %10 = V_CMP_EQ_U32_e64 %7, %9, implicit %exec + %1 = SI_IF %10, %bb.2, implicit-def %exec, implicit-def %scc, implicit %exec + S_BRANCH %bb.1 + + bb.2: + successors: %bb.1 + SI_END_CF %1, implicit-def %exec, implicit-def %scc, implicit %exec + %11 = S_MOV_B32 1 + %2 = S_ADD_I32 %0, %11, implicit-def %scc + S_BRANCH %bb.1 + +... +---