1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 03:02:36 +01:00

AMDGPU/SI: Fix visit order assumption in SIFixSGPRCopies

Summary:
This pass was assuming that when a PHI instruction defined a register
used by another PHI instruction that the defining insstruction would
be legalized before the using instruction.

This assumption was causing the pass to not legalize some PHI nodes
within divergent flow-control.

This fixes a bug that was uncovered by r285762.

Reviewers: nhaehnle, arsenm

Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, tony-tye, llvm-commits

Differential Revision: https://reviews.llvm.org/D26303

llvm-svn: 286676
This commit is contained in:
Tom Stellard 2016-11-11 23:35:42 +00:00
parent f6cbc4ff7c
commit 8b29b594be
3 changed files with 108 additions and 24 deletions

View File

@ -234,6 +234,46 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
return true;
}
static bool phiHasVGPROperands(const MachineInstr &PHI,
const MachineRegisterInfo &MRI,
const SIRegisterInfo *TRI,
const SIInstrInfo *TII) {
for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
unsigned Reg = PHI.getOperand(i).getReg();
if (TRI->hasVGPRs(MRI.getRegClass(Reg)))
return true;
}
return false;
}
static bool phiHasBreakDef(const MachineInstr &PHI,
const MachineRegisterInfo &MRI,
SmallSet<unsigned, 8> &Visited) {
for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
unsigned Reg = PHI.getOperand(i).getReg();
if (Visited.count(Reg))
continue;
Visited.insert(Reg);
MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg);
assert(DefInstr);
switch (DefInstr->getOpcode()) {
default:
break;
case AMDGPU::SI_BREAK:
case AMDGPU::SI_IF_BREAK:
case AMDGPU::SI_ELSE_BREAK:
return true;
case AMDGPU::PHI:
if (phiHasBreakDef(*DefInstr, MRI, Visited))
return true;
}
}
return false;
}
bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
MachineRegisterInfo &MRI = MF.getRegInfo();
@ -311,31 +351,11 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
// the first block (where the condition is computed), so there
// is no chance for values to be over-written.
bool HasBreakDef = false;
for (unsigned i = 1; i < MI.getNumOperands(); i+=2) {
unsigned Reg = MI.getOperand(i).getReg();
if (TRI->hasVGPRs(MRI.getRegClass(Reg))) {
TII->moveToVALU(MI);
break;
}
MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg);
assert(DefInstr);
switch(DefInstr->getOpcode()) {
case AMDGPU::SI_BREAK:
case AMDGPU::SI_IF_BREAK:
case AMDGPU::SI_ELSE_BREAK:
// If we see a PHI instruction that defines an SGPR, then that PHI
// instruction has already been considered and should have
// a *_BREAK as an operand.
case AMDGPU::PHI:
HasBreakDef = true;
break;
}
}
if (!SGPRBranch && !HasBreakDef)
SmallSet<unsigned, 8> Visited;
if (phiHasVGPROperands(MI, MRI, TRI, TII) ||
(!SGPRBranch && !phiHasBreakDef(MI, MRI, Visited))) {
TII->moveToVALU(MI);
}
break;
}
case AMDGPU::REG_SEQUENCE: {

View File

@ -457,5 +457,26 @@ bb7: ; preds = %bb3
ret void
}
; GCN-LABEL: {{^}}phi_visit_order:
; GCN: v_add_i32_e32 v{{[0-9]+}}, vcc, 1, v{{[0-9]+}}
define void @phi_visit_order() {
bb:
br label %bb1
bb1:
%tmp = phi i32 [ 0, %bb ], [ %tmp5, %bb4 ]
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%cnd = icmp eq i32 %tid, 0
br i1 %cnd, label %bb4, label %bb2
bb2:
%tmp3 = add nsw i32 %tmp, 1
br label %bb4
bb4:
%tmp5 = phi i32 [ %tmp3, %bb2 ], [ %tmp, %bb1 ]
br label %bb1
}
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }

View File

@ -0,0 +1,43 @@
# RUN: llc -march=amdgcn -run-pass si-fix-sgpr-copies %s -o - | FileCheck %s -check-prefixes=GCN
--- |
define void @phi_visit_order() { ret void }
name: phi_visit_order
tracksRegLiveness: true
registers:
- { id: 0, class: sreg_32 }
- { id: 1, class: sreg_64 }
- { id: 2, class: sreg_32 }
- { id: 7, class: vgpr_32 }
- { id: 8, class: sreg_32 }
- { id: 9, class: vgpr_32 }
- { id: 10, class: sreg_64 }
- { id: 11, class: sreg_32 }
body: |
; GCN-LABEL: name: phi_visit_order
; GCN: V_ADD_I32
bb.0:
liveins: %vgpr0
successors: %bb.1
%7 = COPY %vgpr0
%8 = S_MOV_B32 0
bb.1:
successors: %bb.1, %bb.2
%0 = PHI %8, %bb.0, %0, %bb.1, %2, %bb.2
%9 = V_MOV_B32_e32 9, implicit %exec
%10 = V_CMP_EQ_U32_e64 %7, %9, implicit %exec
%1 = SI_IF %10, %bb.2, implicit-def %exec, implicit-def %scc, implicit %exec
S_BRANCH %bb.1
bb.2:
successors: %bb.1
SI_END_CF %1, implicit-def %exec, implicit-def %scc, implicit %exec
%11 = S_MOV_B32 1
%2 = S_ADD_I32 %0, %11, implicit-def %scc
S_BRANCH %bb.1
...
---