mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-10-20 11:33:24 +02:00
[AMDGPU] move PHI nodes to AGPR class
If all uses of a PHI are in AGPR register class we should avoid unneeded copies via VGPRs. Differential Revision: https://reviews.llvm.org/D69200 llvm-svn: 375297
This commit is contained in:
parent
7cc7328f4b
commit
18380a59a6
@ -757,6 +757,7 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
|
||||
|
||||
void SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
|
||||
unsigned numVGPRUses = 0;
|
||||
bool AllAGPRUses = true;
|
||||
SetVector<const MachineInstr *> worklist;
|
||||
SmallSet<const MachineInstr *, 4> Visited;
|
||||
worklist.insert(&MI);
|
||||
@ -766,6 +767,9 @@ void SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
|
||||
unsigned Reg = Instr->getOperand(0).getReg();
|
||||
for (const auto &Use : MRI->use_operands(Reg)) {
|
||||
const MachineInstr *UseMI = Use.getParent();
|
||||
AllAGPRUses &= (UseMI->isCopy() &&
|
||||
TRI->isAGPR(*MRI, UseMI->getOperand(0).getReg())) ||
|
||||
TRI->isAGPR(*MRI, Use.getReg());
|
||||
if (UseMI->isCopy() || UseMI->isRegSequence()) {
|
||||
if (UseMI->isCopy() &&
|
||||
UseMI->getOperand(0).getReg().isPhysical() &&
|
||||
@ -794,11 +798,19 @@ void SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Register PHIRes = MI.getOperand(0).getReg();
|
||||
const TargetRegisterClass *RC0 = MRI->getRegClass(PHIRes);
|
||||
if (AllAGPRUses && numVGPRUses && !TRI->hasAGPRs(RC0)) {
|
||||
LLVM_DEBUG(dbgs() << "Moving PHI to AGPR: " << MI);
|
||||
MRI->setRegClass(PHIRes, TRI->getEquivalentAGPRClass(RC0));
|
||||
}
|
||||
|
||||
bool hasVGPRInput = false;
|
||||
for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
|
||||
unsigned InputReg = MI.getOperand(i).getReg();
|
||||
MachineInstr *Def = MRI->getVRegDef(InputReg);
|
||||
if (TRI->isVGPR(*MRI, InputReg)) {
|
||||
if (TRI->isVectorRegister(*MRI, InputReg)) {
|
||||
if (Def->isCopy()) {
|
||||
unsigned SrcReg = Def->getOperand(1).getReg();
|
||||
const TargetRegisterClass *RC =
|
||||
@ -810,15 +822,14 @@ void SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
|
||||
break;
|
||||
}
|
||||
else if (Def->isCopy() &&
|
||||
TRI->isVGPR(*MRI, Def->getOperand(1).getReg())) {
|
||||
TRI->isVectorRegister(*MRI, Def->getOperand(1).getReg())) {
|
||||
hasVGPRInput = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
unsigned PHIRes = MI.getOperand(0).getReg();
|
||||
const TargetRegisterClass *RC0 = MRI->getRegClass(PHIRes);
|
||||
|
||||
if ((!TRI->isVGPR(*MRI, PHIRes) && RC0 != &AMDGPU::VReg_1RegClass) &&
|
||||
if ((!TRI->isVectorRegister(*MRI, PHIRes) &&
|
||||
RC0 != &AMDGPU::VReg_1RegClass) &&
|
||||
(hasVGPRInput || numVGPRUses > 1)) {
|
||||
LLVM_DEBUG(dbgs() << "Fixing PHI: " << MI);
|
||||
TII->moveToVALU(MI);
|
||||
|
29
test/CodeGen/AMDGPU/mfma-loop.ll
Normal file
29
test/CodeGen/AMDGPU/mfma-loop.ll
Normal file
@ -0,0 +1,29 @@
|
||||
; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
; GCN-LABEL: {{^}}test_mfma_loop_zeroinit:
|
||||
; GCN-COUNT32: v_accvgpr_write_b32
|
||||
; GCN: [[LOOP:BB[0-9_]+]]:
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GCN: v_mfma_f32_32x32x1f32
|
||||
; GCN-NOT: v_accvgpr
|
||||
; GCN: s_cbranch_scc1 [[LOOP]]
|
||||
; GCN-COUNT32: v_accvgpr_read_b32
|
||||
define amdgpu_kernel void @test_mfma_loop_zeroinit(<32 x float> addrspace(1)* %arg) {
|
||||
entry:
|
||||
br label %for.cond.preheader
|
||||
|
||||
for.cond.preheader:
|
||||
%phi = phi <32 x float> [ zeroinitializer, %entry ], [ %mai.1, %for.cond.preheader ]
|
||||
%c = phi i32 [ 0, %entry ], [ %inc, %for.cond.preheader ]
|
||||
%mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> %phi, i32 0, i32 0, i32 0)
|
||||
%inc = add nuw nsw i32 %c, 1
|
||||
%cc = icmp eq i32 %inc, 16
|
||||
br i1 %cc, label %exit, label %for.cond.preheader
|
||||
|
||||
exit:
|
||||
store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32)
|
||||
declare i32 @llvm.amdgcn.workitem.id.x()
|
Loading…
Reference in New Issue
Block a user