mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 03:02:36 +01:00
[AMDGPU]: PHI Elimination hooks added for custom COPY insertion. Fixed
Defferential Revision: https://reviews.llvm.org/D67101 Reviewers: rampitec, vpykhtin llvm-svn: 372086
This commit is contained in:
parent
7b81fc68ca
commit
6b488065a6
@ -22,6 +22,7 @@
|
||||
#include "llvm/CodeGen/MachineCombinerPattern.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineLoopInfo.h"
|
||||
#include "llvm/CodeGen/MachineOperand.h"
|
||||
#include "llvm/CodeGen/MachineOutliner.h"
|
||||
@ -1638,6 +1639,28 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
/// During PHI eleimination lets target to make necessary checks and
|
||||
/// insert the copy to the PHI destination register in a target specific
|
||||
/// manner.
|
||||
virtual MachineInstr *createPHIDestinationCopy(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt,
|
||||
const DebugLoc &DL, Register Src, Register Dst) const {
|
||||
return BuildMI(MBB, InsPt, DL, get(TargetOpcode::COPY), Dst)
|
||||
.addReg(Src);
|
||||
}
|
||||
|
||||
/// During PHI eleimination lets target to make necessary checks and
|
||||
/// insert the copy to the PHI destination register in a target specific
|
||||
/// manner.
|
||||
virtual MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator InsPt,
|
||||
const DebugLoc &DL, Register Src,
|
||||
Register SrcSubReg,
|
||||
Register Dst) const {
|
||||
return BuildMI(MBB, InsPt, DL, get(TargetOpcode::COPY), Dst)
|
||||
.addReg(Src, 0, SrcSubReg);
|
||||
}
|
||||
|
||||
/// Returns a \p outliner::OutlinedFunction struct containing target-specific
|
||||
/// information for a set of outlining candidates.
|
||||
virtual outliner::OutlinedFunction getOutliningCandidateInfo(
|
||||
|
@ -31,7 +31,9 @@
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/SlotIndexes.h"
|
||||
#include "llvm/CodeGen/TargetInstrInfo.h"
|
||||
#include "llvm/CodeGen/TargetLowering.h"
|
||||
#include "llvm/CodeGen/TargetOpcodes.h"
|
||||
#include "llvm/CodeGen/TargetPassConfig.h"
|
||||
#include "llvm/CodeGen/TargetRegisterInfo.h"
|
||||
#include "llvm/CodeGen/TargetSubtargetInfo.h"
|
||||
#include "llvm/Pass.h"
|
||||
@ -252,11 +254,12 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
|
||||
// Insert a register to register copy at the top of the current block (but
|
||||
// after any remaining phi nodes) which copies the new incoming register
|
||||
// into the phi node destination.
|
||||
MachineInstr *PHICopy = nullptr;
|
||||
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
|
||||
if (allPhiOperandsUndefined(*MPhi, *MRI))
|
||||
// If all sources of a PHI node are implicit_def or undef uses, just emit an
|
||||
// implicit_def instead of a copy.
|
||||
BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
|
||||
PHICopy = BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
|
||||
TII->get(TargetOpcode::IMPLICIT_DEF), DestReg);
|
||||
else {
|
||||
// Can we reuse an earlier PHI node? This only happens for critical edges,
|
||||
@ -273,15 +276,13 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
|
||||
const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);
|
||||
entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
|
||||
}
|
||||
BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
|
||||
TII->get(TargetOpcode::COPY), DestReg)
|
||||
.addReg(IncomingReg);
|
||||
// Give the target possiblity to handle special cases fallthrough otherwise
|
||||
PHICopy = TII->createPHIDestinationCopy(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
|
||||
IncomingReg, DestReg);
|
||||
}
|
||||
|
||||
// Update live variable information if there is any.
|
||||
if (LV) {
|
||||
MachineInstr &PHICopy = *std::prev(AfterPHIsIt);
|
||||
|
||||
if (IncomingReg) {
|
||||
LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg);
|
||||
|
||||
@ -302,7 +303,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
|
||||
// killed. Note that because the value is defined in several places (once
|
||||
// each for each incoming block), the "def" block and instruction fields
|
||||
// for the VarInfo is not filled in.
|
||||
LV->addVirtualRegisterKilled(IncomingReg, PHICopy);
|
||||
LV->addVirtualRegisterKilled(IncomingReg, *PHICopy);
|
||||
}
|
||||
|
||||
// Since we are going to be deleting the PHI node, if it is the last use of
|
||||
@ -312,15 +313,14 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
|
||||
|
||||
// If the result is dead, update LV.
|
||||
if (isDead) {
|
||||
LV->addVirtualRegisterDead(DestReg, PHICopy);
|
||||
LV->addVirtualRegisterDead(DestReg, *PHICopy);
|
||||
LV->removeVirtualRegisterDead(DestReg, *MPhi);
|
||||
}
|
||||
}
|
||||
|
||||
// Update LiveIntervals for the new copy or implicit def.
|
||||
if (LIS) {
|
||||
SlotIndex DestCopyIndex =
|
||||
LIS->InsertMachineInstrInMaps(*std::prev(AfterPHIsIt));
|
||||
SlotIndex DestCopyIndex = LIS->InsertMachineInstrInMaps(*PHICopy);
|
||||
|
||||
SlotIndex MBBStartIndex = LIS->getMBBStartIdx(&MBB);
|
||||
if (IncomingReg) {
|
||||
@ -406,9 +406,9 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
|
||||
if (DefMI->isImplicitDef())
|
||||
ImpDefs.insert(DefMI);
|
||||
} else {
|
||||
NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
|
||||
TII->get(TargetOpcode::COPY), IncomingReg)
|
||||
.addReg(SrcReg, 0, SrcSubReg);
|
||||
NewSrcInstr =
|
||||
TII->createPHISourceCopy(opBlock, InsertPos, MPhi->getDebugLoc(),
|
||||
SrcReg, SrcSubReg, IncomingReg);
|
||||
}
|
||||
}
|
||||
|
||||
@ -457,7 +457,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
|
||||
}
|
||||
} else {
|
||||
// We just inserted this copy.
|
||||
KillInst = std::prev(InsertPos);
|
||||
KillInst = NewSrcInstr;
|
||||
}
|
||||
}
|
||||
assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction");
|
||||
|
@ -6410,3 +6410,40 @@ bool llvm::execMayBeModifiedBeforeAnyUse(const MachineRegisterInfo &MRI,
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
MachineInstr *SIInstrInfo::createPHIDestinationCopy(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock::iterator LastPHIIt,
|
||||
const DebugLoc &DL, Register Src, Register Dst) const {
|
||||
auto Cur = MBB.begin();
|
||||
if (Cur != MBB.end())
|
||||
do {
|
||||
if (!Cur->isPHI() && Cur->readsRegister(Dst))
|
||||
return BuildMI(MBB, Cur, DL, get(TargetOpcode::COPY), Dst).addReg(Src);
|
||||
++Cur;
|
||||
} while (Cur != MBB.end() && Cur != LastPHIIt);
|
||||
|
||||
return TargetInstrInfo::createPHIDestinationCopy(MBB, LastPHIIt, DL, Src,
|
||||
Dst);
|
||||
}
|
||||
|
||||
MachineInstr *SIInstrInfo::createPHISourceCopy(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock::iterator InsPt,
|
||||
const DebugLoc &DL, Register Src, Register SrcSubReg, Register Dst) const {
|
||||
if (InsPt != MBB.end() &&
|
||||
(InsPt->getOpcode() == AMDGPU::SI_IF ||
|
||||
InsPt->getOpcode() == AMDGPU::SI_ELSE ||
|
||||
InsPt->getOpcode() == AMDGPU::SI_IF_BREAK) &&
|
||||
InsPt->definesRegister(Src)) {
|
||||
InsPt++;
|
||||
return BuildMI(MBB, InsPt, InsPt->getDebugLoc(),
|
||||
get(ST.isWave32() ? AMDGPU::S_MOV_B32_term
|
||||
: AMDGPU::S_MOV_B64_term),
|
||||
Dst)
|
||||
.addReg(Src, 0, SrcSubReg)
|
||||
.addReg(AMDGPU::EXEC, RegState::Implicit);
|
||||
}
|
||||
return TargetInstrInfo::createPHISourceCopy(MBB, InsPt, DL, Src, SrcSubReg,
|
||||
Dst);
|
||||
}
|
||||
|
||||
bool llvm::SIInstrInfo::isWave32() const { return ST.isWave32(); }
|
||||
|
@ -954,6 +954,19 @@ public:
|
||||
|
||||
bool isBasicBlockPrologue(const MachineInstr &MI) const override;
|
||||
|
||||
MachineInstr *createPHIDestinationCopy(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator InsPt,
|
||||
const DebugLoc &DL, Register Src,
|
||||
Register Dst) const override;
|
||||
|
||||
MachineInstr *createPHISourceCopy(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator InsPt,
|
||||
const DebugLoc &DL, Register Src,
|
||||
Register SrcSubReg,
|
||||
Register Dst) const override;
|
||||
|
||||
bool isWave32() const;
|
||||
|
||||
/// Return a partially built integer add instruction without carry.
|
||||
/// Caller must add source operands.
|
||||
/// For pre-GFX9 it will generate unused carry destination operand.
|
||||
|
@ -98,6 +98,8 @@ private:
|
||||
void emitLoop(MachineInstr &MI);
|
||||
void emitEndCf(MachineInstr &MI);
|
||||
|
||||
Register getSaveExec(MachineInstr* MI);
|
||||
|
||||
void findMaskOperands(MachineInstr &MI, unsigned OpNo,
|
||||
SmallVectorImpl<MachineOperand> &Src) const;
|
||||
|
||||
@ -175,17 +177,31 @@ static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI,
|
||||
return true;
|
||||
}
|
||||
|
||||
Register SILowerControlFlow::getSaveExec(MachineInstr *MI) {
|
||||
MachineBasicBlock *MBB = MI->getParent();
|
||||
MachineOperand &SaveExec = MI->getOperand(0);
|
||||
assert(SaveExec.getSubReg() == AMDGPU::NoSubRegister);
|
||||
|
||||
Register SaveExecReg = SaveExec.getReg();
|
||||
unsigned FalseTermOpc =
|
||||
TII->isWave32() ? AMDGPU::S_MOV_B32_term : AMDGPU::S_MOV_B64_term;
|
||||
MachineBasicBlock::iterator I = (MI);
|
||||
MachineBasicBlock::iterator J = std::next(I);
|
||||
if (J != MBB->end() && J->getOpcode() == FalseTermOpc &&
|
||||
J->getOperand(1).isReg() && J->getOperand(1).getReg() == SaveExecReg) {
|
||||
SaveExecReg = J->getOperand(0).getReg();
|
||||
J->eraseFromParent();
|
||||
}
|
||||
return SaveExecReg;
|
||||
}
|
||||
|
||||
void SILowerControlFlow::emitIf(MachineInstr &MI) {
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
const DebugLoc &DL = MI.getDebugLoc();
|
||||
MachineBasicBlock::iterator I(&MI);
|
||||
|
||||
MachineOperand &SaveExec = MI.getOperand(0);
|
||||
MachineOperand &Cond = MI.getOperand(1);
|
||||
assert(SaveExec.getSubReg() == AMDGPU::NoSubRegister &&
|
||||
Cond.getSubReg() == AMDGPU::NoSubRegister);
|
||||
|
||||
Register SaveExecReg = SaveExec.getReg();
|
||||
Register SaveExecReg = getSaveExec(&MI);
|
||||
MachineOperand& Cond = MI.getOperand(1);
|
||||
assert(Cond.getSubReg() == AMDGPU::NoSubRegister);
|
||||
|
||||
MachineOperand &ImpDefSCC = MI.getOperand(4);
|
||||
assert(ImpDefSCC.getReg() == AMDGPU::SCC && ImpDefSCC.isDef());
|
||||
@ -266,8 +282,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
const DebugLoc &DL = MI.getDebugLoc();
|
||||
|
||||
Register DstReg = MI.getOperand(0).getReg();
|
||||
assert(MI.getOperand(0).getSubReg() == AMDGPU::NoSubRegister);
|
||||
Register DstReg = getSaveExec(&MI);
|
||||
|
||||
bool ExecModified = MI.getOperand(3).getImm() != 0;
|
||||
MachineBasicBlock::iterator Start = MBB.begin();
|
||||
@ -339,7 +354,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
|
||||
void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
const DebugLoc &DL = MI.getDebugLoc();
|
||||
auto Dst = MI.getOperand(0).getReg();
|
||||
auto Dst = getSaveExec(&MI);
|
||||
|
||||
// Skip ANDing with exec if the break condition is already masked by exec
|
||||
// because it is a V_CMP in the same basic block. (We know the break
|
||||
@ -400,13 +415,17 @@ void SILowerControlFlow::emitLoop(MachineInstr &MI) {
|
||||
|
||||
void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
|
||||
MachineBasicBlock &MBB = *MI.getParent();
|
||||
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
|
||||
unsigned CFMask = MI.getOperand(0).getReg();
|
||||
MachineInstr *Def = MRI.getUniqueVRegDef(CFMask);
|
||||
const DebugLoc &DL = MI.getDebugLoc();
|
||||
|
||||
MachineBasicBlock::iterator InsPt = MBB.begin();
|
||||
MachineInstr *NewMI =
|
||||
BuildMI(MBB, InsPt, DL, TII->get(OrOpc), Exec)
|
||||
.addReg(Exec)
|
||||
.add(MI.getOperand(0));
|
||||
MachineBasicBlock::iterator InsPt =
|
||||
Def && Def->getParent() == &MBB ? std::next(MachineBasicBlock::iterator(Def))
|
||||
: MBB.begin();
|
||||
MachineInstr *NewMI = BuildMI(MBB, InsPt, DL, TII->get(OrOpc), Exec)
|
||||
.addReg(Exec)
|
||||
.add(MI.getOperand(0));
|
||||
|
||||
if (LIS)
|
||||
LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
|
||||
|
@ -26,8 +26,8 @@ body: |
|
||||
|
||||
# CHECK-LABEL: name: foo
|
||||
# CHECK: bb.3:
|
||||
# CHECK-NEXT: %3:sreg_32_xm0 = COPY killed %4
|
||||
# CHECK-NEXT: dead %2:sreg_32_xm0 = IMPLICIT_DEF
|
||||
# CHECK-NEXT: %3:sreg_32_xm0 = COPY killed %4
|
||||
# CHECK-NEXT: S_NOP 0, implicit killed %3
|
||||
|
||||
|
||||
|
54
test/CodeGen/AMDGPU/phi-elimination-end-cf.mir
Normal file
54
test/CodeGen/AMDGPU/phi-elimination-end-cf.mir
Normal file
@ -0,0 +1,54 @@
|
||||
# RUN: llc -mtriple amdgcn -run-pass livevars -run-pass phi-node-elimination -verify-machineinstrs -o - %s | FileCheck %s
|
||||
|
||||
# CHECK-LABEL: phi-cf-test
|
||||
# CHECK: bb.0:
|
||||
# CHECK: [[COND:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64
|
||||
# CHECK: [[IF_SOURCE0:%[0-9]+]]:sreg_64 = SI_IF [[COND]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
|
||||
# CHECK: [[IF_INPUT_REG:%[0-9]+]]:sreg_64 = S_MOV_B64_term killed [[IF_SOURCE0]], implicit $exec
|
||||
|
||||
# CHECK: bb.1:
|
||||
# CHECK: [[END_CF_ARG:%[0-9]+]]:sreg_64 = COPY killed [[IF_INPUT_REG]]
|
||||
# CHECK: SI_END_CF killed [[END_CF_ARG]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec
|
||||
|
||||
# CHECK: bb.2:
|
||||
# CHECK: [[IF_SOURCE1:%[0-9]+]]:sreg_64 = SI_IF [[COND]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
|
||||
# CHECK: [[IF_INPUT_REG]]:sreg_64 = S_MOV_B64_term killed [[IF_SOURCE1]], implicit $exec
|
||||
|
||||
|
||||
...
|
||||
---
|
||||
name: phi-cf-test
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
|
||||
bb.0:
|
||||
successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
liveins: $vgpr0
|
||||
|
||||
%5:vgpr_32(s32) = COPY $vgpr0
|
||||
%0:sreg_64 = V_CMP_EQ_U32_e64 0, %5(s32), implicit $exec
|
||||
%18:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
%22:sreg_64 = SI_IF %0, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
|
||||
S_BRANCH %bb.3
|
||||
|
||||
bb.2:
|
||||
successors: %bb.3(0x80000000)
|
||||
|
||||
%24:sreg_64 = PHI %20, %bb.3, %22, %bb.0
|
||||
%23:vgpr_32 = PHI %19, %bb.3, %18, %bb.0
|
||||
SI_END_CF %24, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
|
||||
%3:vgpr_32, dead %10:sreg_64 = nsw V_ADD_I32_e64 1, %23, 0, implicit $exec
|
||||
|
||||
bb.3:
|
||||
successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
|
||||
%4:vgpr_32 = PHI %19, %bb.3, %3, %bb.2, %18, %bb.0
|
||||
%15:sreg_32_xm0 = S_MOV_B32 61440
|
||||
%16:sreg_32_xm0 = S_MOV_B32 -1
|
||||
%17:sreg_128 = REG_SEQUENCE undef %14:sreg_32_xm0, %subreg.sub0, undef %12:sreg_32_xm0, %subreg.sub1, %16, %subreg.sub2, %15, %subreg.sub3
|
||||
BUFFER_STORE_DWORD_OFFSET %4, %17, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
|
||||
%19:vgpr_32 = COPY %4
|
||||
%20:sreg_64 = SI_IF %0, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
|
||||
S_BRANCH %bb.3
|
||||
|
||||
...
|
Loading…
Reference in New Issue
Block a user