1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-24 03:33:20 +01:00

[Tail duplication] Handle source registers with subregisters

When a block is tail-duplicated, the PHI nodes from that block are
replaced with appropriate COPY instructions. When those PHI nodes
contained use operands with subregisters, the subregisters were
dropped from the COPY instructions, resulting in incorrect code.

Keep track of the subregister information and use this information
when remapping instructions from the duplicated block.

Differential Revision: http://reviews.llvm.org/D19337

llvm-svn: 267583
This commit is contained in:
Krzysztof Parzyszek 2016-04-26 18:36:34 +00:00
parent c247e8effd
commit 8d29c2a6a5
3 changed files with 155 additions and 37 deletions

View File

@ -56,16 +56,18 @@ public:
MachineBasicBlock *MBB);
private:
typedef TargetInstrInfo::RegSubRegPair RegSubRegPair;
void addSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
MachineBasicBlock *BB);
void processPHI(MachineInstr *MI, MachineBasicBlock *TailBB,
MachineBasicBlock *PredBB,
DenseMap<unsigned, unsigned> &LocalVRMap,
SmallVectorImpl<std::pair<unsigned, unsigned>> &Copies,
DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
SmallVectorImpl<std::pair<unsigned, RegSubRegPair>> &Copies,
const DenseSet<unsigned> &UsedByPhi, bool Remove);
void duplicateInstruction(MachineInstr *MI, MachineBasicBlock *TailBB,
MachineBasicBlock *PredBB, MachineFunction &MF,
DenseMap<unsigned, unsigned> &LocalVRMap,
DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
const DenseSet<unsigned> &UsedByPhi);
void updateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
SmallVectorImpl<MachineBasicBlock *> &TDBBs,
@ -79,6 +81,9 @@ private:
MachineBasicBlock *TailBB,
SmallVectorImpl<MachineBasicBlock *> &TDBBs,
SmallVectorImpl<MachineInstr *> &Copies);
void appendCopies(MachineBasicBlock *MBB,
SmallVectorImpl<std::pair<unsigned,RegSubRegPair>> &CopyInfos,
SmallVectorImpl<MachineInstr *> &Copies);
void removeDeadBlock(MachineBasicBlock *MBB);
};

View File

@ -303,20 +303,21 @@ void TailDuplicator::addSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
/// source register that's contributed by PredBB and update SSA update map.
void TailDuplicator::processPHI(
MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
DenseMap<unsigned, unsigned> &LocalVRMap,
SmallVectorImpl<std::pair<unsigned, unsigned>> &Copies,
DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
SmallVectorImpl<std::pair<unsigned, RegSubRegPair>> &Copies,
const DenseSet<unsigned> &RegsUsedByPhi, bool Remove) {
unsigned DefReg = MI->getOperand(0).getReg();
unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB);
assert(SrcOpIdx && "Unable to find matching PHI source?");
unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg();
unsigned SrcSubReg = MI->getOperand(SrcOpIdx).getSubReg();
const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
LocalVRMap.insert(std::make_pair(DefReg, SrcReg));
LocalVRMap.insert(std::make_pair(DefReg, RegSubRegPair(SrcReg, SrcSubReg)));
// Insert a copy from source to the end of the block. The def register is the
// available value liveout of the block.
unsigned NewDef = MRI->createVirtualRegister(RC);
Copies.push_back(std::make_pair(NewDef, SrcReg));
Copies.push_back(std::make_pair(NewDef, RegSubRegPair(SrcReg, SrcSubReg)));
if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg))
addSSAUpdateEntry(DefReg, NewDef, PredBB);
@ -334,7 +335,8 @@ void TailDuplicator::processPHI(
/// the source operands due to earlier PHI translation.
void TailDuplicator::duplicateInstruction(
MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
MachineFunction &MF, DenseMap<unsigned, unsigned> &LocalVRMap,
MachineFunction &MF,
DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
const DenseSet<unsigned> &UsedByPhi) {
MachineInstr *NewMI = TII->duplicate(MI, MF);
if (PreRegAlloc) {
@ -349,17 +351,63 @@ void TailDuplicator::duplicateInstruction(
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
unsigned NewReg = MRI->createVirtualRegister(RC);
MO.setReg(NewReg);
LocalVRMap.insert(std::make_pair(Reg, NewReg));
LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0)));
if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg))
addSSAUpdateEntry(Reg, NewReg, PredBB);
} else {
DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg);
auto VI = LocalVRMap.find(Reg);
if (VI != LocalVRMap.end()) {
MO.setReg(VI->second);
// Need to make sure that the register class of the mapped register
// will satisfy the constraints of the class of the register being
// replaced.
auto *OrigRC = MRI->getRegClass(Reg);
auto *MappedRC = MRI->getRegClass(VI->second.Reg);
const TargetRegisterClass *ConstrRC;
if (VI->second.SubReg != 0) {
ConstrRC = TRI->getMatchingSuperRegClass(MappedRC, OrigRC,
VI->second.SubReg);
if (ConstrRC) {
// The actual constraining (as in "find appropriate new class")
// is done by getMatchingSuperRegClass, so now we only need to
// change the class of the mapped register.
MRI->setRegClass(VI->second.Reg, ConstrRC);
}
} else {
// For mapped registers that do not have sub-registers, simply
// restrict their class to match the original one.
ConstrRC = MRI->constrainRegClass(VI->second.Reg, OrigRC);
}
if (ConstrRC) {
// If the class constraining succeeded, we can simply replace
// the old register with the mapped one.
MO.setReg(VI->second.Reg);
// We have Reg -> VI.Reg:VI.SubReg, so if Reg is used with a
// sub-register, we need to compose the sub-register indices.
MO.setSubReg(TRI->composeSubRegIndices(MO.getSubReg(),
VI->second.SubReg));
} else {
// The direct replacement is not possible, due to failing register
// class constraints. An explicit COPY is necessary. Create one
// that can be reused
auto *NewRC = MI->getRegClassConstraint(i, TII, TRI);
if (NewRC == nullptr)
NewRC = OrigRC;
unsigned NewReg = MRI->createVirtualRegister(NewRC);
BuildMI(*PredBB, MI, MI->getDebugLoc(),
TII->get(TargetOpcode::COPY), NewReg)
.addReg(VI->second.Reg, 0, VI->second.SubReg);
LocalVRMap.erase(VI);
LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0)));
MO.setReg(NewReg);
// The composed VI.Reg:VI.SubReg is replaced with NewReg, which
// is equivalent to the whole register Reg. Hence, Reg:subreg
// is same as NewReg:subreg, so keep the sub-register index
// unchanged.
}
// Clear any kill flags from this operand. The new register could
// have uses after this one, so kills are not valid here.
MO.setIsKill(false);
MRI->constrainRegClass(VI->second, MRI->getRegClass(Reg));
}
}
}
@ -734,8 +782,8 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
}
// Clone the contents of TailBB into PredBB.
DenseMap<unsigned, unsigned> LocalVRMap;
SmallVector<std::pair<unsigned, unsigned>, 4> CopyInfos;
DenseMap<unsigned, RegSubRegPair> LocalVRMap;
SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos;
// Use instr_iterator here to properly handle bundles, e.g.
// ARM Thumb2 IT block.
MachineBasicBlock::instr_iterator I = TailBB->instr_begin();
@ -752,12 +800,7 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
duplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi);
}
}
MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
TII->get(TargetOpcode::COPY), CopyInfos[i].first)
.addReg(CopyInfos[i].second));
}
appendCopies(PredBB, CopyInfos, Copies);
// Simplify
TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true);
@ -792,8 +835,8 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
<< "From MBB: " << *TailBB);
if (PreRegAlloc) {
DenseMap<unsigned, unsigned> LocalVRMap;
SmallVector<std::pair<unsigned, unsigned>, 4> CopyInfos;
DenseMap<unsigned, RegSubRegPair> LocalVRMap;
SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos;
MachineBasicBlock::iterator I = TailBB->begin();
// Process PHI instructions first.
while (I != TailBB->end() && I->isPHI()) {
@ -812,13 +855,7 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
duplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi);
MI->eraseFromParent();
}
MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator();
for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(),
TII->get(TargetOpcode::COPY),
CopyInfos[i].first)
.addReg(CopyInfos[i].second));
}
appendCopies(PrevBB, CopyInfos, Copies);
} else {
// No PHIs to worry about, just splice the instructions over.
PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
@ -863,8 +900,8 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
if (PredBB->succ_size() != 1)
continue;
DenseMap<unsigned, unsigned> LocalVRMap;
SmallVector<std::pair<unsigned, unsigned>, 4> CopyInfos;
DenseMap<unsigned, RegSubRegPair> LocalVRMap;
SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos;
MachineBasicBlock::iterator I = TailBB->begin();
// Process PHI instructions first.
while (I != TailBB->end() && I->isPHI()) {
@ -873,17 +910,26 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
MachineInstr *MI = &*I++;
processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false);
}
MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
TII->get(TargetOpcode::COPY), CopyInfos[i].first)
.addReg(CopyInfos[i].second));
}
appendCopies(PredBB, CopyInfos, Copies);
}
return Changed;
}
/// At the end of the block \p MBB generate COPY instructions between registers
/// described by \p CopyInfos. Append resulting instructions to \p Copies.
void TailDuplicator::appendCopies(MachineBasicBlock *MBB,
SmallVectorImpl<std::pair<unsigned,RegSubRegPair>> &CopyInfos,
SmallVectorImpl<MachineInstr*> &Copies) {
MachineBasicBlock::iterator Loc = MBB->getFirstTerminator();
const MCInstrDesc &CopyD = TII->get(TargetOpcode::COPY);
for (auto &CI : CopyInfos) {
auto C = BuildMI(*MBB, Loc, DebugLoc(), CopyD, CI.first)
.addReg(CI.second.Reg, 0, CI.second.SubReg);
Copies.push_back(C);
}
}
/// Remove the specified dead machine basic block from the function, updating
/// the CFG.
void TailDuplicator::removeDeadBlock(MachineBasicBlock *MBB) {

View File

@ -0,0 +1,67 @@
; RUN: llc -march=hexagon < %s | FileCheck %s
; REQUIRES: asserts
; When tail-duplicating a block with PHI nodes that use subregisters, the
; subregisters were dropped by the tail duplicator, resulting in invalid
; COPY instructions being generated.
; CHECK: = extractu(r{{[0-9]+}}, #15, #17)
target triple = "hexagon"
%struct.0 = type { i64, i16 }
%struct.1 = type { i64, i64 }
declare hidden fastcc void @foo(%struct.0* noalias nocapture, i8 signext, i8 zeroext, i32, i64, i64) unnamed_addr #0
define void @fred(%struct.0* noalias nocapture sret %agg.result, %struct.1* byval nocapture readonly align 8 %a) #1 {
entry:
%0 = load i64, i64* undef, align 8
switch i32 undef, label %if.else [
i32 32767, label %if.then
i32 0, label %if.then7
]
if.then: ; preds = %entry
ret void
if.then7: ; preds = %entry
br i1 undef, label %if.then.i, label %if.else16.i
if.then.i: ; preds = %if.then7
br i1 undef, label %if.then5.i, label %if.else.i
if.then5.i: ; preds = %if.then.i
%shl.i21 = shl i64 %0, 0
br label %if.end.i
if.else.i: ; preds = %if.then.i
%shl12.i = shl i64 %0, undef
br label %if.end.i
if.end.i: ; preds = %if.else.i, %if.then5.i
%aSig0.0 = phi i64 [ undef, %if.then5.i ], [ %shl12.i, %if.else.i ]
%storemerge43.i = phi i64 [ %shl.i21, %if.then5.i ], [ 0, %if.else.i ]
%sub15.i = sub nsw i32 -63, undef
br label %if.end13
if.else16.i: ; preds = %if.then7
br label %if.end13
if.else: ; preds = %entry
%or12 = or i64 undef, 281474976710656
br label %if.end13
if.end13: ; preds = %if.else, %if.else16.i, %if.end.i
%aSig1.1 = phi i64 [ %0, %if.else ], [ %storemerge43.i, %if.end.i ], [ undef, %if.else16.i ]
%aSig0.2 = phi i64 [ %or12, %if.else ], [ %aSig0.0, %if.end.i ], [ undef, %if.else16.i ]
%aExp.0 = phi i32 [ undef, %if.else ], [ %sub15.i, %if.end.i ], [ undef, %if.else16.i ]
%shl2.i = shl i64 %aSig0.2, 15
%shr.i = lshr i64 %aSig1.1, 49
%or.i = or i64 %shl2.i, %shr.i
tail call fastcc void @foo(%struct.0* noalias %agg.result, i8 signext 80, i8 zeroext undef, i32 %aExp.0, i64 %or.i, i64 undef)
unreachable
}
attributes #0 = { norecurse nounwind }
attributes #1 = { nounwind }