mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 03:33:20 +01:00
[Tail duplication] Handle source registers with subregisters
When a block is tail-duplicated, the PHI nodes from that block are replaced with appropriate COPY instructions. When those PHI nodes contained use operands with subregisters, the subregisters were dropped from the COPY instructions, resulting in incorrect code. Keep track of the subregister information and use this information when remapping instructions from the duplicated block. Differential Revision: http://reviews.llvm.org/D19337 llvm-svn: 267583
This commit is contained in:
parent
c247e8effd
commit
8d29c2a6a5
@ -56,16 +56,18 @@ public:
|
||||
MachineBasicBlock *MBB);
|
||||
|
||||
private:
|
||||
typedef TargetInstrInfo::RegSubRegPair RegSubRegPair;
|
||||
|
||||
void addSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
|
||||
MachineBasicBlock *BB);
|
||||
void processPHI(MachineInstr *MI, MachineBasicBlock *TailBB,
|
||||
MachineBasicBlock *PredBB,
|
||||
DenseMap<unsigned, unsigned> &LocalVRMap,
|
||||
SmallVectorImpl<std::pair<unsigned, unsigned>> &Copies,
|
||||
DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
|
||||
SmallVectorImpl<std::pair<unsigned, RegSubRegPair>> &Copies,
|
||||
const DenseSet<unsigned> &UsedByPhi, bool Remove);
|
||||
void duplicateInstruction(MachineInstr *MI, MachineBasicBlock *TailBB,
|
||||
MachineBasicBlock *PredBB, MachineFunction &MF,
|
||||
DenseMap<unsigned, unsigned> &LocalVRMap,
|
||||
DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
|
||||
const DenseSet<unsigned> &UsedByPhi);
|
||||
void updateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
|
||||
SmallVectorImpl<MachineBasicBlock *> &TDBBs,
|
||||
@ -79,6 +81,9 @@ private:
|
||||
MachineBasicBlock *TailBB,
|
||||
SmallVectorImpl<MachineBasicBlock *> &TDBBs,
|
||||
SmallVectorImpl<MachineInstr *> &Copies);
|
||||
void appendCopies(MachineBasicBlock *MBB,
|
||||
SmallVectorImpl<std::pair<unsigned,RegSubRegPair>> &CopyInfos,
|
||||
SmallVectorImpl<MachineInstr *> &Copies);
|
||||
|
||||
void removeDeadBlock(MachineBasicBlock *MBB);
|
||||
};
|
||||
|
@ -303,20 +303,21 @@ void TailDuplicator::addSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
|
||||
/// source register that's contributed by PredBB and update SSA update map.
|
||||
void TailDuplicator::processPHI(
|
||||
MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
|
||||
DenseMap<unsigned, unsigned> &LocalVRMap,
|
||||
SmallVectorImpl<std::pair<unsigned, unsigned>> &Copies,
|
||||
DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
|
||||
SmallVectorImpl<std::pair<unsigned, RegSubRegPair>> &Copies,
|
||||
const DenseSet<unsigned> &RegsUsedByPhi, bool Remove) {
|
||||
unsigned DefReg = MI->getOperand(0).getReg();
|
||||
unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB);
|
||||
assert(SrcOpIdx && "Unable to find matching PHI source?");
|
||||
unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg();
|
||||
unsigned SrcSubReg = MI->getOperand(SrcOpIdx).getSubReg();
|
||||
const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
|
||||
LocalVRMap.insert(std::make_pair(DefReg, SrcReg));
|
||||
LocalVRMap.insert(std::make_pair(DefReg, RegSubRegPair(SrcReg, SrcSubReg)));
|
||||
|
||||
// Insert a copy from source to the end of the block. The def register is the
|
||||
// available value liveout of the block.
|
||||
unsigned NewDef = MRI->createVirtualRegister(RC);
|
||||
Copies.push_back(std::make_pair(NewDef, SrcReg));
|
||||
Copies.push_back(std::make_pair(NewDef, RegSubRegPair(SrcReg, SrcSubReg)));
|
||||
if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg))
|
||||
addSSAUpdateEntry(DefReg, NewDef, PredBB);
|
||||
|
||||
@ -334,7 +335,8 @@ void TailDuplicator::processPHI(
|
||||
/// the source operands due to earlier PHI translation.
|
||||
void TailDuplicator::duplicateInstruction(
|
||||
MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
|
||||
MachineFunction &MF, DenseMap<unsigned, unsigned> &LocalVRMap,
|
||||
MachineFunction &MF,
|
||||
DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
|
||||
const DenseSet<unsigned> &UsedByPhi) {
|
||||
MachineInstr *NewMI = TII->duplicate(MI, MF);
|
||||
if (PreRegAlloc) {
|
||||
@ -349,17 +351,63 @@ void TailDuplicator::duplicateInstruction(
|
||||
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
|
||||
unsigned NewReg = MRI->createVirtualRegister(RC);
|
||||
MO.setReg(NewReg);
|
||||
LocalVRMap.insert(std::make_pair(Reg, NewReg));
|
||||
LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0)));
|
||||
if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg))
|
||||
addSSAUpdateEntry(Reg, NewReg, PredBB);
|
||||
} else {
|
||||
DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg);
|
||||
auto VI = LocalVRMap.find(Reg);
|
||||
if (VI != LocalVRMap.end()) {
|
||||
MO.setReg(VI->second);
|
||||
// Need to make sure that the register class of the mapped register
|
||||
// will satisfy the constraints of the class of the register being
|
||||
// replaced.
|
||||
auto *OrigRC = MRI->getRegClass(Reg);
|
||||
auto *MappedRC = MRI->getRegClass(VI->second.Reg);
|
||||
const TargetRegisterClass *ConstrRC;
|
||||
if (VI->second.SubReg != 0) {
|
||||
ConstrRC = TRI->getMatchingSuperRegClass(MappedRC, OrigRC,
|
||||
VI->second.SubReg);
|
||||
if (ConstrRC) {
|
||||
// The actual constraining (as in "find appropriate new class")
|
||||
// is done by getMatchingSuperRegClass, so now we only need to
|
||||
// change the class of the mapped register.
|
||||
MRI->setRegClass(VI->second.Reg, ConstrRC);
|
||||
}
|
||||
} else {
|
||||
// For mapped registers that do not have sub-registers, simply
|
||||
// restrict their class to match the original one.
|
||||
ConstrRC = MRI->constrainRegClass(VI->second.Reg, OrigRC);
|
||||
}
|
||||
|
||||
if (ConstrRC) {
|
||||
// If the class constraining succeeded, we can simply replace
|
||||
// the old register with the mapped one.
|
||||
MO.setReg(VI->second.Reg);
|
||||
// We have Reg -> VI.Reg:VI.SubReg, so if Reg is used with a
|
||||
// sub-register, we need to compose the sub-register indices.
|
||||
MO.setSubReg(TRI->composeSubRegIndices(MO.getSubReg(),
|
||||
VI->second.SubReg));
|
||||
} else {
|
||||
// The direct replacement is not possible, due to failing register
|
||||
// class constraints. An explicit COPY is necessary. Create one
|
||||
// that can be reused
|
||||
auto *NewRC = MI->getRegClassConstraint(i, TII, TRI);
|
||||
if (NewRC == nullptr)
|
||||
NewRC = OrigRC;
|
||||
unsigned NewReg = MRI->createVirtualRegister(NewRC);
|
||||
BuildMI(*PredBB, MI, MI->getDebugLoc(),
|
||||
TII->get(TargetOpcode::COPY), NewReg)
|
||||
.addReg(VI->second.Reg, 0, VI->second.SubReg);
|
||||
LocalVRMap.erase(VI);
|
||||
LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0)));
|
||||
MO.setReg(NewReg);
|
||||
// The composed VI.Reg:VI.SubReg is replaced with NewReg, which
|
||||
// is equivalent to the whole register Reg. Hence, Reg:subreg
|
||||
// is same as NewReg:subreg, so keep the sub-register index
|
||||
// unchanged.
|
||||
}
|
||||
// Clear any kill flags from this operand. The new register could
|
||||
// have uses after this one, so kills are not valid here.
|
||||
MO.setIsKill(false);
|
||||
MRI->constrainRegClass(VI->second, MRI->getRegClass(Reg));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -734,8 +782,8 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
|
||||
}
|
||||
|
||||
// Clone the contents of TailBB into PredBB.
|
||||
DenseMap<unsigned, unsigned> LocalVRMap;
|
||||
SmallVector<std::pair<unsigned, unsigned>, 4> CopyInfos;
|
||||
DenseMap<unsigned, RegSubRegPair> LocalVRMap;
|
||||
SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos;
|
||||
// Use instr_iterator here to properly handle bundles, e.g.
|
||||
// ARM Thumb2 IT block.
|
||||
MachineBasicBlock::instr_iterator I = TailBB->instr_begin();
|
||||
@ -752,12 +800,7 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
|
||||
duplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi);
|
||||
}
|
||||
}
|
||||
MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
|
||||
for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
|
||||
Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
|
||||
TII->get(TargetOpcode::COPY), CopyInfos[i].first)
|
||||
.addReg(CopyInfos[i].second));
|
||||
}
|
||||
appendCopies(PredBB, CopyInfos, Copies);
|
||||
|
||||
// Simplify
|
||||
TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true);
|
||||
@ -792,8 +835,8 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
|
||||
DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
|
||||
<< "From MBB: " << *TailBB);
|
||||
if (PreRegAlloc) {
|
||||
DenseMap<unsigned, unsigned> LocalVRMap;
|
||||
SmallVector<std::pair<unsigned, unsigned>, 4> CopyInfos;
|
||||
DenseMap<unsigned, RegSubRegPair> LocalVRMap;
|
||||
SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos;
|
||||
MachineBasicBlock::iterator I = TailBB->begin();
|
||||
// Process PHI instructions first.
|
||||
while (I != TailBB->end() && I->isPHI()) {
|
||||
@ -812,13 +855,7 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
|
||||
duplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi);
|
||||
MI->eraseFromParent();
|
||||
}
|
||||
MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator();
|
||||
for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
|
||||
Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(),
|
||||
TII->get(TargetOpcode::COPY),
|
||||
CopyInfos[i].first)
|
||||
.addReg(CopyInfos[i].second));
|
||||
}
|
||||
appendCopies(PrevBB, CopyInfos, Copies);
|
||||
} else {
|
||||
// No PHIs to worry about, just splice the instructions over.
|
||||
PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
|
||||
@ -863,8 +900,8 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
|
||||
if (PredBB->succ_size() != 1)
|
||||
continue;
|
||||
|
||||
DenseMap<unsigned, unsigned> LocalVRMap;
|
||||
SmallVector<std::pair<unsigned, unsigned>, 4> CopyInfos;
|
||||
DenseMap<unsigned, RegSubRegPair> LocalVRMap;
|
||||
SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos;
|
||||
MachineBasicBlock::iterator I = TailBB->begin();
|
||||
// Process PHI instructions first.
|
||||
while (I != TailBB->end() && I->isPHI()) {
|
||||
@ -873,17 +910,26 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
|
||||
MachineInstr *MI = &*I++;
|
||||
processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false);
|
||||
}
|
||||
MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
|
||||
for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
|
||||
Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(),
|
||||
TII->get(TargetOpcode::COPY), CopyInfos[i].first)
|
||||
.addReg(CopyInfos[i].second));
|
||||
}
|
||||
appendCopies(PredBB, CopyInfos, Copies);
|
||||
}
|
||||
|
||||
return Changed;
|
||||
}
|
||||
|
||||
/// At the end of the block \p MBB generate COPY instructions between registers
|
||||
/// described by \p CopyInfos. Append resulting instructions to \p Copies.
|
||||
void TailDuplicator::appendCopies(MachineBasicBlock *MBB,
|
||||
SmallVectorImpl<std::pair<unsigned,RegSubRegPair>> &CopyInfos,
|
||||
SmallVectorImpl<MachineInstr*> &Copies) {
|
||||
MachineBasicBlock::iterator Loc = MBB->getFirstTerminator();
|
||||
const MCInstrDesc &CopyD = TII->get(TargetOpcode::COPY);
|
||||
for (auto &CI : CopyInfos) {
|
||||
auto C = BuildMI(*MBB, Loc, DebugLoc(), CopyD, CI.first)
|
||||
.addReg(CI.second.Reg, 0, CI.second.SubReg);
|
||||
Copies.push_back(C);
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove the specified dead machine basic block from the function, updating
|
||||
/// the CFG.
|
||||
void TailDuplicator::removeDeadBlock(MachineBasicBlock *MBB) {
|
||||
|
67
test/CodeGen/Hexagon/tail-dup-subreg-map.ll
Normal file
67
test/CodeGen/Hexagon/tail-dup-subreg-map.ll
Normal file
@ -0,0 +1,67 @@
|
||||
; RUN: llc -march=hexagon < %s | FileCheck %s
|
||||
; REQUIRES: asserts
|
||||
|
||||
; When tail-duplicating a block with PHI nodes that use subregisters, the
|
||||
; subregisters were dropped by the tail duplicator, resulting in invalid
|
||||
; COPY instructions being generated.
|
||||
|
||||
; CHECK: = extractu(r{{[0-9]+}}, #15, #17)
|
||||
|
||||
target triple = "hexagon"
|
||||
|
||||
%struct.0 = type { i64, i16 }
|
||||
%struct.1 = type { i64, i64 }
|
||||
|
||||
declare hidden fastcc void @foo(%struct.0* noalias nocapture, i8 signext, i8 zeroext, i32, i64, i64) unnamed_addr #0
|
||||
|
||||
define void @fred(%struct.0* noalias nocapture sret %agg.result, %struct.1* byval nocapture readonly align 8 %a) #1 {
|
||||
entry:
|
||||
%0 = load i64, i64* undef, align 8
|
||||
switch i32 undef, label %if.else [
|
||||
i32 32767, label %if.then
|
||||
i32 0, label %if.then7
|
||||
]
|
||||
|
||||
if.then: ; preds = %entry
|
||||
ret void
|
||||
|
||||
if.then7: ; preds = %entry
|
||||
br i1 undef, label %if.then.i, label %if.else16.i
|
||||
|
||||
if.then.i: ; preds = %if.then7
|
||||
br i1 undef, label %if.then5.i, label %if.else.i
|
||||
|
||||
if.then5.i: ; preds = %if.then.i
|
||||
%shl.i21 = shl i64 %0, 0
|
||||
br label %if.end.i
|
||||
|
||||
if.else.i: ; preds = %if.then.i
|
||||
%shl12.i = shl i64 %0, undef
|
||||
br label %if.end.i
|
||||
|
||||
if.end.i: ; preds = %if.else.i, %if.then5.i
|
||||
%aSig0.0 = phi i64 [ undef, %if.then5.i ], [ %shl12.i, %if.else.i ]
|
||||
%storemerge43.i = phi i64 [ %shl.i21, %if.then5.i ], [ 0, %if.else.i ]
|
||||
%sub15.i = sub nsw i32 -63, undef
|
||||
br label %if.end13
|
||||
|
||||
if.else16.i: ; preds = %if.then7
|
||||
br label %if.end13
|
||||
|
||||
if.else: ; preds = %entry
|
||||
%or12 = or i64 undef, 281474976710656
|
||||
br label %if.end13
|
||||
|
||||
if.end13: ; preds = %if.else, %if.else16.i, %if.end.i
|
||||
%aSig1.1 = phi i64 [ %0, %if.else ], [ %storemerge43.i, %if.end.i ], [ undef, %if.else16.i ]
|
||||
%aSig0.2 = phi i64 [ %or12, %if.else ], [ %aSig0.0, %if.end.i ], [ undef, %if.else16.i ]
|
||||
%aExp.0 = phi i32 [ undef, %if.else ], [ %sub15.i, %if.end.i ], [ undef, %if.else16.i ]
|
||||
%shl2.i = shl i64 %aSig0.2, 15
|
||||
%shr.i = lshr i64 %aSig1.1, 49
|
||||
%or.i = or i64 %shl2.i, %shr.i
|
||||
tail call fastcc void @foo(%struct.0* noalias %agg.result, i8 signext 80, i8 zeroext undef, i32 %aExp.0, i64 %or.i, i64 undef)
|
||||
unreachable
|
||||
}
|
||||
|
||||
attributes #0 = { norecurse nounwind }
|
||||
attributes #1 = { nounwind }
|
Loading…
Reference in New Issue
Block a user