1
0
mirror of https://github.com/RPCS3/llvm-mirror.git synced 2024-11-23 11:13:28 +01:00

[Hexagon] Mark dead defs as <dead> in expand-condsets

The code in updateDeadFlags removed unnecessary <dead> flags, but there
can be cases where such a flag is not set, and yet a register has become
dead. For example, if a mux with identical inputs is replaced with a COPY,
the predicate register may no longer be used after that.

llvm-svn: 297032
This commit is contained in:
Krzysztof Parzyszek 2017-03-06 17:09:06 +00:00
parent fbbcfd906f
commit 90d780b0a6
3 changed files with 127 additions and 12 deletions

View File

@ -362,14 +362,16 @@ void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM,
if (Range.empty())
return;
auto IsRegDef = [this,Reg,LM] (MachineOperand &Op) -> bool {
// Return two booleans: { def-modifes-reg, def-covers-reg }.
auto IsRegDef = [this,Reg,LM] (MachineOperand &Op) -> std::pair<bool,bool> {
if (!Op.isReg() || !Op.isDef())
return false;
return { false, false };
unsigned DR = Op.getReg(), DSR = Op.getSubReg();
if (!TargetRegisterInfo::isVirtualRegister(DR) || DR != Reg)
return false;
return { false, false };
LaneBitmask SLM = getLaneMask(DR, DSR);
return (SLM & LM).any();
LaneBitmask A = SLM & LM;
return { A.any(), A == SLM };
};
// The splitting step will create pairs of predicated definitions without
@ -453,20 +455,27 @@ void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM,
// Remove <dead> flags from all defs that are not dead after live range
// extension, and collect all def operands. They will be used to generate
// the necessary implicit uses.
// At the same time, add <dead> flag to all defs that are actually dead.
// This can happen, for example, when a mux with identical inputs is
// replaced with a COPY: the use of the predicate register disappears and
// the dead can become dead.
std::set<RegisterRef> DefRegs;
for (auto &Seg : Range) {
if (!Seg.start.isRegister())
continue;
MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start);
for (auto &Op : DefI->operands()) {
if (Seg.start.isDead() || !IsRegDef(Op))
continue;
DefRegs.insert(Op);
Op.setIsDead(false);
auto P = IsRegDef(Op);
if (P.second && Seg.end.isDead()) {
Op.setIsDead(true);
} else if (P.first) {
DefRegs.insert(Op);
Op.setIsDead(false);
}
}
}
// Finally, add implicit uses to each predicated def that is reached
// Now, add implicit uses to each predicated def that is reached
// by other defs.
for (auto &Seg : Range) {
if (!Seg.start.isRegister() || !Range.liveAt(Seg.start.getPrevSlot()))
@ -486,6 +495,7 @@ void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM,
for (RegisterRef R : ImpUses)
MachineInstrBuilder(MF, DefI).addReg(R.Reg, RegState::Implicit, R.Sub);
}
}
void HexagonExpandCondsets::updateDeadFlags(unsigned Reg) {
@ -622,6 +632,12 @@ bool HexagonExpandCondsets::split(MachineInstr &MI,
bool ReadUndef = MD.isUndef();
MachineBasicBlock::iterator At = MI;
auto updateRegs = [&UpdRegs] (const MachineInstr &MI) -> void {
for (auto &Op : MI.operands())
if (Op.isReg())
UpdRegs.insert(Op.getReg());
};
// If this is a mux of the same register, just replace it with COPY.
// Ideally, this would happen earlier, so that register coalescing would
// see it.
@ -630,6 +646,8 @@ bool HexagonExpandCondsets::split(MachineInstr &MI,
if (ST.isReg() && SF.isReg()) {
RegisterRef RT(ST);
if (RT == RegisterRef(SF)) {
// Copy regs to update first.
updateRegs(MI);
MI.setDesc(HII->get(TargetOpcode::COPY));
unsigned S = getRegState(ST);
while (MI.getNumOperands() > 1)
@ -651,9 +669,7 @@ bool HexagonExpandCondsets::split(MachineInstr &MI,
LIS->InsertMachineInstrInMaps(*TfrF);
// Will need to recalculate live intervals for all registers in MI.
for (auto &Op : MI.operands())
if (Op.isReg())
UpdRegs.insert(Op.getReg());
updateRegs(MI);
removeInstr(MI);
return true;

View File

@ -0,0 +1,54 @@
; RUN: llc -march=hexagon -verify-machineinstrs < %s | FileCheck %s
; REQUIRES: asserts
; Check for some output other than crashing.
; CHECK: bitsset
target triple = "hexagon"
; Function Attrs: nounwind
define void @fred() local_unnamed_addr #0 {
b0:
%v1 = load i32, i32* undef, align 4
%v2 = and i32 %v1, 603979776
%v3 = trunc i32 %v2 to i30
switch i30 %v3, label %b23 [
i30 -536870912, label %b4
i30 -469762048, label %b5
]
b4: ; preds = %b0
unreachable
b5: ; preds = %b0
%v6 = load i32, i32* undef, align 4
br i1 undef, label %b7, label %b8
b7: ; preds = %b5
br label %b9
b8: ; preds = %b5
br label %b9
b9: ; preds = %b8, %b7
%v10 = load i32, i32* undef, align 4
%v11 = load i32, i32* undef, align 4
%v12 = mul nsw i32 %v11, %v10
%v13 = ashr i32 %v12, 13
%v14 = mul nsw i32 %v13, %v13
%v15 = zext i32 %v14 to i64
%v16 = mul nsw i32 %v6, %v6
%v17 = zext i32 %v16 to i64
%v18 = lshr i64 %v17, 5
%v19 = select i1 undef, i64 %v18, i64 %v17
%v20 = mul nuw nsw i64 %v19, %v15
%v21 = trunc i64 %v20 to i32
%v22 = and i32 %v21, 2147483647
store i32 %v22, i32* undef, align 4
unreachable
b23: ; preds = %b0
ret void
}
attributes #0 = { nounwind "target-cpu"="hexagonv5" "target-features"="-hvx,-hvx-double,-long-calls" }

View File

@ -0,0 +1,45 @@
; RUN: llc -march=hexagon -verify-machineinstrs < %s | FileCheck %s
; REQUIRES: asserts
; Check for some output (as opposed to a crash).
; CHECK: loop0
target triple = "hexagon"
@x = external local_unnamed_addr global [80 x i32], align 8
; Function Attrs: nounwind
define void @fred() local_unnamed_addr #0 {
b0:
br label %b1
b1: ; preds = %b20, %b0
br label %b2
b2: ; preds = %b2, %b1
%v3 = phi i32 [ 0, %b1 ], [ %v17, %b2 ]
%v4 = phi i32 [ 0, %b1 ], [ %v16, %b2 ]
%v5 = phi i32 [ undef, %b1 ], [ %v18, %b2 ]
%v6 = load i32, i32* undef, align 8
%v7 = icmp sgt i32 %v6, undef
%v8 = select i1 %v7, i32 %v3, i32 %v4
%v9 = select i1 undef, i32 0, i32 %v8
%v10 = select i1 undef, i32 undef, i32 %v9
%v11 = select i1 undef, i32 0, i32 %v10
%v12 = icmp sgt i32 undef, 0
%v13 = select i1 %v12, i32 undef, i32 %v11
%v14 = select i1 false, i32 undef, i32 %v13
%v15 = select i1 false, i32 undef, i32 %v14
%v16 = select i1 false, i32 undef, i32 %v15
%v17 = add nsw i32 %v3, 8
%v18 = add i32 %v5, -8
%v19 = icmp eq i32 %v18, 0
br i1 %v19, label %b20, label %b2
b20: ; preds = %b2
%v21 = getelementptr inbounds [80 x i32], [80 x i32]* @x, i32 0, i32 %v16
store i32 -2000, i32* %v21, align 4
br label %b1
}
attributes #0 = { nounwind "target-cpu"="hexagonv55" "target-features"="-hvx,-hvx-double,-long-calls" }