From 90d780b0a632cc3a96244295569f821924d0d0db Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Mon, 6 Mar 2017 17:09:06 +0000 Subject: [PATCH] [Hexagon] Mark dead defs as in expand-condsets The code in updateDeadFlags removed unnecessary flags, but there can be cases where such a flag is not set, and yet a register has become dead. For example, if a mux with identical inputs is replaced with a COPY, the predicate register may no longer be used after that. llvm-svn: 297032 --- lib/Target/Hexagon/HexagonExpandCondsets.cpp | 40 +++++++++----- .../Hexagon/expand-condsets-dead-bad.ll | 54 +++++++++++++++++++ .../Hexagon/expand-condsets-dead-pred.ll | 45 ++++++++++++++++ 3 files changed, 127 insertions(+), 12 deletions(-) create mode 100644 test/CodeGen/Hexagon/expand-condsets-dead-bad.ll create mode 100644 test/CodeGen/Hexagon/expand-condsets-dead-pred.ll diff --git a/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/lib/Target/Hexagon/HexagonExpandCondsets.cpp index 624a713a80d..d8ba5dcd35a 100644 --- a/lib/Target/Hexagon/HexagonExpandCondsets.cpp +++ b/lib/Target/Hexagon/HexagonExpandCondsets.cpp @@ -362,14 +362,16 @@ void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM, if (Range.empty()) return; - auto IsRegDef = [this,Reg,LM] (MachineOperand &Op) -> bool { + // Return two booleans: { def-modifes-reg, def-covers-reg }. + auto IsRegDef = [this,Reg,LM] (MachineOperand &Op) -> std::pair { if (!Op.isReg() || !Op.isDef()) - return false; + return { false, false }; unsigned DR = Op.getReg(), DSR = Op.getSubReg(); if (!TargetRegisterInfo::isVirtualRegister(DR) || DR != Reg) - return false; + return { false, false }; LaneBitmask SLM = getLaneMask(DR, DSR); - return (SLM & LM).any(); + LaneBitmask A = SLM & LM; + return { A.any(), A == SLM }; }; // The splitting step will create pairs of predicated definitions without @@ -453,20 +455,27 @@ void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM, // Remove flags from all defs that are not dead after live range // extension, and collect all def operands. They will be used to generate // the necessary implicit uses. + // At the same time, add flag to all defs that are actually dead. + // This can happen, for example, when a mux with identical inputs is + // replaced with a COPY: the use of the predicate register disappears and + // the dead can become dead. std::set DefRegs; for (auto &Seg : Range) { if (!Seg.start.isRegister()) continue; MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start); for (auto &Op : DefI->operands()) { - if (Seg.start.isDead() || !IsRegDef(Op)) - continue; - DefRegs.insert(Op); - Op.setIsDead(false); + auto P = IsRegDef(Op); + if (P.second && Seg.end.isDead()) { + Op.setIsDead(true); + } else if (P.first) { + DefRegs.insert(Op); + Op.setIsDead(false); + } } } - // Finally, add implicit uses to each predicated def that is reached + // Now, add implicit uses to each predicated def that is reached // by other defs. for (auto &Seg : Range) { if (!Seg.start.isRegister() || !Range.liveAt(Seg.start.getPrevSlot())) @@ -486,6 +495,7 @@ void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM, for (RegisterRef R : ImpUses) MachineInstrBuilder(MF, DefI).addReg(R.Reg, RegState::Implicit, R.Sub); } + } void HexagonExpandCondsets::updateDeadFlags(unsigned Reg) { @@ -622,6 +632,12 @@ bool HexagonExpandCondsets::split(MachineInstr &MI, bool ReadUndef = MD.isUndef(); MachineBasicBlock::iterator At = MI; + auto updateRegs = [&UpdRegs] (const MachineInstr &MI) -> void { + for (auto &Op : MI.operands()) + if (Op.isReg()) + UpdRegs.insert(Op.getReg()); + }; + // If this is a mux of the same register, just replace it with COPY. // Ideally, this would happen earlier, so that register coalescing would // see it. @@ -630,6 +646,8 @@ bool HexagonExpandCondsets::split(MachineInstr &MI, if (ST.isReg() && SF.isReg()) { RegisterRef RT(ST); if (RT == RegisterRef(SF)) { + // Copy regs to update first. + updateRegs(MI); MI.setDesc(HII->get(TargetOpcode::COPY)); unsigned S = getRegState(ST); while (MI.getNumOperands() > 1) @@ -651,9 +669,7 @@ bool HexagonExpandCondsets::split(MachineInstr &MI, LIS->InsertMachineInstrInMaps(*TfrF); // Will need to recalculate live intervals for all registers in MI. - for (auto &Op : MI.operands()) - if (Op.isReg()) - UpdRegs.insert(Op.getReg()); + updateRegs(MI); removeInstr(MI); return true; diff --git a/test/CodeGen/Hexagon/expand-condsets-dead-bad.ll b/test/CodeGen/Hexagon/expand-condsets-dead-bad.ll new file mode 100644 index 00000000000..ce7f5e0ce12 --- /dev/null +++ b/test/CodeGen/Hexagon/expand-condsets-dead-bad.ll @@ -0,0 +1,54 @@ +; RUN: llc -march=hexagon -verify-machineinstrs < %s | FileCheck %s +; REQUIRES: asserts + +; Check for some output other than crashing. +; CHECK: bitsset + +target triple = "hexagon" + +; Function Attrs: nounwind +define void @fred() local_unnamed_addr #0 { +b0: + %v1 = load i32, i32* undef, align 4 + %v2 = and i32 %v1, 603979776 + %v3 = trunc i32 %v2 to i30 + switch i30 %v3, label %b23 [ + i30 -536870912, label %b4 + i30 -469762048, label %b5 + ] + +b4: ; preds = %b0 + unreachable + +b5: ; preds = %b0 + %v6 = load i32, i32* undef, align 4 + br i1 undef, label %b7, label %b8 + +b7: ; preds = %b5 + br label %b9 + +b8: ; preds = %b5 + br label %b9 + +b9: ; preds = %b8, %b7 + %v10 = load i32, i32* undef, align 4 + %v11 = load i32, i32* undef, align 4 + %v12 = mul nsw i32 %v11, %v10 + %v13 = ashr i32 %v12, 13 + %v14 = mul nsw i32 %v13, %v13 + %v15 = zext i32 %v14 to i64 + %v16 = mul nsw i32 %v6, %v6 + %v17 = zext i32 %v16 to i64 + %v18 = lshr i64 %v17, 5 + %v19 = select i1 undef, i64 %v18, i64 %v17 + %v20 = mul nuw nsw i64 %v19, %v15 + %v21 = trunc i64 %v20 to i32 + %v22 = and i32 %v21, 2147483647 + store i32 %v22, i32* undef, align 4 + unreachable + +b23: ; preds = %b0 + ret void +} + +attributes #0 = { nounwind "target-cpu"="hexagonv5" "target-features"="-hvx,-hvx-double,-long-calls" } diff --git a/test/CodeGen/Hexagon/expand-condsets-dead-pred.ll b/test/CodeGen/Hexagon/expand-condsets-dead-pred.ll new file mode 100644 index 00000000000..ecec83625e1 --- /dev/null +++ b/test/CodeGen/Hexagon/expand-condsets-dead-pred.ll @@ -0,0 +1,45 @@ +; RUN: llc -march=hexagon -verify-machineinstrs < %s | FileCheck %s +; REQUIRES: asserts + +; Check for some output (as opposed to a crash). +; CHECK: loop0 + +target triple = "hexagon" + +@x = external local_unnamed_addr global [80 x i32], align 8 + +; Function Attrs: nounwind +define void @fred() local_unnamed_addr #0 { +b0: + br label %b1 + +b1: ; preds = %b20, %b0 + br label %b2 + +b2: ; preds = %b2, %b1 + %v3 = phi i32 [ 0, %b1 ], [ %v17, %b2 ] + %v4 = phi i32 [ 0, %b1 ], [ %v16, %b2 ] + %v5 = phi i32 [ undef, %b1 ], [ %v18, %b2 ] + %v6 = load i32, i32* undef, align 8 + %v7 = icmp sgt i32 %v6, undef + %v8 = select i1 %v7, i32 %v3, i32 %v4 + %v9 = select i1 undef, i32 0, i32 %v8 + %v10 = select i1 undef, i32 undef, i32 %v9 + %v11 = select i1 undef, i32 0, i32 %v10 + %v12 = icmp sgt i32 undef, 0 + %v13 = select i1 %v12, i32 undef, i32 %v11 + %v14 = select i1 false, i32 undef, i32 %v13 + %v15 = select i1 false, i32 undef, i32 %v14 + %v16 = select i1 false, i32 undef, i32 %v15 + %v17 = add nsw i32 %v3, 8 + %v18 = add i32 %v5, -8 + %v19 = icmp eq i32 %v18, 0 + br i1 %v19, label %b20, label %b2 + +b20: ; preds = %b2 + %v21 = getelementptr inbounds [80 x i32], [80 x i32]* @x, i32 0, i32 %v16 + store i32 -2000, i32* %v21, align 4 + br label %b1 +} + +attributes #0 = { nounwind "target-cpu"="hexagonv55" "target-features"="-hvx,-hvx-double,-long-calls" }