From 5e17ea36e18bfbed5322222e6677dd221ce6ec1f Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Mon, 12 Jan 2009 03:19:55 +0000 Subject: [PATCH] Fix PR3241: Currently EmitCopyFromReg emits a copy from the physical register to a virtual register unless it requires an expensive cross class copy. That means we are only treating "expensive to copy" register dependency as physical register dependency. Also future proof the scheduler to handle "normal" physical register dependencies. The code is not exercised yet. llvm-svn: 62074 --- include/llvm/CodeGen/ScheduleDAG.h | 2 +- lib/CodeGen/ScheduleDAGEmit.cpp | 7 +-- lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp | 46 ++++++++------- .../SelectionDAG/ScheduleDAGRRList.cpp | 58 ++++++++++--------- .../SelectionDAG/ScheduleDAGSDNodes.cpp | 30 +++++++--- .../SelectionDAG/ScheduleDAGSDNodesEmit.cpp | 11 ++-- test/CodeGen/X86/pr3244.ll | 26 +++++++++ 7 files changed, 114 insertions(+), 66 deletions(-) create mode 100644 test/CodeGen/X86/pr3244.ll diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h index 03d11e2d828..765c26a5941 100644 --- a/include/llvm/CodeGen/ScheduleDAG.h +++ b/include/llvm/CodeGen/ScheduleDAG.h @@ -485,7 +485,7 @@ namespace llvm { protected: void AddMemOperand(MachineInstr *MI, const MachineMemOperand &MO); - void EmitCrossRCCopy(SUnit *SU, DenseMap &VRBaseMap); + void EmitPhysRegCopy(SUnit *SU, DenseMap &VRBaseMap); /// ForceUnitLatencies - Return true if all scheduling edges should be given a /// latency value of one. The default is to return false; schedulers may diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp index d10d670d346..1f40771e3bd 100644 --- a/lib/CodeGen/ScheduleDAGEmit.cpp +++ b/lib/CodeGen/ScheduleDAGEmit.cpp @@ -36,7 +36,7 @@ void ScheduleDAG::EmitNoop() { TII->insertNoop(*BB, BB->end()); } -void ScheduleDAG::EmitCrossRCCopy(SUnit *SU, +void ScheduleDAG::EmitPhysRegCopy(SUnit *SU, DenseMap &VRBaseMap) { for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { @@ -49,12 +49,11 @@ void ScheduleDAG::EmitCrossRCCopy(SUnit *SU, unsigned Reg = 0; for (SUnit::const_succ_iterator II = SU->Succs.begin(), EE = SU->Succs.end(); II != EE; ++II) { - if (I->getReg()) { - Reg = I->getReg(); + if (II->getReg()) { + Reg = II->getReg(); break; } } - assert(I->getReg() && "Unknown physical register!"); TII->copyRegToReg(*BB, BB->end(), Reg, VRI->second, SU->CopyDstRC, SU->CopySrcRC); } else { diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 113dfb1751d..b86492992c4 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -28,7 +28,7 @@ using namespace llvm; STATISTIC(NumUnfolds, "Number of nodes unfolded"); STATISTIC(NumDups, "Number of duplicated nodes"); -STATISTIC(NumCCCopies, "Number of cross class copies"); +STATISTIC(NumPRCopies, "Number of physical copies"); static RegisterScheduler fastDAGScheduler("fast", "Fast suboptimal list scheduling", @@ -93,10 +93,10 @@ private: void ReleasePred(SUnit *SU, SDep *PredEdge); void ScheduleNodeBottomUp(SUnit*, unsigned); SUnit *CopyAndMoveSuccessors(SUnit*); - void InsertCCCopiesAndMoveSuccs(SUnit*, unsigned, - const TargetRegisterClass*, - const TargetRegisterClass*, - SmallVector&); + void InsertCopiesAndMoveSuccs(SUnit*, unsigned, + const TargetRegisterClass*, + const TargetRegisterClass*, + SmallVector&); bool DelayForLiveRegsBottomUp(SUnit*, SmallVector&); void ListScheduleBottomUp(); @@ -361,17 +361,16 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { DelDeps.push_back(std::make_pair(SuccSU, D)); } } - for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) RemovePred(DelDeps[i].first, DelDeps[i].second); - } ++NumDups; return NewSU; } -/// InsertCCCopiesAndMoveSuccs - Insert expensive cross register class copies -/// and move all scheduled successors of the given SUnit to the last copy. -void ScheduleDAGFast::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, +/// InsertCopiesAndMoveSuccs - Insert register copies and move all +/// scheduled successors of the given SUnit to the last copy. +void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, const TargetRegisterClass *DestRC, const TargetRegisterClass *SrcRC, SmallVector &Copies) { @@ -408,7 +407,7 @@ void ScheduleDAGFast::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, Copies.push_back(CopyFromSU); Copies.push_back(CopyToSU); - ++NumCCCopies; + ++NumPRCopies; } /// getPhysicalRegisterVT - Returns the ValueType of the physical register @@ -524,19 +523,22 @@ void ScheduleDAGFast::ListScheduleBottomUp() { assert(LRegs.size() == 1 && "Can't handle this yet!"); unsigned Reg = LRegs[0]; SUnit *LRDef = LiveRegDefs[Reg]; - SUnit *NewDef = CopyAndMoveSuccessors(LRDef); + MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); + const TargetRegisterClass *RC = + TRI->getPhysicalRegisterRegClass(Reg, VT); + const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); + + // If cross copy register class is null, then it must be possible copy + // the value directly. Do not try duplicate the def. + SUnit *NewDef = 0; + if (DestRC) + NewDef = CopyAndMoveSuccessors(LRDef); + else + DestRC = RC; if (!NewDef) { - // Issue expensive cross register class copies. - MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); - const TargetRegisterClass *RC = - TRI->getPhysicalRegisterRegClass(Reg, VT); - const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); - if (!DestRC) { - assert(false && "Don't know how to copy this physical register!"); - abort(); - } + // Issue copies, these can be expensive cross register class copies. SmallVector Copies; - InsertCCCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); + InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); DOUT << "Adding an edge from SU # " << TrySU->NodeNum << " to SU #" << Copies.front()->NodeNum << "\n"; AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 65de7a5c8f8..bc5443eaba8 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -35,7 +35,7 @@ using namespace llvm; STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); STATISTIC(NumUnfolds, "Number of nodes unfolded"); STATISTIC(NumDups, "Number of duplicated nodes"); -STATISTIC(NumCCCopies, "Number of cross class copies"); +STATISTIC(NumPRCopies, "Number of physical register copies"); static RegisterScheduler burrListDAGScheduler("list-burr", @@ -121,10 +121,10 @@ private: void UnscheduleNodeBottomUp(SUnit*); void BacktrackBottomUp(SUnit*, unsigned, unsigned&); SUnit *CopyAndMoveSuccessors(SUnit*); - void InsertCCCopiesAndMoveSuccs(SUnit*, unsigned, - const TargetRegisterClass*, - const TargetRegisterClass*, - SmallVector&); + void InsertCopiesAndMoveSuccs(SUnit*, unsigned, + const TargetRegisterClass*, + const TargetRegisterClass*, + SmallVector&); bool DelayForLiveRegsBottomUp(SUnit*, SmallVector&); void ListScheduleTopDown(); void ListScheduleBottomUp(); @@ -517,11 +517,11 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { return NewSU; } -/// InsertCCCopiesAndMoveSuccs - Insert expensive cross register class copies -/// and move all scheduled successors of the given SUnit to the last copy. -void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, +/// InsertCopiesAndMoveSuccs - Insert register copies and move all +/// scheduled successors of the given SUnit to the last copy. +void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC, SmallVector &Copies) { SUnit *CopyFromSU = CreateNewSUnit(NULL); CopyFromSU->CopySrcRC = SrcRC; @@ -546,9 +546,8 @@ void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, DelDeps.push_back(std::make_pair(SuccSU, *I)); } } - for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { + for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) RemovePred(DelDeps[i].first, DelDeps[i].second); - } AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); @@ -559,7 +558,7 @@ void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, Copies.push_back(CopyFromSU); Copies.push_back(CopyToSU); - ++NumCCCopies; + ++NumPRCopies; } /// getPhysicalRegisterVT - Returns the ValueType of the physical register @@ -705,27 +704,32 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { } if (!CurSU) { - // Can't backtrack. Try duplicating the nodes that produces these - // "expensive to copy" values to break the dependency. In case even - // that doesn't work, insert cross class copies. + // Can't backtrack. If it's too expensive to copy the value, then try + // duplicate the nodes that produces these "too expensive to copy" + // values to break the dependency. In case even that doesn't work, + // insert cross class copies. + // If it's not too expensive, i.e. cost != -1, issue copies. SUnit *TrySU = NotReady[0]; SmallVector &LRegs = LRegsMap[TrySU]; assert(LRegs.size() == 1 && "Can't handle this yet!"); unsigned Reg = LRegs[0]; SUnit *LRDef = LiveRegDefs[Reg]; - SUnit *NewDef = CopyAndMoveSuccessors(LRDef); + MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); + const TargetRegisterClass *RC = + TRI->getPhysicalRegisterRegClass(Reg, VT); + const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); + + // If cross copy register class is null, then it must be possible copy + // the value directly. Do not try duplicate the def. + SUnit *NewDef = 0; + if (DestRC) + NewDef = CopyAndMoveSuccessors(LRDef); + else + DestRC = RC; if (!NewDef) { - // Issue expensive cross register class copies. - MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); - const TargetRegisterClass *RC = - TRI->getPhysicalRegisterRegClass(Reg, VT); - const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); - if (!DestRC) { - assert(false && "Don't know how to copy this physical register!"); - abort(); - } + // Issue copies, these can be expensive cross register class copies. SmallVector Copies; - InsertCCCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); + InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); DOUT << "Adding an edge from SU #" << TrySU->NodeNum << " to SU #" << Copies.front()->NodeNum << "\n"; AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index a8d904c2f67..c755086a8d5 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -39,11 +39,11 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) { /// CheckForPhysRegDependency - Check if the dependency between def and use of /// a specified operand is a physical register dependency. If so, returns the -/// register. +/// register and the cost of copying the register. static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, const TargetRegisterInfo *TRI, const TargetInstrInfo *TII, - unsigned &PhysReg) { + unsigned &PhysReg, int &Cost) { if (Op != 2 || User->getOpcode() != ISD::CopyToReg) return; @@ -55,8 +55,12 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, if (Def->isMachineOpcode()) { const TargetInstrDesc &II = TII->get(Def->getMachineOpcode()); if (ResNo >= II.getNumDefs() && - II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) + II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) { PhysReg = Reg; + const TargetRegisterClass *RC = + TRI->getPhysicalRegisterRegClass(Reg, Def->getValueType(ResNo)); + Cost = RC->getCopyCost(); + } } } @@ -179,10 +183,18 @@ void ScheduleDAGSDNodes::AddSchedEdges() { bool isChain = OpVT == MVT::Other; unsigned PhysReg = 0; + int Cost = 1; // Determine if this is a physical register dependency. - CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg); + CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost); assert((PhysReg == 0 || !isChain) && "Chain dependence via physreg data?"); + // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler + // emits a copy from the physical register to a virtual register unless + // it requires a cross class copy (cost < 0). That means we are only + // treating "expensive to copy" register dependency as physical register + // dependency. This may change in the future though. + if (Cost >= 0) + PhysReg = 0; SU->addPred(SDep(OpSU, isChain ? SDep::Order : SDep::Data, OpSU->Latency, PhysReg)); } @@ -252,10 +264,12 @@ unsigned ScheduleDAGSDNodes::ComputeMemOperandsEnd(SDNode *Node) { void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { - if (SU->getNode()) - SU->getNode()->dump(DAG); - else - cerr << "CROSS RC COPY "; + if (!SU->getNode()) { + cerr << "PHYS REG COPY\n"; + return; + } + + SU->getNode()->dump(DAG); cerr << "\n"; SmallVector FlaggedNodes; for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode()) diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp index ebe084d811f..d6179651589 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp @@ -629,6 +629,12 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { // For pre-regalloc scheduling, create instructions corresponding to the // SDNode and any flagged SDNodes and append them to the block. + if (!SU->getNode()) { + // Emit a copy. + EmitPhysRegCopy(SU, CopyVRBaseMap); + continue; + } + SmallVector FlaggedNodes; for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode()) FlaggedNodes.push_back(N); @@ -636,10 +642,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, VRBaseMap); FlaggedNodes.pop_back(); } - if (!SU->getNode()) - EmitCrossRCCopy(SU, CopyVRBaseMap); - else - EmitNode(SU->getNode(), SU->OrigNode != SU, VRBaseMap); + EmitNode(SU->getNode(), SU->OrigNode != SU, VRBaseMap); } return BB; diff --git a/test/CodeGen/X86/pr3244.ll b/test/CodeGen/X86/pr3244.ll new file mode 100644 index 00000000000..0765f86405c --- /dev/null +++ b/test/CodeGen/X86/pr3244.ll @@ -0,0 +1,26 @@ +; RUN: llvm-as < %s | llc -march=x86 +; PR3244 + +@g_62 = external global i16 ; [#uses=1] +@g_487 = external global i32 ; [#uses=1] + +define i32 @func_42(i32 %p_43, i32 %p_44, i32 %p_45, i32 %p_46) nounwind { +entry: + %0 = load i16* @g_62, align 2 ; [#uses=1] + %1 = load i32* @g_487, align 4 ; [#uses=1] + %2 = trunc i16 %0 to i8 ; [#uses=1] + %3 = trunc i32 %1 to i8 ; [#uses=1] + %4 = tail call i32 (...)* @func_7(i64 -4455561449541442965, i32 1) +nounwind ; [#uses=1] + %5 = trunc i32 %4 to i8 ; [#uses=1] + %6 = mul i8 %3, %2 ; [#uses=1] + %7 = mul i8 %6, %5 ; [#uses=1] + %8 = sext i8 %7 to i16 ; [#uses=1] + %9 = tail call i32 @func_85(i16 signext %8, i32 1, i32 1) nounwind + ; [#uses=0] + ret i32 undef +} + +declare i32 @func_7(...) + +declare i32 @func_85(i16 signext, i32, i32)