diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 7b64047c39c..1306aa6c831 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -56,12 +56,12 @@ namespace { // State that is updated as we process loops bool Changed; // True if a loop is changed. + bool FirstInLoop; // True if it's the first LICM in the loop. MachineLoop *CurLoop; // The current loop we are working on. MachineBasicBlock *CurPreheader; // The preheader for CurLoop. - // For each BB and opcode pair, keep a list of hoisted instructions. - DenseMap, - std::vector > CSEMap; + // For each opcode, keep a list of potentail CSE instructions. + DenseMap > CSEMap; public: static char ID; // Pass identification, replacement for typeid MachineLICM() : MachineFunctionPass(&ID) {} @@ -115,6 +115,11 @@ namespace { /// that is safe to hoist, this instruction is called to do the dirty work. /// void Hoist(MachineInstr *MI); + + /// InitCSEMap - Initialize the CSE map with instructions that are in the + /// current loop preheader that may become duplicates of instructions that + /// are hoisted out of the loop. + void InitCSEMap(MachineBasicBlock *BB); }; } // end anonymous namespace @@ -140,7 +145,7 @@ static bool LoopIsOuterMostWithPreheader(MachineLoop *CurLoop) { bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { DEBUG(errs() << "******** Machine LICM ********\n"); - Changed = false; + Changed = FirstInLoop = false; TM = &MF.getTarget(); TII = TM->getInstrInfo(); TRI = TM->getRegisterInfo(); @@ -152,8 +157,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { DT = &getAnalysis(); AA = &getAnalysis(); - for (MachineLoopInfo::iterator - I = LI->begin(), E = LI->end(); I != E; ++I) { + for (MachineLoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) { CurLoop = *I; // Only visit outer-most preheader-sporting loops. @@ -170,7 +174,11 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { if (!CurPreheader) continue; + // CSEMap is initialized for loop header when the first instruction is + // being hoisted. + FirstInLoop = true; HoistRegion(DT->getNode(CurLoop->getHeader())); + CSEMap.clear(); } return Changed; @@ -191,10 +199,7 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N) { for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end(); MII != E; ) { MachineBasicBlock::iterator NextMII = MII; ++NextMII; - MachineInstr &MI = *MII; - - Hoist(&MI); - + Hoist(&*MII); MII = NextMII; } @@ -430,6 +435,27 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { return NewMIs[0]; } +void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { + for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) { + const MachineInstr *MI = &*I; + // FIXME: For now, only hoist re-materilizable instructions. LICM will + // increase register pressure. We want to make sure it doesn't increase + // spilling. + if (TII->isTriviallyReMaterializable(MI, AA)) { + unsigned Opcode = MI->getOpcode(); + DenseMap >::iterator + CI = CSEMap.find(Opcode); + if (CI != CSEMap.end()) + CI->second.push_back(MI); + else { + std::vector CSEMIs; + CSEMIs.push_back(MI); + CSEMap.insert(std::make_pair(Opcode, CSEMIs)); + } + } + } +} + /// Hoist - When an instruction is found to use only loop invariant operands /// that are safe to hoist, this instruction is called to do the dirty work. /// @@ -454,11 +480,14 @@ void MachineLICM::Hoist(MachineInstr *MI) { errs() << "\n"; }); + // If this is the first instruction being hoisted to the preheader, + // initialize the CSE map with potential common expressions. + InitCSEMap(CurPreheader); + // Look for opportunity to CSE the hoisted instruction. - std::pair BBOpcPair = - std::make_pair(CurPreheader->getNumber(), MI->getOpcode()); - DenseMap, - std::vector >::iterator CI = CSEMap.find(BBOpcPair); + unsigned Opcode = MI->getOpcode(); + DenseMap >::iterator + CI = CSEMap.find(Opcode); bool DoneCSE = false; if (CI != CSEMap.end()) { const MachineInstr *Dup = LookForDuplicate(MI, CI->second, RegInfo); @@ -477,15 +506,15 @@ void MachineLICM::Hoist(MachineInstr *MI) { // Otherwise, splice the instruction to the preheader. if (!DoneCSE) { - CurPreheader->splice(CurPreheader->getFirstTerminator(), - MI->getParent(), MI); + CurPreheader->splice(CurPreheader->getFirstTerminator(),MI->getParent(),MI); + // Add to the CSE map. if (CI != CSEMap.end()) CI->second.push_back(MI); else { std::vector CSEMIs; CSEMIs.push_back(MI); - CSEMap.insert(std::make_pair(BBOpcPair, CSEMIs)); + CSEMap.insert(std::make_pair(Opcode, CSEMIs)); } } diff --git a/test/CodeGen/ARM/remat.ll b/test/CodeGen/ARM/remat.ll index ba9699efd59..50da997ed46 100644 --- a/test/CodeGen/ARM/remat.ll +++ b/test/CodeGen/ARM/remat.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -mtriple=arm-apple-darwin -; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | grep "Number of re-materialization" | grep 4 +; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | grep "Number of re-materialization" | grep 5 %struct.CONTENTBOX = type { i32, i32, i32, i32, i32 } %struct.LOCBOX = type { i32, i32, i32, i32 } diff --git a/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll b/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll index 0626d28eefe..721d4c945b1 100644 --- a/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll +++ b/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll @@ -1,6 +1,5 @@ -; RUN: llc < %s -march=x86 -mattr=+sse2 -stats |& \ -; RUN: grep {1 .*folded into instructions} -; Increment in loop bb.128.i adjusted to 2, to prevent loop reversal from +; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s +; Increment in loop bb.i28.i adjusted to 2, to prevent loop reversal from ; kicking in. declare fastcc void @rdft(i32, i32, double*, i32*, double*) @@ -34,6 +33,9 @@ cond_next36.i: ; preds = %cond_next.i br label %bb.i28.i bb.i28.i: ; preds = %bb.i28.i, %cond_next36.i +; CHECK: %bb.i28.i +; CHECK: addl $2 +; CHECK: addl $2 %j.0.reg2mem.0.i16.i = phi i32 [ 0, %cond_next36.i ], [ %indvar.next39.i, %bb.i28.i ] ; [#uses=2] %din_addr.1.reg2mem.0.i17.i = phi double [ 0.000000e+00, %cond_next36.i ], [ %tmp16.i25.i, %bb.i28.i ] ; [#uses=1] %tmp1.i18.i = fptosi double %din_addr.1.reg2mem.0.i17.i to i32 ; [#uses=2]