Clean up spill weight computation. Also some changes to give loop induction

variable increment / decrement slighter high priority. This has major impact on some micro-benchmarks. On MultiSource/Applications and spec tests, it's a minor win. It also reduce 256.bzip instruction count by 8%, 55 on 164.gzip on i386 / Darwin. llvm-svn: 82485
2024-10-20 03:23:01 +02:00 · 2009-09-21 21:12:25 +00:00 · 2009-09-21 21:12:25 +00:00 · a6d602a5c1
commit a6d602a5c1
parent 0db84be1dc
9 changed files with 145 additions and 68 deletions
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@ -2535,7 +2535,8 @@ void SimpleRegisterCoalescing::releaseMemory() {
  ReMatDefs.clear();
 }

-bool SimpleRegisterCoalescing::isZeroLengthInterval(LiveInterval *li) const {
+/// Returns true if the given live interval is zero length.
+static bool isZeroLengthInterval(LiveInterval *li, LiveIntervals *li_) {
  for (LiveInterval::Ranges::const_iterator
         i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i)
    if (li_->getPrevIndex(i->end) > i->start)
@ -2543,6 +2544,97 @@ bool SimpleRegisterCoalescing::isZeroLengthInterval(LiveInterval *li) const {
  return true;
 }

+void SimpleRegisterCoalescing::CalculateSpillWeights() {
+  SmallSet<unsigned, 4> Processed;
+  for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
+       mbbi != mbbe; ++mbbi) {
+    MachineBasicBlock* MBB = mbbi;
+    MachineInstrIndex MBBEnd = li_->getMBBEndIdx(MBB);
+    MachineLoop* loop = loopInfo->getLoopFor(MBB);
+    unsigned loopDepth = loop ? loop->getLoopDepth() : 0;
+    bool isExit = loop ? loop->isLoopExit(MBB) : false;
+
+    for (MachineBasicBlock::iterator mii = MBB->begin(), mie = MBB->end();
+         mii != mie; ++mii) {
+      MachineInstr *MI = mii;
+
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        const MachineOperand &mopi = MI->getOperand(i);
+        if (!mopi.isReg() || mopi.getReg() == 0)
+          continue;
+        unsigned Reg = mopi.getReg();
+        if (!TargetRegisterInfo::isVirtualRegister(mopi.getReg()))
+          continue;
+        // Multiple uses of reg by the same instruction. It should not
+        // contribute to spill weight again.
+        if (!Processed.insert(Reg))
+          continue;
+
+        bool HasDef = mopi.isDef();
+        bool HasUse = mopi.isUse();
+        for (unsigned j = i+1; j != e; ++j) {
+          const MachineOperand &mopj = MI->getOperand(j);
+          if (!mopj.isReg() || mopj.getReg() != Reg)
+            continue;
+          HasDef |= mopj.isDef();
+          HasUse |= mopj.isUse();
+        }
+
+        LiveInterval &RegInt = li_->getInterval(Reg);
+        float Weight = li_->getSpillWeight(HasDef, HasUse, loopDepth+1);
+        if (HasDef && isExit) {
+          // Looks like this is a loop count variable update.
+          MachineInstrIndex DefIdx =
+            li_->getDefIndex(li_->getInstructionIndex(MI));
+          const LiveRange *DLR =
+            li_->getInterval(Reg).getLiveRangeContaining(DefIdx);
+          if (DLR->end > MBBEnd)
+            Weight *= 3.0F;
+        }
+        RegInt.weight += Weight;
+      }
+      Processed.clear();
+    }
+  }
+
+  for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) {
+    LiveInterval &LI = *I->second;
+    if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+      // If the live interval length is essentially zero, i.e. in every live
+      // range the use follows def immediately, it doesn't make sense to spill
+      // it and hope it will be easier to allocate for this li.
+      if (isZeroLengthInterval(&LI, li_)) {
+        LI.weight = HUGE_VALF;
+        continue;
+      }
+
+      bool isLoad = false;
+      SmallVector<LiveInterval*, 4> SpillIs;
+      if (li_->isReMaterializable(LI, SpillIs, isLoad)) {
+        // If all of the definitions of the interval are re-materializable,
+        // it is a preferred candidate for spilling. If non of the defs are
+        // loads, then it's potentially very cheap to re-materialize.
+        // FIXME: this gets much more complicated once we support non-trivial
+        // re-materialization.
+        if (isLoad)
+          LI.weight *= 0.9F;
+        else
+          LI.weight *= 0.5F;
+      }
+
+      // Slightly prefer live interval that has been assigned a preferred reg.
+      std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(LI.reg);
+      if (Hint.first || Hint.second)
+        LI.weight *= 1.01F;
+
+      // Divide the weight of the interval by its size.  This encourages
+      // spilling of intervals that are large and have few uses, and
+      // discourages spilling of small intervals with many uses.
+      LI.weight /= li_->getApproximateInstructionCount(LI) * InstrSlots::NUM;
+    }
+  }
+}
+

 bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
  mf_ = &fn;
@ -2581,8 +2673,6 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
  for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
       mbbi != mbbe; ++mbbi) {
    MachineBasicBlock* mbb = mbbi;
-    unsigned loopDepth = loopInfo->getLoopDepth(mbb);
-
    for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end();
         mii != mie; ) {
      MachineInstr *MI = mii;
@ -2656,62 +2746,12 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
        mii = mbbi->erase(mii);
        ++numPeep;
      } else {
-        SmallSet<unsigned, 4> UniqueUses;
-        for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-          const MachineOperand &mop = MI->getOperand(i);
-          if (mop.isReg() && mop.getReg() &&
-              TargetRegisterInfo::isVirtualRegister(mop.getReg())) {
-            unsigned reg = mop.getReg();
-            // Multiple uses of reg by the same instruction. It should not
-            // contribute to spill weight again.
-            if (UniqueUses.count(reg) != 0)
-              continue;
-            LiveInterval &RegInt = li_->getInterval(reg);
-            RegInt.weight +=
-              li_->getSpillWeight(mop.isDef(), mop.isUse(), loopDepth);
-            UniqueUses.insert(reg);
-          }
-        }
        ++mii;
      }
    }
  }

-  for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) {
-    LiveInterval &LI = *I->second;
-    if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
-      // If the live interval length is essentially zero, i.e. in every live
-      // range the use follows def immediately, it doesn't make sense to spill
-      // it and hope it will be easier to allocate for this li.
-      if (isZeroLengthInterval(&LI))
-        LI.weight = HUGE_VALF;
-      else {
-        bool isLoad = false;
-        SmallVector<LiveInterval*, 4> SpillIs;
-        if (li_->isReMaterializable(LI, SpillIs, isLoad)) {
-          // If all of the definitions of the interval are re-materializable,
-          // it is a preferred candidate for spilling. If non of the defs are
-          // loads, then it's potentially very cheap to re-materialize.
-          // FIXME: this gets much more complicated once we support non-trivial
-          // re-materialization.
-          if (isLoad)
-            LI.weight *= 0.9F;
-          else
-            LI.weight *= 0.5F;
-        }
-      }
-
-      // Slightly prefer live interval that has been assigned a preferred reg.
-      std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(LI.reg);
-      if (Hint.first || Hint.second)
-        LI.weight *= 1.01F;
-
-      // Divide the weight of the interval by its size.  This encourages
-      // spilling of intervals that are large and have few uses, and
-      // discourages spilling of small intervals with many uses.
-      LI.weight /= li_->getApproximateInstructionCount(LI) * InstrSlots::NUM;
-    }
-  }
+  CalculateSpillWeights();

  DEBUG(dump());
  return true;
--- a/lib/CodeGen/SimpleRegisterCoalescing.h
+++ b/lib/CodeGen/SimpleRegisterCoalescing.h
@ -123,7 +123,6 @@ namespace llvm {
    /// classes.  The registers may be either phys or virt regs.
    bool differingRegisterClasses(unsigned RegA, unsigned RegB) const;

-
    /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If
    /// the source value number is defined by a copy from the destination reg
    /// see if we can merge these two destination reg valno# into a single
@ -235,13 +234,15 @@ namespace llvm {

    /// lastRegisterUse - Returns the last use of the specific register between
    /// cycles Start and End or NULL if there are no uses.
-    MachineOperand *lastRegisterUse(MachineInstrIndex Start, MachineInstrIndex End,
-                                    unsigned Reg, MachineInstrIndex &LastUseIdx) const;
+    MachineOperand *lastRegisterUse(MachineInstrIndex Start,
+                                    MachineInstrIndex End, unsigned Reg,
+                                    MachineInstrIndex &LastUseIdx) const;
+
+    /// CalculateSpillWeights - Compute spill weights for all virtual register
+    /// live intervals.
+    void CalculateSpillWeights();

    void printRegName(unsigned reg) const;
-
-    /// Returns true if the given live interval is zero length.
-    bool isZeroLengthInterval(LiveInterval *li) const;
  };

 } // End llvm namespace
--- a/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin9 -stats |& grep asm-printer | grep 164
+; RUN: llc < %s -mtriple=arm-apple-darwin9 -stats |& grep asm-printer | grep 161

 	%"struct.Adv5::Ekin<3>" = type <{ i8 }>
 	%"struct.Adv5::X::Energyflux<3>" = type { double }
--- a/test/CodeGen/ARM/remat.ll
+++ b/test/CodeGen/ARM/remat.ll
@ -1,5 +1,5 @@
 ; RUN: llc < %s -mtriple=arm-apple-darwin 
-; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | grep "Number of re-materialization" | grep 2
+; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | grep "Number of re-materialization" | grep 5

 	%struct.CONTENTBOX = type { i32, i32, i32, i32, i32 }
 	%struct.LOCBOX = type { i32, i32, i32, i32 }
--- a/test/CodeGen/X86/2008-02-22-ReMatBug.ll
+++ b/test/CodeGen/X86/2008-02-22-ReMatBug.ll
@ -1,5 +1,4 @@
-; RUN: llc < %s -march=x86 -stats |& grep {Number of re-materialization} | grep 3
-; RUN: llc < %s -march=x86 -stats |& grep {Number of dead spill slots removed}
+; RUN: llc < %s -march=x86 -stats |& grep {Number of re-materialization} | grep 2
 ; rdar://5761454

 	%struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* }
--- a/test/CodeGen/X86/2008-07-11-SpillerBug.ll
+++ b/test/CodeGen/X86/2008-07-11-SpillerBug.ll
@ -2,8 +2,9 @@
 ; PR2536


-; CHECK: movw %ax
+; CHECK: movw %cx
 ; CHECK-NEXT: andl    $65534, %
+; CHECK-NEXT: movl %
 ; CHECK-NEXT: movl $17

@g_5 = external global i16		; <i16*> [#uses=2]
--- a/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll
+++ b/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll
@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of registers downgraded}
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep asm-printer | grep 84
 ; rdar://6802189

 ; Test if linearscan is unfavoring registers for allocation to allow more reuse
--- a/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll
+++ b/test/CodeGen/X86/2009-09-21-NoSpillLoopCount.ll
@ -0,0 +1,36 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10.0 -relocation-model=pic | FileCheck %s
+
+define void @dot(i16* nocapture %A, i32 %As, i16* nocapture %B, i32 %Bs, i16* nocapture %C, i32 %N) nounwind ssp {
+; CHECK: dot:
+; CHECK: decl %
+; CHECK-NEXT: jne
+entry:
+	%0 = icmp sgt i32 %N, 0		; <i1> [#uses=1]
+	br i1 %0, label %bb, label %bb2
+
+bb:		; preds = %bb, %entry
+	%i.03 = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=3]
+	%sum.04 = phi i32 [ 0, %entry ], [ %10, %bb ]		; <i32> [#uses=1]
+	%1 = mul i32 %i.03, %As		; <i32> [#uses=1]
+	%2 = getelementptr i16* %A, i32 %1		; <i16*> [#uses=1]
+	%3 = load i16* %2, align 2		; <i16> [#uses=1]
+	%4 = sext i16 %3 to i32		; <i32> [#uses=1]
+	%5 = mul i32 %i.03, %Bs		; <i32> [#uses=1]
+	%6 = getelementptr i16* %B, i32 %5		; <i16*> [#uses=1]
+	%7 = load i16* %6, align 2		; <i16> [#uses=1]
+	%8 = sext i16 %7 to i32		; <i32> [#uses=1]
+	%9 = mul i32 %8, %4		; <i32> [#uses=1]
+	%10 = add i32 %9, %sum.04		; <i32> [#uses=2]
+	%indvar.next = add i32 %i.03, 1		; <i32> [#uses=2]
+	%exitcond = icmp eq i32 %indvar.next, %N		; <i1> [#uses=1]
+	br i1 %exitcond, label %bb1.bb2_crit_edge, label %bb
+
+bb1.bb2_crit_edge:		; preds = %bb
+	%phitmp = trunc i32 %10 to i16		; <i16> [#uses=1]
+	br label %bb2
+
+bb2:		; preds = %entry, %bb1.bb2_crit_edge
+	%sum.0.lcssa = phi i16 [ %phitmp, %bb1.bb2_crit_edge ], [ 0, %entry ]		; <i16> [#uses=1]
+	store i16 %sum.0.lcssa, i16* %C, align 2
+	ret void
+}
--- a/test/CodeGen/X86/stack-color-with-reg.ll
+++ b/test/CodeGen/X86/stack-color-with-reg.ll
@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t
-; RUN:   grep stackcoloring %t | grep "stack slot refs replaced with reg refs"  | grep 8
-; RUN:   grep asm-printer %t   | grep 182
+; RUN:   grep stackcoloring %t | grep "stack slot refs replaced with reg refs"  | grep 5
+; RUN:   grep asm-printer %t   | grep 179

 	type { [62 x %struct.Bitvec*] }		; type %0
 	type { i8* }		; type %1