Do not fold reload into an instruction with multiple uses. It issues one extra load.

llvm-svn: 44467
2024-11-23 19:23:23 +01:00 · 2007-11-30 21:23:43 +00:00 · 2007-11-30 21:23:43 +00:00 · 90c548af8e
commit 90c548af8e
parent c75545d4a8
3 changed files with 170 additions and 77 deletions
--- a/include/llvm/CodeGen/LiveIntervalAnalysis.h
+++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h
@ -274,8 +274,9 @@ namespace llvm {
    /// MI. If it is successul, MI is updated with the newly created MI and
    /// returns true.
    bool tryFoldMemoryOperand(MachineInstr* &MI, VirtRegMap &vrm,
-                              MachineInstr *DefMI, unsigned index, unsigned i,
-                              bool isSS, int slot, unsigned reg);
+                              MachineInstr *DefMI, unsigned InstrIdx,
+                              unsigned OpIdx, unsigned NumUses,
+                              bool isSS, int Slot, unsigned Reg);

    /// anyKillInMBBAfterIdx - Returns true if there is a kill of the specified
    /// VNInfo that's after the specified index but is within the basic block.
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@ -642,13 +642,22 @@ bool LiveIntervals::isReMaterializable(const LiveInterval &li,
 /// MI. If it is successul, MI is updated with the newly created MI and
 /// returns true.
 bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI,
-                                         VirtRegMap &vrm,
-                                         MachineInstr *DefMI,
-                                         unsigned index, unsigned i,
-                                         bool isSS, int slot, unsigned reg) {
+                                         VirtRegMap &vrm, MachineInstr *DefMI,
+                                         unsigned InstrIdx, unsigned OpIdx,
+                                         unsigned NumUses,
+                                         bool isSS, int Slot, unsigned Reg) {
+  // FIXME: fold subreg use
+  if (MI->getOperand(OpIdx).getSubReg())
+    return false;
+
+  // FIXME: It may be possible to fold load when there are multiple uses.
+  // e.g. On x86, TEST32rr r, r -> CMP32rm [mem], 0
+  if (NumUses > 1)
+    return false;
+
  MachineInstr *fmi = isSS
-    ? mri_->foldMemoryOperand(MI, i, slot)
-    : mri_->foldMemoryOperand(MI, i, DefMI);
+    ? mri_->foldMemoryOperand(MI, OpIdx, Slot)
+    : mri_->foldMemoryOperand(MI, OpIdx, DefMI);
  if (fmi) {
    // Attempt to fold the memory reference into the instruction. If
    // we can do this, we don't need to insert spill code.
@ -657,15 +666,13 @@ bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI,
    else
      LiveVariables::transferKillDeadInfo(MI, fmi, mri_);
    MachineBasicBlock &MBB = *MI->getParent();
-    if (isSS) {
-      if (!mf_->getFrameInfo()->isFixedObjectIndex(slot))
-        vrm.virtFolded(reg, MI, i, fmi);
-    }
+    if (isSS && !mf_->getFrameInfo()->isFixedObjectIndex(Slot))
+      vrm.virtFolded(Reg, MI, OpIdx, fmi);
    vrm.transferSpillPts(MI, fmi);
    vrm.transferRestorePts(MI, fmi);
    mi2iMap_.erase(MI);
-    i2miMap_[index/InstrSlots::NUM] = fmi;
-    mi2iMap_[fmi] = index;
+    i2miMap_[InstrIdx /InstrSlots::NUM] = fmi;
+    mi2iMap_[fmi] = InstrIdx;
    MI = MBB.insert(MBB.erase(MI), fmi);
    ++numFolds;
    return true;
@ -714,8 +721,6 @@ rewriteInstructionForSpills(const LiveInterval &li, bool TrySplit,
    unsigned RegI = Reg;
    if (Reg == 0 || MRegisterInfo::isPhysicalRegister(Reg))
      continue;
-    unsigned SubIdx = mop.getSubReg();
-    bool isSubReg = SubIdx != 0;
    if (Reg != li.reg)
      continue;

@ -726,6 +731,8 @@ rewriteInstructionForSpills(const LiveInterval &li, bool TrySplit,
      // If this is the rematerializable definition MI itself and
      // all of its uses are rematerialized, simply delete it.
      if (MI == ReMatOrigDefMI && CanDelete) {
+        DOUT << "\t\t\t\tErasing re-materlizable def: ";
+        DOUT << MI << '\n';
        RemoveMachineInstrFromMaps(MI);
        vrm.RemoveMachineInstrFromMaps(MI);
        MI->eraseFromParent();
@ -747,23 +754,6 @@ rewriteInstructionForSpills(const LiveInterval &li, bool TrySplit,
    if (TryFold)
      TryFold = !TrySplit && NewVReg == 0;

-    // FIXME: fold subreg use
-    if (!isSubReg && TryFold &&
-        tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index, i, FoldSS, FoldSlot,
-                             Reg))
-      // Folding the load/store can completely change the instruction in
-      // unpredictable ways, rescan it from the beginning.
-      goto RestartInstruction;
-
-    // Create a new virtual register for the spill interval.
-    bool CreatedNewVReg = false;
-    if (NewVReg == 0) {
-      NewVReg = RegMap->createVirtualRegister(rc);
-      vrm.grow();
-      CreatedNewVReg = true;
-    }
-    mop.setReg(NewVReg);
-            
    // Scan all of the operands of this instruction rewriting operands
    // to use NewVReg instead of li.reg as appropriate.  We do this for
    // two reasons:
@ -775,9 +765,11 @@ rewriteInstructionForSpills(const LiveInterval &li, bool TrySplit,
    //
    // Keep track of whether we replace a use and/or def so that we can
    // create the spill interval with the appropriate range. 
-            
+
    HasUse = mop.isUse();
    HasDef = mop.isDef();
+    unsigned NumUses = HasUse;
+    std::vector<unsigned> UpdateOps;
    for (unsigned j = i+1, e = MI->getNumOperands(); j != e; ++j) {
      if (!MI->getOperand(j).isRegister())
        continue;
@ -785,12 +777,37 @@ rewriteInstructionForSpills(const LiveInterval &li, bool TrySplit,
      if (RegJ == 0 || MRegisterInfo::isPhysicalRegister(RegJ))
        continue;
      if (RegJ == RegI) {
-        MI->getOperand(j).setReg(NewVReg);
+        UpdateOps.push_back(j);
+        if (MI->getOperand(j).isUse())
+          ++NumUses;
        HasUse |= MI->getOperand(j).isUse();
        HasDef |= MI->getOperand(j).isDef();
      }
    }

+    if (TryFold &&
+        tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index, i,
+                             NumUses, FoldSS, FoldSlot, Reg)) {
+      // Folding the load/store can completely change the instruction in
+      // unpredictable ways, rescan it from the beginning.
+      HasUse = false;
+      HasDef = false;
+      goto RestartInstruction;
+    }
+
+    // Create a new virtual register for the spill interval.
+    bool CreatedNewVReg = false;
+    if (NewVReg == 0) {
+      NewVReg = RegMap->createVirtualRegister(rc);
+      vrm.grow();
+      CreatedNewVReg = true;
+    }
+    mop.setReg(NewVReg);
+
+    // Reuse NewVReg for other reads.
+    for (unsigned j = 0, e = UpdateOps.size(); j != e; ++j)
+      MI->getOperand(UpdateOps[j]).setReg(NewVReg);
+            
    if (CreatedNewVReg) {
      if (DefIsReMat) {
        vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI/*, CanDelete*/);
@ -1197,46 +1214,44 @@ addIntervalsForSpills(const LiveInterval &li,
      for (unsigned i = 0, e = spills.size(); i != e; ++i) {
        int index = spills[i].index;
        unsigned VReg = spills[i].vreg;
-        bool DoFold = spills[i].canFold;
        bool isReMat = vrm.isReMaterialized(VReg);
        MachineInstr *MI = getInstructionFromIndex(index);
        int OpIdx = -1;
-        bool FoldedLoad = false;
-        if (DoFold) {
+        unsigned NumUses = 0;
+        if (spills[i].canFold) {
          for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
            MachineOperand &MO = MI->getOperand(j);
            if (!MO.isRegister() || MO.getReg() != VReg)
              continue;
-            if (MO.isUse()) {
-              // Can't fold if it's two-address code and the use isn't the
-              // first and only use.
-              // If there are more than one uses, a load is still needed.
-              if (!isReMat && !FoldedLoad &&
-                  alsoFoldARestore(Id, index,VReg,RestoreMBBs,RestoreIdxes)) {
-                FoldedLoad = true;
-                continue;
-              } else {
-                OpIdx = -1;
-                break;
-              }
+            if (MO.isDef()) {
+              OpIdx = (int)j;
+              continue;
            }
-            OpIdx = (int)j;
+            // Can't fold if it's two-address code and the use isn't the
+            // first and only use.
+            if (isReMat ||
+                (NumUses == 0 && !alsoFoldARestore(Id, index, VReg, RestoreMBBs,
+                                                   RestoreIdxes))) {
+              OpIdx = -1;
+              break;
+            }
+            ++NumUses;
          }
        }
        // Fold the store into the def if possible.
-        if (OpIdx == -1)
-          DoFold = false;
-        if (DoFold) {
-          if (tryFoldMemoryOperand(MI, vrm, NULL, index,OpIdx,true,Slot,VReg)) {
-            if (FoldedLoad)
+        bool Folded = false;
+        if (OpIdx != -1) {
+          if (tryFoldMemoryOperand(MI, vrm, NULL, index, OpIdx, NumUses,
+                                   true, Slot, VReg)) {
+            if (NumUses)
              // Folded a two-address instruction, do not issue a load.
              eraseRestoreInfo(Id, index, VReg, RestoreMBBs, RestoreIdxes);
-          } else
-            DoFold = false;
+            Folded = true;
+          }
        }

        // Else tell the spiller to issue a store for us.
-        if (!DoFold)
+        if (!Folded)
          vrm.addSpillPoint(VReg, MI);
      }
      Id = SpillMBBs.find_next(Id);
@ -1251,32 +1266,30 @@ addIntervalsForSpills(const LiveInterval &li,
      if (index == -1)
        continue;
      unsigned VReg = restores[i].vreg;
-      bool DoFold = restores[i].canFold;
      MachineInstr *MI = getInstructionFromIndex(index);
+      unsigned NumUses = 0;
      int OpIdx = -1;
-      if (DoFold) {
+      if (restores[i].canFold) {
        for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
          MachineOperand &MO = MI->getOperand(j);
          if (!MO.isRegister() || MO.getReg() != VReg)
            continue;
          if (MO.isDef()) {
-            // Can't fold if it's two-address code.            
+            // Can't fold if it's two-address code and it hasn't already
+            // been folded.
            OpIdx = -1;
            break;
          }
-          if (OpIdx != -1) {
-            // Multiple uses, do not fold!
-            OpIdx = -1;
-            break;
-          }
-          OpIdx = (int)j;
+          if (NumUses == 0)
+            // Use the first use index.
+            OpIdx = (int)j;
+          ++NumUses;
        }
      }

      // Fold the load into the use if possible.
-      if (OpIdx == -1)
-        DoFold = false;
-      if (DoFold) {
+      bool Folded = false;
+      if (OpIdx != -1) {
        if (vrm.isReMaterialized(VReg)) {
          MachineInstr *ReMatDefMI = vrm.getReMaterializedMI(VReg);
          int LdSlot = 0;
@ -1284,17 +1297,15 @@ addIntervalsForSpills(const LiveInterval &li,
          // If the rematerializable def is a load, also try to fold it.
          if (isLoadSS ||
              (ReMatDefMI->getInstrDescriptor()->Flags & M_LOAD_FLAG))
-            DoFold = tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index, OpIdx,
-                                          isLoadSS, LdSlot, VReg);
-          else
-            DoFold = false;
+            Folded = tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index, OpIdx,
+                                          NumUses, isLoadSS, LdSlot, VReg);
        } else
-          DoFold = tryFoldMemoryOperand(MI, vrm, NULL, index, OpIdx,
+          Folded = tryFoldMemoryOperand(MI, vrm, NULL, index, OpIdx, NumUses,
                                        true, Slot, VReg);
      }
      // If folding is not possible / failed, then tell the spiller to issue a
      // load / rematerialization for us.
-      if (!DoFold)
+      if (!Folded)
        vrm.addRestorePoint(VReg, MI);
    }
    Id = RestoreMBBs.find_next(Id);
--- a/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
+++ b/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
@ -0,0 +1,81 @@
+; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -stats |& \
+; RUN:   grep {1 .*folded into instructions}
+
+declare fastcc void @rdft(i32, i32, double*, i32*, double*)
+
+define fastcc void @mp_sqrt(i32 %n, i32 %radix, i32* %in, i32* %out, i32* %tmp1, i32* %tmp2, i32 %nfft, double* %tmp1fft, double* %tmp2fft, i32* %ip, double* %w) {
+entry:
+	br label %bb.i5
+
+bb.i5:		; preds = %bb.i5, %entry
+	%nfft_init.0.i = phi i32 [ 1, %entry ], [ %tmp7.i3, %bb.i5 ]		; <i32> [#uses=1]
+	%tmp7.i3 = shl i32 %nfft_init.0.i, 1		; <i32> [#uses=2]
+	br i1 false, label %bb.i5, label %mp_unexp_mp2d.exit.i
+
+mp_unexp_mp2d.exit.i:		; preds = %bb.i5
+	br i1 false, label %cond_next.i, label %cond_true.i
+
+cond_true.i:		; preds = %mp_unexp_mp2d.exit.i
+	ret void
+
+cond_next.i:		; preds = %mp_unexp_mp2d.exit.i
+	%tmp22.i = sdiv i32 0, 2		; <i32> [#uses=2]
+	br i1 false, label %cond_true29.i, label %cond_next36.i
+
+cond_true29.i:		; preds = %cond_next.i
+	ret void
+
+cond_next36.i:		; preds = %cond_next.i
+	store i32 %tmp22.i, i32* null, align 4
+	%tmp8.i14.i = select i1 false, i32 1, i32 0		; <i32> [#uses=1]
+	br label %bb.i28.i
+
+bb.i28.i:		; preds = %bb.i28.i, %cond_next36.i
+	%j.0.reg2mem.0.i16.i = phi i32 [ 0, %cond_next36.i ], [ %indvar.next39.i, %bb.i28.i ]		; <i32> [#uses=2]
+	%din_addr.1.reg2mem.0.i17.i = phi double [ 0.000000e+00, %cond_next36.i ], [ %tmp16.i25.i, %bb.i28.i ]		; <double> [#uses=1]
+	%tmp1.i18.i = fptosi double %din_addr.1.reg2mem.0.i17.i to i32		; <i32> [#uses=2]
+	%tmp4.i19.i = icmp slt i32 %tmp1.i18.i, %radix		; <i1> [#uses=1]
+	%x.0.i21.i = select i1 %tmp4.i19.i, i32 %tmp1.i18.i, i32 0		; <i32> [#uses=1]
+	%tmp41.sum.i = add i32 %j.0.reg2mem.0.i16.i, 2		; <i32> [#uses=0]
+	%tmp1213.i23.i = sitofp i32 %x.0.i21.i to double		; <double> [#uses=1]
+	%tmp15.i24.i = sub double 0.000000e+00, %tmp1213.i23.i		; <double> [#uses=1]
+	%tmp16.i25.i = mul double 0.000000e+00, %tmp15.i24.i		; <double> [#uses=1]
+	%indvar.next39.i = add i32 %j.0.reg2mem.0.i16.i, 1		; <i32> [#uses=2]
+	%exitcond40.i = icmp eq i32 %indvar.next39.i, %tmp8.i14.i		; <i1> [#uses=1]
+	br i1 %exitcond40.i, label %mp_unexp_d2mp.exit29.i, label %bb.i28.i
+
+mp_unexp_d2mp.exit29.i:		; preds = %bb.i28.i
+	%tmp46.i = sub i32 0, %tmp22.i		; <i32> [#uses=1]
+	store i32 %tmp46.i, i32* null, align 4
+	br i1 false, label %bb.i.i, label %mp_sqrt_init.exit
+
+bb.i.i:		; preds = %bb.i.i, %mp_unexp_d2mp.exit29.i
+	br label %bb.i.i
+
+mp_sqrt_init.exit:		; preds = %mp_unexp_d2mp.exit29.i
+	tail call fastcc void @mp_mul_csqu( i32 0, double* %tmp1fft )
+	tail call fastcc void @rdft( i32 0, i32 -1, double* null, i32* %ip, double* %w )
+	tail call fastcc void @mp_mul_d2i( i32 0, i32 %radix, i32 0, double* %tmp1fft, i32* %tmp2 )
+	br i1 false, label %cond_false.i, label %cond_true36.i
+
+cond_true36.i:		; preds = %mp_sqrt_init.exit
+	ret void
+
+cond_false.i:		; preds = %mp_sqrt_init.exit
+	tail call fastcc void @mp_round( i32 0, i32 %radix, i32 0, i32* %out )
+	tail call fastcc void @mp_add( i32 0, i32 %radix, i32* %tmp1, i32* %tmp2, i32* %tmp1 )
+	tail call fastcc void @mp_sub( i32 0, i32 %radix, i32* %in, i32* %tmp2, i32* %tmp2 )
+	tail call fastcc void @mp_round( i32 0, i32 %radix, i32 0, i32* %tmp1 )
+	tail call fastcc void @mp_mul_d2i( i32 0, i32 %radix, i32 %tmp7.i3, double* %tmp2fft, i32* %tmp2 )
+	ret void
+}
+
+declare fastcc void @mp_add(i32, i32, i32*, i32*, i32*)
+
+declare fastcc void @mp_sub(i32, i32, i32*, i32*, i32*)
+
+declare fastcc void @mp_round(i32, i32, i32, i32*)
+
+declare fastcc void @mp_mul_csqu(i32, double*)
+
+declare fastcc void @mp_mul_d2i(i32, i32, i32, double*, i32*)