From 6f84fd77635bcd04954554952ef5b78b5ad8e65f Mon Sep 17 00:00:00 2001
From: Vedant Kumar <vsk@apple.com>
Date: Wed, 27 May 2020 18:19:54 -0700
Subject: [PATCH] [LiveDebugValues] Speed up removeEntryValue, NFC

Summary:
Instead of iterating over all VarLoc IDs in removeEntryValue(), just
iterate over the interval reserved for entry value VarLocs. This changes
the iteration order, hence the test update -- otherwise this is NFC.

This appears to give an ~8.5x wall time speed-up for LiveDebugValues when
compiling sqlite3.c 3.30.1 with a Release clang (on my machine):

```
          ---User Time---   --System Time--   --User+System--   ---Wall Time--- --- Name ---
  Before: 2.5402 ( 18.8%)   0.0050 (  0.4%)   2.5452 ( 17.3%)   2.5452 ( 17.3%) Live DEBUG_VALUE analysis
   After: 0.2364 (  2.1%)   0.0034 (  0.3%)   0.2399 (  2.0%)   0.2398 (  2.0%) Live DEBUG_VALUE analysis
```

The change in removeEntryValue() is the only one that appears to affect
wall time, but for consistency (and to resolve a pending TODO), I made
the analogous changes for iterating over SpillLocKind VarLocs.

Reviewers: nikic, aprantl, jmorse, djtodoro

Subscribers: hiraditya, dexonsmith, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D80684
---
 include/llvm/ADT/CoalescingBitVector.h        |  14 ++
 lib/CodeGen/LiveDebugValues.cpp               | 124 ++++++++++++++----
 .../MIR/X86/entry-values-diamond-bbs.mir      |   6 +-
 unittests/ADT/CoalescingBitVectorTest.cpp     |  55 ++++++++
 4 files changed, 170 insertions(+), 29 deletions(-)

diff --git a/include/llvm/ADT/CoalescingBitVector.h b/include/llvm/ADT/CoalescingBitVector.h
index 647857435b1..f8c8fec0ec9 100644
--- a/include/llvm/ADT/CoalescingBitVector.h
+++ b/include/llvm/ADT/CoalescingBitVector.h
@@ -16,6 +16,7 @@
 
 #include "llvm/ADT/IntervalMap.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -352,6 +353,19 @@ public:
     return It;
   }
 
+  /// Return a range iterator which iterates over all of the set bits in the
+  /// half-open range [Start, End).
+  iterator_range<const_iterator> half_open_range(IndexT Start,
+                                                 IndexT End) const {
+    assert(Start < End && "Not a valid range");
+    auto StartIt = find(Start);
+    if (StartIt == end() || *StartIt >= End)
+      return {end(), end()};
+    auto EndIt = StartIt;
+    EndIt.advanceToLowerBound(End);
+    return {StartIt, EndIt};
+  }
+
   void print(raw_ostream &OS) const {
     OS << "{";
     for (auto It = Intervals.begin(), End = Intervals.end(); It != End;
diff --git a/lib/CodeGen/LiveDebugValues.cpp b/lib/CodeGen/LiveDebugValues.cpp
index 2d11a23e9ed..abda001fa4a 100644
--- a/lib/CodeGen/LiveDebugValues.cpp
+++ b/lib/CodeGen/LiveDebugValues.cpp
@@ -137,16 +137,24 @@ using VarLocSet = CoalescingBitVector<uint64_t>;
 /// Why encode a location /into/ the VarLocMap index? This makes it possible
 /// to find the open VarLocs killed by a register def very quickly. This is a
 /// performance-critical operation for LiveDebugValues.
-///
-/// TODO: Consider adding reserved intervals for kinds of VarLocs other than
-/// RegisterKind, like SpillLocKind or EntryValueKind, to optimize iteration
-/// over open locations.
 struct LocIndex {
   uint32_t Location; // Physical registers live in the range [1;2^30) (see
                      // \ref MCRegister), so we have plenty of range left here
                      // to encode non-register locations.
   uint32_t Index;
 
+  /// The first location greater than 0 that is not reserved for VarLocs of
+  /// kind RegisterKind.
+  static constexpr uint32_t kFirstInvalidRegLocation = 1 << 30;
+
+  /// A special location reserved for VarLocs of kind SpillLocKind.
+  static constexpr uint32_t kSpillLocation = kFirstInvalidRegLocation;
+
+  /// A special location reserved for VarLocs of kind EntryValueBackupKind and
+  /// EntryValueCopyBackupKind.
+  static constexpr uint32_t kEntryValueBackupLocation =
+      kFirstInvalidRegLocation + 1;
+
   LocIndex(uint32_t Location, uint32_t Index)
       : Location(Location), Index(Index) {}
 
@@ -166,6 +174,14 @@ struct LocIndex {
   static uint64_t rawIndexForReg(uint32_t Reg) {
     return LocIndex(Reg, 0).getAsRawInteger();
   }
+
+  /// Return a range covering all set indices in the interval reserved for
+  /// \p Location in \p Set.
+  static auto indexRangeForLocation(const VarLocSet &Set, uint32_t Location) {
+    uint64_t Start = LocIndex(Location, 0).getAsRawInteger();
+    uint64_t End = LocIndex(Location + 1, 0).getAsRawInteger();
+    return Set.half_open_range(Start, End);
+  }
 };
 
 class LiveDebugValues : public MachineFunctionPass {
@@ -211,6 +227,9 @@ private:
       bool operator==(const SpillLoc &Other) const {
         return SpillBase == Other.SpillBase && SpillOffset == Other.SpillOffset;
       }
+      bool operator!=(const SpillLoc &Other) const {
+        return !(*this == Other);
+      }
     };
 
     /// Identity of the variable at this location.
@@ -477,10 +496,27 @@ private:
     /// location.
     SmallDenseMap<uint32_t, std::vector<VarLoc>> Loc2Vars;
 
+    /// Determine the 32-bit location reserved for \p VL, based on its kind.
+    static uint32_t getLocationForVar(const VarLoc &VL) {
+      switch (VL.Kind) {
+      case VarLoc::RegisterKind:
+        assert((VL.Loc.RegNo < LocIndex::kFirstInvalidRegLocation) &&
+               "Physreg out of range?");
+        return VL.Loc.RegNo;
+      case VarLoc::SpillLocKind:
+        return LocIndex::kSpillLocation;
+      case VarLoc::EntryValueBackupKind:
+      case VarLoc::EntryValueCopyBackupKind:
+        return LocIndex::kEntryValueBackupLocation;
+      default:
+        return 0;
+      }
+    }
+
   public:
     /// Retrieve a unique LocIndex for \p VL.
     LocIndex insert(const VarLoc &VL) {
-      uint32_t Location = VL.isDescribedByReg();
+      uint32_t Location = getLocationForVar(VL);
       uint32_t &Index = Var2Index[VL];
       if (!Index) {
         auto &Vars = Loc2Vars[Location];
@@ -577,6 +613,30 @@ private:
              "open ranges are inconsistent");
       return VarLocs.empty();
     }
+
+    /// Get an empty range of VarLoc IDs.
+    auto getEmptyVarLocRange() const {
+      return iterator_range<VarLocSet::const_iterator>(getVarLocs().end(),
+                                                       getVarLocs().end());
+    }
+
+    /// Get all set IDs for VarLocs of kind RegisterKind in \p Reg.
+    auto getRegisterVarLocs(Register Reg) const {
+      return LocIndex::indexRangeForLocation(getVarLocs(), Reg);
+    }
+
+    /// Get all set IDs for VarLocs of kind SpillLocKind.
+    auto getSpillVarLocs() const {
+      return LocIndex::indexRangeForLocation(getVarLocs(),
+                                             LocIndex::kSpillLocation);
+    }
+
+    /// Get all set IDs for VarLocs of kind EntryValueBackupKind or
+    /// EntryValueCopyBackupKind.
+    auto getEntryValueBackupVarLocs() const {
+      return LocIndex::indexRangeForLocation(
+          getVarLocs(), LocIndex::kEntryValueBackupLocation);
+    }
   };
 
   /// Collect all VarLoc IDs from \p CollectFrom for VarLocs of kind
@@ -821,7 +881,10 @@ void LiveDebugValues::getUsedRegs(const VarLocSet &CollectFrom,
   // All register-based VarLocs are assigned indices greater than or equal to
   // FirstRegIndex.
   uint64_t FirstRegIndex = LocIndex::rawIndexForReg(1);
-  for (auto It = CollectFrom.find(FirstRegIndex), End = CollectFrom.end();
+  uint64_t FirstInvalidIndex =
+      LocIndex::rawIndexForReg(LocIndex::kFirstInvalidRegLocation);
+  for (auto It = CollectFrom.find(FirstRegIndex),
+            End = CollectFrom.find(FirstInvalidIndex);
        It != End;) {
     // We found a VarLoc ID for a VarLoc that lives in a register. Figure out
     // which register and add it to UsedRegs.
@@ -924,11 +987,8 @@ bool LiveDebugValues::removeEntryValue(const MachineInstr &MI,
   }
 
   if (TrySalvageEntryValue) {
-    for (uint64_t ID : OpenRanges.getVarLocs()) {
+    for (uint64_t ID : OpenRanges.getEntryValueBackupVarLocs()) {
       const VarLoc &VL = VarLocIDs[LocIndex::fromRawInteger(ID)];
-      if (!VL.isEntryBackupLoc())
-        continue;
-
       if (VL.getEntryValueCopyBackupReg() == Reg &&
           VL.MI.getOperand(0).getReg() == SrcRegOp->getReg())
         return false;
@@ -1259,10 +1319,11 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI,
   VarLocSet KillSet(Alloc);
   if (isSpillInstruction(MI, MF)) {
     Loc = extractSpillBaseRegAndOffset(MI);
-    for (uint64_t ID : OpenRanges.getVarLocs()) {
+    for (uint64_t ID : OpenRanges.getSpillVarLocs()) {
       LocIndex Idx = LocIndex::fromRawInteger(ID);
       const VarLoc &VL = VarLocIDs[Idx];
-      if (VL.Kind == VarLoc::SpillLocKind && VL.Loc.SpillLocation == *Loc) {
+      assert(VL.Kind == VarLoc::SpillLocKind && "Broken VarLocSet?");
+      if (VL.Loc.SpillLocation == *Loc) {
         // This location is overwritten by the current instruction -- terminate
         // the open range, and insert an explicit DBG_VALUE $noreg.
         //
@@ -1298,21 +1359,31 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI,
                       << "\n");
   }
   // Check if the register or spill location is the location of a debug value.
-  for (uint64_t ID : OpenRanges.getVarLocs()) {
+  auto TransferCandidates = OpenRanges.getEmptyVarLocRange();
+  if (TKind == TransferKind::TransferSpill)
+    TransferCandidates = OpenRanges.getRegisterVarLocs(Reg);
+  else if (TKind == TransferKind::TransferRestore)
+    TransferCandidates = OpenRanges.getSpillVarLocs();
+  for (uint64_t ID : TransferCandidates) {
     LocIndex Idx = LocIndex::fromRawInteger(ID);
     const VarLoc &VL = VarLocIDs[Idx];
-    if (TKind == TransferKind::TransferSpill && VL.isDescribedByReg() == Reg) {
+    if (TKind == TransferKind::TransferSpill) {
+      assert(VL.isDescribedByReg() == Reg && "Broken VarLocSet?");
       LLVM_DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '('
                         << VL.Var.getVariable()->getName() << ")\n");
-    } else if (TKind == TransferKind::TransferRestore &&
-               VL.Kind == VarLoc::SpillLocKind &&
-               VL.Loc.SpillLocation == *Loc) {
+    } else {
+      assert(TKind == TransferKind::TransferRestore &&
+             VL.Kind == VarLoc::SpillLocKind && "Broken VarLocSet?");
+      if (VL.Loc.SpillLocation != *Loc)
+        // The spill location is not the location of a debug value.
+        continue;
       LLVM_DEBUG(dbgs() << "Restoring Register " << printReg(Reg, TRI) << '('
                         << VL.Var.getVariable()->getName() << ")\n");
-    } else
-      continue;
+    }
     insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, Idx, TKind,
                             Reg);
+    // FIXME: A comment should explain why it's correct to return early here,
+    // if that is in fact correct.
     return;
   }
 }
@@ -1356,7 +1427,7 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI,
   // a parameter describing only a moving of the value around, rather then
   // modifying it, we are still able to use the entry value if needed.
   if (isRegOtherThanSPAndFP(*DestRegOp, MI, TRI)) {
-    for (uint64_t ID : OpenRanges.getVarLocs()) {
+    for (uint64_t ID : OpenRanges.getEntryValueBackupVarLocs()) {
       LocIndex Idx = LocIndex::fromRawInteger(ID);
       const VarLoc &VL = VarLocIDs[Idx];
       if (VL.getEntryValueBackupReg() == SrcReg) {
@@ -1378,13 +1449,14 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI,
   if (!SrcRegOp->isKill())
     return;
 
-  for (uint64_t ID : OpenRanges.getVarLocs()) {
+  for (uint64_t ID : OpenRanges.getRegisterVarLocs(SrcReg)) {
     LocIndex Idx = LocIndex::fromRawInteger(ID);
-    if (VarLocIDs[Idx].isDescribedByReg() == SrcReg) {
-      insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, Idx,
-                              TransferKind::TransferCopy, DestReg);
-      return;
-    }
+    assert(VarLocIDs[Idx].isDescribedByReg() == SrcReg && "Broken VarLocSet?");
+    insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, Idx,
+                            TransferKind::TransferCopy, DestReg);
+    // FIXME: A comment should explain why it's correct to return early here,
+    // if that is in fact correct.
+    return;
   }
 }
 
diff --git a/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir b/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir
index fc7bd93d022..734ae7127a8 100644
--- a/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir
+++ b/test/DebugInfo/MIR/X86/entry-values-diamond-bbs.mir
@@ -23,9 +23,9 @@
 # CHECK-NEXT: $ebx = MOV32ri 2
 # CHECK-NEXT: DBG_VALUE $esi, $noreg, ![[ARG_B]], !DIExpression(DW_OP_LLVM_entry_value, 1)
 # CHECK: bb.3.if.end
-# CHECK-NEXT: DBG_VALUE $edx, $noreg, ![[ARG_Q]], !DIExpression()
-# CHECK-NEXT: DBG_VALUE $ebp, $noreg, ![[ARG_C]], !DIExpression()
-# CHECK-NEXT: DBG_VALUE $esi, $noreg, ![[ARG_B]], !DIExpression(DW_OP_LLVM_entry_value, 1)
+# CHECK-DAG: DBG_VALUE $edx, $noreg, ![[ARG_Q]], !DIExpression()
+# CHECK-DAG: DBG_VALUE $ebp, $noreg, ![[ARG_C]], !DIExpression()
+# CHECK-DAG: DBG_VALUE $esi, $noreg, ![[ARG_B]], !DIExpression(DW_OP_LLVM_entry_value, 1)
 --- |
   ; ModuleID = 'test.c'
   source_filename = "test.c"
diff --git a/unittests/ADT/CoalescingBitVectorTest.cpp b/unittests/ADT/CoalescingBitVectorTest.cpp
index 4f87bf415be..355426c4d84 100644
--- a/unittests/ADT/CoalescingBitVectorTest.cpp
+++ b/unittests/ADT/CoalescingBitVectorTest.cpp
@@ -31,6 +31,11 @@ bool elementsMatch(const UBitVec &BV, std::initializer_list<unsigned> List) {
   return true;
 }
 
+bool rangesMatch(iterator_range<UBitVec::const_iterator> R,
+                 std::initializer_list<unsigned> List) {
+  return std::equal(R.begin(), R.end(), List.begin(), List.end());
+}
+
 TEST(CoalescingBitVectorTest, Set) {
   UBitVec::Allocator Alloc;
   UBitVec BV1(Alloc);
@@ -486,6 +491,56 @@ TEST(CoalescingBitVectorTest, AdvanceToLowerBound) {
   EXPECT_TRUE(It == BV.end());
 }
 
+TEST(CoalescingBitVectorTest, HalfOpenRange) {
+  UBitVec::Allocator Alloc;
+
+  {
+    UBitVec BV(Alloc);
+    BV.set({1, 2, 3});
+
+    EXPECT_EQ(*BV.find(0), 1U); // find(Start) > Start
+    EXPECT_TRUE(rangesMatch(BV.half_open_range(0, 5), {1, 2, 3}));
+    EXPECT_TRUE(rangesMatch(BV.half_open_range(1, 4), {1, 2, 3}));
+    EXPECT_TRUE(rangesMatch(BV.half_open_range(1, 3), {1, 2}));
+    EXPECT_TRUE(rangesMatch(BV.half_open_range(2, 3), {2}));
+    EXPECT_TRUE(rangesMatch(BV.half_open_range(2, 4), {2, 3}));
+    EXPECT_TRUE(rangesMatch(BV.half_open_range(4, 5), {}));
+  }
+
+  {
+    UBitVec BV(Alloc);
+    BV.set({1, 2, 11, 12});
+
+    EXPECT_TRUE(rangesMatch(BV.half_open_range(0, 15), {1, 2, 11, 12}));
+    EXPECT_TRUE(rangesMatch(BV.half_open_range(1, 13), {1, 2, 11, 12}));
+    EXPECT_TRUE(rangesMatch(BV.half_open_range(1, 12), {1, 2, 11}));
+
+    EXPECT_TRUE(rangesMatch(BV.half_open_range(0, 5), {1, 2}));
+    EXPECT_TRUE(rangesMatch(BV.half_open_range(1, 5), {1, 2}));
+    EXPECT_TRUE(rangesMatch(BV.half_open_range(2, 5), {2}));
+    EXPECT_TRUE(rangesMatch(BV.half_open_range(1, 2), {1}));
+    EXPECT_TRUE(rangesMatch(BV.half_open_range(13, 14), {}));
+
+    EXPECT_TRUE(rangesMatch(BV.half_open_range(2, 10), {2}));
+  }
+
+  {
+    UBitVec BV(Alloc);
+    BV.set({1});
+
+    EXPECT_EQ(*BV.find(0), 1U); // find(Start) == End
+    EXPECT_TRUE(rangesMatch(BV.half_open_range(0, 1), {}));
+  }
+
+  {
+    UBitVec BV(Alloc);
+    BV.set({5});
+
+    EXPECT_EQ(*BV.find(3), 5U); // find(Start) > End
+    EXPECT_TRUE(rangesMatch(BV.half_open_range(3, 4), {}));
+  }
+}
+
 TEST(CoalescingBitVectorTest, Print) {
   std::string S;
   {