Revert rL349136: [llvm-exegesis] Optimize ToProcess in dbScan

Summary: Use `vector<char> Added + vector<size_t> ToProcess` to replace `SetVector ToProcess` We also check `Added[P]` to enqueueing a point more than once, which also saves us a `ClusterIdForPoint_[Q].isUndef()` check. Reviewers: courbet, RKSimon, gchatelet, john.brawn, lebedev.ri Subscribers: tschuett, llvm-commits Differential Revision: https://reviews.llvm.org/D54442 ........ Patch wasn't approved and breaks buildbots llvm-svn: 349139
2024-10-19 11:02:59 +02:00 · 2018-12-14 09:25:08 +00:00 · 2018-12-14 09:25:08 +00:00 · 0777513533
commit 0777513533
parent 486c1ccebe
1 changed files with 14 additions and 24 deletions
--- a/tools/llvm-exegesis/lib/Clustering.cpp
+++ b/tools/llvm-exegesis/lib/Clustering.cpp
@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 #include "Clustering.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include <string>
@ -91,14 +92,8 @@ llvm::Error InstructionBenchmarkClustering::validateAndSetup() {
 }
 void InstructionBenchmarkClustering::dbScan(const size_t MinPts) {
-  const size_t NumPoints = Points_.size();
+  std::vector<size_t> Neighbors; // Persistent buffer to avoid allocs.
-
+  for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) {
  // Persistent buffers to avoid allocs.
  std::vector<size_t> Neighbors;
  std::vector<size_t> ToProcess(NumPoints);
  std::vector<char> Added(NumPoints);
  for (size_t P = 0; P < NumPoints; ++P) {
    if (!ClusterIdForPoint_[P].isUndef())
      continue; // Previously processed in inner loop.
    rangeQuery(P, Neighbors);
@ -114,18 +109,14 @@ void InstructionBenchmarkClustering::dbScan(const size_t MinPts) {
    Cluster &CurrentCluster = Clusters_.back();
    ClusterIdForPoint_[P] = CurrentCluster.Id; /* Label initial point */
    CurrentCluster.PointIndices.push_back(P);
    Added[P] = 1;
    // Process P's neighbors.
-    size_t Tail = 0;
+    llvm::SetVector<size_t, std::deque<size_t>> ToProcess;
-    for (size_t Q : Neighbors)
+    ToProcess.insert(Neighbors.begin(), Neighbors.end());
-      if (!Added[Q]) {
+    while (!ToProcess.empty()) {
        ToProcess[Tail++] = P;
        Added[Q] = 1;
      }
    for (size_t Head = 0; Head < Tail; ++Head) {
      // Retrieve a point from the set.
-      size_t Q = ToProcess[Head];
+      const size_t Q = *ToProcess.begin();
      ToProcess.erase(ToProcess.begin());
      if (ClusterIdForPoint_[Q].isNoise()) {
        // Change noise point to border point.
@ -133,18 +124,17 @@ void InstructionBenchmarkClustering::dbScan(const size_t MinPts) {
        CurrentCluster.PointIndices.push_back(Q);
        continue;
      }
-      assert(ClusterIdForPoint_[Q].isUndef());
+      if (!ClusterIdForPoint_[Q].isUndef()) {
        continue; // Previously processed.
      }
      // Add Q to the current custer.
      ClusterIdForPoint_[Q] = CurrentCluster.Id;
      CurrentCluster.PointIndices.push_back(Q);
      // And extend to the neighbors of Q if the region is dense enough.
      rangeQuery(Q, Neighbors);
-      if (Neighbors.size() + 1 >= MinPts)
+      if (Neighbors.size() + 1 >= MinPts) {
-        for (size_t P : Neighbors)
+        ToProcess.insert(Neighbors.begin(), Neighbors.end());
-          if (!Added[P]) {
+      }
            ToProcess[Tail++] = P;
            Added[P] = 1;
          }
    }
  }
  // assert(Neighbors.capacity() == (Points_.size() - 1));