mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-23 19:23:23 +01:00
32e21be127
The majority of the running time of this script tends to be spent in running git blame on source files touched by patches under review. By introducing a git blame output cache, some of the git blame commands don't have to re-run, and the blame information can be retrieved from a cache. I've observed that in a typical run matching patches available for review with potential reviewers, this speeds up the script's running time by a factor of about 2.5x.
681 lines
25 KiB
Python
681 lines
25 KiB
Python
#!/usr/bin/env python
|
|
|
|
from __future__ import print_function
|
|
|
|
import argparse
|
|
import email.mime.multipart
|
|
import email.mime.text
|
|
import logging
|
|
import os.path
|
|
import pickle
|
|
import re
|
|
import smtplib
|
|
import subprocess
|
|
import sys
|
|
from datetime import datetime, timedelta
|
|
from phabricator import Phabricator
|
|
|
|
# Setting up a virtualenv to run this script can be done by running the
|
|
# following commands:
|
|
# $ virtualenv venv
|
|
# $ . ./venv/bin/activate
|
|
# $ pip install Phabricator
|
|
|
|
GIT_REPO_METADATA = (("llvm-monorepo", "https://github.com/llvm/llvm-project"),
|
|
)
|
|
|
|
# The below PhabXXX classes represent objects as modelled by Phabricator.
|
|
# The classes can be serialized to disk, to try and make sure that we don't
|
|
# needlessly have to re-fetch lots of data from Phabricator, as that would
|
|
# make this script unusably slow.
|
|
|
|
|
|
class PhabObject:
|
|
OBJECT_KIND = None
|
|
|
|
def __init__(self, id):
|
|
self.id = id
|
|
|
|
|
|
class PhabObjectCache:
|
|
def __init__(self, PhabObjectClass):
|
|
self.PhabObjectClass = PhabObjectClass
|
|
self.most_recent_info = None
|
|
self.oldest_info = None
|
|
self.id2PhabObjects = {}
|
|
|
|
def get_name(self):
|
|
return self.PhabObjectClass.OBJECT_KIND + "sCache"
|
|
|
|
def get(self, id):
|
|
if id not in self.id2PhabObjects:
|
|
self.id2PhabObjects[id] = self.PhabObjectClass(id)
|
|
return self.id2PhabObjects[id]
|
|
|
|
def get_ids_in_cache(self):
|
|
return list(self.id2PhabObjects.keys())
|
|
|
|
def get_objects(self):
|
|
return list(self.id2PhabObjects.values())
|
|
|
|
DEFAULT_DIRECTORY = "PhabObjectCache"
|
|
|
|
def _get_pickle_name(self, directory):
|
|
file_name = "Phab" + self.PhabObjectClass.OBJECT_KIND + "s.pickle"
|
|
return os.path.join(directory, file_name)
|
|
|
|
def populate_cache_from_disk(self, directory=DEFAULT_DIRECTORY):
|
|
"""
|
|
FIXME: consider if serializing to JSON would bring interoperability
|
|
advantages over serializing to pickle.
|
|
"""
|
|
try:
|
|
f = open(self._get_pickle_name(directory), "rb")
|
|
except IOError as err:
|
|
print("Could not find cache. Error message: {0}. Continuing..."
|
|
.format(err))
|
|
else:
|
|
with f:
|
|
try:
|
|
d = pickle.load(f)
|
|
self.__dict__.update(d)
|
|
except EOFError as err:
|
|
print("Cache seems to be corrupt. " +
|
|
"Not using cache. Error message: {0}".format(err))
|
|
|
|
def write_cache_to_disk(self, directory=DEFAULT_DIRECTORY):
|
|
if not os.path.exists(directory):
|
|
os.makedirs(directory)
|
|
with open(self._get_pickle_name(directory), "wb") as f:
|
|
pickle.dump(self.__dict__, f)
|
|
print("wrote cache to disk, most_recent_info= {0}".format(
|
|
datetime.fromtimestamp(self.most_recent_info)
|
|
if self.most_recent_info is not None else None))
|
|
|
|
|
|
class PhabReview(PhabObject):
|
|
OBJECT_KIND = "Review"
|
|
|
|
def __init__(self, id):
|
|
PhabObject.__init__(self, id)
|
|
|
|
def update(self, title, dateCreated, dateModified, author):
|
|
self.title = title
|
|
self.dateCreated = dateCreated
|
|
self.dateModified = dateModified
|
|
self.author = author
|
|
|
|
def setPhabDiffs(self, phabDiffs):
|
|
self.phabDiffs = phabDiffs
|
|
|
|
|
|
class PhabUser(PhabObject):
|
|
OBJECT_KIND = "User"
|
|
|
|
def __init__(self, id):
|
|
PhabObject.__init__(self, id)
|
|
|
|
def update(self, phid, realName):
|
|
self.phid = phid
|
|
self.realName = realName
|
|
|
|
|
|
class PhabHunk:
|
|
def __init__(self, rest_api_hunk):
|
|
self.oldOffset = int(rest_api_hunk["oldOffset"])
|
|
self.oldLength = int(rest_api_hunk["oldLength"])
|
|
# self.actual_lines_changed_offset will contain the offsets of the
|
|
# lines that were changed in this hunk.
|
|
self.actual_lines_changed_offset = []
|
|
offset = self.oldOffset
|
|
inHunk = False
|
|
hunkStart = -1
|
|
contextLines = 3
|
|
for line in rest_api_hunk["corpus"].split("\n"):
|
|
if line.startswith("+"):
|
|
# line is a new line that got introduced in this patch.
|
|
# Do not record it as a changed line.
|
|
if inHunk is False:
|
|
inHunk = True
|
|
hunkStart = max(self.oldOffset, offset - contextLines)
|
|
continue
|
|
if line.startswith("-"):
|
|
# line was changed or removed from the older version of the
|
|
# code. Record it as a changed line.
|
|
if inHunk is False:
|
|
inHunk = True
|
|
hunkStart = max(self.oldOffset, offset - contextLines)
|
|
offset += 1
|
|
continue
|
|
# line is a context line.
|
|
if inHunk is True:
|
|
inHunk = False
|
|
hunkEnd = offset + contextLines
|
|
self.actual_lines_changed_offset.append((hunkStart, hunkEnd))
|
|
offset += 1
|
|
if inHunk is True:
|
|
hunkEnd = offset + contextLines
|
|
self.actual_lines_changed_offset.append((hunkStart, hunkEnd))
|
|
|
|
# The above algorithm could result in adjacent or overlapping ranges
|
|
# being recorded into self.actual_lines_changed_offset.
|
|
# Merge the adjacent and overlapping ranges in there:
|
|
t = []
|
|
lastRange = None
|
|
for start, end in self.actual_lines_changed_offset + \
|
|
[(sys.maxsize, sys.maxsize)]:
|
|
if lastRange is None:
|
|
lastRange = (start, end)
|
|
else:
|
|
if lastRange[1] >= start:
|
|
lastRange = (lastRange[0], end)
|
|
else:
|
|
t.append(lastRange)
|
|
lastRange = (start, end)
|
|
self.actual_lines_changed_offset = t
|
|
|
|
|
|
class PhabChange:
|
|
def __init__(self, rest_api_change):
|
|
self.oldPath = rest_api_change["oldPath"]
|
|
self.hunks = [PhabHunk(h) for h in rest_api_change["hunks"]]
|
|
|
|
|
|
class PhabDiff(PhabObject):
|
|
OBJECT_KIND = "Diff"
|
|
|
|
def __init__(self, id):
|
|
PhabObject.__init__(self, id)
|
|
|
|
def update(self, rest_api_results):
|
|
self.revisionID = rest_api_results["revisionID"]
|
|
self.dateModified = int(rest_api_results["dateModified"])
|
|
self.dateCreated = int(rest_api_results["dateCreated"])
|
|
self.changes = [PhabChange(c) for c in rest_api_results["changes"]]
|
|
|
|
|
|
class ReviewsCache(PhabObjectCache):
|
|
def __init__(self):
|
|
PhabObjectCache.__init__(self, PhabReview)
|
|
|
|
|
|
class UsersCache(PhabObjectCache):
|
|
def __init__(self):
|
|
PhabObjectCache.__init__(self, PhabUser)
|
|
|
|
|
|
reviews_cache = ReviewsCache()
|
|
users_cache = UsersCache()
|
|
|
|
|
|
def init_phab_connection():
|
|
phab = Phabricator()
|
|
phab.update_interfaces()
|
|
return phab
|
|
|
|
|
|
def update_cached_info(phab, cache, phab_query, order, record_results,
|
|
max_nr_entries_per_fetch, max_nr_days_to_cache):
|
|
q = phab
|
|
LIMIT = max_nr_entries_per_fetch
|
|
for query_step in phab_query:
|
|
q = getattr(q, query_step)
|
|
results = q(order=order, limit=LIMIT)
|
|
most_recent_info, oldest_info = record_results(cache, results, phab)
|
|
oldest_info_to_fetch = datetime.fromtimestamp(most_recent_info) - \
|
|
timedelta(days=max_nr_days_to_cache)
|
|
most_recent_info_overall = most_recent_info
|
|
cache.write_cache_to_disk()
|
|
after = results["cursor"]["after"]
|
|
print("after: {0!r}".format(after))
|
|
print("most_recent_info: {0}".format(
|
|
datetime.fromtimestamp(most_recent_info)))
|
|
while (after is not None
|
|
and datetime.fromtimestamp(oldest_info) > oldest_info_to_fetch):
|
|
need_more_older_data = \
|
|
(cache.oldest_info is None or
|
|
datetime.fromtimestamp(cache.oldest_info) > oldest_info_to_fetch)
|
|
print(("need_more_older_data={0} cache.oldest_info={1} " +
|
|
"oldest_info_to_fetch={2}").format(
|
|
need_more_older_data,
|
|
datetime.fromtimestamp(cache.oldest_info)
|
|
if cache.oldest_info is not None else None,
|
|
oldest_info_to_fetch))
|
|
need_more_newer_data = \
|
|
(cache.most_recent_info is None or
|
|
cache.most_recent_info < most_recent_info)
|
|
print(("need_more_newer_data={0} cache.most_recent_info={1} " +
|
|
"most_recent_info={2}")
|
|
.format(need_more_newer_data, cache.most_recent_info,
|
|
most_recent_info))
|
|
if not need_more_older_data and not need_more_newer_data:
|
|
break
|
|
results = q(order=order, after=after, limit=LIMIT)
|
|
most_recent_info, oldest_info = record_results(cache, results, phab)
|
|
after = results["cursor"]["after"]
|
|
print("after: {0!r}".format(after))
|
|
print("most_recent_info: {0}".format(
|
|
datetime.fromtimestamp(most_recent_info)))
|
|
cache.write_cache_to_disk()
|
|
cache.most_recent_info = most_recent_info_overall
|
|
if after is None:
|
|
# We did fetch all records. Mark the cache to contain all info since
|
|
# the start of time.
|
|
oldest_info = 0
|
|
cache.oldest_info = oldest_info
|
|
cache.write_cache_to_disk()
|
|
|
|
|
|
def record_reviews(cache, reviews, phab):
|
|
most_recent_info = None
|
|
oldest_info = None
|
|
for reviewInfo in reviews["data"]:
|
|
if reviewInfo["type"] != "DREV":
|
|
continue
|
|
id = reviewInfo["id"]
|
|
# phid = reviewInfo["phid"]
|
|
dateModified = int(reviewInfo["fields"]["dateModified"])
|
|
dateCreated = int(reviewInfo["fields"]["dateCreated"])
|
|
title = reviewInfo["fields"]["title"]
|
|
author = reviewInfo["fields"]["authorPHID"]
|
|
phabReview = cache.get(id)
|
|
if "dateModified" not in phabReview.__dict__ or \
|
|
dateModified > phabReview.dateModified:
|
|
diff_results = phab.differential.querydiffs(revisionIDs=[id])
|
|
diff_ids = sorted(diff_results.keys())
|
|
phabDiffs = []
|
|
for diff_id in diff_ids:
|
|
diffInfo = diff_results[diff_id]
|
|
d = PhabDiff(diff_id)
|
|
d.update(diffInfo)
|
|
phabDiffs.append(d)
|
|
phabReview.update(title, dateCreated, dateModified, author)
|
|
phabReview.setPhabDiffs(phabDiffs)
|
|
print("Updated D{0} modified on {1} ({2} diffs)".format(
|
|
id, datetime.fromtimestamp(dateModified), len(phabDiffs)))
|
|
|
|
if most_recent_info is None:
|
|
most_recent_info = dateModified
|
|
elif most_recent_info < dateModified:
|
|
most_recent_info = dateModified
|
|
|
|
if oldest_info is None:
|
|
oldest_info = dateModified
|
|
elif oldest_info > dateModified:
|
|
oldest_info = dateModified
|
|
return most_recent_info, oldest_info
|
|
|
|
|
|
def record_users(cache, users, phab):
|
|
most_recent_info = None
|
|
oldest_info = None
|
|
for info in users["data"]:
|
|
if info["type"] != "USER":
|
|
continue
|
|
id = info["id"]
|
|
phid = info["phid"]
|
|
dateModified = int(info["fields"]["dateModified"])
|
|
# dateCreated = int(info["fields"]["dateCreated"])
|
|
realName = info["fields"]["realName"]
|
|
phabUser = cache.get(id)
|
|
phabUser.update(phid, realName)
|
|
if most_recent_info is None:
|
|
most_recent_info = dateModified
|
|
elif most_recent_info < dateModified:
|
|
most_recent_info = dateModified
|
|
if oldest_info is None:
|
|
oldest_info = dateModified
|
|
elif oldest_info > dateModified:
|
|
oldest_info = dateModified
|
|
return most_recent_info, oldest_info
|
|
|
|
|
|
PHABCACHESINFO = ((reviews_cache, ("differential", "revision", "search"),
|
|
"updated", record_reviews, 5, 7),
|
|
(users_cache, ("user", "search"), "newest", record_users,
|
|
100, 1000))
|
|
|
|
|
|
def load_cache():
|
|
for cache, phab_query, order, record_results, _, _ in PHABCACHESINFO:
|
|
cache.populate_cache_from_disk()
|
|
print("Loaded {0} nr entries: {1}".format(
|
|
cache.get_name(), len(cache.get_ids_in_cache())))
|
|
print("Loaded {0} has most recent info: {1}".format(
|
|
cache.get_name(),
|
|
datetime.fromtimestamp(cache.most_recent_info)
|
|
if cache.most_recent_info is not None else None))
|
|
|
|
|
|
def update_cache(phab):
|
|
load_cache()
|
|
for cache, phab_query, order, record_results, max_nr_entries_per_fetch, \
|
|
max_nr_days_to_cache in PHABCACHESINFO:
|
|
update_cached_info(phab, cache, phab_query, order, record_results,
|
|
max_nr_entries_per_fetch, max_nr_days_to_cache)
|
|
ids_in_cache = cache.get_ids_in_cache()
|
|
print("{0} objects in {1}".format(len(ids_in_cache), cache.get_name()))
|
|
cache.write_cache_to_disk()
|
|
|
|
|
|
def get_most_recent_reviews(days):
|
|
newest_reviews = sorted(
|
|
reviews_cache.get_objects(), key=lambda r: -r.dateModified)
|
|
if len(newest_reviews) == 0:
|
|
return newest_reviews
|
|
most_recent_review_time = \
|
|
datetime.fromtimestamp(newest_reviews[0].dateModified)
|
|
cut_off_date = most_recent_review_time - timedelta(days=days)
|
|
result = []
|
|
for review in newest_reviews:
|
|
if datetime.fromtimestamp(review.dateModified) < cut_off_date:
|
|
return result
|
|
result.append(review)
|
|
return result
|
|
|
|
|
|
# All of the above code is about fetching data from Phabricator and caching it
|
|
# on local disk. The below code contains the actual "business logic" for this
|
|
# script.
|
|
|
|
_userphid2realname = None
|
|
|
|
|
|
def get_real_name_from_author(user_phid):
|
|
global _userphid2realname
|
|
if _userphid2realname is None:
|
|
_userphid2realname = {}
|
|
for user in users_cache.get_objects():
|
|
_userphid2realname[user.phid] = user.realName
|
|
return _userphid2realname.get(user_phid, "unknown")
|
|
|
|
|
|
def print_most_recent_reviews(phab, days, filter_reviewers):
|
|
msgs = []
|
|
|
|
def add_msg(msg):
|
|
msgs.append(msg)
|
|
print(msg.encode('utf-8'))
|
|
|
|
newest_reviews = get_most_recent_reviews(days)
|
|
add_msg(u"These are the reviews that look interesting to be reviewed. " +
|
|
u"The report below has 2 sections. The first " +
|
|
u"section is organized per review; the second section is organized "
|
|
+ u"per potential reviewer.\n")
|
|
oldest_review = newest_reviews[-1] if len(newest_reviews) > 0 else None
|
|
oldest_datetime = \
|
|
datetime.fromtimestamp(oldest_review.dateModified) \
|
|
if oldest_review else None
|
|
add_msg((u"The report below is based on analyzing the reviews that got " +
|
|
u"touched in the past {0} days (since {1}). " +
|
|
u"The script found {2} such reviews.\n").format(
|
|
days, oldest_datetime, len(newest_reviews)))
|
|
reviewer2reviews_and_scores = {}
|
|
for i, review in enumerate(newest_reviews):
|
|
matched_reviewers = find_reviewers_for_review(review)
|
|
matched_reviewers = filter_reviewers(matched_reviewers)
|
|
if len(matched_reviewers) == 0:
|
|
continue
|
|
add_msg((u"{0:>3}. https://reviews.llvm.org/D{1} by {2}\n {3}\n" +
|
|
u" Last updated on {4}").format(
|
|
i, review.id,
|
|
get_real_name_from_author(review.author), review.title,
|
|
datetime.fromtimestamp(review.dateModified)))
|
|
for reviewer, scores in matched_reviewers:
|
|
add_msg(u" potential reviewer {0}, score {1}".format(
|
|
reviewer,
|
|
"(" + "/".join(["{0:.1f}%".format(s) for s in scores]) + ")"))
|
|
if reviewer not in reviewer2reviews_and_scores:
|
|
reviewer2reviews_and_scores[reviewer] = []
|
|
reviewer2reviews_and_scores[reviewer].append((review, scores))
|
|
|
|
# Print out a summary per reviewer.
|
|
for reviewer in sorted(reviewer2reviews_and_scores.keys()):
|
|
reviews_and_scores = reviewer2reviews_and_scores[reviewer]
|
|
reviews_and_scores.sort(key=lambda rs: rs[1], reverse=True)
|
|
add_msg(u"\n\nSUMMARY FOR {0} (found {1} reviews):".format(
|
|
reviewer, len(reviews_and_scores)))
|
|
for review, scores in reviews_and_scores:
|
|
add_msg(u"[{0}] https://reviews.llvm.org/D{1} '{2}' by {3}".format(
|
|
"/".join(["{0:.1f}%".format(s) for s in scores]), review.id,
|
|
review.title, get_real_name_from_author(review.author)))
|
|
return "\n".join(msgs)
|
|
|
|
|
|
def get_git_cmd_output(cmd):
|
|
output = None
|
|
try:
|
|
logging.debug(cmd)
|
|
output = subprocess.check_output(
|
|
cmd, shell=True, stderr=subprocess.STDOUT)
|
|
except subprocess.CalledProcessError as e:
|
|
logging.debug(str(e))
|
|
if output is None:
|
|
return None
|
|
return output.decode("utf-8", errors='ignore')
|
|
|
|
|
|
reAuthorMail = re.compile("^author-mail <([^>]*)>.*$")
|
|
|
|
|
|
def parse_blame_output_line_porcelain(blame_output_lines):
|
|
email2nr_occurences = {}
|
|
if blame_output_lines is None:
|
|
return email2nr_occurences
|
|
for line in blame_output_lines:
|
|
m = reAuthorMail.match(line)
|
|
if m:
|
|
author_email_address = m.group(1)
|
|
if author_email_address not in email2nr_occurences:
|
|
email2nr_occurences[author_email_address] = 1
|
|
else:
|
|
email2nr_occurences[author_email_address] += 1
|
|
return email2nr_occurences
|
|
|
|
|
|
class BlameOutputCache:
|
|
def __init__(self):
|
|
self.cache = {}
|
|
|
|
def _populate_cache_for(self, cache_key):
|
|
assert cache_key not in self.cache
|
|
git_repo, base_revision, path = cache_key
|
|
cmd = ("git -C {0} blame --encoding=utf-8 --date iso -f -e -w " +
|
|
"--line-porcelain {1} -- {2}").format(git_repo, base_revision,
|
|
path)
|
|
blame_output = get_git_cmd_output(cmd)
|
|
self.cache[cache_key] = \
|
|
blame_output.split('\n') if blame_output is not None else None
|
|
# FIXME: the blame cache could probably be made more effective still if
|
|
# instead of storing the requested base_revision in the cache, the last
|
|
# revision before the base revision this file/path got changed in gets
|
|
# stored. That way multiple project revisions for which this specific
|
|
# file/patch hasn't changed would get cache hits (instead of misses in
|
|
# the current implementation).
|
|
|
|
def get_blame_output_for(self, git_repo, base_revision, path, start_line=-1,
|
|
end_line=-1):
|
|
cache_key = (git_repo, base_revision, path)
|
|
if cache_key not in self.cache:
|
|
self._populate_cache_for(cache_key)
|
|
assert cache_key in self.cache
|
|
all_blame_lines = self.cache[cache_key]
|
|
if all_blame_lines is None:
|
|
return None
|
|
if start_line == -1 and end_line == -1:
|
|
return all_blame_lines
|
|
assert start_line >= 0
|
|
assert end_line >= 0
|
|
assert end_line <= len(all_blame_lines)
|
|
assert start_line <= len(all_blame_lines)
|
|
assert start_line <= end_line
|
|
return all_blame_lines[start_line:end_line]
|
|
|
|
def get_parsed_git_blame_for(self, git_repo, base_revision, path,
|
|
start_line=-1, end_line=-1):
|
|
return parse_blame_output_line_porcelain(
|
|
self.get_blame_output_for(git_repo, base_revision, path, start_line,
|
|
end_line))
|
|
|
|
|
|
blameOutputCache = BlameOutputCache()
|
|
|
|
|
|
def find_reviewers_for_diff_heuristic(diff):
|
|
# Heuristic 1: assume good reviewers are the ones that touched the same
|
|
# lines before as this patch is touching.
|
|
# Heuristic 2: assume good reviewers are the ones that touched the same
|
|
# files before as this patch is touching.
|
|
reviewers2nr_lines_touched = {}
|
|
reviewers2nr_files_touched = {}
|
|
# Assume last revision before diff was modified is the revision the diff
|
|
# applies to.
|
|
assert len(GIT_REPO_METADATA) == 1
|
|
git_repo = os.path.join("git_repos", GIT_REPO_METADATA[0][0])
|
|
cmd = 'git -C {0} rev-list -n 1 --before="{1}" master'.format(
|
|
git_repo,
|
|
datetime.fromtimestamp(
|
|
diff.dateModified).strftime("%Y-%m-%d %H:%M:%s"))
|
|
base_revision = get_git_cmd_output(cmd).strip()
|
|
logging.debug("Base revision={0}".format(base_revision))
|
|
for change in diff.changes:
|
|
path = change.oldPath
|
|
# Compute heuristic 1: look at context of patch lines.
|
|
for hunk in change.hunks:
|
|
for start_line, end_line in hunk.actual_lines_changed_offset:
|
|
# Collect git blame results for authors in those ranges.
|
|
for reviewer, nr_occurences in \
|
|
blameOutputCache.get_parsed_git_blame_for(
|
|
git_repo, base_revision, path, start_line, end_line
|
|
).items():
|
|
if reviewer not in reviewers2nr_lines_touched:
|
|
reviewers2nr_lines_touched[reviewer] = 0
|
|
reviewers2nr_lines_touched[reviewer] += nr_occurences
|
|
# Compute heuristic 2: don't look at context, just at files touched.
|
|
# Collect git blame results for authors in those ranges.
|
|
for reviewer, nr_occurences in \
|
|
blameOutputCache.get_parsed_git_blame_for(
|
|
git_repo, base_revision, path).items():
|
|
if reviewer not in reviewers2nr_files_touched:
|
|
reviewers2nr_files_touched[reviewer] = 0
|
|
reviewers2nr_files_touched[reviewer] += 1
|
|
|
|
# Compute "match scores"
|
|
total_nr_lines = sum(reviewers2nr_lines_touched.values())
|
|
total_nr_files = len(diff.changes)
|
|
reviewers_matchscores = \
|
|
[(reviewer,
|
|
(reviewers2nr_lines_touched.get(reviewer, 0)*100.0/total_nr_lines
|
|
if total_nr_lines != 0 else 0,
|
|
reviewers2nr_files_touched[reviewer]*100.0/total_nr_files
|
|
if total_nr_files != 0 else 0))
|
|
for reviewer, nr_lines
|
|
in reviewers2nr_files_touched.items()]
|
|
reviewers_matchscores.sort(key=lambda i: i[1], reverse=True)
|
|
return reviewers_matchscores
|
|
|
|
|
|
def find_reviewers_for_review(review):
|
|
# Process the newest diff first.
|
|
diffs = sorted(
|
|
review.phabDiffs, key=lambda d: d.dateModified, reverse=True)
|
|
if len(diffs) == 0:
|
|
return
|
|
diff = diffs[0]
|
|
matched_reviewers = find_reviewers_for_diff_heuristic(diff)
|
|
# Show progress, as this is a slow operation:
|
|
sys.stdout.write('.')
|
|
sys.stdout.flush()
|
|
logging.debug(u"matched_reviewers: {0}".format(matched_reviewers))
|
|
return matched_reviewers
|
|
|
|
|
|
def update_git_repos():
|
|
git_repos_directory = "git_repos"
|
|
for name, url in GIT_REPO_METADATA:
|
|
dirname = os.path.join(git_repos_directory, name)
|
|
if not os.path.exists(dirname):
|
|
cmd = "git clone {0} {1}".format(url, dirname)
|
|
output = get_git_cmd_output(cmd)
|
|
cmd = "git -C {0} pull --rebase".format(dirname)
|
|
output = get_git_cmd_output(cmd)
|
|
|
|
|
|
def send_emails(email_addresses, sender, msg):
|
|
s = smtplib.SMTP()
|
|
s.connect()
|
|
for email_address in email_addresses:
|
|
email_msg = email.mime.multipart.MIMEMultipart()
|
|
email_msg['From'] = sender
|
|
email_msg['To'] = email_address
|
|
email_msg['Subject'] = 'LLVM patches you may be able to review.'
|
|
email_msg.attach(email.mime.text.MIMEText(msg.encode('utf-8'), 'plain'))
|
|
# python 3.x: s.send_message(email_msg)
|
|
s.sendmail(email_msg['From'], email_msg['To'], email_msg.as_string())
|
|
s.quit()
|
|
|
|
|
|
def filter_reviewers_to_report_for(people_to_look_for):
|
|
# The below is just an example filter, to only report potential reviews
|
|
# to do for the people that will receive the report email.
|
|
return lambda potential_reviewers: [r for r in potential_reviewers
|
|
if r[0] in people_to_look_for]
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description='Match open reviews to potential reviewers.')
|
|
parser.add_argument(
|
|
'--no-update-cache',
|
|
dest='update_cache',
|
|
action='store_false',
|
|
default=True,
|
|
help='Do not update cached Phabricator objects')
|
|
parser.add_argument(
|
|
'--email-report',
|
|
dest='email_report',
|
|
nargs='*',
|
|
default="",
|
|
help="A email addresses to send the report to.")
|
|
parser.add_argument(
|
|
'--sender',
|
|
dest='sender',
|
|
default="",
|
|
help="The email address to use in 'From' on messages emailed out.")
|
|
parser.add_argument(
|
|
'--email-addresses',
|
|
dest='email_addresses',
|
|
nargs='*',
|
|
help="The email addresses (as known by LLVM git) of " +
|
|
"the people to look for reviews for.")
|
|
parser.add_argument('--verbose', '-v', action='count')
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.verbose >= 1:
|
|
logging.basicConfig(level=logging.DEBUG)
|
|
|
|
people_to_look_for = [e.decode('utf-8') for e in args.email_addresses]
|
|
logging.debug("Will look for reviews that following contributors could " +
|
|
"review: {}".format(people_to_look_for))
|
|
logging.debug("Will email a report to: {}".format(args.email_report))
|
|
|
|
phab = init_phab_connection()
|
|
|
|
if args.update_cache:
|
|
update_cache(phab)
|
|
|
|
load_cache()
|
|
update_git_repos()
|
|
msg = print_most_recent_reviews(
|
|
phab,
|
|
days=1,
|
|
filter_reviewers=filter_reviewers_to_report_for(people_to_look_for))
|
|
|
|
if args.email_report != []:
|
|
send_emails(args.email_report, args.sender, msg)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|