mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-24 19:52:54 +01:00
[find_interesting_reviews.py] Add git blame output cache
The majority of the running time of this script tends to be spent in running git blame on source files touched by patches under review. By introducing a git blame output cache, some of the git blame commands don't have to re-run, and the blame information can be retrieved from a cache. I've observed that in a typical run matching patches available for review with potential reviewers, this speeds up the script's running time by a factor of about 2.5x.
This commit is contained in:
parent
a6c442cb15
commit
32e21be127
@ -458,11 +458,11 @@ def get_git_cmd_output(cmd):
|
|||||||
reAuthorMail = re.compile("^author-mail <([^>]*)>.*$")
|
reAuthorMail = re.compile("^author-mail <([^>]*)>.*$")
|
||||||
|
|
||||||
|
|
||||||
def parse_blame_output_line_porcelain(blame_output):
|
def parse_blame_output_line_porcelain(blame_output_lines):
|
||||||
email2nr_occurences = {}
|
email2nr_occurences = {}
|
||||||
if blame_output is None:
|
if blame_output_lines is None:
|
||||||
return email2nr_occurences
|
return email2nr_occurences
|
||||||
for line in blame_output.split('\n'):
|
for line in blame_output_lines:
|
||||||
m = reAuthorMail.match(line)
|
m = reAuthorMail.match(line)
|
||||||
if m:
|
if m:
|
||||||
author_email_address = m.group(1)
|
author_email_address = m.group(1)
|
||||||
@ -473,6 +473,54 @@ def parse_blame_output_line_porcelain(blame_output):
|
|||||||
return email2nr_occurences
|
return email2nr_occurences
|
||||||
|
|
||||||
|
|
||||||
|
class BlameOutputCache:
|
||||||
|
def __init__(self):
|
||||||
|
self.cache = {}
|
||||||
|
|
||||||
|
def _populate_cache_for(self, cache_key):
|
||||||
|
assert cache_key not in self.cache
|
||||||
|
git_repo, base_revision, path = cache_key
|
||||||
|
cmd = ("git -C {0} blame --encoding=utf-8 --date iso -f -e -w " +
|
||||||
|
"--line-porcelain {1} -- {2}").format(git_repo, base_revision,
|
||||||
|
path)
|
||||||
|
blame_output = get_git_cmd_output(cmd)
|
||||||
|
self.cache[cache_key] = \
|
||||||
|
blame_output.split('\n') if blame_output is not None else None
|
||||||
|
# FIXME: the blame cache could probably be made more effective still if
|
||||||
|
# instead of storing the requested base_revision in the cache, the last
|
||||||
|
# revision before the base revision this file/path got changed in gets
|
||||||
|
# stored. That way multiple project revisions for which this specific
|
||||||
|
# file/patch hasn't changed would get cache hits (instead of misses in
|
||||||
|
# the current implementation).
|
||||||
|
|
||||||
|
def get_blame_output_for(self, git_repo, base_revision, path, start_line=-1,
|
||||||
|
end_line=-1):
|
||||||
|
cache_key = (git_repo, base_revision, path)
|
||||||
|
if cache_key not in self.cache:
|
||||||
|
self._populate_cache_for(cache_key)
|
||||||
|
assert cache_key in self.cache
|
||||||
|
all_blame_lines = self.cache[cache_key]
|
||||||
|
if all_blame_lines is None:
|
||||||
|
return None
|
||||||
|
if start_line == -1 and end_line == -1:
|
||||||
|
return all_blame_lines
|
||||||
|
assert start_line >= 0
|
||||||
|
assert end_line >= 0
|
||||||
|
assert end_line <= len(all_blame_lines)
|
||||||
|
assert start_line <= len(all_blame_lines)
|
||||||
|
assert start_line <= end_line
|
||||||
|
return all_blame_lines[start_line:end_line]
|
||||||
|
|
||||||
|
def get_parsed_git_blame_for(self, git_repo, base_revision, path,
|
||||||
|
start_line=-1, end_line=-1):
|
||||||
|
return parse_blame_output_line_porcelain(
|
||||||
|
self.get_blame_output_for(git_repo, base_revision, path, start_line,
|
||||||
|
end_line))
|
||||||
|
|
||||||
|
|
||||||
|
blameOutputCache = BlameOutputCache()
|
||||||
|
|
||||||
|
|
||||||
def find_reviewers_for_diff_heuristic(diff):
|
def find_reviewers_for_diff_heuristic(diff):
|
||||||
# Heuristic 1: assume good reviewers are the ones that touched the same
|
# Heuristic 1: assume good reviewers are the ones that touched the same
|
||||||
# lines before as this patch is touching.
|
# lines before as this patch is touching.
|
||||||
@ -496,23 +544,18 @@ def find_reviewers_for_diff_heuristic(diff):
|
|||||||
for hunk in change.hunks:
|
for hunk in change.hunks:
|
||||||
for start_line, end_line in hunk.actual_lines_changed_offset:
|
for start_line, end_line in hunk.actual_lines_changed_offset:
|
||||||
# Collect git blame results for authors in those ranges.
|
# Collect git blame results for authors in those ranges.
|
||||||
cmd = ("git -C {0} blame --encoding=utf-8 --date iso -f -e " +
|
|
||||||
"-w --line-porcelain -L {1},{2} {3} -- {4}").format(
|
|
||||||
git_repo, start_line, end_line, base_revision, path)
|
|
||||||
blame_output = get_git_cmd_output(cmd)
|
|
||||||
for reviewer, nr_occurences in \
|
for reviewer, nr_occurences in \
|
||||||
parse_blame_output_line_porcelain(blame_output).items():
|
blameOutputCache.get_parsed_git_blame_for(
|
||||||
|
git_repo, base_revision, path, start_line, end_line
|
||||||
|
).items():
|
||||||
if reviewer not in reviewers2nr_lines_touched:
|
if reviewer not in reviewers2nr_lines_touched:
|
||||||
reviewers2nr_lines_touched[reviewer] = 0
|
reviewers2nr_lines_touched[reviewer] = 0
|
||||||
reviewers2nr_lines_touched[reviewer] += nr_occurences
|
reviewers2nr_lines_touched[reviewer] += nr_occurences
|
||||||
# Compute heuristic 2: don't look at context, just at files touched.
|
# Compute heuristic 2: don't look at context, just at files touched.
|
||||||
# Collect git blame results for authors in those ranges.
|
# Collect git blame results for authors in those ranges.
|
||||||
cmd = ("git -C {0} blame --encoding=utf-8 --date iso -f -e -w " +
|
for reviewer, nr_occurences in \
|
||||||
"--line-porcelain {1} -- {2}").format(git_repo, base_revision,
|
blameOutputCache.get_parsed_git_blame_for(
|
||||||
path)
|
git_repo, base_revision, path).items():
|
||||||
blame_output = get_git_cmd_output(cmd)
|
|
||||||
for reviewer, nr_occurences in parse_blame_output_line_porcelain(
|
|
||||||
blame_output).items():
|
|
||||||
if reviewer not in reviewers2nr_files_touched:
|
if reviewer not in reviewers2nr_files_touched:
|
||||||
reviewers2nr_files_touched[reviewer] = 0
|
reviewers2nr_files_touched[reviewer] = 0
|
||||||
reviewers2nr_files_touched[reviewer] += 1
|
reviewers2nr_files_touched[reviewer] += 1
|
||||||
|
Loading…
Reference in New Issue
Block a user