much faster (#781)

Co-authored-by: HailSanta <Hail2Santa@gmail.com>
This commit is contained in:
HailSanta 2022-09-19 12:07:31 -04:00 committed by GitHub
parent 68d77e7f9d
commit 2e37730dd0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 32 additions and 12 deletions

View File

@ -5,3 +5,4 @@ stringcase
watchdog watchdog
gitpython gitpython
colour colour
ahocorasick-rs

View File

@ -1,9 +1,11 @@
#!/usr/bin/python3 #!/usr/bin/python3
import os import os
import re
from tqdm import tqdm from tqdm import tqdm
import ahocorasick_rs
from ahocorasick_rs import *
script_dir = os.path.dirname(os.path.realpath(__file__)) script_dir = os.path.dirname(os.path.realpath(__file__))
root_dir = os.path.join(script_dir, "..") root_dir = os.path.join(script_dir, "..")
src_dir = os.path.join(root_dir, "src") src_dir = os.path.join(root_dir, "src")
@ -11,6 +13,7 @@ include_dir = os.path.join(root_dir, "include")
asm_dir = os.path.join(root_dir, "ver", "us", "asm") asm_dir = os.path.join(root_dir, "ver", "us", "asm")
renames = {} renames = {}
patterns = []
deletes = [] deletes = []
def handle_file(f_path, try_rename_file=False): def handle_file(f_path, try_rename_file=False):
@ -23,25 +26,41 @@ def handle_file(f_path, try_rename_file=False):
deletes.append(f_path) deletes.append(f_path)
f_path = f_path.replace(extless, renames[extless]) f_path = f_path.replace(extless, renames[extless])
# find all matches in one pass with aho-corasick algorithm
f_text = f_text_orig f_text = f_text_orig
for rename in renames: matches = ac.find_matches_as_indexes(f_text)
if "(" in rename or "," in rename: if matches:
f_text = f_text.replace(rename, renames[rename]) to_join = []
else: pos = 0
f_text = re.sub(r"(?:\b)" + re.escape(rename) + r"(?:\b)", renames[rename], f_text) # replace all matches
for match in matches:
# head part
to_join.append(f_text[pos:match[1]])
to_replace = patterns[match[0]]
to_join.append(renames[to_replace])
pos = match[2]
# tail part
to_join.append(f_text[pos:])
f_text = ''.join(to_join);
# save changes
with open(f_path, "w", newline="\n") as f: with open(f_path, "w", newline="\n") as f:
f.write(f_text) f.write(f_text)
# Read input file
# Read Star Rod's output file (one rename per line, old and new, delimited by a space) # One valid whitespace-separated find-replace pair is given per line
with open(os.path.join(script_dir, "to_rename.txt")) as f: with open(os.path.join(script_dir, "to_rename.txt")) as f:
renames_text = f.readlines() renames_text = f.readlines()
# Create dict of old -> new names # Create dict of old -> new names
for line in renames_text: for line in renames_text:
split = line.split() split = line.split()
if len(split) == 2:
renames[split[0]] = split[1] renames[split[0]] = split[1]
patterns.append(split[0])
elif len(split) != 0:
raise Exception("input contains invalid rename pattern: \n\"" + line.strip() + "\"")
ac = ahocorasick_rs.AhoCorasick(patterns, matchkind=MATCHKIND_LEFTMOST_LONGEST)
# Walk through asm files and rename stuff # Walk through asm files and rename stuff
print("Walking through asm files") print("Walking through asm files")