mirror of
https://github.com/pmret/papermario.git
synced 2024-11-08 12:02:30 +01:00
Yaml fixes + find_similar_areas update
This commit is contained in:
parent
9e8f3eb36b
commit
f59f495a36
@ -1,36 +1,41 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from collections import OrderedDict
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
import re
|
||||
from typing import Optional
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
import os
|
||||
import sys
|
||||
from sty import fg
|
||||
|
||||
script_dir = Path(os.path.dirname(os.path.realpath(__file__)))
|
||||
root_dir = script_dir / ".."
|
||||
asm_dir = root_dir / "ver/current/asm/nonmatchings/"
|
||||
map_file_path = root_dir / "ver/current/build/papermario.map"
|
||||
build_dir = root_dir / "ver/current/build/"
|
||||
map_file_path = build_dir / "papermario.map"
|
||||
rom_path = root_dir / "ver/current/baserom.z64"
|
||||
|
||||
OBJDUMP = "mips-linux-gnu-objdump"
|
||||
|
||||
@dataclass
|
||||
class Symbol:
|
||||
name: str
|
||||
rom_start: int
|
||||
ram: int
|
||||
current_file: str
|
||||
current_file: Path
|
||||
prev_sym: str
|
||||
is_decompiled: bool
|
||||
rom_end: Optional[int] = None
|
||||
|
||||
def size(self):
|
||||
assert(self.rom_end is not None)
|
||||
assert self.rom_end is not None
|
||||
return self.rom_end - self.rom_start
|
||||
|
||||
|
||||
@dataclass
|
||||
class Bytes:
|
||||
offset: int
|
||||
@ -42,6 +47,7 @@ def read_rom() -> bytes:
|
||||
with open(rom_path, "rb") as f:
|
||||
return f.read()
|
||||
|
||||
|
||||
def get_all_unmatched_functions():
|
||||
ret = set()
|
||||
for root, dirs, files in os.walk(asm_dir):
|
||||
@ -55,7 +61,7 @@ def get_symbol_bytes(func: str) -> Optional[Bytes]:
|
||||
if func not in syms or syms[func].rom_end is None:
|
||||
return None
|
||||
sym = syms[func]
|
||||
bs = list(rom_bytes[sym.rom_start:sym.rom_end])
|
||||
bs = list(rom_bytes[sym.rom_start : sym.rom_end])
|
||||
|
||||
while len(bs) > 0 and bs[-1] == 0:
|
||||
bs.pop()
|
||||
@ -66,7 +72,7 @@ def get_symbol_bytes(func: str) -> Optional[Bytes]:
|
||||
for ins in insns:
|
||||
ret.append(ins >> 2)
|
||||
|
||||
return Bytes(0, bytes(ret).decode('utf-8'), bs)
|
||||
return Bytes(0, bytes(ret).decode("utf-8"), bs)
|
||||
|
||||
|
||||
def parse_map() -> OrderedDict[str, Symbol]:
|
||||
@ -116,7 +122,7 @@ def parse_map() -> OrderedDict[str, Symbol]:
|
||||
name=fn,
|
||||
rom_start=rom,
|
||||
ram=ram,
|
||||
current_file=cur_file,
|
||||
current_file=Path(cur_file),
|
||||
prev_sym=prev_sym,
|
||||
is_decompiled=not fn in unmatched_functions,
|
||||
)
|
||||
@ -150,7 +156,7 @@ class Result:
|
||||
length: int
|
||||
|
||||
|
||||
def get_pair_matches(query_hashes: list[int], sym_hashes: list[int]) -> list[Match]:
|
||||
def get_pair_matches(query_hashes: list[str], sym_hashes: list[str]) -> list[Match]:
|
||||
ret = []
|
||||
|
||||
matching_hashes = set(query_hashes).intersection(sym_hashes)
|
||||
@ -159,7 +165,7 @@ def get_pair_matches(query_hashes: list[int], sym_hashes: list[int]) -> list[Mat
|
||||
return ret
|
||||
|
||||
|
||||
def get_hashes(bytes: Bytes, window_size: int) -> list[int]:
|
||||
def get_hashes(bytes: Bytes, window_size: int) -> list[str]:
|
||||
ret = []
|
||||
for i in range(0, len(bytes.normalized) - window_size):
|
||||
ret.append(bytes.normalized[i : i + window_size])
|
||||
@ -171,7 +177,7 @@ def group_matches(query: str, target: str, matches: list[Match]) -> list[Result]
|
||||
|
||||
matches.sort(key=lambda m: m.query_offset)
|
||||
|
||||
match_groups = []
|
||||
match_groups: List[List[Match]] = []
|
||||
last_start = matches[0].query_offset
|
||||
for match in matches:
|
||||
if match.query_offset == last_start + 1:
|
||||
@ -189,6 +195,74 @@ def group_matches(query: str, target: str, matches: list[Match]) -> list[Result]
|
||||
return ret
|
||||
|
||||
|
||||
def get_line_numbers(obj_file: Path) -> Dict[int, int]:
|
||||
ret = {}
|
||||
|
||||
objdump_out = (
|
||||
subprocess.run(
|
||||
[OBJDUMP, "-WL", obj_file],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.DEVNULL,
|
||||
)
|
||||
.stdout.decode("utf-8")
|
||||
.split("\n")
|
||||
)
|
||||
|
||||
if not objdump_out:
|
||||
return {}
|
||||
|
||||
for line in objdump_out[7:]:
|
||||
if not line:
|
||||
continue
|
||||
pieces = line.split()
|
||||
|
||||
if len(pieces) < 3:
|
||||
continue
|
||||
|
||||
fn = pieces[0]
|
||||
|
||||
if fn == OBJDUMP or fn[0] == "<":
|
||||
continue
|
||||
|
||||
starting_addr = int(pieces[2], 0)
|
||||
try:
|
||||
line_num = int(pieces[1])
|
||||
ret[starting_addr] = line_num
|
||||
except ValueError:
|
||||
continue
|
||||
return ret
|
||||
|
||||
|
||||
def get_tu_offset(obj_file: Path, symbol: str) -> Optional[int]:
|
||||
objdump = "mips-linux-gnu-objdump"
|
||||
|
||||
objdump_out = (
|
||||
subprocess.run([objdump, "-t", obj_file], stdout=subprocess.PIPE)
|
||||
.stdout.decode("utf-8")
|
||||
.split("\n")
|
||||
)
|
||||
|
||||
if not objdump_out:
|
||||
return None
|
||||
|
||||
for line in objdump_out[4:]:
|
||||
if not line:
|
||||
continue
|
||||
pieces = line.split()
|
||||
|
||||
if pieces[-1] == symbol:
|
||||
return int(pieces[0], 16)
|
||||
return None
|
||||
|
||||
|
||||
def get_c_range(
|
||||
insn_start: int, insn_end: int, line_numbers: Dict[int, int]
|
||||
) -> Tuple[Optional[int], Optional[int]]:
|
||||
start = line_numbers.get(insn_start)
|
||||
end = line_numbers.get(insn_end)
|
||||
return start, end
|
||||
|
||||
|
||||
def get_matches(query: str, window_size: int):
|
||||
query_bytes: Optional[Bytes] = get_symbol_bytes(query)
|
||||
|
||||
@ -214,17 +288,46 @@ def get_matches(query: str, window_size: int):
|
||||
matches: list[Match] = get_pair_matches(query_hashes, sym_hashes)
|
||||
if matches:
|
||||
results = group_matches(query, symbol, matches)
|
||||
obj_file = syms[symbol].current_file
|
||||
|
||||
decompiled_str = ""
|
||||
line_numbers = {}
|
||||
tu_offset = None
|
||||
decompiled_str = ":"
|
||||
if syms[symbol].is_decompiled:
|
||||
decompiled_str = " (decompiled)"
|
||||
print(symbol + ":" + decompiled_str)
|
||||
line_numbers = get_line_numbers(obj_file)
|
||||
tu_offset = get_tu_offset(obj_file, symbol)
|
||||
decompiled_str = fg.green + " (decompiled)" + fg.rs + ":"
|
||||
|
||||
print(symbol + decompiled_str)
|
||||
|
||||
for result in results:
|
||||
total_len = result.length + window_size
|
||||
query_str = f"{query} [{result.query_start}-{result.query_start + total_len}]"
|
||||
target_str = f"{symbol} [{result.target_start}-{result.target_start + total_len}]"
|
||||
print(f"\t{query_str} matches {target_str} ({total_len})")
|
||||
query_end = result.query_start + total_len
|
||||
target_end = result.target_start + total_len
|
||||
|
||||
c_start: Optional[int] = None
|
||||
c_end: Optional[int] = None
|
||||
if tu_offset is not None and len(line_numbers) > 0:
|
||||
c_start, c_end = get_c_range(
|
||||
tu_offset + (result.target_start * 4),
|
||||
tu_offset + (target_end * 4),
|
||||
line_numbers,
|
||||
)
|
||||
|
||||
target_range_str = ""
|
||||
if c_start is not None or c_end is not None:
|
||||
start_str = c_start if c_start is not None else "?"
|
||||
end_str = c_end if c_end is not None else "?"
|
||||
|
||||
target_range_str = (
|
||||
fg.li_cyan + f" (line {start_str}-{end_str} in {obj_file.stem})" + fg.rs
|
||||
)
|
||||
|
||||
query_str = f"{query} [{result.query_start}-{query_end}]"
|
||||
target_str = (
|
||||
f"{symbol} [{result.target_start}-{target_end}]{target_range_str}"
|
||||
)
|
||||
print(f"\t{query_str} matches {target_str} ({total_len} total insns)")
|
||||
|
||||
return OrderedDict(sorted(ret.items(), key=lambda kv: kv[1], reverse=True))
|
||||
|
||||
@ -232,9 +335,19 @@ def get_matches(query: str, window_size: int):
|
||||
def do_query(query, window_size):
|
||||
get_matches(query, window_size)
|
||||
|
||||
parser = argparse.ArgumentParser(description="Tool to find duplicate portions of code from one function in code across the codebase")
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Tool to find duplicate portions of code from one function in code across the codebase"
|
||||
)
|
||||
parser.add_argument("query", help="function")
|
||||
parser.add_argument("-w", "--window-size", help="number of bytes to compare", type=int, default=20, required=False)
|
||||
parser.add_argument(
|
||||
"-w",
|
||||
"--window-size",
|
||||
help="number of bytes to compare",
|
||||
type=int,
|
||||
default=20,
|
||||
required=False,
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
@ -6680,7 +6680,7 @@ segments:
|
||||
start: 0x6DDDC0
|
||||
vram: 0x80218000
|
||||
subsegments:
|
||||
- [0x669D80, c, actor/koopa_troopa]
|
||||
- [0x6DDDC0, c, actor/koopa_troopa]
|
||||
- [0x6DDE90, c, actor/fuzzy]
|
||||
- [0x6DE000, c, actor/pokey]
|
||||
- [0x6DE0D0, c, actor/bandit]
|
||||
@ -8917,7 +8917,7 @@ segments:
|
||||
- [0x953FC0, c, sbk_56_1_main]
|
||||
- [0x954D80, c, sbk_56_2_entity]
|
||||
- [0x954D80, c, sbk_56_3_foliage]
|
||||
- [0x954FC00]
|
||||
- [0x954FC0]
|
||||
- name: sbk_60
|
||||
dir: world/area_sbk/sbk_60
|
||||
type: code
|
||||
|
Loading…
Reference in New Issue
Block a user