From ac3797ea569334aab05a9af6ea7fb0898c4b9d2d Mon Sep 17 00:00:00 2001 From: Ethan Roseman Date: Tue, 27 Apr 2021 21:36:33 +0900 Subject: [PATCH] Splat update to 0.7.3 (#283) * change splat branch to master * git subrepo pull --force tools/splat subrepo: subdir: "tools/splat" merged: "924414a51d" upstream: origin: "https://github.com/ethteck/splat.git" branch: "master" commit: "924414a51d" git-subrepo: version: "0.4.3" origin: "https://github.com/ingydotnet/git-subrepo" commit: "2f68596" --- tools/splat/.github/workflows/mypy.yml | 15 +++++ tools/splat/.gitrepo | 6 +- tools/splat/CHANGELOG.md | 6 ++ tools/splat/segtypes/n64/data.py | 92 +++++++++++++++++++++----- tools/splat/segtypes/n64/rodata.py | 13 ---- 5 files changed, 101 insertions(+), 31 deletions(-) create mode 100644 tools/splat/.github/workflows/mypy.yml diff --git a/tools/splat/.github/workflows/mypy.yml b/tools/splat/.github/workflows/mypy.yml new file mode 100644 index 0000000000..b842ebd82f --- /dev/null +++ b/tools/splat/.github/workflows/mypy.yml @@ -0,0 +1,15 @@ +name: mypy + +on: + push: + branches: [master] + pull_request: + branches: [master] + +jobs: + mypy: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - uses: jpetrucciani/mypy-check@master diff --git a/tools/splat/.gitrepo b/tools/splat/.gitrepo index 784828a6ee..daa84bdda6 100644 --- a/tools/splat/.gitrepo +++ b/tools/splat/.gitrepo @@ -5,8 +5,8 @@ ; [subrepo] remote = https://github.com/ethteck/splat.git - branch = imgflip - commit = 4e012eaad6bffc4da7aed13d9a7f86bbfddf9150 - parent = 8acbae1ea523f217fc6f2780ee83b37f5f2ac05b + branch = master + commit = 924414a51d0bcc52076b6ee7147b1bb1d20e804a + parent = 7515d21506205b43cccd28875f0d2765addb36ad method = merge cmdver = 0.4.3 diff --git a/tools/splat/CHANGELOG.md b/tools/splat/CHANGELOG.md index 259410a5e8..68bd282b73 100644 --- a/tools/splat/CHANGELOG.md +++ b/tools/splat/CHANGELOG.md @@ -1,5 +1,11 @@ # splat Release Notes +### 0.7.2 + +* Data disassembly changes: + * String detection has been improved. Please send me false positives / negatives as you see them and I can try to improve it further! + * Symbols in a data segment pointed to by other symbols will now properly be split out as their own symbols + ### 0.7.1 * Image segment changes: diff --git a/tools/splat/segtypes/n64/data.py b/tools/splat/segtypes/n64/data.py index 542e4c2b96..2c2a874e19 100644 --- a/tools/splat/segtypes/n64/data.py +++ b/tools/splat/segtypes/n64/data.py @@ -1,6 +1,6 @@ from segtypes.n64.codesubsegment import N64SegCodeSubsegment from pathlib import Path -from typing import Optional +from typing import Dict, Optional from util.symbols import Symbol from util import floats, options @@ -33,14 +33,21 @@ class N64SegData(N64SegCodeSubsegment): def get_linker_section(self) -> str: return ".data" - def get_symbols(self): - ret = [] + def get_symbols(self, rom_bytes): + symset = set() + + # Find inter-data symbols + for i in range(self.rom_start, self.rom_end, 4): + bits = int.from_bytes(rom_bytes[i : i + 4], "big") + if self.contains_vram(bits): + symset.add(self.parent.get_symbol(bits, create=True, define=True, local_only=True)) for symbol_addr in self.seg_symbols: for symbol in self.seg_symbols[symbol_addr]: if not symbol.dead and self.contains_vram(symbol.vram_start): - ret.append(symbol) + symset.add(symbol) + ret = list(symset) ret.sort(key=lambda s:s.vram_start) # Ensure we start at the beginning @@ -52,21 +59,75 @@ class N64SegData(N64SegCodeSubsegment): return ret + def are_null(chars): + for b in chars: + if b != '\x00': + return False + return True + @staticmethod def is_valid_ascii(bytes): - if len(bytes) < 8: + null_char = '\x00' + valid_chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890[]():%!#=-_ " + invalid_chars = "" + duplicate_limit = 10 + + last_char = 0 + true_end = None + consecutive_duplicates = 0 + valid_count = 0 + + if len(bytes) <= 4 or bytes[0] == 0: return False - num_empty_bytes = 0 - for b in bytes: - if b == 0: - num_empty_bytes += 1 - - empty_ratio = num_empty_bytes / len(bytes) - if empty_ratio > 0.2: + try: + chars = bytes.decode("EUC-JP") + except: return False - return True + if len(chars) <= 4: + return False + + for i, c in enumerate(chars): + # Ensure null bytes are only at the end of ascii strings + # TODO: if we find null bytes in the middle, break this into multiple strings ? + if c == null_char: + if true_end is None: + if N64SegData.are_null(chars[i:]): + true_end = i + else: + pass + #return False + + # Ensure we're not seeing a ton of the same character in a row + if last_char == c: + consecutive_duplicates += 1 + if consecutive_duplicates >= duplicate_limit and last_char != null_char: + return False + else: + consecutive_duplicates = 0 + + if c in valid_chars: + valid_count += 1 + elif c in invalid_chars: + return False + + last_char = c + + # Ensure the number of valid characters is sufficient + if true_end is not None: + # If there are more than 16 null chars at the end, something is afoot + if len(chars) - true_end > 16: + return False + end = true_end + else: + end = len(chars) + + valid_ratio = valid_count / end + if valid_ratio >= 0.75: + return True + + return False def disassemble_symbol(self, sym_bytes, sym_type): if sym_type == "jtbl": @@ -86,9 +147,10 @@ class N64SegData(N64SegCodeSubsegment): if sym_type == "ascii": try: ascii_str = sym_bytes.decode("EUC-JP") - ascii_str = ascii_str.replace("\\", "\\\\") + # ascii_str = ascii_str.rstrip("\x00") ascii_str = ascii_str.replace("\x00", "\\0") ascii_str = ascii_str.replace("\n", "\\n") + sym_str += f'"{ascii_str}"' return sym_str except: @@ -148,7 +210,7 @@ class N64SegData(N64SegCodeSubsegment): if self.size == 0: return None - syms = self.get_symbols() + syms = self.get_symbols(rom_bytes) for i in range(len(syms) - 1): mnemonic = syms[i].access_mnemonic diff --git a/tools/splat/segtypes/n64/rodata.py b/tools/splat/segtypes/n64/rodata.py index 5cd4dbaee2..64c5832724 100644 --- a/tools/splat/segtypes/n64/rodata.py +++ b/tools/splat/segtypes/n64/rodata.py @@ -3,16 +3,3 @@ from segtypes.n64.data import N64SegData class N64SegRodata(N64SegData): def get_linker_section(self) -> str: return ".rodata" - - def scan(self, rom_bytes: bytes): - self.file_text = self.disassemble_data(rom_bytes) - - def split(self, rom_bytes: bytes): - if self.file_text: - path = self.out_path() - - if path: - path.parent.mkdir(parents=True, exist_ok=True) - - with open(path, "w", newline="\n") as f: - f.write(self.file_text)