Splat update to 0.7.3 (#283)

* change splat branch to master

* git subrepo pull --force tools/splat

subrepo:
  subdir:   "tools/splat"
  merged:   "924414a51d"
upstream:
  origin:   "https://github.com/ethteck/splat.git"
  branch:   "master"
  commit:   "924414a51d"
git-subrepo:
  version:  "0.4.3"
  origin:   "https://github.com/ingydotnet/git-subrepo"
  commit:   "2f68596"
This commit is contained in:
Ethan Roseman 2021-04-27 21:36:33 +09:00 committed by GitHub
parent 1c0d26e6c6
commit ac3797ea56
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 101 additions and 31 deletions

15
tools/splat/.github/workflows/mypy.yml vendored Normal file
View File

@ -0,0 +1,15 @@
name: mypy
on:
push:
branches: [master]
pull_request:
branches: [master]
jobs:
mypy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: jpetrucciani/mypy-check@master

View File

@ -5,8 +5,8 @@
;
[subrepo]
remote = https://github.com/ethteck/splat.git
branch = imgflip
commit = 4e012eaad6bffc4da7aed13d9a7f86bbfddf9150
parent = 8acbae1ea523f217fc6f2780ee83b37f5f2ac05b
branch = master
commit = 924414a51d0bcc52076b6ee7147b1bb1d20e804a
parent = 7515d21506205b43cccd28875f0d2765addb36ad
method = merge
cmdver = 0.4.3

View File

@ -1,5 +1,11 @@
# splat Release Notes
### 0.7.2
* Data disassembly changes:
* String detection has been improved. Please send me false positives / negatives as you see them and I can try to improve it further!
* Symbols in a data segment pointed to by other symbols will now properly be split out as their own symbols
### 0.7.1
* Image segment changes:

View File

@ -1,6 +1,6 @@
from segtypes.n64.codesubsegment import N64SegCodeSubsegment
from pathlib import Path
from typing import Optional
from typing import Dict, Optional
from util.symbols import Symbol
from util import floats, options
@ -33,14 +33,21 @@ class N64SegData(N64SegCodeSubsegment):
def get_linker_section(self) -> str:
return ".data"
def get_symbols(self):
ret = []
def get_symbols(self, rom_bytes):
symset = set()
# Find inter-data symbols
for i in range(self.rom_start, self.rom_end, 4):
bits = int.from_bytes(rom_bytes[i : i + 4], "big")
if self.contains_vram(bits):
symset.add(self.parent.get_symbol(bits, create=True, define=True, local_only=True))
for symbol_addr in self.seg_symbols:
for symbol in self.seg_symbols[symbol_addr]:
if not symbol.dead and self.contains_vram(symbol.vram_start):
ret.append(symbol)
symset.add(symbol)
ret = list(symset)
ret.sort(key=lambda s:s.vram_start)
# Ensure we start at the beginning
@ -52,21 +59,75 @@ class N64SegData(N64SegCodeSubsegment):
return ret
def are_null(chars):
for b in chars:
if b != '\x00':
return False
return True
@staticmethod
def is_valid_ascii(bytes):
if len(bytes) < 8:
null_char = '\x00'
valid_chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890[]():%!#=-_ "
invalid_chars = ""
duplicate_limit = 10
last_char = 0
true_end = None
consecutive_duplicates = 0
valid_count = 0
if len(bytes) <= 4 or bytes[0] == 0:
return False
num_empty_bytes = 0
for b in bytes:
if b == 0:
num_empty_bytes += 1
empty_ratio = num_empty_bytes / len(bytes)
if empty_ratio > 0.2:
try:
chars = bytes.decode("EUC-JP")
except:
return False
return True
if len(chars) <= 4:
return False
for i, c in enumerate(chars):
# Ensure null bytes are only at the end of ascii strings
# TODO: if we find null bytes in the middle, break this into multiple strings ?
if c == null_char:
if true_end is None:
if N64SegData.are_null(chars[i:]):
true_end = i
else:
pass
#return False
# Ensure we're not seeing a ton of the same character in a row
if last_char == c:
consecutive_duplicates += 1
if consecutive_duplicates >= duplicate_limit and last_char != null_char:
return False
else:
consecutive_duplicates = 0
if c in valid_chars:
valid_count += 1
elif c in invalid_chars:
return False
last_char = c
# Ensure the number of valid characters is sufficient
if true_end is not None:
# If there are more than 16 null chars at the end, something is afoot
if len(chars) - true_end > 16:
return False
end = true_end
else:
end = len(chars)
valid_ratio = valid_count / end
if valid_ratio >= 0.75:
return True
return False
def disassemble_symbol(self, sym_bytes, sym_type):
if sym_type == "jtbl":
@ -86,9 +147,10 @@ class N64SegData(N64SegCodeSubsegment):
if sym_type == "ascii":
try:
ascii_str = sym_bytes.decode("EUC-JP")
ascii_str = ascii_str.replace("\\", "\\\\")
# ascii_str = ascii_str.rstrip("\x00")
ascii_str = ascii_str.replace("\x00", "\\0")
ascii_str = ascii_str.replace("\n", "\\n")
sym_str += f'"{ascii_str}"'
return sym_str
except:
@ -148,7 +210,7 @@ class N64SegData(N64SegCodeSubsegment):
if self.size == 0:
return None
syms = self.get_symbols()
syms = self.get_symbols(rom_bytes)
for i in range(len(syms) - 1):
mnemonic = syms[i].access_mnemonic

View File

@ -3,16 +3,3 @@ from segtypes.n64.data import N64SegData
class N64SegRodata(N64SegData):
def get_linker_section(self) -> str:
return ".rodata"
def scan(self, rom_bytes: bytes):
self.file_text = self.disassemble_data(rom_bytes)
def split(self, rom_bytes: bytes):
if self.file_text:
path = self.out_path()
if path:
path.parent.mkdir(parents=True, exist_ok=True)
with open(path, "w", newline="\n") as f:
f.write(self.file_text)