git subrepo pull tools/splat

subrepo:
  subdir:   "tools/splat"
  merged:   "b426daf02"
upstream:
  origin:   "https://github.com/ethteck/splat.git"
  branch:   "master"
  commit:   "b426daf02"
git-subrepo:
  version:  "0.4.3"
  origin:   "https://github.com/ingydotnet/git-subrepo"
  commit:   "2f68596"
This commit is contained in:
Ethan Roseman 2021-02-04 00:09:01 +09:00
parent 43f3a6fa5f
commit f8a344e713
8 changed files with 393 additions and 242 deletions

View File

@ -6,7 +6,7 @@
[subrepo] [subrepo]
remote = https://github.com/ethteck/splat.git remote = https://github.com/ethteck/splat.git
branch = master branch = master
commit = e2b731ab198c1d8400412ffd09252deca65253d2 commit = b426daf02b5b5549a390d8212f601920de35ce7e
parent = 4201a08a28b6d6f070b35f97e3ba726b5448893b parent = 43f3a6fa5f49424940912f3f37dc018d8231ddba
method = merge method = merge
cmdver = 0.4.3 cmdver = 0.4.3

View File

@ -10,3 +10,17 @@
* New `undefined_funcs_auto_path` option * New `undefined_funcs_auto_path` option
* New `cache_path` option * New `cache_path` option
* (All path-like options' names now end with `_path`) * (All path-like options' names now end with `_path`)
## 0.6: The Symbol Update
Internally, there's a new Symbol class which stores information about a symbol and is stored in a couple places during disassembly. Many things should be improved, such as reconciling symbols within overlays, things being named functions vs data symbols, and more.
**Breaking change**: The format to symbol_addrs.txt has been updated. After specifying the name and address of a symbol (`symbol = addr;`), optional properties of symbols can be set via inline comment, space delimited, in any order. The properties are of the format `name:value`
* `type:` supports `func` mostly right now but will support `label` and `data` later on. Internally, `jtbl` is used as well, for jump tables. Splat uses type information during disassembly to disambiguate symbols with the same addresses.
* `rom:` is for the hex rom address of the symbol, beginning with `0x`. If available, this information is extremely valuable for use in disambiguating symbols.
* `size:` specifies the size of the symbol, which splat will use to generate offsets during disassembly. Uses the same format as `rom:`
**function example**: `FuncNameHere = 0x80023423; // type:func rom:0x10023`
**data example**: `gSomeDataVar = 0x80024233; // type:data size:0x100`
As always, feel free to reach out to me with any questions, suggestions, or feedback.

View File

@ -1,4 +1,3 @@
from re import split
from capstone import * from capstone import *
from capstone.mips import * from capstone.mips import *
@ -6,10 +5,10 @@ from collections import OrderedDict
from segtypes.n64.segment import N64Segment from segtypes.n64.segment import N64Segment
import os import os
from pathlib import Path, PurePath from pathlib import Path, PurePath
from ranges import Range, RangeDict
import re import re
import sys import sys
from util import floats from util import floats
from util.symbol import Symbol
STRIP_C_COMMENTS_RE = re.compile( STRIP_C_COMMENTS_RE = re.compile(
@ -22,6 +21,12 @@ C_FUNC_RE = re.compile(
re.MULTILINE re.MULTILINE
) )
double_mnemonics = ["ldc1", "sdc1"]
word_mnemonics = ["addiu", "sw", "lw", "jtbl"]
float_mnemonics = ["lwc1", "swc1"]
short_mnemonics = ["addiu", "lh", "sh", "lhu"]
byte_mnemonics = ["lb", "sb", "lbu"]
def strip_c_comments(text): def strip_c_comments(text):
def replacer(match): def replacer(match):
s = match.group(0) s = match.group(0)
@ -39,64 +44,58 @@ def get_funcs_defined_in_c(c_file):
return set(m.group(2) for m in C_FUNC_RE.finditer(text)) return set(m.group(2) for m in C_FUNC_RE.finditer(text))
def parse_segment_files(segment, segment_class, seg_start, seg_end, seg_name, seg_vram):
prefix = seg_name if seg_name.endswith("/") else f"{seg_name}_"
ret = []
prev_start = -1
if "files" in segment:
for i, split_file in enumerate(segment["files"]):
if type(split_file) is dict:
start = split_file["start"]
end = split_file["end"]
name = None if "name" not in split_file else split_file["name"]
subtype = split_file["type"]
else:
start = split_file[0]
end = seg_end if i == len(segment["files"]) - 1 else segment["files"][i + 1][0]
name = None if len(split_file) < 3 else split_file[2]
subtype = split_file[1]
if start < prev_start:
print(f"Error: Code segment {seg_name} has files out of ascending rom order (0x{prev_start:X} followed by 0x{start:X})")
sys.exit(1)
if not name:
name = N64SegCode.get_default_name(start) if seg_name == N64SegCode.get_default_name(seg_start) else f"{prefix}{start:X}"
vram = seg_vram + (start - seg_start)
fl = {"start": start, "end": end, "name": name, "vram": vram, "subtype": subtype}
ret.append(fl)
prev_start = start
else:
fl = {"start": seg_start, "end": seg_end,
"name": seg_name, "vram": seg_vram, "subtype": "asm"}
ret.append(fl)
return ret
class N64SegCode(N64Segment): class N64SegCode(N64Segment):
def parse_segment_files(self, segment, seg_start, seg_end, seg_name, seg_vram):
prefix = seg_name if seg_name.endswith("/") else f"{seg_name}_"
ret = []
prev_start = -1
if "files" in segment:
for i, split_file in enumerate(segment["files"]):
if type(split_file) is dict:
start = split_file["start"]
end = split_file["end"]
name = None if "name" not in split_file else split_file["name"]
subtype = split_file["type"]
else:
start = split_file[0]
end = seg_end if i == len(segment["files"]) - 1 else segment["files"][i + 1][0]
name = None if len(split_file) < 3 else split_file[2]
subtype = split_file[1]
if start < prev_start:
print(f"Error: Code segment {seg_name} has files out of ascending rom order (0x{prev_start:X} followed by 0x{start:X})")
sys.exit(1)
if not name:
name = self.get_default_name(start) if seg_name == self.get_default_name(seg_start) else f"{prefix}{start:X}"
vram = seg_vram + (start - seg_start)
fl = {"start": start, "end": end, "name": name, "vram": vram, "subtype": subtype}
ret.append(fl)
prev_start = start
else:
fl = {"start": seg_start, "end": seg_end,
"name": seg_name, "vram": seg_vram, "subtype": "asm"}
ret.append(fl)
return ret
def __init__(self, segment, next_segment, options): def __init__(self, segment, next_segment, options):
super().__init__(segment, next_segment, options) super().__init__(segment, next_segment, options)
self.files = parse_segment_files(segment, self.__class__, self.rom_start, self.rom_end, self.name, self.vram_addr) self.files = self.parse_segment_files(segment, self.rom_start, self.rom_end, self.name, self.vram_start)
self.is_overlay = segment.get("overlay", False) self.is_overlay = segment.get("overlay", False)
self.labels_to_add = set() self.all_symbols = ()
self.jtbl_glabels = set() self.seg_symbols = {} # Symbols known to be in this segment
self.glabels_to_add = set() self.ext_symbols = {} # Symbols not in this segment but also not from other overlapping ram address ranges
self.special_labels = {} self.symbol_ranges = []
self.undefined_syms_to_add = set()
self.glabels_added = {}
self.all_functions = {}
self.provided_symbols = {}
self.c_labels_to_add = set()
self.ld_section_name = "." + segment.get("ld_name", f"text_{self.rom_start:X}")
self.symbol_ranges = RangeDict()
self.detected_syms = {}
self.reported_file_split = False self.reported_file_split = False
self.labels_to_add = set()
self.jtbl_glabels_to_add = set()
self.jtbl_jumps = {} self.jtbl_jumps = {}
self.jumptables = {} self.jumptables = {}
@ -104,23 +103,79 @@ class N64SegCode(N64Segment):
def get_default_name(addr): def get_default_name(addr):
return f"code_{addr:X}" return f"code_{addr:X}"
def get_func_name(self, addr): def retrieve_symbol(self, d, k, t):
return self.provided_symbols.get(addr, f"func_{addr:X}") if k not in d:
return None
def get_unique_func_name(self, func_addr, rom_addr): if t:
func_name = self.get_func_name(func_addr) items = [s for s in d[k] if s.type == t or s.type == "unknown"]
else:
items = d[k]
if self.is_overlay and (func_addr >= self.vram_addr) and (func_addr <= self.vram_addr + self.rom_end - self.rom_start): if len(items) > 1:
return func_name + "_{:X}".format(rom_addr) pass #print(f"Trying to retrieve {k:X} from symbol dict but there are {len(items)} entries to pick from - picking the first")
return func_name if len(items) == 0:
return None
return items[0]
def add_glabel(self, ram_addr, rom_addr): def retrieve_symbol_from_ranges(self, vram, rom=None):
func = self.get_unique_func_name(ram_addr, rom_addr) rom_matches = []
self.glabels_to_add.discard(func) ram_matches = []
self.glabels_added[ram_addr] = func
if not self.is_overlay: for symbol in self.symbol_ranges:
self.all_functions[ram_addr] = func if symbol.contains_vram(vram):
return "glabel " + func if symbol.rom and rom and symbol.contains_rom(rom):
rom_matches.append(symbol)
else:
ram_matches.append(symbol)
ret = rom_matches + ram_matches
if len(ret) > 0:
return ret[0]
else:
return None
def get_symbol(self, addr, type=None, create=False, define=False, reference=False, offsets=False, local_only=False):
ret = None
rom = None
in_segment = self.contains_vram(addr)
if in_segment:
# If the vram address is within this segment, we can calculate the symbol's rom address
rom = self.ram_to_rom(addr)
ret = self.retrieve_symbol(self.seg_symbols, addr, type)
elif not local_only:
ret = self.retrieve_symbol(self.ext_symbols, addr, type)
# Search for symbol ranges
if not ret and offsets:
ret = self.retrieve_symbol_from_ranges(addr, rom)
# Create the symbol if it doesn't exist
if not ret and create:
ret = Symbol(addr, rom=rom, type=type)
self.all_symbols.append(ret)
if in_segment:
if self.is_overlay:
ret.set_in_overlay()
if addr not in self.seg_symbols:
self.seg_symbols[addr] = []
self.seg_symbols[addr].append(ret)
elif not local_only:
if addr not in self.ext_symbols:
self.ext_symbols[addr] = []
self.ext_symbols[addr].append(ret)
if ret:
if define:
ret.defined = True
if reference:
ret.referenced = True
return ret
def get_asm_header(self): def get_asm_header(self):
ret = [] ret = []
@ -192,32 +247,23 @@ class N64SegCode(N64Segment):
print("INVALID INSTRUCTION " + insn) print("INVALID INSTRUCTION " + insn)
elif mnemonic == "jal": elif mnemonic == "jal":
jal_addr = int(op_str, 0) jal_addr = int(op_str, 0)
jump_func = self.get_func_name(jal_addr) jump_func = self.get_symbol(jal_addr, type="func", create=True, reference=True)
if ( op_str = jump_func.name
jump_func.startswith("func_")
and self.is_overlay
and jal_addr >= self.vram_addr
and jal_addr <= (self.vram_addr + self.rom_end - self.rom_start)
):
func_loc = self.rom_start + jal_addr - self.vram_addr
jump_func += "_{:X}".format(func_loc)
if jump_func not in self.provided_symbols.values():
self.glabels_to_add.add(jump_func)
op_str = jump_func
elif self.is_branch_insn(insn.mnemonic): elif self.is_branch_insn(insn.mnemonic):
op_str_split = op_str.split(" ") op_str_split = op_str.split(" ")
branch_target = op_str_split[-1] branch_target = op_str_split[-1]
branch_target_int = int(branch_target, 0) branch_target_int = int(branch_target, 0)
label = "" label = ""
if branch_target_int in self.special_labels: label = self.get_symbol(branch_target_int, type="label", reference=True, local_only=True)
label = self.special_labels[branch_target_int]
if label:
label_name = label.name
else: else:
self.labels_to_add.add(branch_target_int) self.labels_to_add.add(branch_target_int)
label = ".L" + branch_target[2:].upper() label_name = f".L{branch_target[2:].upper()}"
op_str = " ".join(op_str_split[:-1] + [label]) op_str = " ".join(op_str_split[:-1] + [label_name])
elif mnemonic == "mtc0" or mnemonic == "mfc0": elif mnemonic == "mtc0" or mnemonic == "mfc0":
rd = (insn.bytes[2] & 0xF8) >> 3 rd = (insn.bytes[2] & 0xF8) >> 3
op_str = op_str.split(" ")[0] + " $" + str(rd) op_str = op_str.split(" ")[0] + " $" + str(rd)
@ -239,7 +285,7 @@ class N64SegCode(N64Segment):
end_func = True end_func = True
continue continue
if i < len(insns) - 1 and self.get_func_name(insns[i + 1].address) in self.c_labels_to_add: if i < len(insns) - 1 and self.get_symbol(insns[i + 1].address, local_only=True, type="func"):
end_func = True end_func = True
if end_func: if end_func:
@ -262,33 +308,24 @@ class N64SegCode(N64Segment):
return fl return fl
return None return None
def store_symbol_access(self, addr, mnemonic): def update_access_mnemonic(self, sym, mnemonic):
# Don't overwrite useful info with addiu if not sym.access_mnemonic:
if addr in self.detected_syms and self.detected_syms[addr] != "addiu": sym.access_mnemonic = mnemonic
elif sym.access_mnemonic == "addiu":
sym.access_mnemonic = mnemonic
elif sym.access_mnemonic in double_mnemonics:
return return
elif sym.access_mnemonic in float_mnemonics and mnemonic in double_mnemonics:
self.detected_syms[addr] = mnemonic sym.access_mnemonic = mnemonic
elif sym.access_mnemonic in short_mnemonics:
def get_symbol_name(self, addr, rom_addr, funcs=None): return
if funcs and addr in funcs: elif sym.access_mnemonic in byte_mnemonics:
return self.get_unique_func_name(addr, rom_addr) return
if addr in self.all_functions: else:
return self.all_functions[addr] # todo clean up funcs vs all_functions sym.access_mnemonic = mnemonic
if addr in self.provided_symbols:
return self.provided_symbols[addr]
if addr in self.jumptables:
return f"jtbl_{addr:X}_{rom_addr:X}"
if addr in self.symbol_ranges:
ret = self.symbol_ranges.get(addr)
offset = addr - self.symbol_ranges.getrange(addr).start
if offset != 0:
ret += f"+0x{offset:X}"
return ret
return f"D_{addr:X}"
# Determine symbols # Determine symbols
def determine_symbols(self, funcs, rom_addr): def determine_symbols(self, funcs):
ret = {} ret = {}
for func_addr in funcs: for func_addr in funcs:
@ -345,29 +382,31 @@ class N64SegCode(N64Segment):
s_str = s_op_split[-1] s_str = s_op_split[-1]
symbol_addr = (lui_val * 0x10000) + int(s_str, 0) symbol_addr = (lui_val * 0x10000) + int(s_str, 0)
symbol_name = self.get_symbol_name(symbol_addr, symbol_addr - next(iter(funcs)) + rom_addr, funcs)
symbol_tag = s_insn.mnemonic
vram_end = self.vram_addr + self.rom_end - self.rom_start sym = None
if symbol_addr > func_addr and symbol_addr < vram_end and len(possible_jtbl_jumps) > 0 and func_end_addr - s_insn.address >= 0x30: offset_str = ""
if symbol_addr > func_addr and symbol_addr < self.vram_end and len(possible_jtbl_jumps) > 0 and func_end_addr - s_insn.address >= 0x30:
for jump in possible_jtbl_jumps: for jump in possible_jtbl_jumps:
if jump[1] == s_op_split[0]: if jump[1] == s_op_split[0]:
dist_to_jump = possible_jtbl_jumps[0][0] - s_insn.address dist_to_jump = possible_jtbl_jumps[0][0] - s_insn.address
if dist_to_jump <= 16: if dist_to_jump <= 16:
symbol_name = f"jtbl_{symbol_addr:X}_{self.ram_to_rom(symbol_addr):X}" sym = self.get_symbol(symbol_addr, create=True, reference=True, type="jtbl", local_only=True)
symbol_tag = "jtbl"
self.jumptables[symbol_addr] = (func_addr, func_end_addr) self.jumptables[symbol_addr] = (func_addr, func_end_addr)
break break
self.store_symbol_access(symbol_addr, symbol_tag) if not sym:
symbol_file = self.get_file_for_addr(symbol_addr) sym = self.get_symbol(symbol_addr, create=True, offsets=True, reference=True)
offset = symbol_addr - sym.vram_start
if offset != 0:
offset_str = f"+0x{offset:X}"
if not symbol_file or symbol_file["subtype"] == "bin": self.update_access_mnemonic(sym, s_insn.mnemonic)
if "+" not in symbol_name:
self.undefined_syms_to_add.add((symbol_name, symbol_addr))
func[i] += ("%hi({})".format(symbol_name),) sym_label = sym.name + offset_str
func[j] += ("%lo({}){}".format(symbol_name, reg_ext),)
func[i] += ("%hi({})".format(sym_label),)
func[j] += ("%lo({}){}".format(sym_label, reg_ext),)
break break
ret[func_addr] = func ret[func_addr] = func
return ret return ret
@ -380,7 +419,8 @@ class N64SegCode(N64Segment):
# Add function glabel # Add function glabel
rom_addr = funcs[func][0][3] rom_addr = funcs[func][0][3]
func_text.append(self.add_glabel(func, rom_addr)) sym = self.get_symbol(func, type="func", create=True, define=True, local_only=True)
func_text.append(f"glabel {sym.name}")
indent_next = False indent_next = False
@ -393,7 +433,7 @@ class N64SegCode(N64Segment):
if insn_addr in self.labels_to_add: if insn_addr in self.labels_to_add:
self.labels_to_add.remove(insn_addr) self.labels_to_add.remove(insn_addr)
func_text.append(".L{:X}:".format(insn_addr)) func_text.append(".L{:X}:".format(insn_addr))
if insn_addr in self.jtbl_glabels: if insn_addr in self.jtbl_glabels_to_add:
func_text.append(f"glabel L{insn_addr:X}_{insn[3]:X}") func_text.append(f"glabel L{insn_addr:X}_{insn[3]:X}")
if rom_addr_padding: if rom_addr_padding:
@ -451,7 +491,8 @@ class N64SegCode(N64Segment):
return super().should_run() or (st in self.options["modes"] and st in subtypes for st in possible_subtypes) return super().should_run() or (st in self.options["modes"] and st in subtypes for st in possible_subtypes)
def is_valid_ascii(self, bytes): @staticmethod
def is_valid_ascii(bytes):
if len(bytes) < 8: if len(bytes) < 8:
return False return False
@ -467,10 +508,17 @@ class N64SegCode(N64Segment):
return True return True
def get_symbols_for_file(self, split_file): def get_symbols_for_file(self, split_file):
ret = []
vram_start = split_file["vram"] vram_start = split_file["vram"]
vram_end = split_file["vram"] + split_file["end"] - split_file["start"] vram_end = split_file["vram"] + split_file["end"] - split_file["start"]
return [(s, self.detected_syms[s]) for s in self.detected_syms if s >= vram_start and s <= vram_end] for symbol_addr in self.seg_symbols:
for symbol in self.seg_symbols[symbol_addr]:
if symbol.vram_start >= vram_start and symbol.vram_end < vram_end:
ret.append(symbol)
return ret
def disassemble_symbol(self, sym_bytes, sym_type): def disassemble_symbol(self, sym_bytes, sym_type):
if sym_type == "jtbl": if sym_type == "jtbl":
@ -502,8 +550,14 @@ class N64SegCode(N64Segment):
byte_str = f"L{bits:X}_{rom_addr:X}" byte_str = f"L{bits:X}_{rom_addr:X}"
else: else:
byte_str = f"0x{bits:X}" byte_str = f"0x{bits:X}"
elif slen == 4 and bits >= 0x80000000:
sym = self.get_symbol(bits, reference=True)
if sym:
byte_str = sym.name
else:
byte_str = '0x{0:0{1}X}'.format(bits, 2 * slen)
else: else:
byte_str = self.provided_symbols.get(bits, '0x{0:0{1}X}'.format(bits, 2 * slen)) byte_str = '0x{0:0{1}X}'.format(bits, 2 * slen)
if sym_type in ["float", "double"]: if sym_type in ["float", "double"]:
if sym_type == "float": if sym_type == "float":
@ -512,7 +566,7 @@ class N64SegCode(N64Segment):
float_str = floats.format_f64_imm(bits) float_str = floats.format_f64_imm(bits)
# Fall back to .word if we see weird float values # Fall back to .word if we see weird float values
# todo cut the symbol in half maybe where we see the first nan or something # TODO: cut the symbol in half maybe where we see the first nan or something
if "e-" in float_str or "nan" in float_str: if "e-" in float_str or "nan" in float_str:
return self.disassemble_symbol(sym_bytes, "word") return self.disassemble_symbol(sym_bytes, "word")
else: else:
@ -532,31 +586,41 @@ class N64SegCode(N64Segment):
ret = ".include \"macro.inc\"\n\n" ret = ".include \"macro.inc\"\n\n"
ret += f'.section .{split_file["subtype"]}' ret += f'.section .{split_file["subtype"]}'
# Todo remove when we have class for file
file_size = split_file['end'] - split_file['start']
if file_size == 0:
return None
syms = self.get_symbols_for_file(split_file) syms = self.get_symbols_for_file(split_file)
syms.sort(key=lambda x:x[0]) syms.sort(key=lambda s:s.vram_start)
if len(syms) == 0: if len(syms) == 0:
self.warn("No symbol accesses detected for " + split_file["name"] + "; the output will most likely be an ugly blob") self.warn("No symbol accesses detected for " + split_file["name"] + "; the output will most likely be an ugly blob")
# check beginning # Ensure we start at the beginning
if syms[0][0] != split_file["vram"]: if len(syms) == 0 or syms[0].vram_start != split_file["vram"]:
syms.insert(0, (split_file["vram"], None)) syms.insert(0, self.get_symbol(split_file["vram"], create=True, define=True, local_only=True))
# add end vram_end = split_file["vram"] + file_size
vram_end = split_file["vram"] + split_file["end"] - split_file["start"] if syms[-1].vram_start != vram_end:
if syms[-1][0] != vram_end: # Make a dummy symbol here that marks the end of the previous symbol's disasm range
syms.append((vram_end, None)) syms.append(Symbol(vram_end))
for i in range(len(syms) - 1): for i in range(len(syms) - 1):
mnemonic = syms[i][1] mnemonic = syms[i].access_mnemonic
start = syms[i][0] start = syms[i].vram_start
end = syms[i + 1][0] end = syms[i + 1].vram_start
sym_rom_start = start - split_file["vram"] + split_file["start"] sym_rom_start = start - split_file["vram"] + split_file["start"]
sym_rom_end = end - split_file["vram"] + split_file["start"] sym_rom_end = end - split_file["vram"] + split_file["start"]
sym_name = self.get_symbol_name(start, sym_rom_start) sym = self.get_symbol(start, create=True, define=True, local_only=True)
sym_str = f"\n\nglabel {sym_name}\n" sym_str = f"\n\nglabel {sym.name}\n"
sym_bytes = rom_bytes[sym_rom_start : sym_rom_end] sym_bytes = rom_bytes[sym_rom_start : sym_rom_end]
# TODO: Hack for null mnemonic - move elsewhere later, probably
if not mnemonic:
mnemonic = "addiu"
# .ascii # .ascii
if self.is_valid_ascii(sym_bytes) and mnemonic == "addiu": if self.is_valid_ascii(sym_bytes) and mnemonic == "addiu":
# mnemonic thing may be too picky, we'll see # mnemonic thing may be too picky, we'll see
@ -571,19 +635,19 @@ class N64SegCode(N64Segment):
pass pass
# Fallback to raw data # Fallback to raw data
if mnemonic == "jtbl": if syms[i].type == "jtbl":
stype = "jtbl" stype = "jtbl"
elif len(sym_bytes) % 8 == 0 and mnemonic in ["ldc1", "sdc1"]: elif len(sym_bytes) % 8 == 0 and mnemonic in double_mnemonics:
stype = "double" stype = "double"
elif len(sym_bytes) % 4 == 0 and mnemonic in ["addiu", "sw", "lw", "jtbl"]: elif len(sym_bytes) % 4 == 0 and mnemonic in word_mnemonics:
stype = "word" stype = "word"
elif len(sym_bytes) % 4 == 0 and mnemonic in ["lwc1", "swc1"]: elif len(sym_bytes) % 4 == 0 and mnemonic in float_mnemonics:
stype = "float" stype = "float"
elif len(sym_bytes) % 2 == 0 and mnemonic in ["addiu", "lh", "sh", "lhu"]: elif len(sym_bytes) % 2 == 0 and mnemonic in short_mnemonics:
stype = "short" stype = "short"
else: else:
stype = "byte" stype = "byte"
if not rodata_encountered and mnemonic == "jtbl": if not rodata_encountered and mnemonic == "jtbl":
rodata_encountered = True rodata_encountered = True
ret += "\n\n\n.section .rodata" ret += "\n\n\n.section .rodata"
@ -604,10 +668,12 @@ class N64SegCode(N64Segment):
return ret return ret
def gather_jumptable_labels(self, section_vram, section_rom, rom_bytes): def gather_jumptable_labels(self, rom_bytes):
# TODO: use the seg_symbols for this
# jumptables = [j.type == "jtbl" for j in self.seg_symbols]
for jumptable in self.jumptables: for jumptable in self.jumptables:
start, end = self.jumptables[jumptable] start, end = self.jumptables[jumptable]
rom_offset = section_rom + jumptable - section_vram rom_offset = self.rom_start + jumptable - self.vram_start
if rom_offset <= 0: if rom_offset <= 0:
return return
@ -616,12 +682,23 @@ class N64SegCode(N64Segment):
word = rom_bytes[rom_offset : rom_offset + 4] word = rom_bytes[rom_offset : rom_offset + 4]
word_int = int.from_bytes(word, "big") word_int = int.from_bytes(word, "big")
if word_int >= start and word_int <= end: if word_int >= start and word_int <= end:
self.jtbl_glabels.add(word_int) self.jtbl_glabels_to_add.add(word_int)
else: else:
break break
rom_offset += 4 rom_offset += 4
def mark_c_funcs_as_defined(self, c_funcs):
for func_name in c_funcs:
found = False
for func_addr in self.seg_symbols:
for symbol in self.seg_symbols[func_addr]:
if symbol.name == func_name:
symbol.defined = True
found = True
break
if found:
break
def split(self, rom_bytes, base_path): def split(self, rom_bytes, base_path):
md = Cs(CS_ARCH_MIPS, CS_MODE_MIPS64 + CS_MODE_BIG_ENDIAN) md = Cs(CS_ARCH_MIPS, CS_MODE_MIPS64 + CS_MODE_BIG_ENDIAN)
@ -645,8 +722,13 @@ class N64SegCode(N64Segment):
insns = [insn for insn in md.disasm(rom_bytes[split_file["start"]: split_file["end"]], split_file["vram"])] insns = [insn for insn in md.disasm(rom_bytes[split_file["start"]: split_file["end"]], split_file["vram"])]
funcs = self.process_insns(insns, rom_addr) funcs = self.process_insns(insns, rom_addr)
funcs = self.determine_symbols(funcs, rom_addr)
self.gather_jumptable_labels(self.vram_addr, self.rom_start, rom_bytes) # TODO: someday make func a subclass of symbol and store this disasm info there too
for func in funcs:
self.get_symbol(func, type="func", create=True, define=True, local_only=True)
funcs = self.determine_symbols(funcs)
self.gather_jumptable_labels(rom_bytes)
funcs_text = self.add_labels(funcs) funcs_text = self.add_labels(funcs)
if file_type == "c": if file_type == "c":
@ -655,6 +737,7 @@ class N64SegCode(N64Segment):
if os.path.exists(c_path): if os.path.exists(c_path):
defined_funcs = get_funcs_defined_in_c(c_path) defined_funcs = get_funcs_defined_in_c(c_path)
self.mark_c_funcs_as_defined(defined_funcs)
else: else:
defined_funcs = set() defined_funcs = set()
@ -662,8 +745,7 @@ class N64SegCode(N64Segment):
base_path, os.path.join("asm", "nonmatchings")) base_path, os.path.join("asm", "nonmatchings"))
for func in funcs_text: for func in funcs_text:
func_name = self.get_unique_func_name( func_name = self.get_symbol(func, type="func", local_only=True).name
func, funcs_text[func][1])
if func_name not in defined_funcs: if func_name not in defined_funcs:
if self.options.get("compiler", "IDO") == "GCC": if self.options.get("compiler", "IDO") == "GCC":
@ -686,7 +768,7 @@ class N64SegCode(N64Segment):
c_lines = self.get_c_preamble() c_lines = self.get_c_preamble()
for func in funcs_text: for func in funcs_text:
func_name = self.get_unique_func_name(func, funcs_text[func][1]) func_name = self.get_symbol(func, type="func", local_only=True).name
if self.options.get("compiler", "IDO") == "GCC": if self.options.get("compiler", "IDO") == "GCC":
c_lines.append("INCLUDE_ASM(s32, \"{}\", {});".format(split_file["name"], func_name)) c_lines.append("INCLUDE_ASM(s32, \"{}\", {});".format(split_file["name"], func_name))
else: else:

View File

@ -48,7 +48,7 @@ class Segment:
self.rom_end = parse_segment_start(next_segment) self.rom_end = parse_segment_start(next_segment)
self.type = parse_segment_type(segment) self.type = parse_segment_type(segment)
self.name = parse_segment_name(segment, self.__class__) self.name = parse_segment_name(segment, self.__class__)
self.vram_addr = parse_segment_vram(segment) self.vram_start = parse_segment_vram(segment)
self.ld_name_override = segment.get("ld_name", None) if type(segment) is dict else None self.ld_name_override = segment.get("ld_name", None) if type(segment) is dict else None
self.options = options self.options = options
self.config = segment self.config = segment
@ -73,19 +73,25 @@ class Segment:
@property @property
def vram_end(self): def vram_end(self):
return self.vram_addr + self.size return self.vram_start + self.size
def contains_vram(self, vram):
return vram >= self.vram_start and vram < self.vram_end
def contains_rom(self, rom):
return rom >= self.rom_start and rom < self.rom_end
def rom_to_ram(self, rom_addr): def rom_to_ram(self, rom_addr):
if rom_addr < self.rom_start or rom_addr > self.rom_end: if rom_addr < self.rom_start or rom_addr > self.rom_end:
return None return None
return self.vram_addr + rom_addr - self.rom_start return self.vram_start + rom_addr - self.rom_start
def ram_to_rom(self, ram_addr): def ram_to_rom(self, ram_addr):
if ram_addr < self.vram_addr or ram_addr > self.vram_end: if ram_addr < self.vram_start or ram_addr > self.vram_end:
return None return None
return self.rom_start + ram_addr - self.vram_addr return self.rom_start + ram_addr - self.vram_start
def create_split_dir(self, base_path, subdir): def create_split_dir(self, base_path, subdir):
out_dir = Path(base_path, subdir) out_dir = Path(base_path, subdir)
@ -112,7 +118,7 @@ class Segment:
def get_ld_section(self): def get_ld_section(self):
replace_ext = self.options.get("ld_o_replace_extension", True) replace_ext = self.options.get("ld_o_replace_extension", True)
sect_name = self.ld_name_override if self.ld_name_override else self.get_ld_section_name() sect_name = self.ld_name_override if self.ld_name_override else self.get_ld_section_name()
vram_or_rom = self.rom_start if self.vram_addr == 0 else self.vram_addr vram_or_rom = self.rom_start if self.vram_start == 0 else self.vram_start
subalign_str = "" if self.subalign == default_subalign else f"SUBALIGN({self.subalign})" subalign_str = "" if self.subalign == default_subalign else f"SUBALIGN({self.subalign})"
s = ( s = (
@ -125,7 +131,7 @@ class Segment:
if start % 0x10 != 0 and i != 0: if start % 0x10 != 0 and i != 0:
tmp_sect_name = path.replace(".", "_") tmp_sect_name = path.replace(".", "_")
tmp_sect_name = tmp_sect_name.replace("/", "_") tmp_sect_name = tmp_sect_name.replace("/", "_")
tmp_vram = start - self.rom_start + self.vram_addr tmp_vram = start - self.rom_start + self.vram_start
s += ( s += (
"}\n" "}\n"
f"SPLAT_BEGIN_SEG({tmp_sect_name}, 0x{start:X}, 0x{tmp_vram:X}, {subalign_str})\n" f"SPLAT_BEGIN_SEG({tmp_sect_name}, 0x{start:X}, 0x{tmp_vram:X}, {subalign_str})\n"

View File

@ -4,7 +4,6 @@ import argparse
import importlib import importlib
import importlib.util import importlib.util
import os import os
from ranges import Range, RangeDict
from pathlib import Path from pathlib import Path
import yaml import yaml
import pickle import pickle
@ -12,6 +11,7 @@ from colorama import Style, Fore
from segtypes.segment import parse_segment_type from segtypes.segment import parse_segment_type
from segtypes.n64.code import N64SegCode from segtypes.n64.code import N64SegCode
from util import log from util import log
from util.symbol import Symbol
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="Split a rom given a rom, a config, and output directory") description="Split a rom given a rom, a config, and output directory")
@ -24,6 +24,7 @@ parser.add_argument("--verbose", action="store_true",
parser.add_argument("--new", action="store_true", parser.add_argument("--new", action="store_true",
help="Only split changed segments in config") help="Only split changed segments in config")
sym_isolated_map = {}
def write_ldscript(rom_name, repo_path, sections, options): def write_ldscript(rom_name, repo_path, sections, options):
with open(os.path.join(repo_path, rom_name + ".ld"), "w", newline="\n") as f: with open(os.path.join(repo_path, rom_name + ".ld"), "w", newline="\n") as f:
@ -97,10 +98,7 @@ def get_cache_path(repo_path, options):
def gather_symbols(symbol_addrs_path, undefined_syms_path): def gather_symbols(symbol_addrs_path, undefined_syms_path):
symbols = {} symbols = []
special_labels = {}
labels_to_add = set()
ranges = RangeDict()
# Manual list of func name / addrs # Manual list of func name / addrs
if os.path.exists(symbol_addrs_path): if os.path.exists(symbol_addrs_path):
@ -120,17 +118,23 @@ def gather_symbols(symbol_addrs_path, undefined_syms_path):
line_split = line.split("=") line_split = line.split("=")
name = line_split[0].strip() name = line_split[0].strip()
addr = int(line_split[1].strip()[:-1], 0) addr = int(line_split[1].strip()[:-1], 0)
symbols[addr] = name
sym = Symbol(addr, given_name=name)
if line_ext: if line_ext:
for info in line_ext.split(" "): for info in line_ext.split(" "):
if info == "!": if info.startswith("type:"):
labels_to_add.add(name) type = info.split(":")[1]
special_labels[addr] = name sym.type = type
if info.startswith("size:"): if info.startswith("size:"):
size = int(info.split(":")[1], 0) size = int(info.split(":")[1], 0)
ranges.add(Range(addr, addr + size), name) sym.size = size
if info.startswith("rom:"):
rom_addr = int(info.split(":")[1], 0)
sym.rom = rom_addr
symbols.append(sym)
# Maybe let's not use this
if os.path.exists(undefined_syms_path): if os.path.exists(undefined_syms_path):
with open(undefined_syms_path) as f: with open(undefined_syms_path) as f:
us_lines = f.readlines() us_lines = f.readlines()
@ -141,27 +145,9 @@ def gather_symbols(symbol_addrs_path, undefined_syms_path):
line_split = line.split("=") line_split = line.split("=")
name = line_split[0].strip() name = line_split[0].strip()
addr = int(line_split[1].strip()[:-1], 0) addr = int(line_split[1].strip()[:-1], 0)
symbols[addr] = name symbols.append(Symbol(addr, given_name=name))
return symbols, labels_to_add, special_labels, ranges return symbols
def gather_c_variables(undefined_syms_path):
vars = {}
if os.path.exists(undefined_syms_path):
with open(undefined_syms_path) as f:
us_lines = f.readlines()
for line in us_lines:
line = line.strip()
if not line == "" and not line.startswith("//"):
line_split = line.split("=")
name = line_split[0].strip()
addr = int(line_split[1].strip()[:-1], 0)
vars[addr] = name
return vars
def get_base_segment_class(seg_type, platform): def get_base_segment_class(seg_type, platform):
@ -239,6 +225,46 @@ def initialize_segments(options, config_path, config_segments):
return ret return ret
def is_symbol_isolated(symbol, all_segments):
if symbol in sym_isolated_map:
return sym_isolated_map[symbol]
relevant_segs = 0
for segment in all_segments:
if segment.contains_vram(symbol.vram_start):
relevant_segs += 1
if relevant_segs > 1:
break
sym_isolated_map[symbol] = relevant_segs < 2
return sym_isolated_map[symbol]
def get_segment_symbols(segment, all_symbols, all_segments):
seg_syms = {}
other_syms = {}
for symbol in all_symbols:
if is_symbol_isolated(symbol, all_segments) and not symbol.rom:
if segment.contains_vram(symbol.vram_start):
if symbol.vram_start not in seg_syms:
seg_syms[symbol.vram_start] = []
seg_syms[symbol.vram_start].append(symbol)
else:
if symbol.vram_start not in other_syms:
other_syms[symbol.vram_start] = []
other_syms[symbol.vram_start].append(symbol)
else:
if symbol.rom and segment.contains_rom(symbol.rom):
if symbol.vram_start not in seg_syms:
seg_syms[symbol.vram_start] = []
seg_syms[symbol.vram_start].append(symbol)
else:
if symbol.vram_start not in other_syms:
other_syms[symbol.vram_start] = []
other_syms[symbol.vram_start].append(symbol)
return seg_syms, other_syms
def main(rom_path, config_path, repo_path, modes, verbose, ignore_cache=False): def main(rom_path, config_path, repo_path, modes, verbose, ignore_cache=False):
with open(rom_path, "rb") as f: with open(rom_path, "rb") as f:
@ -257,16 +283,14 @@ def main(rom_path, config_path, repo_path, modes, verbose, ignore_cache=False):
symbol_addrs_path = get_symbol_addrs_path(repo_path, options) symbol_addrs_path = get_symbol_addrs_path(repo_path, options)
undefined_syms_path = get_undefined_syms_path(repo_path, options) undefined_syms_path = get_undefined_syms_path(repo_path, options)
provided_symbols, c_func_labels_to_add, special_labels, ranges = gather_symbols(symbol_addrs_path, undefined_syms_path) all_symbols = gather_symbols(symbol_addrs_path, undefined_syms_path)
isolated_symbols = {}
symbol_ranges = [s for s in all_symbols if s.size > 4]
platform = get_platform(options) platform = get_platform(options)
processed_segments = [] processed_segments = []
ld_sections = [] ld_sections = []
defined_funcs = {}
undefined_funcs = set()
undefined_syms = set()
seg_sizes = {} seg_sizes = {}
seg_split = {} seg_split = {}
seg_cached = {} seg_cached = {}
@ -284,11 +308,11 @@ def main(rom_path, config_path, repo_path, modes, verbose, ignore_cache=False):
for segment in all_segments: for segment in all_segments:
if platform == "n64" and type(segment) == N64SegCode: # remove special-case sometime if platform == "n64" and type(segment) == N64SegCode: # remove special-case sometime
segment.all_functions = defined_funcs segment_symbols, other_symbols = get_segment_symbols(segment, all_symbols, all_segments)
segment.provided_symbols = provided_symbols segment.seg_symbols = segment_symbols
segment.special_labels = special_labels segment.ext_symbols = other_symbols
segment.c_labels_to_add = c_func_labels_to_add segment.all_symbols = all_symbols
segment.symbol_ranges = ranges segment.symbol_ranges = symbol_ranges
segment.check() segment.check()
@ -319,11 +343,6 @@ def main(rom_path, config_path, repo_path, modes, verbose, ignore_cache=False):
if len(segment.errors) == 0: if len(segment.errors) == 0:
processed_segments.append(segment) processed_segments.append(segment)
if platform == "n64" and type(segment) == N64SegCode: # edge case
undefined_funcs |= segment.glabels_to_add
defined_funcs = {**defined_funcs, **segment.glabels_added}
undefined_syms |= segment.undefined_syms_to_add
seg_split[tp] += 1 seg_split[tp] += 1
log.dot(status=segment.status()) log.dot(status=segment.status())
@ -339,45 +358,40 @@ def main(rom_path, config_path, repo_path, modes, verbose, ignore_cache=False):
log.write(f"saving {config['basename']}.ld") log.write(f"saving {config['basename']}.ld")
write_ldscript(config['basename'], repo_path, ld_sections, options) write_ldscript(config['basename'], repo_path, ld_sections, options)
undefined_syms_to_write = [s for s in all_symbols if s.referenced and not s.defined and not s.type == "func"]
undefined_funcs_to_write = [s for s in all_symbols if s.referenced and not s.defined and s.type == "func"]
# Write undefined_funcs_auto.txt # Write undefined_funcs_auto.txt
undefined_funcs_auto_path = get_undefined_funcs_auto_path(repo_path, options) undefined_funcs_auto_path = get_undefined_funcs_auto_path(repo_path, options)
if verbose: if verbose:
log.write(f"saving {undefined_funcs_auto_path}") log.write(f"saving {undefined_funcs_auto_path}")
c_predefined_funcs = set(provided_symbols.keys())
to_write = sorted(undefined_funcs - set(defined_funcs.values()) - c_predefined_funcs) to_write = undefined_funcs_to_write
if len(to_write) > 0: if len(to_write) > 0:
with open(undefined_funcs_auto_path, "w", newline="\n") as f: with open(undefined_funcs_auto_path, "w", newline="\n") as f:
for line in to_write: for symbol in to_write:
f.write(line + " = 0x" + line.split("_")[1][:8].upper() + ";\n") f.write(f"{symbol.name} = 0x{symbol.vram_start:X};\n")
# write undefined_syms_auto.txt # write undefined_syms_auto.txt
undefined_syms_auto_path = get_undefined_syms_auto_path(repo_path, options) undefined_syms_auto_path = get_undefined_syms_auto_path(repo_path, options)
if verbose: if verbose:
log.write(f"saving {undefined_syms_auto_path}") log.write(f"saving {undefined_syms_auto_path}")
to_write = sorted(undefined_syms, key=lambda x:x[0]) to_write = undefined_syms_to_write
if len(to_write) > 0: if len(to_write) > 0:
with open(undefined_syms_auto_path, "w", newline="\n") as f: with open(undefined_syms_auto_path, "w", newline="\n") as f:
for sym in to_write: for symbol in to_write:
f.write(f"{sym[0]} = 0x{sym[1]:X};\n") f.write(f"{symbol.name} = 0x{symbol.vram_start:X};\n")
# print warnings and errors during split/postsplit # print warnings during split/postsplit
had_error = False
for segment in all_segments: for segment in all_segments:
if len(segment.warnings) > 0 or len(segment.errors) > 0: if len(segment.warnings) > 0:
log.write(f"{Style.DIM}0x{segment.rom_start:06X}{Style.RESET_ALL} {segment.type} {Style.BRIGHT}{segment.name}{Style.RESET_ALL}:") log.write(f"{Style.DIM}0x{segment.rom_start:06X}{Style.RESET_ALL} {segment.type} {Style.BRIGHT}{segment.name}{Style.RESET_ALL}:")
for warn in segment.warnings: for warn in segment.warnings:
log.write("warning: " + warn, status="warn") log.write("warning: " + warn, status="warn")
for error in segment.errors:
log.write("error: " + error, status="error")
had_error = True
log.write("") # empty line log.write("") # empty line
if had_error:
return 1
# Statistics # Statistics
unk_size = seg_sizes.get("unk", 0) unk_size = seg_sizes.get("unk", 0)
rest_size = 0 rest_size = 0

View File

@ -9,6 +9,7 @@ parser.add_argument('rom', help='path to a .z64 rom')
parser.add_argument('--encoding', help='Text encoding the game header is using; see docs.python.org/3/library/codecs.html#standard-encodings for valid encodings', default='ASCII') parser.add_argument('--encoding', help='Text encoding the game header is using; see docs.python.org/3/library/codecs.html#standard-encodings for valid encodings', default='ASCII')
country_codes = { country_codes = {
0x00: "Unknown",
0x37: "Beta", 0x37: "Beta",
0x41: "Asian (NTSC)", 0x41: "Asian (NTSC)",
0x42: "Brazillian", 0x42: "Brazillian",
@ -102,7 +103,7 @@ class N64Rom:
def get_country_name(self): def get_country_name(self):
return country_codes[self.country_code] return country_codes[self.country_code]
# TODO: support .n64 extension
def main(): def main():
args = parser.parse_args() args = parser.parse_args()
rom = get_info(args.rom, args.encoding) rom = get_info(args.rom, args.encoding)

View File

@ -1,12 +0,0 @@
class N64Symbol:
@staticmethod
def get_default_name(vram):
return f"D_{vram:X}"
def __init__(self, vram, rom=None, name=None, segment=None, length=4):
self.vram = vram
self.rom = rom
self.name = name if name else self.get_default_name(vram)
self.segment = segment
self.length = length

View File

@ -0,0 +1,46 @@
class Symbol:
@property
def default_name(self):
suffix = f"_{self.vram_start:X}"
if self.in_overlay:
suffix += f"_{self.rom:X}"
if self.type == "func":
prefix = "func"
elif self.type =="jtbl":
prefix = "jtbl"
else:
prefix = "D"
return prefix + suffix
@property
def rom_end(self):
return None if not self.rom else self.rom + self.size
@property
def vram_end(self):
return self.vram_start + self.size
def set_in_overlay(self):
self.in_overlay = True
@property
def name(self):
return self.given_name if self.given_name else self.default_name
def contains_vram(self, offset):
return offset >= self.vram_start and offset < self.vram_end
def __init__(self, vram, given_name=None, rom=None, type="unknown", in_overlay=False, size=4):
self.defined = False
self.referenced = False
self.vram_start = vram
self.rom = rom
self.type = type
self.in_overlay = in_overlay
self.size = size
self.given_name = given_name
self.access_mnemonic = None