git subrepo pull tools/splat

subrepo:
  subdir:   "tools/splat"
  merged:   "b426daf02"
upstream:
  origin:   "https://github.com/ethteck/splat.git"
  branch:   "master"
  commit:   "b426daf02"
git-subrepo:
  version:  "0.4.3"
  origin:   "https://github.com/ingydotnet/git-subrepo"
  commit:   "2f68596"
This commit is contained in:
Ethan Roseman 2021-02-04 00:09:01 +09:00
parent 43f3a6fa5f
commit f8a344e713
8 changed files with 393 additions and 242 deletions

View File

@ -6,7 +6,7 @@
[subrepo]
remote = https://github.com/ethteck/splat.git
branch = master
commit = e2b731ab198c1d8400412ffd09252deca65253d2
parent = 4201a08a28b6d6f070b35f97e3ba726b5448893b
commit = b426daf02b5b5549a390d8212f601920de35ce7e
parent = 43f3a6fa5f49424940912f3f37dc018d8231ddba
method = merge
cmdver = 0.4.3

View File

@ -10,3 +10,17 @@
* New `undefined_funcs_auto_path` option
* New `cache_path` option
* (All path-like options' names now end with `_path`)
## 0.6: The Symbol Update
Internally, there's a new Symbol class which stores information about a symbol and is stored in a couple places during disassembly. Many things should be improved, such as reconciling symbols within overlays, things being named functions vs data symbols, and more.
**Breaking change**: The format to symbol_addrs.txt has been updated. After specifying the name and address of a symbol (`symbol = addr;`), optional properties of symbols can be set via inline comment, space delimited, in any order. The properties are of the format `name:value`
* `type:` supports `func` mostly right now but will support `label` and `data` later on. Internally, `jtbl` is used as well, for jump tables. Splat uses type information during disassembly to disambiguate symbols with the same addresses.
* `rom:` is for the hex rom address of the symbol, beginning with `0x`. If available, this information is extremely valuable for use in disambiguating symbols.
* `size:` specifies the size of the symbol, which splat will use to generate offsets during disassembly. Uses the same format as `rom:`
**function example**: `FuncNameHere = 0x80023423; // type:func rom:0x10023`
**data example**: `gSomeDataVar = 0x80024233; // type:data size:0x100`
As always, feel free to reach out to me with any questions, suggestions, or feedback.

View File

@ -1,4 +1,3 @@
from re import split
from capstone import *
from capstone.mips import *
@ -6,10 +5,10 @@ from collections import OrderedDict
from segtypes.n64.segment import N64Segment
import os
from pathlib import Path, PurePath
from ranges import Range, RangeDict
import re
import sys
from util import floats
from util.symbol import Symbol
STRIP_C_COMMENTS_RE = re.compile(
@ -22,6 +21,12 @@ C_FUNC_RE = re.compile(
re.MULTILINE
)
double_mnemonics = ["ldc1", "sdc1"]
word_mnemonics = ["addiu", "sw", "lw", "jtbl"]
float_mnemonics = ["lwc1", "swc1"]
short_mnemonics = ["addiu", "lh", "sh", "lhu"]
byte_mnemonics = ["lb", "sb", "lbu"]
def strip_c_comments(text):
def replacer(match):
s = match.group(0)
@ -39,64 +44,58 @@ def get_funcs_defined_in_c(c_file):
return set(m.group(2) for m in C_FUNC_RE.finditer(text))
def parse_segment_files(segment, segment_class, seg_start, seg_end, seg_name, seg_vram):
prefix = seg_name if seg_name.endswith("/") else f"{seg_name}_"
ret = []
prev_start = -1
if "files" in segment:
for i, split_file in enumerate(segment["files"]):
if type(split_file) is dict:
start = split_file["start"]
end = split_file["end"]
name = None if "name" not in split_file else split_file["name"]
subtype = split_file["type"]
else:
start = split_file[0]
end = seg_end if i == len(segment["files"]) - 1 else segment["files"][i + 1][0]
name = None if len(split_file) < 3 else split_file[2]
subtype = split_file[1]
if start < prev_start:
print(f"Error: Code segment {seg_name} has files out of ascending rom order (0x{prev_start:X} followed by 0x{start:X})")
sys.exit(1)
if not name:
name = N64SegCode.get_default_name(start) if seg_name == N64SegCode.get_default_name(seg_start) else f"{prefix}{start:X}"
vram = seg_vram + (start - seg_start)
fl = {"start": start, "end": end, "name": name, "vram": vram, "subtype": subtype}
ret.append(fl)
prev_start = start
else:
fl = {"start": seg_start, "end": seg_end,
"name": seg_name, "vram": seg_vram, "subtype": "asm"}
ret.append(fl)
return ret
class N64SegCode(N64Segment):
def parse_segment_files(self, segment, seg_start, seg_end, seg_name, seg_vram):
prefix = seg_name if seg_name.endswith("/") else f"{seg_name}_"
ret = []
prev_start = -1
if "files" in segment:
for i, split_file in enumerate(segment["files"]):
if type(split_file) is dict:
start = split_file["start"]
end = split_file["end"]
name = None if "name" not in split_file else split_file["name"]
subtype = split_file["type"]
else:
start = split_file[0]
end = seg_end if i == len(segment["files"]) - 1 else segment["files"][i + 1][0]
name = None if len(split_file) < 3 else split_file[2]
subtype = split_file[1]
if start < prev_start:
print(f"Error: Code segment {seg_name} has files out of ascending rom order (0x{prev_start:X} followed by 0x{start:X})")
sys.exit(1)
if not name:
name = self.get_default_name(start) if seg_name == self.get_default_name(seg_start) else f"{prefix}{start:X}"
vram = seg_vram + (start - seg_start)
fl = {"start": start, "end": end, "name": name, "vram": vram, "subtype": subtype}
ret.append(fl)
prev_start = start
else:
fl = {"start": seg_start, "end": seg_end,
"name": seg_name, "vram": seg_vram, "subtype": "asm"}
ret.append(fl)
return ret
def __init__(self, segment, next_segment, options):
super().__init__(segment, next_segment, options)
self.files = parse_segment_files(segment, self.__class__, self.rom_start, self.rom_end, self.name, self.vram_addr)
self.files = self.parse_segment_files(segment, self.rom_start, self.rom_end, self.name, self.vram_start)
self.is_overlay = segment.get("overlay", False)
self.labels_to_add = set()
self.jtbl_glabels = set()
self.glabels_to_add = set()
self.special_labels = {}
self.undefined_syms_to_add = set()
self.glabels_added = {}
self.all_functions = {}
self.provided_symbols = {}
self.c_labels_to_add = set()
self.ld_section_name = "." + segment.get("ld_name", f"text_{self.rom_start:X}")
self.symbol_ranges = RangeDict()
self.detected_syms = {}
self.all_symbols = ()
self.seg_symbols = {} # Symbols known to be in this segment
self.ext_symbols = {} # Symbols not in this segment but also not from other overlapping ram address ranges
self.symbol_ranges = []
self.reported_file_split = False
self.labels_to_add = set()
self.jtbl_glabels_to_add = set()
self.jtbl_jumps = {}
self.jumptables = {}
@ -104,23 +103,79 @@ class N64SegCode(N64Segment):
def get_default_name(addr):
return f"code_{addr:X}"
def get_func_name(self, addr):
return self.provided_symbols.get(addr, f"func_{addr:X}")
def retrieve_symbol(self, d, k, t):
if k not in d:
return None
def get_unique_func_name(self, func_addr, rom_addr):
func_name = self.get_func_name(func_addr)
if t:
items = [s for s in d[k] if s.type == t or s.type == "unknown"]
else:
items = d[k]
if self.is_overlay and (func_addr >= self.vram_addr) and (func_addr <= self.vram_addr + self.rom_end - self.rom_start):
return func_name + "_{:X}".format(rom_addr)
return func_name
if len(items) > 1:
pass #print(f"Trying to retrieve {k:X} from symbol dict but there are {len(items)} entries to pick from - picking the first")
if len(items) == 0:
return None
return items[0]
def add_glabel(self, ram_addr, rom_addr):
func = self.get_unique_func_name(ram_addr, rom_addr)
self.glabels_to_add.discard(func)
self.glabels_added[ram_addr] = func
if not self.is_overlay:
self.all_functions[ram_addr] = func
return "glabel " + func
def retrieve_symbol_from_ranges(self, vram, rom=None):
rom_matches = []
ram_matches = []
for symbol in self.symbol_ranges:
if symbol.contains_vram(vram):
if symbol.rom and rom and symbol.contains_rom(rom):
rom_matches.append(symbol)
else:
ram_matches.append(symbol)
ret = rom_matches + ram_matches
if len(ret) > 0:
return ret[0]
else:
return None
def get_symbol(self, addr, type=None, create=False, define=False, reference=False, offsets=False, local_only=False):
ret = None
rom = None
in_segment = self.contains_vram(addr)
if in_segment:
# If the vram address is within this segment, we can calculate the symbol's rom address
rom = self.ram_to_rom(addr)
ret = self.retrieve_symbol(self.seg_symbols, addr, type)
elif not local_only:
ret = self.retrieve_symbol(self.ext_symbols, addr, type)
# Search for symbol ranges
if not ret and offsets:
ret = self.retrieve_symbol_from_ranges(addr, rom)
# Create the symbol if it doesn't exist
if not ret and create:
ret = Symbol(addr, rom=rom, type=type)
self.all_symbols.append(ret)
if in_segment:
if self.is_overlay:
ret.set_in_overlay()
if addr not in self.seg_symbols:
self.seg_symbols[addr] = []
self.seg_symbols[addr].append(ret)
elif not local_only:
if addr not in self.ext_symbols:
self.ext_symbols[addr] = []
self.ext_symbols[addr].append(ret)
if ret:
if define:
ret.defined = True
if reference:
ret.referenced = True
return ret
def get_asm_header(self):
ret = []
@ -192,32 +247,23 @@ class N64SegCode(N64Segment):
print("INVALID INSTRUCTION " + insn)
elif mnemonic == "jal":
jal_addr = int(op_str, 0)
jump_func = self.get_func_name(jal_addr)
if (
jump_func.startswith("func_")
and self.is_overlay
and jal_addr >= self.vram_addr
and jal_addr <= (self.vram_addr + self.rom_end - self.rom_start)
):
func_loc = self.rom_start + jal_addr - self.vram_addr
jump_func += "_{:X}".format(func_loc)
if jump_func not in self.provided_symbols.values():
self.glabels_to_add.add(jump_func)
op_str = jump_func
jump_func = self.get_symbol(jal_addr, type="func", create=True, reference=True)
op_str = jump_func.name
elif self.is_branch_insn(insn.mnemonic):
op_str_split = op_str.split(" ")
branch_target = op_str_split[-1]
branch_target_int = int(branch_target, 0)
label = ""
if branch_target_int in self.special_labels:
label = self.special_labels[branch_target_int]
label = self.get_symbol(branch_target_int, type="label", reference=True, local_only=True)
if label:
label_name = label.name
else:
self.labels_to_add.add(branch_target_int)
label = ".L" + branch_target[2:].upper()
label_name = f".L{branch_target[2:].upper()}"
op_str = " ".join(op_str_split[:-1] + [label])
op_str = " ".join(op_str_split[:-1] + [label_name])
elif mnemonic == "mtc0" or mnemonic == "mfc0":
rd = (insn.bytes[2] & 0xF8) >> 3
op_str = op_str.split(" ")[0] + " $" + str(rd)
@ -239,7 +285,7 @@ class N64SegCode(N64Segment):
end_func = True
continue
if i < len(insns) - 1 and self.get_func_name(insns[i + 1].address) in self.c_labels_to_add:
if i < len(insns) - 1 and self.get_symbol(insns[i + 1].address, local_only=True, type="func"):
end_func = True
if end_func:
@ -262,33 +308,24 @@ class N64SegCode(N64Segment):
return fl
return None
def store_symbol_access(self, addr, mnemonic):
# Don't overwrite useful info with addiu
if addr in self.detected_syms and self.detected_syms[addr] != "addiu":
def update_access_mnemonic(self, sym, mnemonic):
if not sym.access_mnemonic:
sym.access_mnemonic = mnemonic
elif sym.access_mnemonic == "addiu":
sym.access_mnemonic = mnemonic
elif sym.access_mnemonic in double_mnemonics:
return
self.detected_syms[addr] = mnemonic
def get_symbol_name(self, addr, rom_addr, funcs=None):
if funcs and addr in funcs:
return self.get_unique_func_name(addr, rom_addr)
if addr in self.all_functions:
return self.all_functions[addr] # todo clean up funcs vs all_functions
if addr in self.provided_symbols:
return self.provided_symbols[addr]
if addr in self.jumptables:
return f"jtbl_{addr:X}_{rom_addr:X}"
if addr in self.symbol_ranges:
ret = self.symbol_ranges.get(addr)
offset = addr - self.symbol_ranges.getrange(addr).start
if offset != 0:
ret += f"+0x{offset:X}"
return ret
return f"D_{addr:X}"
elif sym.access_mnemonic in float_mnemonics and mnemonic in double_mnemonics:
sym.access_mnemonic = mnemonic
elif sym.access_mnemonic in short_mnemonics:
return
elif sym.access_mnemonic in byte_mnemonics:
return
else:
sym.access_mnemonic = mnemonic
# Determine symbols
def determine_symbols(self, funcs, rom_addr):
def determine_symbols(self, funcs):
ret = {}
for func_addr in funcs:
@ -345,29 +382,31 @@ class N64SegCode(N64Segment):
s_str = s_op_split[-1]
symbol_addr = (lui_val * 0x10000) + int(s_str, 0)
symbol_name = self.get_symbol_name(symbol_addr, symbol_addr - next(iter(funcs)) + rom_addr, funcs)
symbol_tag = s_insn.mnemonic
vram_end = self.vram_addr + self.rom_end - self.rom_start
if symbol_addr > func_addr and symbol_addr < vram_end and len(possible_jtbl_jumps) > 0 and func_end_addr - s_insn.address >= 0x30:
sym = None
offset_str = ""
if symbol_addr > func_addr and symbol_addr < self.vram_end and len(possible_jtbl_jumps) > 0 and func_end_addr - s_insn.address >= 0x30:
for jump in possible_jtbl_jumps:
if jump[1] == s_op_split[0]:
dist_to_jump = possible_jtbl_jumps[0][0] - s_insn.address
if dist_to_jump <= 16:
symbol_name = f"jtbl_{symbol_addr:X}_{self.ram_to_rom(symbol_addr):X}"
symbol_tag = "jtbl"
sym = self.get_symbol(symbol_addr, create=True, reference=True, type="jtbl", local_only=True)
self.jumptables[symbol_addr] = (func_addr, func_end_addr)
break
self.store_symbol_access(symbol_addr, symbol_tag)
symbol_file = self.get_file_for_addr(symbol_addr)
if not sym:
sym = self.get_symbol(symbol_addr, create=True, offsets=True, reference=True)
offset = symbol_addr - sym.vram_start
if offset != 0:
offset_str = f"+0x{offset:X}"
if not symbol_file or symbol_file["subtype"] == "bin":
if "+" not in symbol_name:
self.undefined_syms_to_add.add((symbol_name, symbol_addr))
self.update_access_mnemonic(sym, s_insn.mnemonic)
func[i] += ("%hi({})".format(symbol_name),)
func[j] += ("%lo({}){}".format(symbol_name, reg_ext),)
sym_label = sym.name + offset_str
func[i] += ("%hi({})".format(sym_label),)
func[j] += ("%lo({}){}".format(sym_label, reg_ext),)
break
ret[func_addr] = func
return ret
@ -380,7 +419,8 @@ class N64SegCode(N64Segment):
# Add function glabel
rom_addr = funcs[func][0][3]
func_text.append(self.add_glabel(func, rom_addr))
sym = self.get_symbol(func, type="func", create=True, define=True, local_only=True)
func_text.append(f"glabel {sym.name}")
indent_next = False
@ -393,7 +433,7 @@ class N64SegCode(N64Segment):
if insn_addr in self.labels_to_add:
self.labels_to_add.remove(insn_addr)
func_text.append(".L{:X}:".format(insn_addr))
if insn_addr in self.jtbl_glabels:
if insn_addr in self.jtbl_glabels_to_add:
func_text.append(f"glabel L{insn_addr:X}_{insn[3]:X}")
if rom_addr_padding:
@ -451,7 +491,8 @@ class N64SegCode(N64Segment):
return super().should_run() or (st in self.options["modes"] and st in subtypes for st in possible_subtypes)
def is_valid_ascii(self, bytes):
@staticmethod
def is_valid_ascii(bytes):
if len(bytes) < 8:
return False
@ -467,10 +508,17 @@ class N64SegCode(N64Segment):
return True
def get_symbols_for_file(self, split_file):
ret = []
vram_start = split_file["vram"]
vram_end = split_file["vram"] + split_file["end"] - split_file["start"]
return [(s, self.detected_syms[s]) for s in self.detected_syms if s >= vram_start and s <= vram_end]
for symbol_addr in self.seg_symbols:
for symbol in self.seg_symbols[symbol_addr]:
if symbol.vram_start >= vram_start and symbol.vram_end < vram_end:
ret.append(symbol)
return ret
def disassemble_symbol(self, sym_bytes, sym_type):
if sym_type == "jtbl":
@ -502,8 +550,14 @@ class N64SegCode(N64Segment):
byte_str = f"L{bits:X}_{rom_addr:X}"
else:
byte_str = f"0x{bits:X}"
elif slen == 4 and bits >= 0x80000000:
sym = self.get_symbol(bits, reference=True)
if sym:
byte_str = sym.name
else:
byte_str = '0x{0:0{1}X}'.format(bits, 2 * slen)
else:
byte_str = self.provided_symbols.get(bits, '0x{0:0{1}X}'.format(bits, 2 * slen))
byte_str = '0x{0:0{1}X}'.format(bits, 2 * slen)
if sym_type in ["float", "double"]:
if sym_type == "float":
@ -512,7 +566,7 @@ class N64SegCode(N64Segment):
float_str = floats.format_f64_imm(bits)
# Fall back to .word if we see weird float values
# todo cut the symbol in half maybe where we see the first nan or something
# TODO: cut the symbol in half maybe where we see the first nan or something
if "e-" in float_str or "nan" in float_str:
return self.disassemble_symbol(sym_bytes, "word")
else:
@ -532,31 +586,41 @@ class N64SegCode(N64Segment):
ret = ".include \"macro.inc\"\n\n"
ret += f'.section .{split_file["subtype"]}'
# Todo remove when we have class for file
file_size = split_file['end'] - split_file['start']
if file_size == 0:
return None
syms = self.get_symbols_for_file(split_file)
syms.sort(key=lambda x:x[0])
syms.sort(key=lambda s:s.vram_start)
if len(syms) == 0:
self.warn("No symbol accesses detected for " + split_file["name"] + "; the output will most likely be an ugly blob")
# check beginning
if syms[0][0] != split_file["vram"]:
syms.insert(0, (split_file["vram"], None))
# Ensure we start at the beginning
if len(syms) == 0 or syms[0].vram_start != split_file["vram"]:
syms.insert(0, self.get_symbol(split_file["vram"], create=True, define=True, local_only=True))
# add end
vram_end = split_file["vram"] + split_file["end"] - split_file["start"]
if syms[-1][0] != vram_end:
syms.append((vram_end, None))
vram_end = split_file["vram"] + file_size
if syms[-1].vram_start != vram_end:
# Make a dummy symbol here that marks the end of the previous symbol's disasm range
syms.append(Symbol(vram_end))
for i in range(len(syms) - 1):
mnemonic = syms[i][1]
start = syms[i][0]
end = syms[i + 1][0]
mnemonic = syms[i].access_mnemonic
start = syms[i].vram_start
end = syms[i + 1].vram_start
sym_rom_start = start - split_file["vram"] + split_file["start"]
sym_rom_end = end - split_file["vram"] + split_file["start"]
sym_name = self.get_symbol_name(start, sym_rom_start)
sym_str = f"\n\nglabel {sym_name}\n"
sym = self.get_symbol(start, create=True, define=True, local_only=True)
sym_str = f"\n\nglabel {sym.name}\n"
sym_bytes = rom_bytes[sym_rom_start : sym_rom_end]
# TODO: Hack for null mnemonic - move elsewhere later, probably
if not mnemonic:
mnemonic = "addiu"
# .ascii
if self.is_valid_ascii(sym_bytes) and mnemonic == "addiu":
# mnemonic thing may be too picky, we'll see
@ -571,15 +635,15 @@ class N64SegCode(N64Segment):
pass
# Fallback to raw data
if mnemonic == "jtbl":
if syms[i].type == "jtbl":
stype = "jtbl"
elif len(sym_bytes) % 8 == 0 and mnemonic in ["ldc1", "sdc1"]:
elif len(sym_bytes) % 8 == 0 and mnemonic in double_mnemonics:
stype = "double"
elif len(sym_bytes) % 4 == 0 and mnemonic in ["addiu", "sw", "lw", "jtbl"]:
elif len(sym_bytes) % 4 == 0 and mnemonic in word_mnemonics:
stype = "word"
elif len(sym_bytes) % 4 == 0 and mnemonic in ["lwc1", "swc1"]:
elif len(sym_bytes) % 4 == 0 and mnemonic in float_mnemonics:
stype = "float"
elif len(sym_bytes) % 2 == 0 and mnemonic in ["addiu", "lh", "sh", "lhu"]:
elif len(sym_bytes) % 2 == 0 and mnemonic in short_mnemonics:
stype = "short"
else:
stype = "byte"
@ -604,10 +668,12 @@ class N64SegCode(N64Segment):
return ret
def gather_jumptable_labels(self, section_vram, section_rom, rom_bytes):
def gather_jumptable_labels(self, rom_bytes):
# TODO: use the seg_symbols for this
# jumptables = [j.type == "jtbl" for j in self.seg_symbols]
for jumptable in self.jumptables:
start, end = self.jumptables[jumptable]
rom_offset = section_rom + jumptable - section_vram
rom_offset = self.rom_start + jumptable - self.vram_start
if rom_offset <= 0:
return
@ -616,12 +682,23 @@ class N64SegCode(N64Segment):
word = rom_bytes[rom_offset : rom_offset + 4]
word_int = int.from_bytes(word, "big")
if word_int >= start and word_int <= end:
self.jtbl_glabels.add(word_int)
self.jtbl_glabels_to_add.add(word_int)
else:
break
rom_offset += 4
def mark_c_funcs_as_defined(self, c_funcs):
for func_name in c_funcs:
found = False
for func_addr in self.seg_symbols:
for symbol in self.seg_symbols[func_addr]:
if symbol.name == func_name:
symbol.defined = True
found = True
break
if found:
break
def split(self, rom_bytes, base_path):
md = Cs(CS_ARCH_MIPS, CS_MODE_MIPS64 + CS_MODE_BIG_ENDIAN)
@ -645,8 +722,13 @@ class N64SegCode(N64Segment):
insns = [insn for insn in md.disasm(rom_bytes[split_file["start"]: split_file["end"]], split_file["vram"])]
funcs = self.process_insns(insns, rom_addr)
funcs = self.determine_symbols(funcs, rom_addr)
self.gather_jumptable_labels(self.vram_addr, self.rom_start, rom_bytes)
# TODO: someday make func a subclass of symbol and store this disasm info there too
for func in funcs:
self.get_symbol(func, type="func", create=True, define=True, local_only=True)
funcs = self.determine_symbols(funcs)
self.gather_jumptable_labels(rom_bytes)
funcs_text = self.add_labels(funcs)
if file_type == "c":
@ -655,6 +737,7 @@ class N64SegCode(N64Segment):
if os.path.exists(c_path):
defined_funcs = get_funcs_defined_in_c(c_path)
self.mark_c_funcs_as_defined(defined_funcs)
else:
defined_funcs = set()
@ -662,8 +745,7 @@ class N64SegCode(N64Segment):
base_path, os.path.join("asm", "nonmatchings"))
for func in funcs_text:
func_name = self.get_unique_func_name(
func, funcs_text[func][1])
func_name = self.get_symbol(func, type="func", local_only=True).name
if func_name not in defined_funcs:
if self.options.get("compiler", "IDO") == "GCC":
@ -686,7 +768,7 @@ class N64SegCode(N64Segment):
c_lines = self.get_c_preamble()
for func in funcs_text:
func_name = self.get_unique_func_name(func, funcs_text[func][1])
func_name = self.get_symbol(func, type="func", local_only=True).name
if self.options.get("compiler", "IDO") == "GCC":
c_lines.append("INCLUDE_ASM(s32, \"{}\", {});".format(split_file["name"], func_name))
else:

View File

@ -48,7 +48,7 @@ class Segment:
self.rom_end = parse_segment_start(next_segment)
self.type = parse_segment_type(segment)
self.name = parse_segment_name(segment, self.__class__)
self.vram_addr = parse_segment_vram(segment)
self.vram_start = parse_segment_vram(segment)
self.ld_name_override = segment.get("ld_name", None) if type(segment) is dict else None
self.options = options
self.config = segment
@ -73,19 +73,25 @@ class Segment:
@property
def vram_end(self):
return self.vram_addr + self.size
return self.vram_start + self.size
def contains_vram(self, vram):
return vram >= self.vram_start and vram < self.vram_end
def contains_rom(self, rom):
return rom >= self.rom_start and rom < self.rom_end
def rom_to_ram(self, rom_addr):
if rom_addr < self.rom_start or rom_addr > self.rom_end:
return None
return self.vram_addr + rom_addr - self.rom_start
return self.vram_start + rom_addr - self.rom_start
def ram_to_rom(self, ram_addr):
if ram_addr < self.vram_addr or ram_addr > self.vram_end:
if ram_addr < self.vram_start or ram_addr > self.vram_end:
return None
return self.rom_start + ram_addr - self.vram_addr
return self.rom_start + ram_addr - self.vram_start
def create_split_dir(self, base_path, subdir):
out_dir = Path(base_path, subdir)
@ -112,7 +118,7 @@ class Segment:
def get_ld_section(self):
replace_ext = self.options.get("ld_o_replace_extension", True)
sect_name = self.ld_name_override if self.ld_name_override else self.get_ld_section_name()
vram_or_rom = self.rom_start if self.vram_addr == 0 else self.vram_addr
vram_or_rom = self.rom_start if self.vram_start == 0 else self.vram_start
subalign_str = "" if self.subalign == default_subalign else f"SUBALIGN({self.subalign})"
s = (
@ -125,7 +131,7 @@ class Segment:
if start % 0x10 != 0 and i != 0:
tmp_sect_name = path.replace(".", "_")
tmp_sect_name = tmp_sect_name.replace("/", "_")
tmp_vram = start - self.rom_start + self.vram_addr
tmp_vram = start - self.rom_start + self.vram_start
s += (
"}\n"
f"SPLAT_BEGIN_SEG({tmp_sect_name}, 0x{start:X}, 0x{tmp_vram:X}, {subalign_str})\n"

View File

@ -4,7 +4,6 @@ import argparse
import importlib
import importlib.util
import os
from ranges import Range, RangeDict
from pathlib import Path
import yaml
import pickle
@ -12,6 +11,7 @@ from colorama import Style, Fore
from segtypes.segment import parse_segment_type
from segtypes.n64.code import N64SegCode
from util import log
from util.symbol import Symbol
parser = argparse.ArgumentParser(
description="Split a rom given a rom, a config, and output directory")
@ -24,6 +24,7 @@ parser.add_argument("--verbose", action="store_true",
parser.add_argument("--new", action="store_true",
help="Only split changed segments in config")
sym_isolated_map = {}
def write_ldscript(rom_name, repo_path, sections, options):
with open(os.path.join(repo_path, rom_name + ".ld"), "w", newline="\n") as f:
@ -97,10 +98,7 @@ def get_cache_path(repo_path, options):
def gather_symbols(symbol_addrs_path, undefined_syms_path):
symbols = {}
special_labels = {}
labels_to_add = set()
ranges = RangeDict()
symbols = []
# Manual list of func name / addrs
if os.path.exists(symbol_addrs_path):
@ -120,17 +118,23 @@ def gather_symbols(symbol_addrs_path, undefined_syms_path):
line_split = line.split("=")
name = line_split[0].strip()
addr = int(line_split[1].strip()[:-1], 0)
symbols[addr] = name
sym = Symbol(addr, given_name=name)
if line_ext:
for info in line_ext.split(" "):
if info == "!":
labels_to_add.add(name)
special_labels[addr] = name
if info.startswith("type:"):
type = info.split(":")[1]
sym.type = type
if info.startswith("size:"):
size = int(info.split(":")[1], 0)
ranges.add(Range(addr, addr + size), name)
sym.size = size
if info.startswith("rom:"):
rom_addr = int(info.split(":")[1], 0)
sym.rom = rom_addr
symbols.append(sym)
# Maybe let's not use this
if os.path.exists(undefined_syms_path):
with open(undefined_syms_path) as f:
us_lines = f.readlines()
@ -141,27 +145,9 @@ def gather_symbols(symbol_addrs_path, undefined_syms_path):
line_split = line.split("=")
name = line_split[0].strip()
addr = int(line_split[1].strip()[:-1], 0)
symbols[addr] = name
symbols.append(Symbol(addr, given_name=name))
return symbols, labels_to_add, special_labels, ranges
def gather_c_variables(undefined_syms_path):
vars = {}
if os.path.exists(undefined_syms_path):
with open(undefined_syms_path) as f:
us_lines = f.readlines()
for line in us_lines:
line = line.strip()
if not line == "" and not line.startswith("//"):
line_split = line.split("=")
name = line_split[0].strip()
addr = int(line_split[1].strip()[:-1], 0)
vars[addr] = name
return vars
return symbols
def get_base_segment_class(seg_type, platform):
@ -239,6 +225,46 @@ def initialize_segments(options, config_path, config_segments):
return ret
def is_symbol_isolated(symbol, all_segments):
if symbol in sym_isolated_map:
return sym_isolated_map[symbol]
relevant_segs = 0
for segment in all_segments:
if segment.contains_vram(symbol.vram_start):
relevant_segs += 1
if relevant_segs > 1:
break
sym_isolated_map[symbol] = relevant_segs < 2
return sym_isolated_map[symbol]
def get_segment_symbols(segment, all_symbols, all_segments):
seg_syms = {}
other_syms = {}
for symbol in all_symbols:
if is_symbol_isolated(symbol, all_segments) and not symbol.rom:
if segment.contains_vram(symbol.vram_start):
if symbol.vram_start not in seg_syms:
seg_syms[symbol.vram_start] = []
seg_syms[symbol.vram_start].append(symbol)
else:
if symbol.vram_start not in other_syms:
other_syms[symbol.vram_start] = []
other_syms[symbol.vram_start].append(symbol)
else:
if symbol.rom and segment.contains_rom(symbol.rom):
if symbol.vram_start not in seg_syms:
seg_syms[symbol.vram_start] = []
seg_syms[symbol.vram_start].append(symbol)
else:
if symbol.vram_start not in other_syms:
other_syms[symbol.vram_start] = []
other_syms[symbol.vram_start].append(symbol)
return seg_syms, other_syms
def main(rom_path, config_path, repo_path, modes, verbose, ignore_cache=False):
with open(rom_path, "rb") as f:
@ -257,16 +283,14 @@ def main(rom_path, config_path, repo_path, modes, verbose, ignore_cache=False):
symbol_addrs_path = get_symbol_addrs_path(repo_path, options)
undefined_syms_path = get_undefined_syms_path(repo_path, options)
provided_symbols, c_func_labels_to_add, special_labels, ranges = gather_symbols(symbol_addrs_path, undefined_syms_path)
all_symbols = gather_symbols(symbol_addrs_path, undefined_syms_path)
isolated_symbols = {}
symbol_ranges = [s for s in all_symbols if s.size > 4]
platform = get_platform(options)
processed_segments = []
ld_sections = []
defined_funcs = {}
undefined_funcs = set()
undefined_syms = set()
seg_sizes = {}
seg_split = {}
seg_cached = {}
@ -284,11 +308,11 @@ def main(rom_path, config_path, repo_path, modes, verbose, ignore_cache=False):
for segment in all_segments:
if platform == "n64" and type(segment) == N64SegCode: # remove special-case sometime
segment.all_functions = defined_funcs
segment.provided_symbols = provided_symbols
segment.special_labels = special_labels
segment.c_labels_to_add = c_func_labels_to_add
segment.symbol_ranges = ranges
segment_symbols, other_symbols = get_segment_symbols(segment, all_symbols, all_segments)
segment.seg_symbols = segment_symbols
segment.ext_symbols = other_symbols
segment.all_symbols = all_symbols
segment.symbol_ranges = symbol_ranges
segment.check()
@ -319,11 +343,6 @@ def main(rom_path, config_path, repo_path, modes, verbose, ignore_cache=False):
if len(segment.errors) == 0:
processed_segments.append(segment)
if platform == "n64" and type(segment) == N64SegCode: # edge case
undefined_funcs |= segment.glabels_to_add
defined_funcs = {**defined_funcs, **segment.glabels_added}
undefined_syms |= segment.undefined_syms_to_add
seg_split[tp] += 1
log.dot(status=segment.status())
@ -339,45 +358,40 @@ def main(rom_path, config_path, repo_path, modes, verbose, ignore_cache=False):
log.write(f"saving {config['basename']}.ld")
write_ldscript(config['basename'], repo_path, ld_sections, options)
undefined_syms_to_write = [s for s in all_symbols if s.referenced and not s.defined and not s.type == "func"]
undefined_funcs_to_write = [s for s in all_symbols if s.referenced and not s.defined and s.type == "func"]
# Write undefined_funcs_auto.txt
undefined_funcs_auto_path = get_undefined_funcs_auto_path(repo_path, options)
if verbose:
log.write(f"saving {undefined_funcs_auto_path}")
c_predefined_funcs = set(provided_symbols.keys())
to_write = sorted(undefined_funcs - set(defined_funcs.values()) - c_predefined_funcs)
to_write = undefined_funcs_to_write
if len(to_write) > 0:
with open(undefined_funcs_auto_path, "w", newline="\n") as f:
for line in to_write:
f.write(line + " = 0x" + line.split("_")[1][:8].upper() + ";\n")
for symbol in to_write:
f.write(f"{symbol.name} = 0x{symbol.vram_start:X};\n")
# write undefined_syms_auto.txt
undefined_syms_auto_path = get_undefined_syms_auto_path(repo_path, options)
if verbose:
log.write(f"saving {undefined_syms_auto_path}")
to_write = sorted(undefined_syms, key=lambda x:x[0])
to_write = undefined_syms_to_write
if len(to_write) > 0:
with open(undefined_syms_auto_path, "w", newline="\n") as f:
for sym in to_write:
f.write(f"{sym[0]} = 0x{sym[1]:X};\n")
for symbol in to_write:
f.write(f"{symbol.name} = 0x{symbol.vram_start:X};\n")
# print warnings and errors during split/postsplit
had_error = False
# print warnings during split/postsplit
for segment in all_segments:
if len(segment.warnings) > 0 or len(segment.errors) > 0:
if len(segment.warnings) > 0:
log.write(f"{Style.DIM}0x{segment.rom_start:06X}{Style.RESET_ALL} {segment.type} {Style.BRIGHT}{segment.name}{Style.RESET_ALL}:")
for warn in segment.warnings:
log.write("warning: " + warn, status="warn")
for error in segment.errors:
log.write("error: " + error, status="error")
had_error = True
log.write("") # empty line
if had_error:
return 1
# Statistics
unk_size = seg_sizes.get("unk", 0)
rest_size = 0

View File

@ -9,6 +9,7 @@ parser.add_argument('rom', help='path to a .z64 rom')
parser.add_argument('--encoding', help='Text encoding the game header is using; see docs.python.org/3/library/codecs.html#standard-encodings for valid encodings', default='ASCII')
country_codes = {
0x00: "Unknown",
0x37: "Beta",
0x41: "Asian (NTSC)",
0x42: "Brazillian",
@ -102,7 +103,7 @@ class N64Rom:
def get_country_name(self):
return country_codes[self.country_code]
# TODO: support .n64 extension
def main():
args = parser.parse_args()
rom = get_info(args.rom, args.encoding)

View File

@ -1,12 +0,0 @@
class N64Symbol:
@staticmethod
def get_default_name(vram):
return f"D_{vram:X}"
def __init__(self, vram, rom=None, name=None, segment=None, length=4):
self.vram = vram
self.rom = rom
self.name = name if name else self.get_default_name(vram)
self.segment = segment
self.length = length

View File

@ -0,0 +1,46 @@
class Symbol:
@property
def default_name(self):
suffix = f"_{self.vram_start:X}"
if self.in_overlay:
suffix += f"_{self.rom:X}"
if self.type == "func":
prefix = "func"
elif self.type =="jtbl":
prefix = "jtbl"
else:
prefix = "D"
return prefix + suffix
@property
def rom_end(self):
return None if not self.rom else self.rom + self.size
@property
def vram_end(self):
return self.vram_start + self.size
def set_in_overlay(self):
self.in_overlay = True
@property
def name(self):
return self.given_name if self.given_name else self.default_name
def contains_vram(self, offset):
return offset >= self.vram_start and offset < self.vram_end
def __init__(self, vram, given_name=None, rom=None, type="unknown", in_overlay=False, size=4):
self.defined = False
self.referenced = False
self.vram_start = vram
self.rom = rom
self.type = type
self.in_overlay = in_overlay
self.size = size
self.given_name = given_name
self.access_mnemonic = None