From f8a344e713fbe1244ae4f28cf9329179c46123b1 Mon Sep 17 00:00:00 2001 From: Ethan Roseman Date: Thu, 4 Feb 2021 00:09:01 +0900 Subject: [PATCH] git subrepo pull tools/splat subrepo: subdir: "tools/splat" merged: "b426daf02" upstream: origin: "https://github.com/ethteck/splat.git" branch: "master" commit: "b426daf02" git-subrepo: version: "0.4.3" origin: "https://github.com/ingydotnet/git-subrepo" commit: "2f68596" --- tools/splat/.gitrepo | 4 +- tools/splat/CHANGELOG.md | 14 ++ tools/splat/segtypes/n64/code.py | 396 +++++++++++++++++++------------ tools/splat/segtypes/segment.py | 20 +- tools/splat/split.py | 140 ++++++----- tools/splat/util/n64/rominfo.py | 3 +- tools/splat/util/n64/symbol.py | 12 - tools/splat/util/symbol.py | 46 ++++ 8 files changed, 393 insertions(+), 242 deletions(-) delete mode 100644 tools/splat/util/n64/symbol.py create mode 100644 tools/splat/util/symbol.py diff --git a/tools/splat/.gitrepo b/tools/splat/.gitrepo index 96b0ac92e4..715bcd7a65 100644 --- a/tools/splat/.gitrepo +++ b/tools/splat/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = https://github.com/ethteck/splat.git branch = master - commit = e2b731ab198c1d8400412ffd09252deca65253d2 - parent = 4201a08a28b6d6f070b35f97e3ba726b5448893b + commit = b426daf02b5b5549a390d8212f601920de35ce7e + parent = 43f3a6fa5f49424940912f3f37dc018d8231ddba method = merge cmdver = 0.4.3 diff --git a/tools/splat/CHANGELOG.md b/tools/splat/CHANGELOG.md index 8e4f9f6d1a..fbbdcb23f3 100644 --- a/tools/splat/CHANGELOG.md +++ b/tools/splat/CHANGELOG.md @@ -10,3 +10,17 @@ * New `undefined_funcs_auto_path` option * New `cache_path` option * (All path-like options' names now end with `_path`) + +## 0.6: The Symbol Update +Internally, there's a new Symbol class which stores information about a symbol and is stored in a couple places during disassembly. Many things should be improved, such as reconciling symbols within overlays, things being named functions vs data symbols, and more. + +**Breaking change**: The format to symbol_addrs.txt has been updated. After specifying the name and address of a symbol (`symbol = addr;`), optional properties of symbols can be set via inline comment, space delimited, in any order. The properties are of the format `name:value` + * `type:` supports `func` mostly right now but will support `label` and `data` later on. Internally, `jtbl` is used as well, for jump tables. Splat uses type information during disassembly to disambiguate symbols with the same addresses. + * `rom:` is for the hex rom address of the symbol, beginning with `0x`. If available, this information is extremely valuable for use in disambiguating symbols. + * `size:` specifies the size of the symbol, which splat will use to generate offsets during disassembly. Uses the same format as `rom:` + +**function example**: `FuncNameHere = 0x80023423; // type:func rom:0x10023` + +**data example**: `gSomeDataVar = 0x80024233; // type:data size:0x100` + +As always, feel free to reach out to me with any questions, suggestions, or feedback. \ No newline at end of file diff --git a/tools/splat/segtypes/n64/code.py b/tools/splat/segtypes/n64/code.py index 4b56928a43..f752f68a80 100644 --- a/tools/splat/segtypes/n64/code.py +++ b/tools/splat/segtypes/n64/code.py @@ -1,4 +1,3 @@ -from re import split from capstone import * from capstone.mips import * @@ -6,10 +5,10 @@ from collections import OrderedDict from segtypes.n64.segment import N64Segment import os from pathlib import Path, PurePath -from ranges import Range, RangeDict import re import sys from util import floats +from util.symbol import Symbol STRIP_C_COMMENTS_RE = re.compile( @@ -22,6 +21,12 @@ C_FUNC_RE = re.compile( re.MULTILINE ) +double_mnemonics = ["ldc1", "sdc1"] +word_mnemonics = ["addiu", "sw", "lw", "jtbl"] +float_mnemonics = ["lwc1", "swc1"] +short_mnemonics = ["addiu", "lh", "sh", "lhu"] +byte_mnemonics = ["lb", "sb", "lbu"] + def strip_c_comments(text): def replacer(match): s = match.group(0) @@ -39,64 +44,58 @@ def get_funcs_defined_in_c(c_file): return set(m.group(2) for m in C_FUNC_RE.finditer(text)) -def parse_segment_files(segment, segment_class, seg_start, seg_end, seg_name, seg_vram): - prefix = seg_name if seg_name.endswith("/") else f"{seg_name}_" - - ret = [] - prev_start = -1 - - if "files" in segment: - for i, split_file in enumerate(segment["files"]): - if type(split_file) is dict: - start = split_file["start"] - end = split_file["end"] - name = None if "name" not in split_file else split_file["name"] - subtype = split_file["type"] - else: - start = split_file[0] - end = seg_end if i == len(segment["files"]) - 1 else segment["files"][i + 1][0] - name = None if len(split_file) < 3 else split_file[2] - subtype = split_file[1] - - if start < prev_start: - print(f"Error: Code segment {seg_name} has files out of ascending rom order (0x{prev_start:X} followed by 0x{start:X})") - sys.exit(1) - - if not name: - name = N64SegCode.get_default_name(start) if seg_name == N64SegCode.get_default_name(seg_start) else f"{prefix}{start:X}" - - vram = seg_vram + (start - seg_start) - - fl = {"start": start, "end": end, "name": name, "vram": vram, "subtype": subtype} - - ret.append(fl) - prev_start = start - else: - fl = {"start": seg_start, "end": seg_end, - "name": seg_name, "vram": seg_vram, "subtype": "asm"} - ret.append(fl) - - return ret - - class N64SegCode(N64Segment): + def parse_segment_files(self, segment, seg_start, seg_end, seg_name, seg_vram): + prefix = seg_name if seg_name.endswith("/") else f"{seg_name}_" + + ret = [] + prev_start = -1 + + if "files" in segment: + for i, split_file in enumerate(segment["files"]): + if type(split_file) is dict: + start = split_file["start"] + end = split_file["end"] + name = None if "name" not in split_file else split_file["name"] + subtype = split_file["type"] + else: + start = split_file[0] + end = seg_end if i == len(segment["files"]) - 1 else segment["files"][i + 1][0] + name = None if len(split_file) < 3 else split_file[2] + subtype = split_file[1] + + if start < prev_start: + print(f"Error: Code segment {seg_name} has files out of ascending rom order (0x{prev_start:X} followed by 0x{start:X})") + sys.exit(1) + + if not name: + name = self.get_default_name(start) if seg_name == self.get_default_name(seg_start) else f"{prefix}{start:X}" + + vram = seg_vram + (start - seg_start) + + fl = {"start": start, "end": end, "name": name, "vram": vram, "subtype": subtype} + + ret.append(fl) + prev_start = start + else: + fl = {"start": seg_start, "end": seg_end, + "name": seg_name, "vram": seg_vram, "subtype": "asm"} + ret.append(fl) + + return ret + def __init__(self, segment, next_segment, options): super().__init__(segment, next_segment, options) - self.files = parse_segment_files(segment, self.__class__, self.rom_start, self.rom_end, self.name, self.vram_addr) + self.files = self.parse_segment_files(segment, self.rom_start, self.rom_end, self.name, self.vram_start) self.is_overlay = segment.get("overlay", False) - self.labels_to_add = set() - self.jtbl_glabels = set() - self.glabels_to_add = set() - self.special_labels = {} - self.undefined_syms_to_add = set() - self.glabels_added = {} - self.all_functions = {} - self.provided_symbols = {} - self.c_labels_to_add = set() - self.ld_section_name = "." + segment.get("ld_name", f"text_{self.rom_start:X}") - self.symbol_ranges = RangeDict() - self.detected_syms = {} + self.all_symbols = () + self.seg_symbols = {} # Symbols known to be in this segment + self.ext_symbols = {} # Symbols not in this segment but also not from other overlapping ram address ranges + self.symbol_ranges = [] + self.reported_file_split = False + self.labels_to_add = set() + self.jtbl_glabels_to_add = set() self.jtbl_jumps = {} self.jumptables = {} @@ -104,23 +103,79 @@ class N64SegCode(N64Segment): def get_default_name(addr): return f"code_{addr:X}" - def get_func_name(self, addr): - return self.provided_symbols.get(addr, f"func_{addr:X}") + def retrieve_symbol(self, d, k, t): + if k not in d: + return None - def get_unique_func_name(self, func_addr, rom_addr): - func_name = self.get_func_name(func_addr) + if t: + items = [s for s in d[k] if s.type == t or s.type == "unknown"] + else: + items = d[k] - if self.is_overlay and (func_addr >= self.vram_addr) and (func_addr <= self.vram_addr + self.rom_end - self.rom_start): - return func_name + "_{:X}".format(rom_addr) - return func_name + if len(items) > 1: + pass #print(f"Trying to retrieve {k:X} from symbol dict but there are {len(items)} entries to pick from - picking the first") + if len(items) == 0: + return None + return items[0] - def add_glabel(self, ram_addr, rom_addr): - func = self.get_unique_func_name(ram_addr, rom_addr) - self.glabels_to_add.discard(func) - self.glabels_added[ram_addr] = func - if not self.is_overlay: - self.all_functions[ram_addr] = func - return "glabel " + func + def retrieve_symbol_from_ranges(self, vram, rom=None): + rom_matches = [] + ram_matches = [] + + for symbol in self.symbol_ranges: + if symbol.contains_vram(vram): + if symbol.rom and rom and symbol.contains_rom(rom): + rom_matches.append(symbol) + else: + ram_matches.append(symbol) + + ret = rom_matches + ram_matches + + if len(ret) > 0: + return ret[0] + else: + return None + + def get_symbol(self, addr, type=None, create=False, define=False, reference=False, offsets=False, local_only=False): + ret = None + rom = None + + in_segment = self.contains_vram(addr) + + if in_segment: + # If the vram address is within this segment, we can calculate the symbol's rom address + rom = self.ram_to_rom(addr) + ret = self.retrieve_symbol(self.seg_symbols, addr, type) + elif not local_only: + ret = self.retrieve_symbol(self.ext_symbols, addr, type) + + # Search for symbol ranges + if not ret and offsets: + ret = self.retrieve_symbol_from_ranges(addr, rom) + + # Create the symbol if it doesn't exist + if not ret and create: + ret = Symbol(addr, rom=rom, type=type) + self.all_symbols.append(ret) + + if in_segment: + if self.is_overlay: + ret.set_in_overlay() + if addr not in self.seg_symbols: + self.seg_symbols[addr] = [] + self.seg_symbols[addr].append(ret) + elif not local_only: + if addr not in self.ext_symbols: + self.ext_symbols[addr] = [] + self.ext_symbols[addr].append(ret) + + if ret: + if define: + ret.defined = True + if reference: + ret.referenced = True + + return ret def get_asm_header(self): ret = [] @@ -192,32 +247,23 @@ class N64SegCode(N64Segment): print("INVALID INSTRUCTION " + insn) elif mnemonic == "jal": jal_addr = int(op_str, 0) - jump_func = self.get_func_name(jal_addr) - if ( - jump_func.startswith("func_") - and self.is_overlay - and jal_addr >= self.vram_addr - and jal_addr <= (self.vram_addr + self.rom_end - self.rom_start) - ): - func_loc = self.rom_start + jal_addr - self.vram_addr - jump_func += "_{:X}".format(func_loc) - - if jump_func not in self.provided_symbols.values(): - self.glabels_to_add.add(jump_func) - op_str = jump_func + jump_func = self.get_symbol(jal_addr, type="func", create=True, reference=True) + op_str = jump_func.name elif self.is_branch_insn(insn.mnemonic): op_str_split = op_str.split(" ") branch_target = op_str_split[-1] branch_target_int = int(branch_target, 0) label = "" - if branch_target_int in self.special_labels: - label = self.special_labels[branch_target_int] + label = self.get_symbol(branch_target_int, type="label", reference=True, local_only=True) + + if label: + label_name = label.name else: self.labels_to_add.add(branch_target_int) - label = ".L" + branch_target[2:].upper() + label_name = f".L{branch_target[2:].upper()}" - op_str = " ".join(op_str_split[:-1] + [label]) + op_str = " ".join(op_str_split[:-1] + [label_name]) elif mnemonic == "mtc0" or mnemonic == "mfc0": rd = (insn.bytes[2] & 0xF8) >> 3 op_str = op_str.split(" ")[0] + " $" + str(rd) @@ -239,7 +285,7 @@ class N64SegCode(N64Segment): end_func = True continue - if i < len(insns) - 1 and self.get_func_name(insns[i + 1].address) in self.c_labels_to_add: + if i < len(insns) - 1 and self.get_symbol(insns[i + 1].address, local_only=True, type="func"): end_func = True if end_func: @@ -262,33 +308,24 @@ class N64SegCode(N64Segment): return fl return None - def store_symbol_access(self, addr, mnemonic): - # Don't overwrite useful info with addiu - if addr in self.detected_syms and self.detected_syms[addr] != "addiu": + def update_access_mnemonic(self, sym, mnemonic): + if not sym.access_mnemonic: + sym.access_mnemonic = mnemonic + elif sym.access_mnemonic == "addiu": + sym.access_mnemonic = mnemonic + elif sym.access_mnemonic in double_mnemonics: return - - self.detected_syms[addr] = mnemonic - - def get_symbol_name(self, addr, rom_addr, funcs=None): - if funcs and addr in funcs: - return self.get_unique_func_name(addr, rom_addr) - if addr in self.all_functions: - return self.all_functions[addr] # todo clean up funcs vs all_functions - if addr in self.provided_symbols: - return self.provided_symbols[addr] - if addr in self.jumptables: - return f"jtbl_{addr:X}_{rom_addr:X}" - if addr in self.symbol_ranges: - ret = self.symbol_ranges.get(addr) - offset = addr - self.symbol_ranges.getrange(addr).start - if offset != 0: - ret += f"+0x{offset:X}" - return ret - - return f"D_{addr:X}" + elif sym.access_mnemonic in float_mnemonics and mnemonic in double_mnemonics: + sym.access_mnemonic = mnemonic + elif sym.access_mnemonic in short_mnemonics: + return + elif sym.access_mnemonic in byte_mnemonics: + return + else: + sym.access_mnemonic = mnemonic # Determine symbols - def determine_symbols(self, funcs, rom_addr): + def determine_symbols(self, funcs): ret = {} for func_addr in funcs: @@ -345,29 +382,31 @@ class N64SegCode(N64Segment): s_str = s_op_split[-1] symbol_addr = (lui_val * 0x10000) + int(s_str, 0) - symbol_name = self.get_symbol_name(symbol_addr, symbol_addr - next(iter(funcs)) + rom_addr, funcs) - symbol_tag = s_insn.mnemonic - vram_end = self.vram_addr + self.rom_end - self.rom_start - if symbol_addr > func_addr and symbol_addr < vram_end and len(possible_jtbl_jumps) > 0 and func_end_addr - s_insn.address >= 0x30: + sym = None + offset_str = "" + + if symbol_addr > func_addr and symbol_addr < self.vram_end and len(possible_jtbl_jumps) > 0 and func_end_addr - s_insn.address >= 0x30: for jump in possible_jtbl_jumps: if jump[1] == s_op_split[0]: dist_to_jump = possible_jtbl_jumps[0][0] - s_insn.address if dist_to_jump <= 16: - symbol_name = f"jtbl_{symbol_addr:X}_{self.ram_to_rom(symbol_addr):X}" - symbol_tag = "jtbl" + sym = self.get_symbol(symbol_addr, create=True, reference=True, type="jtbl", local_only=True) self.jumptables[symbol_addr] = (func_addr, func_end_addr) break - self.store_symbol_access(symbol_addr, symbol_tag) - symbol_file = self.get_file_for_addr(symbol_addr) + if not sym: + sym = self.get_symbol(symbol_addr, create=True, offsets=True, reference=True) + offset = symbol_addr - sym.vram_start + if offset != 0: + offset_str = f"+0x{offset:X}" - if not symbol_file or symbol_file["subtype"] == "bin": - if "+" not in symbol_name: - self.undefined_syms_to_add.add((symbol_name, symbol_addr)) + self.update_access_mnemonic(sym, s_insn.mnemonic) - func[i] += ("%hi({})".format(symbol_name),) - func[j] += ("%lo({}){}".format(symbol_name, reg_ext),) + sym_label = sym.name + offset_str + + func[i] += ("%hi({})".format(sym_label),) + func[j] += ("%lo({}){}".format(sym_label, reg_ext),) break ret[func_addr] = func return ret @@ -380,7 +419,8 @@ class N64SegCode(N64Segment): # Add function glabel rom_addr = funcs[func][0][3] - func_text.append(self.add_glabel(func, rom_addr)) + sym = self.get_symbol(func, type="func", create=True, define=True, local_only=True) + func_text.append(f"glabel {sym.name}") indent_next = False @@ -393,7 +433,7 @@ class N64SegCode(N64Segment): if insn_addr in self.labels_to_add: self.labels_to_add.remove(insn_addr) func_text.append(".L{:X}:".format(insn_addr)) - if insn_addr in self.jtbl_glabels: + if insn_addr in self.jtbl_glabels_to_add: func_text.append(f"glabel L{insn_addr:X}_{insn[3]:X}") if rom_addr_padding: @@ -451,7 +491,8 @@ class N64SegCode(N64Segment): return super().should_run() or (st in self.options["modes"] and st in subtypes for st in possible_subtypes) - def is_valid_ascii(self, bytes): + @staticmethod + def is_valid_ascii(bytes): if len(bytes) < 8: return False @@ -467,10 +508,17 @@ class N64SegCode(N64Segment): return True def get_symbols_for_file(self, split_file): + ret = [] + vram_start = split_file["vram"] vram_end = split_file["vram"] + split_file["end"] - split_file["start"] - return [(s, self.detected_syms[s]) for s in self.detected_syms if s >= vram_start and s <= vram_end] + for symbol_addr in self.seg_symbols: + for symbol in self.seg_symbols[symbol_addr]: + if symbol.vram_start >= vram_start and symbol.vram_end < vram_end: + ret.append(symbol) + + return ret def disassemble_symbol(self, sym_bytes, sym_type): if sym_type == "jtbl": @@ -502,8 +550,14 @@ class N64SegCode(N64Segment): byte_str = f"L{bits:X}_{rom_addr:X}" else: byte_str = f"0x{bits:X}" + elif slen == 4 and bits >= 0x80000000: + sym = self.get_symbol(bits, reference=True) + if sym: + byte_str = sym.name + else: + byte_str = '0x{0:0{1}X}'.format(bits, 2 * slen) else: - byte_str = self.provided_symbols.get(bits, '0x{0:0{1}X}'.format(bits, 2 * slen)) + byte_str = '0x{0:0{1}X}'.format(bits, 2 * slen) if sym_type in ["float", "double"]: if sym_type == "float": @@ -512,7 +566,7 @@ class N64SegCode(N64Segment): float_str = floats.format_f64_imm(bits) # Fall back to .word if we see weird float values - # todo cut the symbol in half maybe where we see the first nan or something + # TODO: cut the symbol in half maybe where we see the first nan or something if "e-" in float_str or "nan" in float_str: return self.disassemble_symbol(sym_bytes, "word") else: @@ -532,31 +586,41 @@ class N64SegCode(N64Segment): ret = ".include \"macro.inc\"\n\n" ret += f'.section .{split_file["subtype"]}' + # Todo remove when we have class for file + file_size = split_file['end'] - split_file['start'] + + if file_size == 0: + return None + syms = self.get_symbols_for_file(split_file) - syms.sort(key=lambda x:x[0]) + syms.sort(key=lambda s:s.vram_start) if len(syms) == 0: self.warn("No symbol accesses detected for " + split_file["name"] + "; the output will most likely be an ugly blob") - # check beginning - if syms[0][0] != split_file["vram"]: - syms.insert(0, (split_file["vram"], None)) + # Ensure we start at the beginning + if len(syms) == 0 or syms[0].vram_start != split_file["vram"]: + syms.insert(0, self.get_symbol(split_file["vram"], create=True, define=True, local_only=True)) - # add end - vram_end = split_file["vram"] + split_file["end"] - split_file["start"] - if syms[-1][0] != vram_end: - syms.append((vram_end, None)) + vram_end = split_file["vram"] + file_size + if syms[-1].vram_start != vram_end: + # Make a dummy symbol here that marks the end of the previous symbol's disasm range + syms.append(Symbol(vram_end)) for i in range(len(syms) - 1): - mnemonic = syms[i][1] - start = syms[i][0] - end = syms[i + 1][0] + mnemonic = syms[i].access_mnemonic + start = syms[i].vram_start + end = syms[i + 1].vram_start sym_rom_start = start - split_file["vram"] + split_file["start"] sym_rom_end = end - split_file["vram"] + split_file["start"] - sym_name = self.get_symbol_name(start, sym_rom_start) - sym_str = f"\n\nglabel {sym_name}\n" + sym = self.get_symbol(start, create=True, define=True, local_only=True) + sym_str = f"\n\nglabel {sym.name}\n" sym_bytes = rom_bytes[sym_rom_start : sym_rom_end] + # TODO: Hack for null mnemonic - move elsewhere later, probably + if not mnemonic: + mnemonic = "addiu" + # .ascii if self.is_valid_ascii(sym_bytes) and mnemonic == "addiu": # mnemonic thing may be too picky, we'll see @@ -571,19 +635,19 @@ class N64SegCode(N64Segment): pass # Fallback to raw data - if mnemonic == "jtbl": + if syms[i].type == "jtbl": stype = "jtbl" - elif len(sym_bytes) % 8 == 0 and mnemonic in ["ldc1", "sdc1"]: + elif len(sym_bytes) % 8 == 0 and mnemonic in double_mnemonics: stype = "double" - elif len(sym_bytes) % 4 == 0 and mnemonic in ["addiu", "sw", "lw", "jtbl"]: + elif len(sym_bytes) % 4 == 0 and mnemonic in word_mnemonics: stype = "word" - elif len(sym_bytes) % 4 == 0 and mnemonic in ["lwc1", "swc1"]: + elif len(sym_bytes) % 4 == 0 and mnemonic in float_mnemonics: stype = "float" - elif len(sym_bytes) % 2 == 0 and mnemonic in ["addiu", "lh", "sh", "lhu"]: + elif len(sym_bytes) % 2 == 0 and mnemonic in short_mnemonics: stype = "short" else: stype = "byte" - + if not rodata_encountered and mnemonic == "jtbl": rodata_encountered = True ret += "\n\n\n.section .rodata" @@ -604,10 +668,12 @@ class N64SegCode(N64Segment): return ret - def gather_jumptable_labels(self, section_vram, section_rom, rom_bytes): + def gather_jumptable_labels(self, rom_bytes): + # TODO: use the seg_symbols for this + # jumptables = [j.type == "jtbl" for j in self.seg_symbols] for jumptable in self.jumptables: start, end = self.jumptables[jumptable] - rom_offset = section_rom + jumptable - section_vram + rom_offset = self.rom_start + jumptable - self.vram_start if rom_offset <= 0: return @@ -616,12 +682,23 @@ class N64SegCode(N64Segment): word = rom_bytes[rom_offset : rom_offset + 4] word_int = int.from_bytes(word, "big") if word_int >= start and word_int <= end: - self.jtbl_glabels.add(word_int) + self.jtbl_glabels_to_add.add(word_int) else: break rom_offset += 4 + def mark_c_funcs_as_defined(self, c_funcs): + for func_name in c_funcs: + found = False + for func_addr in self.seg_symbols: + for symbol in self.seg_symbols[func_addr]: + if symbol.name == func_name: + symbol.defined = True + found = True + break + if found: + break def split(self, rom_bytes, base_path): md = Cs(CS_ARCH_MIPS, CS_MODE_MIPS64 + CS_MODE_BIG_ENDIAN) @@ -645,8 +722,13 @@ class N64SegCode(N64Segment): insns = [insn for insn in md.disasm(rom_bytes[split_file["start"]: split_file["end"]], split_file["vram"])] funcs = self.process_insns(insns, rom_addr) - funcs = self.determine_symbols(funcs, rom_addr) - self.gather_jumptable_labels(self.vram_addr, self.rom_start, rom_bytes) + + # TODO: someday make func a subclass of symbol and store this disasm info there too + for func in funcs: + self.get_symbol(func, type="func", create=True, define=True, local_only=True) + + funcs = self.determine_symbols(funcs) + self.gather_jumptable_labels(rom_bytes) funcs_text = self.add_labels(funcs) if file_type == "c": @@ -655,6 +737,7 @@ class N64SegCode(N64Segment): if os.path.exists(c_path): defined_funcs = get_funcs_defined_in_c(c_path) + self.mark_c_funcs_as_defined(defined_funcs) else: defined_funcs = set() @@ -662,8 +745,7 @@ class N64SegCode(N64Segment): base_path, os.path.join("asm", "nonmatchings")) for func in funcs_text: - func_name = self.get_unique_func_name( - func, funcs_text[func][1]) + func_name = self.get_symbol(func, type="func", local_only=True).name if func_name not in defined_funcs: if self.options.get("compiler", "IDO") == "GCC": @@ -686,7 +768,7 @@ class N64SegCode(N64Segment): c_lines = self.get_c_preamble() for func in funcs_text: - func_name = self.get_unique_func_name(func, funcs_text[func][1]) + func_name = self.get_symbol(func, type="func", local_only=True).name if self.options.get("compiler", "IDO") == "GCC": c_lines.append("INCLUDE_ASM(s32, \"{}\", {});".format(split_file["name"], func_name)) else: diff --git a/tools/splat/segtypes/segment.py b/tools/splat/segtypes/segment.py index 2d03dad2fe..0695c4c8ea 100644 --- a/tools/splat/segtypes/segment.py +++ b/tools/splat/segtypes/segment.py @@ -48,7 +48,7 @@ class Segment: self.rom_end = parse_segment_start(next_segment) self.type = parse_segment_type(segment) self.name = parse_segment_name(segment, self.__class__) - self.vram_addr = parse_segment_vram(segment) + self.vram_start = parse_segment_vram(segment) self.ld_name_override = segment.get("ld_name", None) if type(segment) is dict else None self.options = options self.config = segment @@ -73,19 +73,25 @@ class Segment: @property def vram_end(self): - return self.vram_addr + self.size + return self.vram_start + self.size + + def contains_vram(self, vram): + return vram >= self.vram_start and vram < self.vram_end + + def contains_rom(self, rom): + return rom >= self.rom_start and rom < self.rom_end def rom_to_ram(self, rom_addr): if rom_addr < self.rom_start or rom_addr > self.rom_end: return None - return self.vram_addr + rom_addr - self.rom_start + return self.vram_start + rom_addr - self.rom_start def ram_to_rom(self, ram_addr): - if ram_addr < self.vram_addr or ram_addr > self.vram_end: + if ram_addr < self.vram_start or ram_addr > self.vram_end: return None - return self.rom_start + ram_addr - self.vram_addr + return self.rom_start + ram_addr - self.vram_start def create_split_dir(self, base_path, subdir): out_dir = Path(base_path, subdir) @@ -112,7 +118,7 @@ class Segment: def get_ld_section(self): replace_ext = self.options.get("ld_o_replace_extension", True) sect_name = self.ld_name_override if self.ld_name_override else self.get_ld_section_name() - vram_or_rom = self.rom_start if self.vram_addr == 0 else self.vram_addr + vram_or_rom = self.rom_start if self.vram_start == 0 else self.vram_start subalign_str = "" if self.subalign == default_subalign else f"SUBALIGN({self.subalign})" s = ( @@ -125,7 +131,7 @@ class Segment: if start % 0x10 != 0 and i != 0: tmp_sect_name = path.replace(".", "_") tmp_sect_name = tmp_sect_name.replace("/", "_") - tmp_vram = start - self.rom_start + self.vram_addr + tmp_vram = start - self.rom_start + self.vram_start s += ( "}\n" f"SPLAT_BEGIN_SEG({tmp_sect_name}, 0x{start:X}, 0x{tmp_vram:X}, {subalign_str})\n" diff --git a/tools/splat/split.py b/tools/splat/split.py index 57468af67f..92bd7ab16a 100755 --- a/tools/splat/split.py +++ b/tools/splat/split.py @@ -4,7 +4,6 @@ import argparse import importlib import importlib.util import os -from ranges import Range, RangeDict from pathlib import Path import yaml import pickle @@ -12,6 +11,7 @@ from colorama import Style, Fore from segtypes.segment import parse_segment_type from segtypes.n64.code import N64SegCode from util import log +from util.symbol import Symbol parser = argparse.ArgumentParser( description="Split a rom given a rom, a config, and output directory") @@ -24,6 +24,7 @@ parser.add_argument("--verbose", action="store_true", parser.add_argument("--new", action="store_true", help="Only split changed segments in config") +sym_isolated_map = {} def write_ldscript(rom_name, repo_path, sections, options): with open(os.path.join(repo_path, rom_name + ".ld"), "w", newline="\n") as f: @@ -97,10 +98,7 @@ def get_cache_path(repo_path, options): def gather_symbols(symbol_addrs_path, undefined_syms_path): - symbols = {} - special_labels = {} - labels_to_add = set() - ranges = RangeDict() + symbols = [] # Manual list of func name / addrs if os.path.exists(symbol_addrs_path): @@ -120,17 +118,23 @@ def gather_symbols(symbol_addrs_path, undefined_syms_path): line_split = line.split("=") name = line_split[0].strip() addr = int(line_split[1].strip()[:-1], 0) - symbols[addr] = name + + sym = Symbol(addr, given_name=name) if line_ext: for info in line_ext.split(" "): - if info == "!": - labels_to_add.add(name) - special_labels[addr] = name + if info.startswith("type:"): + type = info.split(":")[1] + sym.type = type if info.startswith("size:"): size = int(info.split(":")[1], 0) - ranges.add(Range(addr, addr + size), name) - + sym.size = size + if info.startswith("rom:"): + rom_addr = int(info.split(":")[1], 0) + sym.rom = rom_addr + symbols.append(sym) + + # Maybe let's not use this if os.path.exists(undefined_syms_path): with open(undefined_syms_path) as f: us_lines = f.readlines() @@ -141,27 +145,9 @@ def gather_symbols(symbol_addrs_path, undefined_syms_path): line_split = line.split("=") name = line_split[0].strip() addr = int(line_split[1].strip()[:-1], 0) - symbols[addr] = name + symbols.append(Symbol(addr, given_name=name)) - return symbols, labels_to_add, special_labels, ranges - - -def gather_c_variables(undefined_syms_path): - vars = {} - - if os.path.exists(undefined_syms_path): - with open(undefined_syms_path) as f: - us_lines = f.readlines() - - for line in us_lines: - line = line.strip() - if not line == "" and not line.startswith("//"): - line_split = line.split("=") - name = line_split[0].strip() - addr = int(line_split[1].strip()[:-1], 0) - vars[addr] = name - - return vars + return symbols def get_base_segment_class(seg_type, platform): @@ -239,6 +225,46 @@ def initialize_segments(options, config_path, config_segments): return ret +def is_symbol_isolated(symbol, all_segments): + if symbol in sym_isolated_map: + return sym_isolated_map[symbol] + + relevant_segs = 0 + + for segment in all_segments: + if segment.contains_vram(symbol.vram_start): + relevant_segs += 1 + if relevant_segs > 1: + break + + sym_isolated_map[symbol] = relevant_segs < 2 + return sym_isolated_map[symbol] + +def get_segment_symbols(segment, all_symbols, all_segments): + seg_syms = {} + other_syms = {} + + for symbol in all_symbols: + if is_symbol_isolated(symbol, all_segments) and not symbol.rom: + if segment.contains_vram(symbol.vram_start): + if symbol.vram_start not in seg_syms: + seg_syms[symbol.vram_start] = [] + seg_syms[symbol.vram_start].append(symbol) + else: + if symbol.vram_start not in other_syms: + other_syms[symbol.vram_start] = [] + other_syms[symbol.vram_start].append(symbol) + else: + if symbol.rom and segment.contains_rom(symbol.rom): + if symbol.vram_start not in seg_syms: + seg_syms[symbol.vram_start] = [] + seg_syms[symbol.vram_start].append(symbol) + else: + if symbol.vram_start not in other_syms: + other_syms[symbol.vram_start] = [] + other_syms[symbol.vram_start].append(symbol) + + return seg_syms, other_syms def main(rom_path, config_path, repo_path, modes, verbose, ignore_cache=False): with open(rom_path, "rb") as f: @@ -257,16 +283,14 @@ def main(rom_path, config_path, repo_path, modes, verbose, ignore_cache=False): symbol_addrs_path = get_symbol_addrs_path(repo_path, options) undefined_syms_path = get_undefined_syms_path(repo_path, options) - provided_symbols, c_func_labels_to_add, special_labels, ranges = gather_symbols(symbol_addrs_path, undefined_syms_path) + all_symbols = gather_symbols(symbol_addrs_path, undefined_syms_path) + isolated_symbols = {} + symbol_ranges = [s for s in all_symbols if s.size > 4] platform = get_platform(options) processed_segments = [] ld_sections = [] - defined_funcs = {} - undefined_funcs = set() - undefined_syms = set() - seg_sizes = {} seg_split = {} seg_cached = {} @@ -284,11 +308,11 @@ def main(rom_path, config_path, repo_path, modes, verbose, ignore_cache=False): for segment in all_segments: if platform == "n64" and type(segment) == N64SegCode: # remove special-case sometime - segment.all_functions = defined_funcs - segment.provided_symbols = provided_symbols - segment.special_labels = special_labels - segment.c_labels_to_add = c_func_labels_to_add - segment.symbol_ranges = ranges + segment_symbols, other_symbols = get_segment_symbols(segment, all_symbols, all_segments) + segment.seg_symbols = segment_symbols + segment.ext_symbols = other_symbols + segment.all_symbols = all_symbols + segment.symbol_ranges = symbol_ranges segment.check() @@ -319,11 +343,6 @@ def main(rom_path, config_path, repo_path, modes, verbose, ignore_cache=False): if len(segment.errors) == 0: processed_segments.append(segment) - if platform == "n64" and type(segment) == N64SegCode: # edge case - undefined_funcs |= segment.glabels_to_add - defined_funcs = {**defined_funcs, **segment.glabels_added} - undefined_syms |= segment.undefined_syms_to_add - seg_split[tp] += 1 log.dot(status=segment.status()) @@ -339,45 +358,40 @@ def main(rom_path, config_path, repo_path, modes, verbose, ignore_cache=False): log.write(f"saving {config['basename']}.ld") write_ldscript(config['basename'], repo_path, ld_sections, options) + undefined_syms_to_write = [s for s in all_symbols if s.referenced and not s.defined and not s.type == "func"] + undefined_funcs_to_write = [s for s in all_symbols if s.referenced and not s.defined and s.type == "func"] + # Write undefined_funcs_auto.txt undefined_funcs_auto_path = get_undefined_funcs_auto_path(repo_path, options) if verbose: log.write(f"saving {undefined_funcs_auto_path}") - c_predefined_funcs = set(provided_symbols.keys()) - to_write = sorted(undefined_funcs - set(defined_funcs.values()) - c_predefined_funcs) + + to_write = undefined_funcs_to_write if len(to_write) > 0: with open(undefined_funcs_auto_path, "w", newline="\n") as f: - for line in to_write: - f.write(line + " = 0x" + line.split("_")[1][:8].upper() + ";\n") + for symbol in to_write: + f.write(f"{symbol.name} = 0x{symbol.vram_start:X};\n") # write undefined_syms_auto.txt undefined_syms_auto_path = get_undefined_syms_auto_path(repo_path, options) if verbose: log.write(f"saving {undefined_syms_auto_path}") - to_write = sorted(undefined_syms, key=lambda x:x[0]) + to_write = undefined_syms_to_write if len(to_write) > 0: with open(undefined_syms_auto_path, "w", newline="\n") as f: - for sym in to_write: - f.write(f"{sym[0]} = 0x{sym[1]:X};\n") + for symbol in to_write: + f.write(f"{symbol.name} = 0x{symbol.vram_start:X};\n") - # print warnings and errors during split/postsplit - had_error = False + # print warnings during split/postsplit for segment in all_segments: - if len(segment.warnings) > 0 or len(segment.errors) > 0: + if len(segment.warnings) > 0: log.write(f"{Style.DIM}0x{segment.rom_start:06X}{Style.RESET_ALL} {segment.type} {Style.BRIGHT}{segment.name}{Style.RESET_ALL}:") for warn in segment.warnings: log.write("warning: " + warn, status="warn") - for error in segment.errors: - log.write("error: " + error, status="error") - had_error = True - log.write("") # empty line - if had_error: - return 1 - # Statistics unk_size = seg_sizes.get("unk", 0) rest_size = 0 diff --git a/tools/splat/util/n64/rominfo.py b/tools/splat/util/n64/rominfo.py index b18aab7a6e..ab33fd60aa 100755 --- a/tools/splat/util/n64/rominfo.py +++ b/tools/splat/util/n64/rominfo.py @@ -9,6 +9,7 @@ parser.add_argument('rom', help='path to a .z64 rom') parser.add_argument('--encoding', help='Text encoding the game header is using; see docs.python.org/3/library/codecs.html#standard-encodings for valid encodings', default='ASCII') country_codes = { + 0x00: "Unknown", 0x37: "Beta", 0x41: "Asian (NTSC)", 0x42: "Brazillian", @@ -102,7 +103,7 @@ class N64Rom: def get_country_name(self): return country_codes[self.country_code] - +# TODO: support .n64 extension def main(): args = parser.parse_args() rom = get_info(args.rom, args.encoding) diff --git a/tools/splat/util/n64/symbol.py b/tools/splat/util/n64/symbol.py deleted file mode 100644 index 374b2ff086..0000000000 --- a/tools/splat/util/n64/symbol.py +++ /dev/null @@ -1,12 +0,0 @@ -class N64Symbol: - - @staticmethod - def get_default_name(vram): - return f"D_{vram:X}" - - def __init__(self, vram, rom=None, name=None, segment=None, length=4): - self.vram = vram - self.rom = rom - self.name = name if name else self.get_default_name(vram) - self.segment = segment - self.length = length diff --git a/tools/splat/util/symbol.py b/tools/splat/util/symbol.py new file mode 100644 index 0000000000..e319c63027 --- /dev/null +++ b/tools/splat/util/symbol.py @@ -0,0 +1,46 @@ +class Symbol: + + @property + def default_name(self): + suffix = f"_{self.vram_start:X}" + + if self.in_overlay: + suffix += f"_{self.rom:X}" + + if self.type == "func": + prefix = "func" + elif self.type =="jtbl": + prefix = "jtbl" + else: + prefix = "D" + + return prefix + suffix + + @property + def rom_end(self): + return None if not self.rom else self.rom + self.size + + @property + def vram_end(self): + return self.vram_start + self.size + + def set_in_overlay(self): + self.in_overlay = True + + @property + def name(self): + return self.given_name if self.given_name else self.default_name + + def contains_vram(self, offset): + return offset >= self.vram_start and offset < self.vram_end + + def __init__(self, vram, given_name=None, rom=None, type="unknown", in_overlay=False, size=4): + self.defined = False + self.referenced = False + self.vram_start = vram + self.rom = rom + self.type = type + self.in_overlay = in_overlay + self.size = size + self.given_name = given_name + self.access_mnemonic = None