#! /usr/bin/env python3 import hashlib from typing import Dict, List, Optional, Union, Set, Any import argparse import spimdisasm import rabbitizer import tqdm import yaml import pickle from colorama import Style, Fore from segtypes.segment import Segment from segtypes.linker_entry import LinkerWriter, to_cname from util import log from util import options from util import symbols from util import palettes from util import compiler from util.symbols import Symbol from intervaltree import Interval, IntervalTree VERSION = "0.10.0" parser = argparse.ArgumentParser( description="Split a rom given a rom, a config, and output directory" ) parser.add_argument("config", help="path to a compatible config .yaml file", nargs="+") parser.add_argument("--target", help="path to a file to split (.z64 rom)") parser.add_argument("--basedir", help="a directory in which to extract the rom") parser.add_argument("--modes", nargs="+", default="all") parser.add_argument("--verbose", action="store_true", help="Enable debug logging") parser.add_argument( "--use-cache", action="store_true", help="Only split changed segments in config" ) linker_writer: LinkerWriter config: Dict[str, Any] segment_roms: IntervalTree = IntervalTree() segment_rams: IntervalTree = IntervalTree() def fmt_size(size): if size > 1000000: return str(size // 1000000) + " MB" elif size > 1000: return str(size // 1000) + " KB" else: return str(size) + " B" def initialize_segments(config_segments: Union[dict, list]) -> List[Segment]: global segment_roms global segment_rams segment_roms = IntervalTree() segment_rams = IntervalTree() segments_by_name: Dict[str, Segment] = {} ret = [] for i, seg_yaml in enumerate(config_segments): # rompos marker if isinstance(seg_yaml, list) and len(seg_yaml) == 1: continue seg_type = Segment.parse_segment_type(seg_yaml) segment_class = Segment.get_class_for_type(seg_type) this_start = Segment.parse_segment_start(seg_yaml) next_start = Segment.parse_segment_start(config_segments[i + 1]) segment: Segment = Segment.from_yaml( segment_class, seg_yaml, this_start, next_start ) if segment.require_unique_name: if segment.name in segments_by_name: log.error(f"segment name '{segment.name}' is not unique") segments_by_name[segment.name] = segment ret.append(segment) if ( isinstance(segment.rom_start, int) and isinstance(segment.rom_end, int) and segment.rom_start != segment.rom_end ): segment_roms.addi(segment.rom_start, segment.rom_end, segment) if ( isinstance(segment.vram_start, int) and isinstance(segment.vram_end, int) and segment.vram_start != segment.vram_end ): segment_rams.addi(segment.vram_start, segment.vram_end, segment) for segment in ret: if segment.follows_vram: if segment.follows_vram not in segments_by_name: log.error( f"segment '{segment.name}' follows_vram segment'{segment.follows_vram}' does not exist" ) segment.follows_vram_segment = segments_by_name[segment.follows_vram] return ret def assign_symbols_to_segments(): seg_syms: dict[int, list[Symbol]] = {} for symbol in symbols.all_symbols: if symbol.rom: cands = segment_roms[symbol.rom] if len(cands) > 1: log.error("multiple segments rom overlap symbol", symbol) elif len(cands) == 0: log.error("no segment rom overlaps symbol", symbol) else: cand: Interval = cands.pop() seg: Segment = cand.data seg.add_symbol(symbol) else: cands: Set[Interval] = segment_rams[symbol.vram_start] segs: List[Segment] = [cand.data for cand in cands] for seg in segs: if not seg.get_exclusive_ram_id(): seg.add_symbol(symbol) def do_statistics(seg_sizes, rom_bytes, seg_split, seg_cached): unk_size = seg_sizes.get("unk", 0) rest_size = 0 total_size = len(rom_bytes) for typ in seg_sizes: if typ != "unk": rest_size += seg_sizes[typ] known_ratio = rest_size / total_size unk_ratio = unk_size / total_size log.write(f"Split {fmt_size(rest_size)} ({known_ratio:.2%}) in defined segments") for typ in seg_sizes: if typ != "unk": tmp_size = seg_sizes[typ] tmp_ratio = tmp_size / total_size log.write( f"{typ:>20}: {fmt_size(tmp_size):>8} ({tmp_ratio:.2%}) {Fore.GREEN}{seg_split[typ]} split{Style.RESET_ALL}, {Style.DIM}{seg_cached[typ]} cached" ) log.write( f"{'unknown':>20}: {fmt_size(unk_size):>8} ({unk_ratio:.2%}) from unknown bin files" ) def merge_configs(main_config, additional_config): # Merge rules are simple # For each key in the dictionary # - If list then append to list # - If a dictionary then repeat merge on sub dictionary entries # - Else assume string or number and replace entry for curkey in additional_config: if curkey not in main_config: main_config[curkey] = additional_config[curkey] elif type(main_config[curkey]) != type(additional_config[curkey]): log.error(f"Type for key {curkey} in configs does not match") else: # keys exist and match, see if a list to append if type(main_config[curkey]) == list: main_config[curkey] += additional_config[curkey] elif type(main_config[curkey]) == dict: # need to merge sub areas main_config[curkey] = merge_configs( main_config[curkey], additional_config[curkey] ) else: # not a list or dictionary, must be a number or string, overwrite main_config[curkey] = additional_config[curkey] return main_config def configure_disassembler(): # Configure spimdisasm spimdisasm.common.GlobalConfig.PRODUCE_SYMBOLS_PLUS_OFFSET = True spimdisasm.common.GlobalConfig.TRUST_USER_FUNCTIONS = True spimdisasm.common.GlobalConfig.TRUST_JAL_FUNCTIONS = True spimdisasm.common.GlobalConfig.GLABEL_ASM_COUNT = False if options.rom_address_padding(): spimdisasm.common.GlobalConfig.ASM_COMMENT_OFFSET_WIDTH = 6 else: spimdisasm.common.GlobalConfig.ASM_COMMENT_OFFSET_WIDTH = 0 # spimdisasm is not performing any analyzis on non-text sections so enabling this options is pointless spimdisasm.common.GlobalConfig.AUTOGENERATED_NAMES_BASED_ON_SECTION_TYPE = False spimdisasm.common.GlobalConfig.AUTOGENERATED_NAMES_BASED_ON_DATA_TYPE = False spimdisasm.common.GlobalConfig.SYMBOL_FINDER_FILTERED_ADDRESSES_AS_HILO = False rabbitizer.config.regNames_userFpcCsr = False rabbitizer.config.regNames_vr4300Cop0NamedRegisters = False rabbitizer.config.misc_opcodeLJust = options.mnemonic_ljust() - 1 rabbitizer.config.regNames_gprAbiNames = rabbitizer.Abi.fromStr( options.get_mips_abi_gpr() ) rabbitizer.config.regNames_fprAbiNames = rabbitizer.Abi.fromStr( options.get_mips_abi_float_regs() ) if options.get_endianess() == "big": spimdisasm.common.GlobalConfig.ENDIAN = spimdisasm.common.InputEndian.BIG else: spimdisasm.common.GlobalConfig.ENDIAN = spimdisasm.common.InputEndian.LITTLE rabbitizer.config.pseudos_pseudoMove = False selectedCompiler = options.get_compiler() if selectedCompiler == compiler.SN64: rabbitizer.config.regNames_namedRegisters = False rabbitizer.config.toolchainTweaks_sn64DivFix = True rabbitizer.config.toolchainTweaks_treatJAsUnconditionalBranch = True spimdisasm.common.GlobalConfig.ASM_COMMENT = False spimdisasm.common.GlobalConfig.SYMBOL_FINDER_FILTERED_ADDRESSES_AS_HILO = False spimdisasm.common.GlobalConfig.COMPILER = spimdisasm.common.Compiler.SN64 elif selectedCompiler == compiler.GCC: rabbitizer.config.toolchainTweaks_treatJAsUnconditionalBranch = True spimdisasm.common.GlobalConfig.COMPILER = spimdisasm.common.Compiler.GCC elif selectedCompiler == compiler.IDO: spimdisasm.common.GlobalConfig.COMPILER = spimdisasm.common.Compiler.IDO spimdisasm.common.GlobalConfig.GP_VALUE = options.get_gp() spimdisasm.common.GlobalConfig.ASM_TEXT_LABEL = options.get_asm_function_macro() spimdisasm.common.GlobalConfig.ASM_DATA_LABEL = options.get_asm_data_macro() spimdisasm.common.GlobalConfig.ASM_TEXT_END_LABEL = options.get_asm_end_label() if spimdisasm.common.GlobalConfig.ASM_TEXT_LABEL == ".globl": spimdisasm.common.GlobalConfig.ASM_TEXT_ENT_LABEL = ".ent" spimdisasm.common.GlobalConfig.ASM_TEXT_FUNC_AS_LABEL = True spimdisasm.common.GlobalConfig.LINE_ENDS = options.c_newline() if options.get_platform() == "n64": symbols.spim_context.fillDefaultBannedSymbols() def brief_seg_name(seg: Segment, limit: int, ellipsis="…") -> str: s = seg.name.strip() if len(s) > limit: return s[:limit].strip() + ellipsis return s def main(config_path, base_dir, target_path, modes, verbose, use_cache=True): global config log.write(f"splat {VERSION} (powered by spimdisasm {spimdisasm.__version__})") # Load config config = {} for entry in config_path: with open(entry) as f: additional_config = yaml.load(f.read(), Loader=yaml.SafeLoader) config = merge_configs(config, additional_config) options.initialize(config, config_path, base_dir, target_path) options.set("modes", modes) if verbose: options.set("verbose", True) with options.get_target_path().open("rb") as f2: rom_bytes = f2.read() if "sha1" in config: sha1 = hashlib.sha1(rom_bytes).hexdigest() e_sha1 = config["sha1"].lower() if e_sha1 != sha1: log.error(f"sha1 mismatch: expected {e_sha1}, was {sha1}") # Create main output dir options.get_base_path().mkdir(parents=True, exist_ok=True) processed_segments: List[Segment] = [] seg_sizes: Dict[str, int] = {} seg_split: Dict[str, int] = {} seg_cached: Dict[str, int] = {} # Load cache if use_cache: try: with options.get_cache_path().open("rb") as f3: cache = pickle.load(f3) if verbose: log.write(f"Loaded cache ({len(cache.keys())} items)") except Exception: cache = {} else: cache = {} # invalidate entire cache if options change if use_cache and cache.get("__options__") != config.get("options"): if verbose: log.write("Options changed, invalidating cache") cache = { "__options__": config.get("options"), } configure_disassembler() # Initialize segments all_segments = initialize_segments(config["segments"]) # Load and process symbols symbols.initialize(all_segments) # Assign symbols to segments assign_symbols_to_segments() if options.mode_active("code"): symbols.initialize_spim_context(all_segments) # Resolve raster/palette siblings if options.mode_active("img"): palettes.initialize(all_segments) # Scan scan_bar = tqdm.tqdm(all_segments, total=len(all_segments)) for segment in scan_bar: assert isinstance(segment, Segment) scan_bar.set_description(f"Scanning {brief_seg_name(segment, 20)}") typ = segment.type if segment.type == "bin" and segment.is_name_default(): typ = "unk" if typ not in seg_sizes: seg_sizes[typ] = 0 seg_split[typ] = 0 seg_cached[typ] = 0 seg_sizes[typ] += 0 if segment.size is None else segment.size if segment.should_scan(): # Check cache but don't write anything if use_cache: if segment.cache() == cache.get(segment.unique_id()): continue segment.did_run = True segment.scan(rom_bytes) processed_segments.append(segment) seg_split[typ] += 1 # Split for segment in tqdm.tqdm( all_segments, total=len(all_segments), desc=f"Splitting {brief_seg_name(segment, 20)}", ): if use_cache: cached = segment.cache() if cached == cache.get(segment.unique_id()): # Cache hit seg_cached[typ] += 1 continue else: # Cache miss; split cache[segment.unique_id()] = cached if segment.should_split(): segment.split(rom_bytes) if options.mode_active("ld"): global linker_writer linker_writer = LinkerWriter() for i, segment in enumerate( tqdm.tqdm( all_segments, total=len(all_segments), desc=f"Writing linker script {brief_seg_name(segment, 20)}", ) ): next_segment: Optional[Segment] = None if i < len(all_segments) - 1: next_segment = all_segments[i + 1] linker_writer.add(segment, next_segment) linker_writer.save_linker_script() linker_writer.save_symbol_header() # write elf_sections.txt - this only lists the generated sections in the elf, not subsections # that the elf combines into one section if options.get_create_elf_section_list_auto(): section_list = "" for segment in all_segments: section_list += "." + to_cname(segment.name) + "\n" with open(options.get_elf_section_list_path(), "w", newline="\n") as f: f.write(section_list) # Write undefined_funcs_auto.txt if options.get_create_undefined_funcs_auto(): to_write = [ s for s in symbols.all_symbols if s.referenced and not s.defined and not s.dead and s.type == "func" ] to_write.sort(key=lambda x: x.vram_start) with open(options.get_undefined_funcs_auto_path(), "w", newline="\n") as f: for symbol in to_write: f.write(f"{symbol.name} = 0x{symbol.vram_start:X};\n") # write undefined_syms_auto.txt if options.get_create_undefined_syms_auto(): to_write = [ s for s in symbols.all_symbols if s.referenced and not s.defined and not s.dead and s.type not in {"func", "label", "jtbl_label"} ] to_write.sort(key=lambda x: x.vram_start) with open(options.get_undefined_syms_auto_path(), "w", newline="\n") as f: for symbol in to_write: f.write(f"{symbol.name} = 0x{symbol.vram_start:X};\n") # print warnings during split for segment in all_segments: if len(segment.warnings) > 0: log.write( f"{Style.DIM}0x{segment.rom_start:06X}{Style.RESET_ALL} {segment.type} {Style.BRIGHT}{segment.name}{Style.RESET_ALL}:" ) for warn in segment.warnings: log.write("warning: " + warn, status="warn") log.write("") # empty line # Statistics do_statistics(seg_sizes, rom_bytes, seg_split, seg_cached) # Save cache if cache != {} and use_cache: if verbose: log.write("Writing cache") with open(options.get_cache_path(), "wb") as f4: pickle.dump(cache, f4) if __name__ == "__main__": args = parser.parse_args() main( args.config, args.basedir, args.target, args.modes, args.verbose, args.use_cache )