papermario/tools/splat/split.py
Ethan Roseman 39ea4c7a86 git subrepo pull tools/splat
subrepo:
  subdir:   "tools/splat"
  merged:   "3a66565988"
upstream:
  origin:   "https://github.com/ethteck/splat.git"
  branch:   "master"
  commit:   "3a66565988"
git-subrepo:
  version:  "0.4.3"
  origin:   "https://github.com/ingydotnet/git-subrepo"
  commit:   "2f68596"
2021-02-08 19:57:56 +09:00

425 lines
15 KiB
Python
Executable File

#! /usr/bin/python3
import argparse
import importlib
import importlib.util
import os
from pathlib import Path
import yaml
import pickle
from colorama import Style, Fore
from segtypes.segment import parse_segment_type
from segtypes.n64.code import N64SegCode
from util import log
from util.symbol import Symbol
import sys
parser = argparse.ArgumentParser(
description="Split a rom given a rom, a config, and output directory")
parser.add_argument("config", help="path to a compatible config .yaml file")
parser.add_argument("--rom", help="path to a .z64 rom")
parser.add_argument("--outdir", help="a directory in which to extract the rom")
parser.add_argument("--modes", nargs="+", default="all")
parser.add_argument("--verbose", action="store_true",
help="Enable debug logging")
parser.add_argument("--new", action="store_true",
help="Only split changed segments in config")
sym_isolated_map = {}
def write_ldscript(rom_name, repo_path, sections, options):
with open(os.path.join(repo_path, rom_name + ".ld"), "w", newline="\n") as f:
f.write(
"#ifndef SPLAT_BEGIN_SEG\n"
"#ifndef SHIFT\n"
"#define SPLAT_BEGIN_SEG(name, start, vram, subalign) \\\n"
" . = start;\\\n"
" name##_ROM_START = .;\\\n"
" name##_VRAM = ADDR(.name);\\\n"
" .name vram : AT(name##_ROM_START) subalign {\n"
"#else\n"
"#define SPLAT_BEGIN_SEG(name, start, vram, subalign) \\\n"
" name##_ROM_START = .;\\\n"
" name##_VRAM = ADDR(.name);\\\n"
" .name vram : AT(name##_ROM_START) subalign {\n"
"#endif\n"
"#endif\n"
"\n"
"#ifndef SPLAT_END_SEG\n"
"#ifndef SHIFT\n"
"#define SPLAT_END_SEG(name, end) \\\n"
" } \\\n"
" . = end;\\\n"
" name##_ROM_END = .;\n"
"#else\n"
"#define SPLAT_END_SEG(name, end) \\\n"
" } \\\n"
" name##_ROM_END = .;\n"
"#endif\n"
"#endif\n"
"\n"
)
if options.get("ld_bare", False):
f.write("\n".join(sections))
else:
f.write(
"SECTIONS\n"
"{\n"
" "
)
f.write("\n ".join(s.replace("\n", "\n ") for s in sections)[:-4])
f.write(
"}\n"
)
def parse_file_start(split_file):
return split_file[0] if "start" not in split_file else split_file["start"]
def get_symbol_addrs_path(repo_path, options):
return os.path.join(repo_path, options.get("symbol_addrs_path", "symbol_addrs.txt"))
def get_undefined_syms_path(repo_path, options):
return os.path.join(repo_path, options.get("undefined_syms_path", "undefined_syms.txt"))
def get_undefined_syms_auto_path(repo_path, options):
return os.path.join(repo_path, options.get("undefined_syms_auto_path", "undefined_syms_auto.txt"))
def get_undefined_funcs_auto_path(repo_path, options):
return os.path.join(repo_path, options.get("undefined_funcs_auto_path", "undefined_funcs_auto.txt"))
def get_cache_path(repo_path, options):
return os.path.join(repo_path, options.get("cache_path", ".splat_cache"))
def gather_symbols(symbol_addrs_path, undefined_syms_path):
symbols = []
# Manual list of func name / addrs
if os.path.exists(symbol_addrs_path):
with open(symbol_addrs_path) as f:
func_addrs_lines = f.readlines()
for line in func_addrs_lines:
line = line.strip()
if not line == "" and not line.startswith("//"):
comment_loc = line.find("//")
line_ext = ""
if comment_loc != -1:
line_ext = line[comment_loc + 2:].strip()
line = line[:comment_loc].strip()
line_split = line.split("=")
name = line_split[0].strip()
addr = int(line_split[1].strip()[:-1], 0)
sym = Symbol(addr, given_name=name)
if line_ext:
for info in line_ext.split(" "):
if info.startswith("type:"):
type = info.split(":")[1]
sym.type = type
if info.startswith("size:"):
size = int(info.split(":")[1], 0)
sym.size = size
if info.startswith("rom:"):
rom_addr = int(info.split(":")[1], 0)
sym.rom = rom_addr
symbols.append(sym)
return symbols
def get_base_segment_class(seg_type, platform):
try:
segmodule = importlib.import_module(f"segtypes.{platform}.{seg_type}")
except ModuleNotFoundError:
return None
return getattr(segmodule, f"{platform.upper()}Seg{seg_type[0].upper()}{seg_type[1:]}")
def get_extension_dir(options, config_path):
if "extensions" not in options:
return None
return os.path.join(Path(config_path).parent, options["extensions"])
def get_extension_class(options, config_path, seg_type, platform):
ext_dir = get_extension_dir(options, config_path)
if ext_dir == None:
return None
try:
ext_spec = importlib.util.spec_from_file_location(f"{platform}.segtypes.{seg_type}", os.path.join(ext_dir, f"{seg_type}.py"))
ext_mod = importlib.util.module_from_spec(ext_spec)
ext_spec.loader.exec_module(ext_mod)
except Exception as err:
log.write(err, status="error")
return None
return getattr(ext_mod, f"{platform.upper()}Seg{seg_type[0].upper()}{seg_type[1:]}")
def get_platform(options):
return options.get("platform", "n64")
def fmt_size(size):
if size > 1000000:
return str(size // 1000000) + " MB"
elif size > 1000:
return str(size // 1000) + " KB"
else:
return str(size) + " B"
def initialize_segments(options, config_path, config_segments):
seen_segment_names = set()
ret = []
for i, segment in enumerate(config_segments[:-1]):
seg_type = parse_segment_type(segment)
platform = get_platform(options)
segment_class = get_base_segment_class(seg_type, platform)
if segment_class == None:
# Look in extensions
segment_class = get_extension_class(options, config_path, seg_type, platform)
if segment_class == None:
log.write(f"fatal error: could not load segment type '{seg_type}'\n(hint: confirm your extension directory is configured correctly)", status="error")
return 2
segment = segment_class(segment, config_segments[i + 1], options)
if segment_class.require_unique_name:
if segment.name in seen_segment_names:
segment.error("segment name is not unique")
seen_segment_names.add(segment.name)
ret.append(segment)
return ret
def is_symbol_isolated(symbol, all_segments):
if symbol in sym_isolated_map:
return sym_isolated_map[symbol]
relevant_segs = 0
for segment in all_segments:
if segment.contains_vram(symbol.vram_start):
relevant_segs += 1
if relevant_segs > 1:
break
sym_isolated_map[symbol] = relevant_segs < 2
return sym_isolated_map[symbol]
def get_segment_symbols(segment, all_symbols, all_segments):
seg_syms = {}
other_syms = {}
for symbol in all_symbols:
if is_symbol_isolated(symbol, all_segments) and not symbol.rom:
if segment.contains_vram(symbol.vram_start):
if symbol.vram_start not in seg_syms:
seg_syms[symbol.vram_start] = []
seg_syms[symbol.vram_start].append(symbol)
else:
if symbol.vram_start not in other_syms:
other_syms[symbol.vram_start] = []
other_syms[symbol.vram_start].append(symbol)
else:
if symbol.rom and segment.contains_rom(symbol.rom):
if symbol.vram_start not in seg_syms:
seg_syms[symbol.vram_start] = []
seg_syms[symbol.vram_start].append(symbol)
else:
if symbol.vram_start not in other_syms:
other_syms[symbol.vram_start] = []
other_syms[symbol.vram_start].append(symbol)
return seg_syms, other_syms
def main(config_path, out_dir, target_path, modes, verbose, ignore_cache=False):
# Load config
with open(config_path) as f:
config = yaml.safe_load(f.read())
options = config.get("options")
options["modes"] = modes
options["verbose"] = verbose
if not out_dir:
out_dir = options.get("out_dir", None)
if not out_dir:
print("Error: Output dir not specified as a command line arg or via the config yaml (out_dir)")
sys.exit(2)
else:
out_dir = os.path.join(Path(config_path).parent, out_dir)
if not target_path:
target_path = options.get("target_path", None)
if not target_path:
print("Error: Target binary path not specified as a command line arg or via the config yaml (target_path)")
sys.exit(2)
else:
target_path = os.path.join(out_dir, target_path)
with open(target_path, "rb") as f:
rom_bytes = f.read()
# Create main output dir
Path(out_dir).mkdir(parents=True, exist_ok=True)
symbol_addrs_path = get_symbol_addrs_path(out_dir, options)
undefined_syms_path = get_undefined_syms_path(out_dir, options)
all_symbols = gather_symbols(symbol_addrs_path, undefined_syms_path)
symbol_ranges = [s for s in all_symbols if s.size > 4]
platform = get_platform(options)
processed_segments = []
ld_sections = []
seg_sizes = {}
seg_split = {}
seg_cached = {}
# Load cache
cache_path = get_cache_path(out_dir, options)
try:
with open(cache_path, "rb") as f:
cache = pickle.load(f)
except Exception:
cache = {}
# Initialize segments
all_segments = initialize_segments(options, config_path, config["segments"])
for segment in all_segments:
if platform == "n64" and type(segment) == N64SegCode: # remove special-case sometime
segment_symbols, other_symbols = get_segment_symbols(segment, all_symbols, all_segments)
segment.seg_symbols = segment_symbols
segment.ext_symbols = other_symbols
segment.all_symbols = all_symbols
segment.symbol_ranges = symbol_ranges
segment.check()
typ = segment.type
if segment.type == "bin" and segment.is_name_default():
typ = "unk"
if typ not in seg_sizes:
seg_sizes[typ] = 0
seg_split[typ] = 0
seg_cached[typ] = 0
seg_sizes[typ] += segment.size
if len(segment.errors) == 0:
if segment.should_run():
# Check cache
cached = segment.cache()
if not ignore_cache and cached == cache.get(segment.unique_id()):
# Cache hit
seg_cached[typ] += 1
else:
# Cache miss; split
cache[segment.unique_id()] = cached
segment.did_run = True
segment.split(rom_bytes, out_dir)
if len(segment.errors) == 0:
processed_segments.append(segment)
seg_split[typ] += 1
log.dot(status=segment.status())
ld_sections.append(segment.get_ld_section())
for segment in processed_segments:
segment.postsplit(processed_segments)
log.dot(status=segment.status())
# Write ldscript
if "ld" in options["modes"] or "all" in options["modes"]:
if verbose:
log.write(f"saving {config['basename']}.ld")
write_ldscript(config['basename'], out_dir, ld_sections, options)
undefined_syms_to_write = [s for s in all_symbols if s.referenced and not s.defined and not s.type == "func"]
undefined_funcs_to_write = [s for s in all_symbols if s.referenced and not s.defined and s.type == "func"]
# Write undefined_funcs_auto.txt
undefined_funcs_auto_path = get_undefined_funcs_auto_path(out_dir, options)
to_write = undefined_funcs_to_write
if len(to_write) > 0:
with open(undefined_funcs_auto_path, "w", newline="\n") as f:
for symbol in to_write:
f.write(f"{symbol.name} = 0x{symbol.vram_start:X};\n")
# write undefined_syms_auto.txt
undefined_syms_auto_path = get_undefined_syms_auto_path(out_dir, options)
to_write = undefined_syms_to_write
if len(to_write) > 0:
with open(undefined_syms_auto_path, "w", newline="\n") as f:
for symbol in to_write:
f.write(f"{symbol.name} = 0x{symbol.vram_start:X};\n")
# print warnings during split/postsplit
for segment in all_segments:
if len(segment.warnings) > 0:
log.write(f"{Style.DIM}0x{segment.rom_start:06X}{Style.RESET_ALL} {segment.type} {Style.BRIGHT}{segment.name}{Style.RESET_ALL}:")
for warn in segment.warnings:
log.write("warning: " + warn, status="warn")
log.write("") # empty line
# Statistics
unk_size = seg_sizes.get("unk", 0)
rest_size = 0
total_size = len(rom_bytes)
for typ in seg_sizes:
if typ != "unk":
rest_size += seg_sizes[typ]
assert(unk_size + rest_size == total_size)
known_ratio = rest_size / total_size
unk_ratio = unk_size / total_size
log.write(f"Split {fmt_size(rest_size)} ({known_ratio:.2%}) in defined segments")
for typ in seg_sizes:
if typ != "unk":
tmp_size = seg_sizes[typ]
tmp_ratio = tmp_size / total_size
log.write(f"{typ:>20}: {fmt_size(tmp_size):>8} ({tmp_ratio:.2%}) {Fore.GREEN}{seg_split[typ]} split{Style.RESET_ALL}, {Style.DIM}{seg_cached[typ]} cached")
log.write(f"{'unknown':>20}: {fmt_size(unk_size):>8} ({unk_ratio:.2%}) from unknown bin files")
# Save cache
if cache != {}:
if verbose:
print("Writing cache")
with open(cache_path, "wb") as f:
pickle.dump(cache, f)
return 0 # no error
if __name__ == "__main__":
args = parser.parse_args()
error_code = main(args.config, args.outdir, args.rom, args.modes, args.verbose, not args.new)
exit(error_code)