papermario/tools/splat/util/n64/rominfo.py
Ethan Roseman f91fe539a4
Snug bugs unrelated to and never been inside of a rug (#1082)
* Fix enum name, offset

* Fix bugs

Some assets were slipping by the asset stack
Tex archve building wasn't respecting the asset stack (Fixes #1074)

* Fixes #1081

* fix paths kinda

* git subrepo pull --force tools/splat

subrepo:
  subdir:   "tools/splat"
  merged:   "818924683b"
upstream:
  origin:   "https://github.com/ethteck/splat.git"
  branch:   "master"
  commit:   "818924683b"
git-subrepo:
  version:  "0.4.5"
  origin:   "https://github.com/ingydotnet/git-subrepo"
  commit:   "aa416e4"

* Fix stuff after splupdate
2023-07-13 17:56:16 +09:00

323 lines
8.8 KiB
Python
Executable File

#! /usr/bin/env python3
import argparse
import hashlib
import itertools
import struct
import sys
import zlib
from dataclasses import dataclass
from pathlib import Path
from typing import Optional
import rabbitizer
import spimdisasm
parser = argparse.ArgumentParser(description="Gives information on N64 roms")
parser.add_argument("rom", help="path to an N64 rom")
parser.add_argument(
"--header-encoding",
dest="header_encoding",
help=(
"Text encoding the game header is using;"
" see docs.python.org/3/library/codecs.html#standard-encodings for valid encodings"
),
)
country_codes = {
0x00: "Unknown",
0x37: "Beta",
0x41: "Asian (NTSC)",
0x42: "Brazilian",
0x43: "Chinese",
0x44: "German",
0x45: "North America",
0x46: "French",
0x47: "Gateway 64 (NTSC)",
0x48: "Dutch",
0x49: "Italian",
0x4A: "Japanese",
0x4B: "Korean",
0x4C: "Gateway 64 (PAL)",
0x4E: "Canadian",
0x50: "European (basic spec.)",
0x53: "Spanish",
0x55: "Australian",
0x57: "Scandinavian",
0x58: "European",
0x59: "European",
}
@dataclass
class CIC:
ntsc_name: str
pal_name: str
offset: int
crc_to_cic = {
0x6170A4A1: CIC("6101", "7102", 0x000000),
0x90BB6CB5: CIC("6102", "7101", 0x000000),
0x0B050EE0: CIC("6103", "7103", 0x100000),
0x98BC2C86: CIC("6105", "7105", 0x000000),
0xACC8580A: CIC("6106", "7106", 0x200000),
}
unknown_cic = CIC("unknown", "unknown", 0x0000000)
@dataclass
class N64EntrypointInfo:
entry_size: int
bss_start_address: Optional[int]
bss_size: Optional[int]
main_address: Optional[int]
stack_top: int
@staticmethod
def parse_rom_bytes(
rom_bytes, offset: int = 0x1000, size: int = 0x60
) -> "N64EntrypointInfo":
word_list = spimdisasm.common.Utils.bytesToWords(
rom_bytes, offset, offset + size
)
nops_count = 0
register_values = [0 for _ in range(32)]
register_bss_address: Optional[int] = None
register_bss_size: Optional[int] = None
register_main_address: Optional[int] = None
size = 0
for word in word_list:
insn = rabbitizer.Instruction(word)
if not insn.isImplemented():
break
if insn.isNop():
nops_count += 1
elif nops_count >= 3:
break
elif insn.canBeHi():
register_values[insn.rt.value] = insn.getProcessedImmediate() << 16
elif insn.canBeLo():
if insn.isLikelyHandwritten():
# Try to skip these instructions:
# addi $t0, $t0, 0x8
# addi $t1, $t1, -0x8
pass
elif insn.modifiesRt():
register_values[insn.rt.value] = (
register_values[insn.rs.value] + insn.getProcessedImmediate()
)
elif insn.doesStore():
if insn.rt == rabbitizer.RegGprO32.zero:
# Try to detect the zero-ing bss algorithm
# sw $zero, 0x0($t0)
register_bss_address = insn.rs.value
elif insn.isBranch():
# lui $t1, 0x2
# addiu $t1, $t1, -0x7220
# ...
# addi $t1, $t1, -0x8
# ...
# bnez $t1, label
register_bss_size = insn.rs.value
elif insn.isJumptableJump() or insn.isReturn():
# lui $t2, 0x8000
# addiu $t2, $t2, 0x494
# ...
# jr $t2
register_main_address = insn.rs.value
# print(f"{word:08X}", insn)
size += 4
# for i, val in enumerate(register_values):
# print(i, f"{val:08X}")
bss_address = (
register_values[register_bss_address]
if register_bss_address is not None
else None
)
bss_size = (
register_values[register_bss_size]
if register_bss_size is not None
else None
)
main_address = (
register_values[register_main_address]
if register_main_address is not None
else None
)
stack_top = register_values[rabbitizer.RegGprO32.sp.value]
return N64EntrypointInfo(size, bss_address, bss_size, main_address, stack_top)
@dataclass
class N64Rom:
name: str
header_encoding: str
country_code: int
libultra_version: str
checksum: str
cic: CIC
entry_point: int
size: int
compiler: str
sha1: str
entrypoint_info: N64EntrypointInfo
def get_country_name(self) -> str:
return country_codes[self.country_code]
def swap_bytes(data):
return bytes(
itertools.chain.from_iterable(
struct.pack(">H", x) for (x,) in struct.iter_unpack("<H", data)
)
)
def read_rom(rom_path: Path):
rom_bytes = rom_path.read_bytes()
if rom_path.suffix.lower() == ".n64":
print("Warning: Input file has .n64 suffix, byte-swapping!")
rom_bytes = swap_bytes(rom_bytes)
as_z64 = rom_path.with_suffix(".z64")
if not as_z64.exists():
print(f"Writing down {as_z64}")
as_z64.write_bytes(rom_bytes)
return rom_bytes
def get_cic(rom_bytes: bytes):
ipl3_crc = zlib.crc32(rom_bytes[0x40:0x1000])
return crc_to_cic.get(ipl3_crc, unknown_cic)
def get_entry_point(program_counter: int, cic: CIC):
return program_counter - cic.offset
def guess_header_encoding(rom_bytes: bytes):
header = rom_bytes[0x20:0x34]
encodings = ["ASCII", "shift_jis", "euc-jp"]
for encoding in encodings:
try:
header.decode(encoding)
return encoding
except UnicodeDecodeError:
# we guessed wrong...
pass
sys.exit("Unknown header encoding, please raise an Issue with us")
def get_info(
rom_path: Path, rom_bytes: Optional[bytes] = None, header_encoding=None
) -> N64Rom:
if rom_bytes is None:
rom_bytes = read_rom(rom_path)
if header_encoding is None:
header_encoding = guess_header_encoding(rom_bytes)
return get_info_bytes(rom_bytes, header_encoding)
def get_info_bytes(rom_bytes: bytes, header_encoding: str) -> N64Rom:
(program_counter,) = struct.unpack(">I", rom_bytes[0x8:0xC])
libultra_version = chr(rom_bytes[0xF])
checksum = rom_bytes[0x10:0x18].hex().upper()
try:
name = rom_bytes[0x20:0x34].decode(header_encoding).rstrip(" \0") or "empty"
except:
sys.exit(
"splat could not decode the game name;"
" try using a different encoding by passing the --header-encoding argument"
" (see docs.python.org/3/library/codecs.html#standard-encodings for valid encodings)"
)
country_code = rom_bytes[0x3E]
cic = get_cic(rom_bytes)
entry_point = get_entry_point(program_counter, cic)
compiler = get_compiler_info(rom_bytes, entry_point, print_result=False)
sha1 = hashlib.sha1(rom_bytes).hexdigest()
entrypoint_info = N64EntrypointInfo.parse_rom_bytes(rom_bytes)
return N64Rom(
name,
header_encoding,
country_code,
libultra_version,
checksum,
cic,
entry_point,
len(rom_bytes),
compiler,
sha1,
entrypoint_info,
)
def get_compiler_info(rom_bytes, entry_point, print_result=True):
jumps = 0
branches = 0
word_list = spimdisasm.common.Utils.bytesToWords(rom_bytes[0x1000:])
for word in word_list:
insn = rabbitizer.Instruction(word)
if not insn.isImplemented():
break
if insn.uniqueId == rabbitizer.InstrId.cpu_j:
jumps += 1
elif insn.uniqueId == rabbitizer.InstrId.cpu_b:
branches += 1
compiler = "IDO" if branches > jumps else "GCC"
if print_result:
print(
f"{branches} branches and {jumps} jumps detected in the first code segment."
f" Compiler is most likely {compiler}"
)
return compiler
def main():
rabbitizer.config.pseudos_pseudoB = True
args = parser.parse_args()
rom_bytes = read_rom(Path(args.rom))
rom = get_info(Path(args.rom), rom_bytes, args.header_encoding)
print("Image name: " + rom.name)
print("Country code: " + chr(rom.country_code) + " - " + rom.get_country_name())
print("Libultra version: " + rom.libultra_version)
print("Checksum: " + rom.checksum)
print("CIC: " + rom.cic.ntsc_name + " / " + rom.cic.pal_name)
print("RAM entry point: " + hex(rom.entry_point))
print("Header encoding: " + rom.header_encoding)
print("")
get_compiler_info(rom_bytes, rom.entry_point)
if __name__ == "__main__":
main()