From efee13f6866212d3eb1784fe986c8bf4575344fa Mon Sep 17 00:00:00 2001 From: Alex Bates <16batesa@gmail.com> Date: Sun, 27 Dec 2020 17:11:33 +0000 Subject: [PATCH] add data disassembly tool --- tools/compile_dsl_macros.py | 12 +- tools/disasm_map.py | 232 ------------------------------------ tools/disasm_script.py | 16 ++- tools/star_rod_idx_to_c.py | 198 ++++++++++++++++++++++++++++++ 4 files changed, 220 insertions(+), 238 deletions(-) delete mode 100755 tools/disasm_map.py create mode 100755 tools/star_rod_idx_to_c.py diff --git a/tools/compile_dsl_macros.py b/tools/compile_dsl_macros.py index 14b7d01aac..f3c757aa61 100755 --- a/tools/compile_dsl_macros.py +++ b/tools/compile_dsl_macros.py @@ -78,6 +78,7 @@ script_parser = Lark(r""" | ">=" -> cond_op_ge | "<=" -> cond_op_le | "?" -> cond_op_flag + | "!?" -> cond_op_not_flag match_stmt: "match" expr "{" (match_cases SEMICOLON*)? "}" match_const_stmt: "matchc" expr "{" (match_cases SEMICOLON*)? "}" @@ -353,7 +354,8 @@ class Compile(Transformer): def cond_op_gt(self, tree): return { "if": "ScriptOpcode_IF_GT", "case": "ScriptOpcode_CASE_GT" } def cond_op_le(self, tree): return { "if": "ScriptOpcode_IF_LE", "case": "ScriptOpcode_CASE_LE" } def cond_op_ge(self, tree): return { "if": "ScriptOpcode_IF_GE", "case": "ScriptOpcode_CASE_GE" } - def cond_op_flag(self, tree): return { "if": "ScriptOpcode_IF_FLAG", "case": "ScriptOpcode_CASE_FLAG" } + def cond_op_flag(self, tree): return { "__op__": "!?", "if": "ScriptOpcode_IF_FLAG", "case": "ScriptOpcode_CASE_FLAG" } + def cond_op_not_flag(self, tree): return { "if": "ScriptOpcode_IF_NOT_FLAG" } def match_stmt(self, tree): expr = tree.children[0] @@ -393,9 +395,17 @@ class Compile(Transformer): def case_op(self, tree): if len(tree.children) == 4: op, expr, multi_case, block = tree.children + + if not "case" in op: + raise CompileError(f"operation `{opcodes['__op__']}' not supported in match cases", tree.meta) + return [Cmd(op["case"], expr), *multi_case, *block, Cmd("ScriptOpcode_END_CASE_MULTI")] else: op, expr, block = tree.children + + if not "case" in op: + raise CompileError(f"operation `{opcodes['__op__']}' not supported in match cases", tree.meta) + return [Cmd(op["case"], expr), *block] def case_range(self, tree): if len(tree.children) == 4: diff --git a/tools/disasm_map.py b/tools/disasm_map.py deleted file mode 100755 index d0ffe9b910..0000000000 --- a/tools/disasm_map.py +++ /dev/null @@ -1,232 +0,0 @@ -#! /usr/bin/python3 - -import sys -import os -import yaml -import json -from struct import unpack - -import disasm_script - -def disassemble(bytes, offset, midx, symbol_map = {}, map_name = "map"): - out = "" - found_data = False - - while len(midx) > 0: - struct = midx.pop(0) - name = struct["name"] - - if name == "Script_Main": name = f"M(Main)" - - #print(f"{offset:X} ({name}, start = {struct['start']:X}, len = {struct['length']:X})") - - if struct["start"] == offset: - found_data = True - - if struct["start"] != offset: - # end of data / padding - break - - # format struct - if struct["type"].startswith("Script"): - pos = bytes.tell() - try: - out += disasm_script.ScriptDSLDisassembler(bytes, f"M({name})", symbol_map).disassemble() - except disasm_script.UnsupportedScript as e: - print(f"Unable to use DSL for {struct['name']}: {e}") - - bytes.seek(pos) - out += disasm_script.ScriptDisassembler(bytes, f"M({name})", symbol_map).disassemble() - elif struct["type"] == "Padding": - # nops at end of file - bytes.seek(offset % 4, 1) - return out - elif struct["type"] == "EntryList": - out += f"EntryList M(entryList) = {{" - for i in range(0, struct["length"], 4 * 4): - x,y,z,yaw = unpack(">ffff", bytes.read(4 * 4)) - out += f"\n {{ {x}f, {y}f, {z}f, {yaw}f }}," - out += f"\n}};\n" - elif struct["type"] == "Header": - out += f"MapConfig M(config) = {{\n" - - bytes.read(0x10) - - main,entry_list,entry_count = unpack(">IIi", bytes.read(4 * 3)) - out += f" .main = M(Main),\n" - out += f" .entryList = M(entryList),\n" - out += f" .entryCount = ENTRY_COUNT(M(entryList)),\n" - - bytes.read(0x1C) - - bg,tattle = unpack(">II", bytes.read(4 * 2)) - if bg == 0x80200000: - out += f" .background = &gBackgroundImage,\n" - elif bg != 0: - raise Exception(f"unknown MapConfig background {bg:X}") - out += f" .tattle = 0x{tattle:X},\n" - - out += f"}};\n" - elif struct["type"] == "ASCII": - string_data = bytes.read(struct["length"]).decode("ascii") - - # strip null terminator(s) - while string_data[-1] == "\0": - string_data = string_data[:-1] - - string_literal = json.dumps(string_data) - out += f"const char M({struct['name']})[] = {string_literal};" - else: # unknown type of struct - out += f"s32 M({name})[] = {{" - for i in range(0, struct["length"], 4): - if (i % 0x20) == 0: - out += f"\n " - - word = int.from_bytes(bytes.read(4), byteorder="big") - - if word in symbol_map: - out += f" {symbol_map[word]}," - else: - out += f" 0x{word:08X}," - - out += f"\n}};\n" - - out += "\n" - elif found_data: - if struct["type"] != "Padding": - # put struct back on list - midx.insert(0, struct) - - # nops at end of file - bytes.seek(offset % 4, 1) - - return out - - if struct["type"] != "Function" and not struct["type"] == "Padding" and not (struct["type"] == "Missing" and not found_data): - offset += struct["length"] - - # end of data - return out - -def parse_midx(file, prefix = ""): - structs = [] - - for line in file.readlines(): - s = line.split("#") - if len(s) == 5: - if s[0] == "$Start": continue - if s[0] == "$End": continue - - structs.append({ - "name": prefix + name_struct(s[0]), - "type": s[1], - "start": int(s[2], 16), - "vaddr": int(s[3], 16), - "length": int(s[4], 16), - "end": int(s[2], 16) + int(s[4], 16), - }) - elif "Missing" in s: - start = int(s[1], 16) - end = int(s[2], 16) - vaddr = start + 0x80240000 - structs.append({ - "name": f"{prefix}unk_missing_{vaddr:X}", - "type": "Missing", - "start": start, - "vaddr": vaddr, - "length": end - start, - "end": end, - }) - elif "Padding" in s: - start = int(s[1], 16) - end = int(s[2], 16) - vaddr = start + 0x80240000 - structs.append({ - "name": f"{prefix}__padding__", - "type": "Padding", - "start": start, - "vaddr": vaddr, - "length": end - start, - "end": end, - }) - - structs.sort(key=lambda s: s["start"]) - return structs - -def name_struct(s): - s = s[1:].replace("???", "unk") - - # use ThisCase for scripts - if s.startswith("Script_"): - s = s[7].upper() + s[8:] - - # if `s` is hex, prefix it with Script_ again - try: - int(s, 16) - return "Script_" + s - except Exception: - pass - - if s.startswith("Main"): - return "Main" - - return s - - if s.startswith("ASCII"): - return s - - return s[0].lower() + s[1:] - -if __name__ == "__main__": - if len(sys.argv) == 1: - print("usage: ./disasm_map.py ") - print("Converts split map data into C files using a .midx file from Star Rod.") - exit() - - map_name = os.path.splitext(os.path.basename(sys.argv[1]))[0] - area_name = "area_" + map_name.split("_")[0] - if len(area_name) > 8: - area_name = area_name[:8] - - with open(sys.argv[1], "r") as f: - midx = parse_midx(f) - - symbol_map = {} - for struct in midx: - symbol_map[struct["vaddr"]] = "M(" + struct["name"] + ")" - - bin_dir = f"bin/world/{area_name}/{map_name}" - src_dir = f"src/world/{area_name}/{map_name}" - - splits = [] - rom_start = 0 - with open(os.path.join(os.path.dirname(__file__), "splat.yaml")) as splat: - splat = yaml.safe_load(splat) - - for segment in splat["segments"]: - if type(segment) == dict and segment.get("name") == f"world/{area_name}/{map_name}/": - rom_start = segment.get("start", 0) - splits = segment.get("files", []) - continue - if len(splits) == 0: - print(f"unable to find {map_name} in splat.yaml") - exit(1) - - # advance to the EntryList (start of data) - while midx[0]["type"] != "EntryList": - midx.pop(0) - - for split in splits: - rom_addr = split[0] - filetype = split[1] - - if filetype == "bin": - with open(f"{bin_dir}/{rom_addr:X}.bin", "rb") as bytes: - print(f"Disassembling {rom_addr:X}") - - disasm = disassemble(bytes, rom_addr - rom_start, midx, symbol_map, map_name) - - if len(disasm.strip()) > 0: - with open(f"{src_dir}/{rom_addr:X}.bin.c", "w") as f: - f.write(f'#include "{map_name}.h"\n\n') - f.write(disasm.rstrip() + "\n") diff --git a/tools/disasm_script.py b/tools/disasm_script.py index 9c571b931c..840c674b28 100755 --- a/tools/disasm_script.py +++ b/tools/disasm_script.py @@ -433,11 +433,11 @@ class ScriptDSLDisassembler(ScriptDisassembler): self.write_line("}") if opcode == 0x01: - if self.out.endswith("return\n"): + if self.out.endswith("return;\n"): # implicit return; break - self.out = self.out[:-7].rstrip() + "\n" + self.out = self.out[:-8].rstrip() + "\n" else: - self.write_line("break") + self.write_line("break;") self.indent -= 1 @@ -446,7 +446,10 @@ class ScriptDSLDisassembler(ScriptDisassembler): self.done = True elif opcode == 0x02: self.write_line(f"return;") - elif opcode == 0x03: self.write_line(f"{self.var(argv[0])}:") + elif opcode == 0x03: + self.indent -= 1 + self.write_line(f"{self.var(argv[0])}:") + self.indent += 1 elif opcode == 0x04: self.write_line(f"goto {self.var(argv[0])};") elif opcode == 0x05: if argv[0] == 0: @@ -481,6 +484,9 @@ class ScriptDSLDisassembler(ScriptDisassembler): elif opcode == 0x10: self.write_line(f"if ({self.var(argv[0])} ? {self.var(argv[1])}) {{") self.indent += 1 + elif opcode == 0x11: + self.write_line(f"if ({self.var(argv[0])} !? {self.var(argv[1])}) {{") + self.indent += 1 elif opcode == 0x12: self.indent -= 1 self.write_line("} else {") @@ -595,7 +601,7 @@ class ScriptDSLDisassembler(ScriptDisassembler): elif opcode == 0x42: self.write_line(f"{self.var(argv[0])} |=c {argv[1]:X};") elif opcode == 0x43: argv_str = ", ".join(self.var(arg) for arg in argv[1:]) - self.write_line(f"{self.addr_ref(argv[0])}({argv_str})") + self.write_line(f"{self.addr_ref(argv[0])}({argv_str});") elif opcode == 0x44: self.write_line(f"spawn {self.addr_ref(argv[0])};") elif opcode == 0x45: self.write_line(f"{self.var(argv[1])} = spawn {self.addr_ref(argv[0])};") elif opcode == 0x46: self.write_line(f"await {self.addr_ref(argv[0])};") diff --git a/tools/star_rod_idx_to_c.py b/tools/star_rod_idx_to_c.py new file mode 100755 index 0000000000..c6901bea14 --- /dev/null +++ b/tools/star_rod_idx_to_c.py @@ -0,0 +1,198 @@ +#! /usr/bin/python3 + +import sys +import os +import yaml +import json +from struct import unpack +import argparse + +import disasm_script + +DIR = os.path.dirname(__file__) + +def disassemble(bytes, midx, symbol_map={}, comments=True): + out = "" + + entry_list_name = None + main_script_name = None + + while len(midx) > 0: + struct = midx.pop(0) + name = struct["name"] + + if comments: + out += f"// {struct['start']:X}-{struct['end']:X} (VRAM: {struct['vaddr']:X})\n" + + # format struct + if struct["type"].startswith("Script"): + if struct["type"] == "Script_Main": + main_script_name = name + + pos = bytes.tell() + try: + out += disasm_script.ScriptDSLDisassembler(bytes, name, symbol_map).disassemble() + except disasm_script.UnsupportedScript as e: + out += f"// Unable to use DSL: {e}\n" + + bytes.seek(pos) + out += disasm_script.ScriptDisassembler(bytes, name, symbol_map).disassemble() + elif struct["type"] == "EntryList": + entry_list_name = name + out += f"EntryList {name} = {{" + for i in range(0, struct["length"], 4 * 4): + x,y,z,yaw = unpack(">ffff", bytes.read(4 * 4)) + out += f"\n {{ {x}f, {y}f, {z}f, {yaw}f }}," + out += f"\n}};\n" + elif struct["type"] == "Header": + out += f"MapConfig {name} = {{\n" + + bytes.read(0x10) + + main,entry_list,entry_count = unpack(">IIi", bytes.read(4 * 3)) + out += f" .main = {main_script_name},\n" + out += f" .entryList = {entry_list_name},\n" + out += f" .entryCount = ENTRY_COUNT({entry_list_name}),\n" + + bytes.read(0x1C) + + bg,tattle = unpack(">II", bytes.read(4 * 2)) + if bg == 0x80200000: + out += f" .background = &gBackgroundImage,\n" + elif bg != 0: + raise Exception(f"unknown MapConfig background {bg:X}") + out += f" .tattle = 0x{tattle:X},\n" + + out += f"}};\n" + elif struct["type"] == "ASCII": + string_data = bytes.read(struct["length"]).decode("ascii") + + # strip null terminator(s) + while string_data[-1] == "\0": + string_data = string_data[:-1] + + string_literal = json.dumps(string_data) + out += f"const char {struct['name']}[] = {string_literal};\n" + elif struct["type"].startswith("Function"): + bytes.read(struct["length"]) + out += f"// function: {name}\n" + elif struct["type"] == "FloatTable": + out += f"f32 {name}[] = {{" + for i in range(0, struct["length"], 4): + if (i % 0x20) == 0: + out += f"\n " + + word = unpack(">f", bytes.read(4))[0] + out += " %ff," % word + + out += f"\n}};\n" + else: # unknown type of struct + out += f"s32 {name}[] = {{" + for i in range(0, struct["length"], 4): + if (i % 0x20) == 0: + out += f"\n " + + word = int.from_bytes(bytes.read(4), byteorder="big") + + if word in symbol_map: + out += f" {symbol_map[word]}," + else: + out += f" 0x{word:08X}," + + out += f"\n}};\n" + + out += "\n" + + # end of data + return out + +def parse_midx(file, prefix = ""): + structs = [] + + for line in file.readlines(): + s = line.split("#") + if len(s) == 5: + if s[0] == "$Start": continue + if s[0] == "$End": continue + + structs.append({ + "name": prefix + name_struct(s[0]), + "type": s[1], + "start": int(s[2], 16), + "vaddr": int(s[3], 16), + "length": int(s[4], 16), + "end": int(s[2], 16) + int(s[4], 16), + }) + elif "Missing" in s: + start = int(s[1], 16) + end = int(s[2], 16) + vaddr = start + 0x80240000 + structs.append({ + "name": f"{prefix}unk_missing_{vaddr:X}", + "type": "Missing", + "start": start, + "vaddr": vaddr, + "length": end - start, + "end": end, + }) + elif "Padding" in s: + start = int(s[1], 16) + end = int(s[2], 16) + vaddr = start + 0x80240000 + structs.append({ + "name": f"{prefix}pad_{start:X}", + "type": "Padding", + "start": start, + "vaddr": vaddr, + "length": end - start, + "end": end, + }) + + structs.sort(key=lambda s: s["start"]) + return structs + +def name_struct(s): + s = s[1:].replace("???", "unk") + + """ + # use ThisCase for scripts + if s.startswith("Script_"): + s = s[7].upper() + s[8:] + + # if `s` is hex, prefix it with Script_ again + try: + int(s, 16) + return "Script_" + s + except Exception: + pass + + if s.startswith("Main"): + return "Main" + + return s + """ + + if s.startswith("ASCII"): + return s + + return s[0].lower() + s[1:] + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Converts split data to C using a Star Rod idx file") + parser.add_argument("idxfile", help="Input .*idx file from Star Rod dump") + parser.add_argument("offset", help="Starting ROM offset") + parser.add_argument("--comments", action="store_true", help="Write offset/vaddr comments") + + args = parser.parse_args() + + with open(args.idxfile, "r") as f: + midx = parse_midx(f) + + symbol_map = {} + for struct in midx: + symbol_map[struct["vaddr"]] = struct["name"] + + with open(os.path.join(DIR, "../baserom.z64"), "rb") as romfile: + romfile.seek(eval(args.offset)) + disasm = disassemble(romfile, midx, symbol_map, args.comments) + print(disasm.rstrip())