papermario/tools/compile_dsl_macros.py

606 lines
18 KiB
Python
Raw Normal View History

2020-10-22 06:54:03 +02:00
#! /usr/bin/python3
from sys import stdin, stderr
2020-10-23 21:14:58 +02:00
from lark import Lark, exceptions, Tree, Transformer, Visitor, v_args, Token
2020-10-23 19:39:38 +02:00
from lark.visitors import Discard
import traceback
2020-10-22 06:54:03 +02:00
def eprint(*args, **kwargs):
print(*args, file=stderr, **kwargs)
def write(s):
print(s, end="")
2020-10-23 19:39:38 +02:00
ANSI_RED = "\033[1;31;40m"
ANSI_RESET = "\u001b[0m"
def pairs(seq):
i = iter(seq)
prev = next(i)
for item in i:
yield prev, item
prev = item
2020-10-22 06:54:03 +02:00
script_parser = Lark(r"""
2020-10-23 19:39:38 +02:00
block: "{" NEWLINE* (stmt STMT_SEP)* NEWLINE* "}"
2020-10-22 06:54:03 +02:00
2020-10-23 19:39:38 +02:00
?stmt: call
| label ":" -> label_decl
| "goto" label -> label_goto
2020-10-22 06:54:03 +02:00
| if_stmt
2020-10-23 20:35:45 +02:00
| do_while_stmt
2020-10-23 19:39:38 +02:00
| "return" -> return_stmt
| "break" -> break_stmt
| "sleep" expr -> sleep_stmt
| "spawn" expr -> spawn_stmt
| "await" expr -> await_stmt
| lhs "=" "spawn" expr -> spawn_set_stmt
| lhs set_op expr -> set_stmt
| lhs ":=" expr -> set_const_stmt
| bind_stmt
| bind_set_stmt
| "unbind" -> unbind_stmt
| "group" expr -> set_group
| suspend_stmt
| resume_stmt
| kill_stmt
| loop_stmt
2020-10-22 06:54:03 +02:00
call: CNAME "(" [expr ("," expr)* [","]] ")"
if_stmt: "if" expr if_op expr block ["else" block]
?if_op: "==" -> if_op_eq
2020-10-23 19:39:38 +02:00
| "!=" -> if_op_ne
2020-10-23 20:35:45 +02:00
do_while_stmt: "do" block "while" expr if_op expr
2020-10-23 19:39:38 +02:00
suspend_stmt: "suspend" control_type expr ("," control_type expr)* [","]
resume_stmt: "resume" control_type expr ("," control_type expr)* [","]
kill_stmt: "kill" control_type expr ("," control_type expr)* [","]
?control_type: "group" -> control_type_group
| "others" -> control_type_others
| ["script"] -> control_type_script
bind_stmt: "bind" expr "to" expr expr
bind_set_stmt: lhs "=" "bind" expr "to" expr expr
loop_stmt: "loop" [expr] block
2020-10-22 06:54:03 +02:00
?expr: c_const_expr
| ESCAPED_STRING
| SIGNED_INT
| DECIMAL
| HEX_INT
| CNAME
2020-10-23 19:39:38 +02:00
?lhs: c_const_expr
?set_op: "=" -> set_op_eq
| "+=" -> set_op_add
| "-=" -> set_op_sub
| "*=" -> set_op_mul
| "/=" -> set_op_div
| "%=" -> set_op_mod
c_const_expr: c_const_expr_internal
c_const_expr_internal: "(" (c_const_expr_internal | NOT_PARENS)+ ")"
2020-10-22 06:54:03 +02:00
NOT_PARENS: /[^()]+/
2020-10-23 19:39:38 +02:00
STMT_SEP: (NEWLINE+ | ";")
label: /[a-zA-Z0-9_]+/
2020-10-22 06:54:03 +02:00
%import common.CNAME
%import common.SIGNED_INT
%import common.DECIMAL
%import common.HEXDIGIT
%import common.ESCAPED_STRING
HEX_INT: ["+"|"-"] "0x" HEXDIGIT+
LINE_COMMENT: "//" /[^\n]*/ NEWLINE
%ignore LINE_COMMENT
2020-10-23 19:39:38 +02:00
%import common.WS_INLINE
2020-10-22 06:54:03 +02:00
%import common.NEWLINE
2020-10-23 19:39:38 +02:00
%ignore WS_INLINE
""", start="block", propagate_positions=True)#, parser="lalr", cache=True)
2020-10-23 21:14:58 +02:00
class BaseCmd():
def __init__(self, *args, **kwargs):
2020-10-23 19:39:38 +02:00
self.args = args
self.meta = kwargs.get("meta", None)
2020-10-23 21:14:58 +02:00
self.context = [RootCtx()]
def add_context(self, ctx):
if not isinstance(ctx, CmdCtx):
raise Exception()
self.context.insert(0, ctx)
2020-10-23 19:39:38 +02:00
2020-10-23 21:14:58 +02:00
# must be overloaded
def opcode():
raise Exception()
2020-10-23 19:39:38 +02:00
def to_bytecode(self):
2020-10-23 21:14:58 +02:00
return [ self.opcode(), len(self.args), *self.args ]
2020-10-23 19:39:38 +02:00
def __str__(self):
2020-10-23 21:14:58 +02:00
return f"Cmd({self.opcode():02X}, {', '.join(map(str, self.args))})"
class Cmd(BaseCmd):
def __init__(self, opcode, *args, **kwargs):
super().__init__(*args, **kwargs)
self._opcode = opcode
2020-10-23 19:39:38 +02:00
2020-10-23 21:14:58 +02:00
def opcode(self):
return self._opcode
class BreakCmd(BaseCmd):
2020-10-23 19:39:38 +02:00
def __init__(self, **kwargs):
2020-10-23 21:14:58 +02:00
super().__init__(**kwargs)
2020-10-23 19:39:38 +02:00
def opcode(self):
2020-10-23 20:35:45 +02:00
for ctx in self.context:
2020-10-23 21:14:58 +02:00
opcode = ctx.break_opcode(self.meta)
if opcode:
return opcode
2020-10-23 19:39:38 +02:00
def __str__(self):
return "BreakCmd"
2020-10-23 20:35:45 +02:00
class CmdCtx():
2020-10-23 21:14:58 +02:00
def break_opcode(self, meta):
pass
class RootCtx(CmdCtx):
def break_opcode(self, meta):
return 0x01
2020-10-23 20:35:45 +02:00
class IfCtx(CmdCtx):
pass
class SwitchCtx(CmdCtx):
def break_opcode(self, meta):
2020-10-23 21:14:58 +02:00
return 0x22
2020-10-23 20:35:45 +02:00
class LoopCtx(CmdCtx):
def break_opcode(self, meta):
2020-10-23 21:14:58 +02:00
return 0x07
2020-10-23 20:35:45 +02:00
class DoWhileCtx(CmdCtx):
def break_opcode(self, meta):
raise CompileError("breaking out of a do..while loop is not supported (hint: use a label)", meta)
2020-10-23 19:39:38 +02:00
class CompileError(Exception):
def __init__(self, message, meta):
super().__init__(message)
self.meta = meta
def is_fixed_var(v):
if type(v) == int:
if v <= -250000000:
return False
elif v <= -220000000:
return True
return False
class LabelAllocation(Visitor):
def __init__(self):
super().__init__()
self.labels = []
def label_decl(self, tree):
name = tree.children[0].children[0]
if name in self.labels:
raise CompileError(f"label `{name}' already declared", tree.meta)
self.labels.append(name)
2020-10-23 20:35:45 +02:00
def gen_label(self):
self.labels.append("$generated")
return len(self.labels) - 1
2020-10-23 19:39:38 +02:00
@v_args(tree=True)
2020-10-22 06:54:03 +02:00
class Compile(Transformer):
SIGNED_INT = str
HEX_INT = str
2020-10-23 19:39:38 +02:00
def transform(self, tree):
self.alloc = LabelAllocation()
self.alloc.visit_topdown(tree)
return super().transform(tree)
2020-10-22 06:54:03 +02:00
def CNAME(self, name):
2020-10-23 19:39:38 +02:00
return f"(Bytecode)(&{name})"
2020-10-22 06:54:03 +02:00
2020-10-23 21:17:35 +02:00
def ESCAPED_STRING(self, str_with_quotes):
return f"(Bytecode)({str_with_quotes})"
2020-10-22 06:54:03 +02:00
NOT_PARENS = str
2020-10-23 19:39:38 +02:00
def c_const_expr_internal(self, tree):
return f"({' '.join(tree.children)})"
def c_const_expr(self, tree):
return f"(Bytecode){tree.children[0]}"
2020-10-22 06:54:03 +02:00
2020-10-23 19:39:38 +02:00
def DECIMAL(self, v):
# fixed-point
return int((float(v) * 1024) - 230000000)
def block(self, tree):
# flatten children list
2020-10-22 06:54:03 +02:00
flat = []
2020-10-23 19:39:38 +02:00
for node in tree.children:
if type(node) == list:
flat += node
2020-10-23 21:14:58 +02:00
elif isinstance(node, BaseCmd):
2020-10-23 19:39:38 +02:00
flat.append(node)
2020-10-23 21:14:58 +02:00
elif isinstance(node, Token) and (node.value.startswith("\n") or node.value == ";"):
pass
else:
raise Exception(f"block statment {type(node)} is not a BaseCmd: {node}")
2020-10-22 06:54:03 +02:00
return flat
2020-10-23 19:39:38 +02:00
def call(self, tree):
2020-10-22 06:54:03 +02:00
# TODO: type checking etc
2020-10-23 19:39:38 +02:00
return Cmd(0x43, *tree.children, meta=tree.meta)
def if_stmt(self, tree):
a, op, b, block = tree.children
for cmd in block:
2020-10-23 21:14:58 +02:00
if isinstance(cmd, BaseCmd):
2020-10-23 20:35:45 +02:00
cmd.add_context(IfCtx())
2020-10-23 19:39:38 +02:00
return [ Cmd(op, a, b, meta=tree.meta), *block, Cmd(0x13) ]
def if_op_eq(self, tree): return 0x0A
def if_op_ne(self, tree): return 0x0B
def loop_stmt(self, tree):
expr = tree.children.pop(0) if len(tree.children) > 1 else 0
block = tree.children[0]
for cmd in block:
2020-10-23 21:14:58 +02:00
if isinstance(cmd, BaseCmd):
2020-10-23 20:35:45 +02:00
cmd.add_context(LoopCtx())
2020-10-23 19:39:38 +02:00
return [ Cmd(0x05, expr, meta=tree.meta), *block, Cmd(0x06) ]
2020-10-23 20:35:45 +02:00
# do..while pseudoinstruction
def do_while_stmt(self, tree):
block, a, op, b = tree.children
for cmd in block:
2020-10-23 21:14:58 +02:00
if isinstance(cmd, BaseCmd):
2020-10-23 20:35:45 +02:00
cmd.add_context(DoWhileCtx())
label = self.alloc.gen_label()
return [
Cmd(0x03, label, meta=tree.meta), # label:
*block,
Cmd(op, a, b, meta=tree.meta), # if a op b
Cmd(0x04, label, meta=tree.meta), # goto label
Cmd(0x13, meta=tree.meta), # end if
]
def return_stmt(self, tree):
return Cmd(0x02, meta=tree.meta)
2020-10-23 19:39:38 +02:00
def break_stmt(self, tree):
return BreakCmd(meta=tree.meta)
2020-10-23 20:35:45 +02:00
def set_group(self, tree):
return Cmd(0x4D, tree.children[0], meta=tree.meta)
2020-10-23 19:39:38 +02:00
def suspend_stmt(self, tree):
commands = []
for opcodes, expr in pairs(tree.children):
if not "suspend" in opcodes:
raise CompileError(f"`suspend {opcodes['__control_type__']}' not supported", meta=tree.meta)
commands.append(Cmd(opcodes["suspend"], expr, meta=tree.meta))
return commands
def resume_stmt(self, tree):
commands = []
for opcodes, expr in pairs(tree.children):
if not "resume" in opcodes:
raise CompileError(f"`resume {opcodes['__control_type__']}' not supported", meta=tree.meta)
commands.append(Cmd(opcodes["resume"], expr, meta=tree.meta))
return commands
def kill_stmt(self, tree):
commands = []
for opcodes, expr in pairs(tree.children):
if not "kill" in opcodes:
raise CompileError(f"`kill {opcodes['__control_type__']}' not supported", meta=tree.meta)
commands.append(Cmd(opcodes["kill"], expr, meta=tree.meta))
return commands
def control_type_group(self, tree):
return {
"__control_type__": "group",
"suspend": 0x4F,
"resume": 0x50,
}
def control_type_others(self, tree):
return {
"__control_type__": "others",
"suspend": 0x51,
"resume": 0x52,
}
def control_type_script(self, tree):
return {
"__control_type__": "script",
"suspend": 0x53,
"resume": 0x54,
"kill": 0x49,
}
def sleep_stmt(self, tree):
return Cmd(0x08, tree.children[0], meta=tree.meta)
def bind_stmt(self, tree):
script, trigger, target = tree.children
return Cmd(0x47, script, trigger, target, 1, 0, meta=tree.meta)
def bind_set_stmt(self, tree):
ret, script, trigger, target = tree.children
return Cmd(0x47, script, trigger, target, 1, ret, meta=tree.meta)
def unbind_stmt(self, tree):
return Cmd(0x48, meta=tree.meta)
def spawn_stmt(self, tree):
return Cmd(0x44, tree.children[0], meta=tree.meta)
def spawn_set_stmt(self, tree):
lhs, script = tree.children
return Cmd(0x45, script, lhs, meta=tree.meta)
def await_stmt(self, tree):
return Cmd(0x46, tree.children[0], meta=tree.meta)
def set_stmt(self, tree):
lhs, opcodes, rhs = tree.children
if is_fixed_var(rhs):
opcode = opcodes.get("float", None)
if not opcode:
raise CompileError(f"operation `{opcodes['__op__']}' not supported for floats", tree.meta)
else:
opcode = opcodes.get("int", None)
if not opcode:
raise CompileError(f"operation `{opcodes['__op__']}' not supported for ints", tree.meta)
return Cmd(opcode, lhs, rhs)
def set_const_stmt(self, tree):
lhs, rhs = tree.children
return Cmd(0x25, lhs, rhs)
def set_op_eq(self, tree):
return {
"__op__": "=",
"int": 0x24,
"float": 0x26,
}
def set_op_add(self, tree):
return {
"__op__": "+",
"int": 0x27,
"float": 0x2C,
}
def set_op_sub(self, tree):
return {
"__op__": "-",
"int": 0x28,
"float": 0x2D,
}
def set_op_mul(self, tree):
return {
"__op__": "*",
"int": 0x29,
"float": 0x2E,
}
def set_op_div(self, tree):
return {
"__op__": "/",
"int": 0x2A,
"float": 0x2F,
}
def set_op_mod(self, tree):
return {
"__op__": "%",
"int": 0x2B,
}
def label_decl(self, tree):
label = tree.children[0]
return Cmd(0x03, label, meta=tree.meta)
def label_goto(self, tree):
label = tree.children[0]
return Cmd(0x04, label, meta=tree.meta)
def label(self, tree):
name = tree.children[0]
if name in self.alloc.labels:
return self.alloc.labels.index(name)
raise CompileError(f"label `{name}' is undeclared", tree.meta)
2020-10-22 06:54:03 +02:00
def compile_script(s):
tree = script_parser.parse(s)
2020-10-23 19:39:38 +02:00
2020-10-22 06:54:03 +02:00
#eprint(tree.pretty())
2020-10-23 19:39:38 +02:00
commands = Compile().transform(tree)
# add RETURN END if no explicit END (top-level `break') was given
2020-10-23 21:14:58 +02:00
if next((cmd for cmd in commands if cmd.opcode() == 0x01), None) == None:
2020-10-23 19:39:38 +02:00
commands += (Cmd(0x02), Cmd(0x01))
return commands
2020-10-22 06:54:03 +02:00
def read_until_closing_paren(depth=1, lex_strings=False):
text = ""
in_string = False
string_escape = False
while True:
char = stdin.read(1)
if len(char) == 0:
# EOF
return text
if string_escape == True:
string_escape = False
elif char == "(" and not in_string:
depth += 1
elif char == ")" and not in_string:
depth -= 1
if depth == 0:
break
elif char == '"' and lex_strings:
in_string = not in_string
elif char == "\\" and in_string:
string_escape = True
text += char
return text
def read_line():
line = ""
while True:
char = stdin.read(1)
if len(char) == 0:
# EOF
return line
if char == "\n":
break
line += char
return line
2020-10-23 19:39:38 +02:00
def gen_line_map(source, source_line_no = 1):
line_map = {}
output = ""
output_line_no = 1
for line in source.splitlines(True):
if line[0] == "#":
parts = line[2:-1].split(" ")
source_line_no = int(parts[0])
else:
line_map[output_line_no] = source_line_no
output += line
output_line_no += 1
source_line_no += 1
return output, line_map
2020-10-22 06:54:03 +02:00
# Expects output from C preprocessor on stdin
if __name__ == "__main__":
line_no = 1
char_no = 1
file_info = []
error = False
macro_name = "" # captures recent UPPER_CASE identifier
prev_char = ""
while True:
char = stdin.read(1)
if len(char) == 0:
# EOF
write(macro_name)
if error:
exit(1)
else:
exit(0)
if char == "#" and (prev_char == "\n" or prev_char == ""):
# cpp line/file marker
line = read_line()
line_split = line[1:].split(" ")
line_no = int(line_split[0])
file_info = line_split[1:]
write("#" + line + "\n")
elif char == "(":
2020-10-23 19:39:38 +02:00
filename = file_info[0][1:-1]
2020-10-22 06:54:03 +02:00
# SCRIPT(...)
if macro_name == "SCRIPT":
2020-10-23 19:39:38 +02:00
script_source, line_map = gen_line_map(read_until_closing_paren(lex_strings=True), source_line_no=line_no)
2020-10-22 06:54:03 +02:00
try:
2020-10-23 19:39:38 +02:00
commands = compile_script(script_source)
write("{\n")
for command in commands:
if command.meta:
write(f"# {line_map[command.meta.line]} {file_info[0]}\n")
write(" ")
for word in command.to_bytecode():
if type(word) == str:
write(word)
elif type(word) == int:
write(f"0x{word & 0xFFFFFFFF:X}")
else:
2020-10-23 21:14:58 +02:00
raise Exception(f"{command}.to_bytecode() gave {type(word)} {word}")
2020-10-23 19:39:38 +02:00
write(", ")
write("\n")
2020-10-22 06:54:03 +02:00
write("}")
2020-10-23 19:39:38 +02:00
except exceptions.UnexpectedEOF as e:
eprint(f"{filename}:{line_no}: {ANSI_RED}error{ANSI_RESET}: unterminated SCRIPT(...) macro")
error = True
except exceptions.UnexpectedCharacters as e:
eprint(e.line)
line = line_map[e.line]
char = script_source[e.pos_in_stream]
allowed = e.allowed
eprint(f"{filename}:{line}: {ANSI_RED}script parse error{ANSI_RESET}: unexpected `{char}', expected {' or '.join(allowed)}")
eprint(e.get_context(script_source))
error = True
2020-10-22 06:54:03 +02:00
except exceptions.UnexpectedToken as e:
2020-10-23 19:39:38 +02:00
line = line_map[e.line]
2020-10-22 06:54:03 +02:00
2020-10-23 19:39:38 +02:00
eprint(f"{filename}:{line}: {ANSI_RED}script parse error{ANSI_RESET}: unexpected `{e.token}'")
2020-10-22 06:54:03 +02:00
eprint(e.get_context(script_source))
error = True
2020-10-23 19:39:38 +02:00
except exceptions.VisitError as e:
if type(e.orig_exc) == CompileError:
line = line_map[e.orig_exc.meta.line]
eprint(f"{filename}:{line}: {ANSI_RED}script compile error{ANSI_RESET}: {e.orig_exc}")
else:
2020-10-23 21:14:58 +02:00
eprint(f"{filename}:{line_no}: {ANSI_RED}internal script transform error{ANSI_RESET}")
2020-10-23 19:39:38 +02:00
traceback.print_exc()
error = True
2020-10-23 21:14:58 +02:00
except CompileError as e:
line = line_map[e.meta.line]
eprint(f"{filename}:{line}: {ANSI_RED}script compile error{ANSI_RESET}: {e}")
error = True
except Exception as e:
eprint(f"{filename}:{line_no}: {ANSI_RED}internal script compilation error{ANSI_RESET}")
traceback.print_exc()
error = True
2020-10-22 06:54:03 +02:00
line_no += script_source.count("\n")
write(f"\n# {line_no} {file_info[0]}\n")
else:
# leave non-macro in source
write(macro_name + char)
macro_name = ""
elif char == "_" or (char >= 'A' and char <= 'Z'):
macro_name += char
else:
write(macro_name + char)
macro_name = ""
if char == "\n":
char_no = 0
line_no += 1
char_no += 1
prev_char = char