From feb6980d4f4880587cc9c64457e37f7d2b9d7d61 Mon Sep 17 00:00:00 2001 From: "Reid D. 'arrdem' McKenzie" Date: Tue, 29 Mar 2022 01:29:18 -0600 Subject: [PATCH] A bytecode VM sketch --- projects/shogoth/BUILD | 36 +- .../shogoth/src/python/shogoth/vm/NOTES.md | 11 + .../shogoth/src/python/shogoth/vm/__init__.py | 3 + .../shogoth/src/python/shogoth/vm/impl.py | 325 ++++++++++++++++++ .../python/shogoth/vm/test_interpreter.py | 167 +++++++++ 5 files changed, 510 insertions(+), 32 deletions(-) create mode 100644 projects/shogoth/src/python/shogoth/vm/NOTES.md create mode 100644 projects/shogoth/src/python/shogoth/vm/__init__.py create mode 100644 projects/shogoth/src/python/shogoth/vm/impl.py create mode 100644 projects/shogoth/test/python/shogoth/vm/test_interpreter.py diff --git a/projects/shogoth/BUILD b/projects/shogoth/BUILD index 5159119..7f56c63 100644 --- a/projects/shogoth/BUILD +++ b/projects/shogoth/BUILD @@ -1,35 +1,7 @@ -py_library( - name = "lib", - srcs = glob(["src/python/**/*"]), - imports = [ - "src/python" - ], - deps = [ +py_project( + name = "shogoth", + main = "src/python/shogoth/repl/__main__.py", + lib_deps = [ py_requirement("lark"), ], ) - -zapp_binary( - name = "shogothd", - main = "src/python/shogoth/server/__main__.py", - deps = [ - ":lib", - ], -) - -zapp_binary( - name = "shogoth", - main = "src/python/shogoth/client/__main__.py", - deps = [ - ":lib", - ], -) - -zapp_binary( - name = "repl", - main = "src/python/shogoth/repl/__main__.py", - shebang = "/usr/bin/env python3.10", - deps = [ - ":lib", - ] -) diff --git a/projects/shogoth/src/python/shogoth/vm/NOTES.md b/projects/shogoth/src/python/shogoth/vm/NOTES.md new file mode 100644 index 0000000..bf97a1e --- /dev/null +++ b/projects/shogoth/src/python/shogoth/vm/NOTES.md @@ -0,0 +1,11 @@ +# VM notes + +## Papers + +10.1145/3486606.3488073 +10.1145/3486606.3486783 +10.1145/3427765.3432355 +10.1145/3281287.3281295 +10.1145/2542142.2542144 +10.1145/1941054.1941059 +10.1145/1941054.1941058 diff --git a/projects/shogoth/src/python/shogoth/vm/__init__.py b/projects/shogoth/src/python/shogoth/vm/__init__.py new file mode 100644 index 0000000..b69cc11 --- /dev/null +++ b/projects/shogoth/src/python/shogoth/vm/__init__.py @@ -0,0 +1,3 @@ +#!/usr/bin/env python3 + +from .impl import Interpreter, Opcode, BOOTSTRAP, AND, OR, NOT, XOR diff --git a/projects/shogoth/src/python/shogoth/vm/impl.py b/projects/shogoth/src/python/shogoth/vm/impl.py new file mode 100644 index 0000000..b5a786e --- /dev/null +++ b/projects/shogoth/src/python/shogoth/vm/impl.py @@ -0,0 +1,325 @@ +#!/usr/bin/env python3.10 + +"""The Shogoth VM implementation. + +The whole point of shogoth is that program executions are checkpointable and restartable. This requires that rather than +using a traditional recursive interpreter which is difficult to snapshot, interpretation in shogoth occur within a +context (a virtual machine) which DOES have an easily introspected and serialized representation. + +## The Shogoth VM Architecture + + +- NOT [bool] -> [bool] +- IF [then: addr, else: addr, cond: bool] -> [] +- CALL [procedure, n, ...] -> [...] +- RETURN [n, ...] + +""" + +from random import Random +from typing import NamedTuple + +class Module(NamedTuple): + opcodes: list = [] + functions: dict = {} + types: dict = {} + constants: dict = {} + + rand: Random = Random() + + def copy(self): + return Module( + self.opcodes.copy(), + self.functions.copy(), + self.types.copy(), + self.constants.copy(), + ) + + @staticmethod + def translate(offset: int, i: "Opcode"): + match i: + case Opcode.IF(t): + return Opcode.IF(t + offset) + case Opcode.GOTO(t, anywhere=False): + return Opcode.GOTO(t + offset) + case _: + return i + + def define_function(self, name, opcodes): + start = len(self.opcodes) + self.functions[name] = start + for op in opcodes: + self.opcodes.append(self.translate(start, op)) + return name + + +class Opcode: + class TRUE(NamedTuple): + """() -> (bool) + Push the constant TRUE onto the stack. + """ + + class FALSE(NamedTuple): + """() -> (bool) + Push the constant FALSE onto the stack. + """ + + class IF(NamedTuple): + """(bool) -> () + Branch to another point if the top item of the stack is TRUE. + Otherwise fall through. + """ + + target: int + + # not, and, or, xor etc. can all be functions given if. + + class DUP(NamedTuple): + """(A, B, ...) -> (A, B, ...) + Duplicate the top N items of the stack. + """ + + nargs: int = 1 + + class ROT(NamedTuple): + """(A, B, ... Z) -> (Z, A, B, ...) + Rotate the top N elements of the stack. + """ + + nargs: int = 2 + + class DROP(NamedTuple): + """(*) -> () + Drop the top N items of the stack. + """ + + nargs: int = 1 + + class CALL(NamedTuple): + """(*) -> () + Branch to `target` pushing the current point onto the call stack. + The callee will see a stack containg only the provided `nargs`. + A subsequent RETURN will return execution to the next point. + """ + + funref: str + + class RETURN(NamedTuple): + """(*) -> () + Return to the source of the last `CALL`. + The returnee will see the top `nargs` values of the present stack appended to theirs. + All other values on the stack will be discarded. + If the call stack is empty, `RETURN` will exit the interpreter. + """ + + nargs: int + + class GOTO(NamedTuple): + """() -> () + Branch to another point within the same bytecode segment. + """ + + target: int + anywhere: bool = False + + class STRUCT(NamedTuple): + """(*) -> (T) + Consume the top N items of the stack, producing a struct. + """ + + nargs: int + structref: str + + class FIELD(NamedTuple): + """(A) -> (B) + Consume the struct reference at the top of the stack, producing the value of the referenced field. + """ + + fieldref: str + + +def rotate(l): + return [l[-1]] + l[:-1] + + +class FunctionSignature(NamedTuple): + type_params: list + name: str + args: list + sig: list + + @staticmethod + def parse_list(l): + return [e for e in l.split(",") if e] + + @classmethod + def parse(cls, name: str): + vars, name, args, sig = name.split(";") + return cls( + cls.parse_list(vars), + name, + cls.parse_list(args), + cls.parse_list(sig) + ) + + +class Stackframe(object): + def __init__(self, stack=None, name=None, ip=None, parent=None): + self.stack = stack or [] + self.name = name or ";unknown;;" + self.ip = ip or 0 + self.parent = parent + + def push(self, obj): + self.stack.insert(0, obj) + + def pop(self): + return self.stack.pop(0) + + def call(self, signature, ip): + nargs = len(signature.args) + args, self.stack = self.stack[:nargs], self.stack[nargs:] + return Stackframe( + stack=args, + name=signature.name, + ip=ip, + parent=self + ) + + def ret(self, nargs): + self.parent.stack = self.stack[:nargs] + self.parent.stack + return self.parent + + def dup(self, nargs): + self.stack = self.stack[:nargs] + self.stack + + def drop(self, nargs): + self.stack = self.stack[nargs:] + + def rot(self, nargs): + self.stack = rotate(self.stack[:nargs]) + self.stack[nargs:] + + +class Interpreter(object): + def __init__(self, bootstrap_module): + self.bootstrap = bootstrap_module + + def run(self, opcodes): + """Directly interpret some opcodes in the configured environment.""" + + stack = Stackframe() + mod = self.bootstrap.copy() + mod.define_function(";;;", opcodes) + stack.ip = mod.functions[";;;"] + + while True: + op = mod.opcodes[stack.ip] + print(stack.ip, op, stack.stack) + match op: + case Opcode.TRUE(): + stack.push(True) + + case Opcode.FALSE(): + stack.push(False) + + case Opcode.IF(target): + if not stack.pop(): + stack.ip = target + continue + + case Opcode.DUP(n): + stack.dup(n) + + case Opcode.ROT(n): + stack.rot(n) + + case Opcode.DROP(n): + stack.drop(n) + + case Opcode.CALL(dest): + sig = FunctionSignature.parse(dest) + ip = mod.functions[dest] + stack = stack.call(sig, ip) + continue + + case Opcode.RETURN(n): + if stack.parent: + stack = stack.ret(n) + else: + return stack.stack[:n] + + case Opcode.GOTO(n, _): + stack.ip = n + continue + + case _: + raise Exception(f"Unhandled interpreter state {op}") + + stack.ip += 1 + + +BOOTSTRAP = Module() + +NOT = ";/lang/shogoth/v0/bootstrap/not;bool;bool" +BOOTSTRAP.define_function( + NOT, + [ + Opcode.IF(target=3), + Opcode.FALSE(), + Opcode.RETURN(1), + Opcode.TRUE(), + Opcode.RETURN(1), + ], +) + +OR = ";/lang/shogoth/v0/bootstrap/or;bool,bool;bool" +BOOTSTRAP.define_function( + OR, + [ + Opcode.IF(target=3), + Opcode.TRUE(), + Opcode.RETURN(1), + Opcode.IF(target=6), + Opcode.TRUE(), + Opcode.RETURN(1), + Opcode.FALSE(), + Opcode.RETURN(1) + ], +) + +AND = ";/lang/shogoth/v0/bootstrap/and;bool,bool;bool" +BOOTSTRAP.define_function( + AND, + [ + Opcode.IF(target=3), + Opcode.IF(target=3), + Opcode.GOTO(target=5), + Opcode.FALSE(), + Opcode.RETURN(1), + Opcode.TRUE(), + Opcode.RETURN(1), + ], +) + +XOR = ";/lang/shogoth/v0/bootstrap/xor;bool,bool;bool" +BOOTSTRAP.define_function( + XOR, + [ + Opcode.DUP(nargs=2), + # !A && B + Opcode.CALL(";/lang/shogoth/v0/bootstrap/not;bool;bool"), + Opcode.CALL(";/lang/shogoth/v0/bootstrap/and;bool,bool;bool"), + Opcode.IF(target=6), + Opcode.TRUE(), + Opcode.RETURN(1), + # !B && A + Opcode.ROT(2), + Opcode.CALL(";/lang/shogoth/v0/bootstrap/not;bool;bool"), + Opcode.CALL(";/lang/shogoth/v0/bootstrap/and;bool,bool;bool"), + Opcode.IF(target=12), + Opcode.TRUE(), + Opcode.RETURN(1), + Opcode.FALSE(), + Opcode.RETURN(1), + ], +) diff --git a/projects/shogoth/test/python/shogoth/vm/test_interpreter.py b/projects/shogoth/test/python/shogoth/vm/test_interpreter.py new file mode 100644 index 0000000..c1aff81 --- /dev/null +++ b/projects/shogoth/test/python/shogoth/vm/test_interpreter.py @@ -0,0 +1,167 @@ +""" +Tests coverign the VM interpreter +""" + +from shogoth.vm import * + +vm = Interpreter(BOOTSTRAP) + + +def test_true(): + assert vm.run([Opcode.TRUE(), Opcode.RETURN(1)]) == [True] + + +def test_false(): + assert vm.run([Opcode.FALSE(), Opcode.RETURN(1)]) == [False] + + +def test_return(): + assert vm.run([Opcode.FALSE(), Opcode.RETURN(0)]) == [] + assert vm.run([Opcode.TRUE(), Opcode.FALSE(), Opcode.RETURN(1)]) == [False] + assert vm.run([Opcode.TRUE(), Opcode.FALSE(), Opcode.RETURN(2)]) == [False, True] + + +def test_dup(): + assert vm.run([ + Opcode.TRUE(), + Opcode.FALSE(), + Opcode.DUP(1), + Opcode.RETURN(3) + ]) == [False, False, True] + + assert vm.run([ + Opcode.TRUE(), + Opcode.FALSE(), + Opcode.DUP(2), + Opcode.RETURN(4) + ]) == [False, True, False, True] + + +def test_rot(): + assert vm.run([ + Opcode.TRUE(), + Opcode.FALSE(), + Opcode.ROT(2), + Opcode.RETURN(2) + ]) == [True, False] + + assert vm.run([ + Opcode.FALSE(), + Opcode.TRUE(), + Opcode.FALSE(), + Opcode.ROT(3), + Opcode.RETURN(3) + ]) == [False, False, True] + + +def test_drop(): + assert vm.run([ + Opcode.TRUE(), + Opcode.FALSE(), + Opcode.DROP(1), + Opcode.RETURN(1) + ]) == [True] + + +def test_not(): + assert vm.run([ + Opcode.TRUE(), + Opcode.CALL(NOT), + Opcode.RETURN(1) + ]) == [False] + + assert vm.run([ + Opcode.FALSE(), + Opcode.CALL(NOT), + Opcode.RETURN(1) + ]) == [True] + + +def test_or(): + assert vm.run([ + Opcode.FALSE(), + Opcode.FALSE(), + Opcode.CALL(OR), + Opcode.RETURN(1) + ]) == [False] + + assert vm.run([ + Opcode.TRUE(), + Opcode.FALSE(), + Opcode.CALL(OR), + Opcode.RETURN(1) + ]) == [True] + + assert vm.run([ + Opcode.FALSE(), + Opcode.TRUE(), + Opcode.CALL(OR), + Opcode.RETURN(1) + ]) == [True] + + assert vm.run([ + Opcode.TRUE(), + Opcode.TRUE(), + Opcode.CALL(OR), + Opcode.RETURN(1) + ]) == [True] + + +def test_and(): + assert vm.run([ + Opcode.FALSE(), + Opcode.FALSE(), + Opcode.CALL(AND), + Opcode.RETURN(1) + ]) == [False] + + assert vm.run([ + Opcode.TRUE(), + Opcode.FALSE(), + Opcode.CALL(AND), + Opcode.RETURN(1) + ]) == [False] + + assert vm.run([ + Opcode.FALSE(), + Opcode.TRUE(), + Opcode.CALL(AND), + Opcode.RETURN(1) + ]) == [False] + + assert vm.run([ + Opcode.TRUE(), + Opcode.TRUE(), + Opcode.CALL(AND), + Opcode.RETURN(1) + ]) == [True] + + +def test_xor(): + assert vm.run([ + Opcode.FALSE(), + Opcode.FALSE(), + Opcode.CALL(XOR), + Opcode.RETURN(1) + ]) == [False] + + assert vm.run([ + Opcode.TRUE(), + Opcode.FALSE(), + Opcode.CALL(XOR), + Opcode.RETURN(1) + ]) == [True] + + assert vm.run([ + Opcode.FALSE(), + Opcode.TRUE(), + Opcode.CALL(XOR), + Opcode.RETURN(1) + ]) == [True] + + assert vm.run([ + Opcode.TRUE(), + Opcode.TRUE(), + Opcode.CALL(XOR), + Opcode.RETURN(1) + ]) == [False]