From 298699c1d47f50c3025bc73f2dbc9347d8e7af78 Mon Sep 17 00:00:00 2001 From: "Reid D. 'arrdem' McKenzie" Date: Tue, 12 Apr 2022 01:49:12 -0600 Subject: [PATCH] Get the Shogoth VM to a bit better state; py3.10 Ba'azel --- WORKSPACE | 6 +- .../shogoth/src/python/shogoth/reader/impl.py | 8 +- .../shogoth/src/python/shogoth/vm/__init__.py | 1 - .../src/python/shogoth/vm/bootstrap.py | 70 +++++ .../shogoth/src/python/shogoth/vm/impl.py | 278 ++++-------------- projects/shogoth/src/python/shogoth/vm/isa.py | 158 ++++++++++ .../python/shogoth/vm/test_interpreter.py | 50 +++- tools/python/BUILD | 2 +- tools/python/requirements.txt | 1 + 9 files changed, 337 insertions(+), 237 deletions(-) create mode 100644 projects/shogoth/src/python/shogoth/vm/bootstrap.py create mode 100644 projects/shogoth/src/python/shogoth/vm/isa.py diff --git a/WORKSPACE b/WORKSPACE index 1f73504..806a371 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -40,8 +40,8 @@ bazel_skylib_workspace() git_repository( name = "rules_python", remote = "https://github.com/bazelbuild/rules_python.git", - tag = "0.4.0", - # commit = "...", + # tag = "0.4.0", + commit = "888fa20176cdcaebb33f968dc7a8112fb678731d", ) register_toolchains("//tools/python:python3_toolchain") @@ -53,7 +53,7 @@ load("@rules_python//python:pip.bzl", "pip_parse") pip_parse( name = "arrdem_source_pypi", requirements_lock = "//tools/python:requirements.txt", - python_interpreter = "/usr/bin/python3.9", + python_interpreter = "/usr/bin/python3.10" ) # Load the starlark macro which will define your dependencies. diff --git a/projects/shogoth/src/python/shogoth/reader/impl.py b/projects/shogoth/src/python/shogoth/reader/impl.py index 7b465c6..2c118bc 100644 --- a/projects/shogoth/src/python/shogoth/reader/impl.py +++ b/projects/shogoth/src/python/shogoth/reader/impl.py @@ -4,13 +4,7 @@ import re from typing import Any -from lark import ( - Lark, - Token, - Transformer, - Tree, - v_args, -) +from lark import Token, Tree from shogoth.parser import parse from shogoth.types import Keyword, Symbol diff --git a/projects/shogoth/src/python/shogoth/vm/__init__.py b/projects/shogoth/src/python/shogoth/vm/__init__.py index b69cc11..63f77b6 100644 --- a/projects/shogoth/src/python/shogoth/vm/__init__.py +++ b/projects/shogoth/src/python/shogoth/vm/__init__.py @@ -1,3 +1,2 @@ #!/usr/bin/env python3 -from .impl import Interpreter, Opcode, BOOTSTRAP, AND, OR, NOT, XOR diff --git a/projects/shogoth/src/python/shogoth/vm/bootstrap.py b/projects/shogoth/src/python/shogoth/vm/bootstrap.py new file mode 100644 index 0000000..58d9de7 --- /dev/null +++ b/projects/shogoth/src/python/shogoth/vm/bootstrap.py @@ -0,0 +1,70 @@ +"""Shogoth bootstrap code. + +Some utterly trivial functions and types that allow me to begin testing the VM. +Hopefully no "real" interpreter ever uses this code, since it's obviously replaceable. +""" + +from .isa import Module, Opcode + + +BOOTSTRAP = Module() + +NOT = BOOTSTRAP.define_function( + ";/lang/shogoth/v0/bootstrap/not;bool;bool", + [ + Opcode.IF(target=3), + Opcode.FALSE(), + Opcode.RETURN(1), + Opcode.TRUE(), + Opcode.RETURN(1), + ], +) + +OR = BOOTSTRAP.define_function( + ";/lang/shogoth/v0/bootstrap/or;bool,bool;bool", + [ + Opcode.IF(target=3), + Opcode.TRUE(), + Opcode.RETURN(1), + Opcode.IF(target=6), + Opcode.TRUE(), + Opcode.RETURN(1), + Opcode.FALSE(), + Opcode.RETURN(1) + ], +) + +AND = BOOTSTRAP.define_function( + ";/lang/shogoth/v0/bootstrap/and;bool,bool;bool", + [ + Opcode.IF(target=3), + Opcode.IF(target=3), + Opcode.GOTO(target=5), + Opcode.FALSE(), + Opcode.RETURN(1), + Opcode.TRUE(), + Opcode.RETURN(1), + ], +) + +XOR = BOOTSTRAP.define_function( + ";/lang/shogoth/v0/bootstrap/xor;bool,bool;bool", + [ + Opcode.DUP(nargs=2), + # !A && B + Opcode.CALL(NOT), + Opcode.CALL(AND), + Opcode.IF(target=6), + Opcode.TRUE(), + Opcode.RETURN(1), + # !B && A + Opcode.ROT(2), + Opcode.CALL(NOT), + Opcode.CALL(AND), + Opcode.IF(target=12), + Opcode.TRUE(), + Opcode.RETURN(1), + Opcode.FALSE(), + Opcode.RETURN(1), + ], +) diff --git a/projects/shogoth/src/python/shogoth/vm/impl.py b/projects/shogoth/src/python/shogoth/vm/impl.py index b5a786e..2393267 100644 --- a/projects/shogoth/src/python/shogoth/vm/impl.py +++ b/projects/shogoth/src/python/shogoth/vm/impl.py @@ -16,153 +16,15 @@ context (a virtual machine) which DOES have an easily introspected and serialize """ -from random import Random -from typing import NamedTuple +from copy import deepcopy -class Module(NamedTuple): - opcodes: list = [] - functions: dict = {} - types: dict = {} - constants: dict = {} - - rand: Random = Random() - - def copy(self): - return Module( - self.opcodes.copy(), - self.functions.copy(), - self.types.copy(), - self.constants.copy(), - ) - - @staticmethod - def translate(offset: int, i: "Opcode"): - match i: - case Opcode.IF(t): - return Opcode.IF(t + offset) - case Opcode.GOTO(t, anywhere=False): - return Opcode.GOTO(t + offset) - case _: - return i - - def define_function(self, name, opcodes): - start = len(self.opcodes) - self.functions[name] = start - for op in opcodes: - self.opcodes.append(self.translate(start, op)) - return name - - -class Opcode: - class TRUE(NamedTuple): - """() -> (bool) - Push the constant TRUE onto the stack. - """ - - class FALSE(NamedTuple): - """() -> (bool) - Push the constant FALSE onto the stack. - """ - - class IF(NamedTuple): - """(bool) -> () - Branch to another point if the top item of the stack is TRUE. - Otherwise fall through. - """ - - target: int - - # not, and, or, xor etc. can all be functions given if. - - class DUP(NamedTuple): - """(A, B, ...) -> (A, B, ...) - Duplicate the top N items of the stack. - """ - - nargs: int = 1 - - class ROT(NamedTuple): - """(A, B, ... Z) -> (Z, A, B, ...) - Rotate the top N elements of the stack. - """ - - nargs: int = 2 - - class DROP(NamedTuple): - """(*) -> () - Drop the top N items of the stack. - """ - - nargs: int = 1 - - class CALL(NamedTuple): - """(*) -> () - Branch to `target` pushing the current point onto the call stack. - The callee will see a stack containg only the provided `nargs`. - A subsequent RETURN will return execution to the next point. - """ - - funref: str - - class RETURN(NamedTuple): - """(*) -> () - Return to the source of the last `CALL`. - The returnee will see the top `nargs` values of the present stack appended to theirs. - All other values on the stack will be discarded. - If the call stack is empty, `RETURN` will exit the interpreter. - """ - - nargs: int - - class GOTO(NamedTuple): - """() -> () - Branch to another point within the same bytecode segment. - """ - - target: int - anywhere: bool = False - - class STRUCT(NamedTuple): - """(*) -> (T) - Consume the top N items of the stack, producing a struct. - """ - - nargs: int - structref: str - - class FIELD(NamedTuple): - """(A) -> (B) - Consume the struct reference at the top of the stack, producing the value of the referenced field. - """ - - fieldref: str +from .isa import FunctionSignature, Opcode def rotate(l): return [l[-1]] + l[:-1] -class FunctionSignature(NamedTuple): - type_params: list - name: str - args: list - sig: list - - @staticmethod - def parse_list(l): - return [e for e in l.split(",") if e] - - @classmethod - def parse(cls, name: str): - vars, name, args, sig = name.split(";") - return cls( - cls.parse_list(vars), - name, - cls.parse_list(args), - cls.parse_list(sig) - ) - - class Stackframe(object): def __init__(self, stack=None, name=None, ip=None, parent=None): self.stack = stack or [] @@ -176,12 +38,13 @@ class Stackframe(object): def pop(self): return self.stack.pop(0) - def call(self, signature, ip): + def call(self, signature: FunctionSignature, ip): + print(signature) nargs = len(signature.args) args, self.stack = self.stack[:nargs], self.stack[nargs:] return Stackframe( stack=args, - name=signature.name, + name=signature.raw, ip=ip, parent=self ) @@ -199,8 +62,24 @@ class Stackframe(object): def rot(self, nargs): self.stack = rotate(self.stack[:nargs]) + self.stack[nargs:] + def __getitem__(self, key): + return self.stack.__getitem__(key) + + def __len__(self): + return len(self.stack) + + +class InterpreterError(Exception): + """An error raised by the interpreter when something goes awry.""" + + def __init__(self, module, stack, message=None): + self.module = module + self.stack = stack + super().__init__(message) + class Interpreter(object): + """A shit simple instruction pointer based interpreter.""" def __init__(self, bootstrap_module): self.bootstrap = bootstrap_module @@ -212,9 +91,13 @@ class Interpreter(object): mod.define_function(";;;", opcodes) stack.ip = mod.functions[";;;"] + def _error(msg=None): + # Note this is pretty expensive because we have to snapshot the stack BEFORE we do anything + # And the stack object isn't immutable or otherwise designed for cheap snapshotting + raise InterpreterError(mod, deepcopy(stack), msg) + while True: op = mod.opcodes[stack.ip] - print(stack.ip, op, stack.stack) match op: case Opcode.TRUE(): stack.push(True) @@ -223,32 +106,66 @@ class Interpreter(object): stack.push(False) case Opcode.IF(target): - if not stack.pop(): + if len(stack) < 1: + _error("Stack size violation") + + val = stack.pop() + if val not in [True, False]: + _error("Type violation") + + if val is False: stack.ip = target continue case Opcode.DUP(n): + if (n > len(stack)): + _error("Stack size violation") + stack.dup(n) case Opcode.ROT(n): + if (n > len(stack)): + _error("Stack size violation") + stack.rot(n) case Opcode.DROP(n): + if (n > len(stack)): + _error("Stack size violation") + stack.drop(n) case Opcode.CALL(dest): - sig = FunctionSignature.parse(dest) - ip = mod.functions[dest] + try: + sig = FunctionSignature.parse(dest) + except: + _error("Invalid target") + + try: + ip = mod.functions[dest] + except KeyError: + _error("Unknown target") + stack = stack.call(sig, ip) continue case Opcode.RETURN(n): + if (n > len(stack)): + _error("Stack size violation") + if stack.parent: + sig = FunctionSignature.parse(stack.name) + if (len(sig.ret) != n): + _error("Signature violation") + stack = stack.ret(n) else: - return stack.stack[:n] + return stack[:n] case Opcode.GOTO(n, _): + if (n < 0): + _error("Illegal branch target") + stack.ip = n continue @@ -256,70 +173,3 @@ class Interpreter(object): raise Exception(f"Unhandled interpreter state {op}") stack.ip += 1 - - -BOOTSTRAP = Module() - -NOT = ";/lang/shogoth/v0/bootstrap/not;bool;bool" -BOOTSTRAP.define_function( - NOT, - [ - Opcode.IF(target=3), - Opcode.FALSE(), - Opcode.RETURN(1), - Opcode.TRUE(), - Opcode.RETURN(1), - ], -) - -OR = ";/lang/shogoth/v0/bootstrap/or;bool,bool;bool" -BOOTSTRAP.define_function( - OR, - [ - Opcode.IF(target=3), - Opcode.TRUE(), - Opcode.RETURN(1), - Opcode.IF(target=6), - Opcode.TRUE(), - Opcode.RETURN(1), - Opcode.FALSE(), - Opcode.RETURN(1) - ], -) - -AND = ";/lang/shogoth/v0/bootstrap/and;bool,bool;bool" -BOOTSTRAP.define_function( - AND, - [ - Opcode.IF(target=3), - Opcode.IF(target=3), - Opcode.GOTO(target=5), - Opcode.FALSE(), - Opcode.RETURN(1), - Opcode.TRUE(), - Opcode.RETURN(1), - ], -) - -XOR = ";/lang/shogoth/v0/bootstrap/xor;bool,bool;bool" -BOOTSTRAP.define_function( - XOR, - [ - Opcode.DUP(nargs=2), - # !A && B - Opcode.CALL(";/lang/shogoth/v0/bootstrap/not;bool;bool"), - Opcode.CALL(";/lang/shogoth/v0/bootstrap/and;bool,bool;bool"), - Opcode.IF(target=6), - Opcode.TRUE(), - Opcode.RETURN(1), - # !B && A - Opcode.ROT(2), - Opcode.CALL(";/lang/shogoth/v0/bootstrap/not;bool;bool"), - Opcode.CALL(";/lang/shogoth/v0/bootstrap/and;bool,bool;bool"), - Opcode.IF(target=12), - Opcode.TRUE(), - Opcode.RETURN(1), - Opcode.FALSE(), - Opcode.RETURN(1), - ], -) diff --git a/projects/shogoth/src/python/shogoth/vm/isa.py b/projects/shogoth/src/python/shogoth/vm/isa.py new file mode 100644 index 0000000..c449bc1 --- /dev/null +++ b/projects/shogoth/src/python/shogoth/vm/isa.py @@ -0,0 +1,158 @@ +"""The instruction set for Shogoth.""" + + +from typing import NamedTuple + + +class Opcode: + class TRUE(NamedTuple): + """() -> (bool) + Push the constant TRUE onto the stack. + """ + + class FALSE(NamedTuple): + """() -> (bool) + Push the constant FALSE onto the stack. + """ + + class IF(NamedTuple): + """(bool) -> () + Branch to another point if the top item of the stack is TRUE. + Otherwise fall through. + """ + + target: int + + # not, and, or, xor etc. can all be functions given if. + + class DUP(NamedTuple): + """(A, B, ...) -> (A, B, ...) + Duplicate the top N items of the stack. + """ + + nargs: int = 1 + + class ROT(NamedTuple): + """(A, B, ... Z) -> (Z, A, B, ...) + Rotate the top N elements of the stack. + """ + + nargs: int = 2 + + class DROP(NamedTuple): + """(*) -> () + Drop the top N items of the stack. + """ + + nargs: int = 1 + + class CALL(NamedTuple): + """(*) -> () + Branch to `target` pushing the current point onto the call stack. + The callee will see a stack containg only the provided `nargs`. + A subsequent RETURN will return execution to the next point. + """ + + funref: str + + class RETURN(NamedTuple): + """(*) -> () + Return to the source of the last `CALL`. + The returnee will see the top `nargs` values of the present stack appended to theirs. + All other values on the stack will be discarded. + If the call stack is empty, `RETURN` will exit the interpreter. + """ + + nargs: int + + class GOTO(NamedTuple): + """() -> () + Branch to another point within the same bytecode segment. + """ + + target: int + anywhere: bool = False + + class STRUCT(NamedTuple): + """(*) -> (T) + Consume the top N items of the stack, producing a struct. + """ + + nargs: int + structref: str + + class FIELD(NamedTuple): + """(A) -> (B) + Consume the struct reference at the top of the stack, producing the value of the referenced field. + """ + + fieldref: str + + +class FunctionSignature(NamedTuple): + raw: str + type_params: list + name: str + args: list + ret: list + + @staticmethod + def parse_list(l): + return [e for e in l.split(",") if e] + + @classmethod + def parse(cls, raw: str): + vars, name, args, ret = raw.split(";") + return cls( + raw, + cls.parse_list(vars), + name, + cls.parse_list(args), + cls.parse_list(ret) + ) + + +class Module(NamedTuple): + opcodes: list = [] + functions: dict = {} + types: dict = {} + constants: dict = {} + + def copy(self): + return Module( + self.opcodes.copy(), + self.functions.copy(), + self.types.copy(), + self.constants.copy(), + ) + + @staticmethod + def translate(offset: int, i: "Opcode"): + match i: + case Opcode.IF(t): + return Opcode.IF(t + offset) + case Opcode.GOTO(t, anywhere=False): + return Opcode.GOTO(t + offset) + case _: + return i + + def define_function(self, name, opcodes): + # FIXME: This is way way WAAAAAAY too minimal. Lots of other stuff goes on a "function." + # For instance how to install handlers? + # How to consume capabilities? + + try: + sig = FunctionSignature.parse(name) + assert sig.name + except: + raise ValueError("Illegal name provided") + + start = len(self.opcodes) + self.functions[name] = start + for op in opcodes: + self.opcodes.append(self.translate(start, op)) + return name + + def define_struct(self, name, signature): + # FIXME: What in TARNATION is this going to do + pass diff --git a/projects/shogoth/test/python/shogoth/vm/test_interpreter.py b/projects/shogoth/test/python/shogoth/vm/test_interpreter.py index c1aff81..551f788 100644 --- a/projects/shogoth/test/python/shogoth/vm/test_interpreter.py +++ b/projects/shogoth/test/python/shogoth/vm/test_interpreter.py @@ -2,26 +2,30 @@ Tests coverign the VM interpreter """ +import pytest from shogoth.vm import * -vm = Interpreter(BOOTSTRAP) + +@pytest.fixture +def vm(): + return Interpreter(BOOTSTRAP) -def test_true(): +def test_true(vm): assert vm.run([Opcode.TRUE(), Opcode.RETURN(1)]) == [True] -def test_false(): +def test_false(vm): assert vm.run([Opcode.FALSE(), Opcode.RETURN(1)]) == [False] -def test_return(): +def test_return(vm): assert vm.run([Opcode.FALSE(), Opcode.RETURN(0)]) == [] assert vm.run([Opcode.TRUE(), Opcode.FALSE(), Opcode.RETURN(1)]) == [False] assert vm.run([Opcode.TRUE(), Opcode.FALSE(), Opcode.RETURN(2)]) == [False, True] -def test_dup(): +def test_dup(vm): assert vm.run([ Opcode.TRUE(), Opcode.FALSE(), @@ -37,7 +41,7 @@ def test_dup(): ]) == [False, True, False, True] -def test_rot(): +def test_rot(vm): assert vm.run([ Opcode.TRUE(), Opcode.FALSE(), @@ -54,7 +58,7 @@ def test_rot(): ]) == [False, False, True] -def test_drop(): +def test_drop(vm): assert vm.run([ Opcode.TRUE(), Opcode.FALSE(), @@ -63,7 +67,7 @@ def test_drop(): ]) == [True] -def test_not(): +def test_not(vm): assert vm.run([ Opcode.TRUE(), Opcode.CALL(NOT), @@ -77,7 +81,7 @@ def test_not(): ]) == [True] -def test_or(): +def test_or(vm): assert vm.run([ Opcode.FALSE(), Opcode.FALSE(), @@ -107,7 +111,7 @@ def test_or(): ]) == [True] -def test_and(): +def test_and(vm): assert vm.run([ Opcode.FALSE(), Opcode.FALSE(), @@ -137,7 +141,7 @@ def test_and(): ]) == [True] -def test_xor(): +def test_xor(vm): assert vm.run([ Opcode.FALSE(), Opcode.FALSE(), @@ -165,3 +169,27 @@ def test_xor(): Opcode.CALL(XOR), Opcode.RETURN(1) ]) == [False] + + +def test_dup_too_many(vm): + with pytest.raises(InterpreterError): + vm.run([Opcode.DUP(1)]) + + with pytest.raises(InterpreterError): + vm.run([Opcode.FALSE(), Opcode.DUP(2)]) + + +def test_rot_too_many(vm): + with pytest.raises(InterpreterError): + vm.run([Opcode.ROT(1)]) + + with pytest.raises(InterpreterError): + vm.run([Opcode.TRUE(), Opcode.ROT(2)]) + + +def test_drop_too_many(vm): + with pytest.raises(InterpreterError): + vm.run([Opcode.DROP(1)]) + + with pytest.raises(InterpreterError): + vm.run([Opcode.TRUE(), Opcode.DROP(2)]) diff --git a/tools/python/BUILD b/tools/python/BUILD index 29ea757..5aca2e3 100644 --- a/tools/python/BUILD +++ b/tools/python/BUILD @@ -17,7 +17,7 @@ exports_files([ py_runtime( name = "python3_runtime", files = [], - interpreter_path = "/usr/bin/python3.9", + interpreter_path = "/usr/bin/python3.10", python_version = "PY3", visibility = ["//visibility:public"], ) diff --git a/tools/python/requirements.txt b/tools/python/requirements.txt index 0e7345f..3214edf 100644 --- a/tools/python/requirements.txt +++ b/tools/python/requirements.txt @@ -15,6 +15,7 @@ click==7.1.2 colored==1.4.3 commonmark==0.9.1 coverage==6.2 +dataclasses Deprecated==1.2.13 docutils==0.17.1 ExifRead==2.3.2