A bytecode VM sketch
This commit is contained in:
parent
977b6d8677
commit
feb6980d4f
5 changed files with 510 additions and 32 deletions
|
@ -1,35 +1,7 @@
|
|||
py_library(
|
||||
name = "lib",
|
||||
srcs = glob(["src/python/**/*"]),
|
||||
imports = [
|
||||
"src/python"
|
||||
],
|
||||
deps = [
|
||||
py_project(
|
||||
name = "shogoth",
|
||||
main = "src/python/shogoth/repl/__main__.py",
|
||||
lib_deps = [
|
||||
py_requirement("lark"),
|
||||
],
|
||||
)
|
||||
|
||||
zapp_binary(
|
||||
name = "shogothd",
|
||||
main = "src/python/shogoth/server/__main__.py",
|
||||
deps = [
|
||||
":lib",
|
||||
],
|
||||
)
|
||||
|
||||
zapp_binary(
|
||||
name = "shogoth",
|
||||
main = "src/python/shogoth/client/__main__.py",
|
||||
deps = [
|
||||
":lib",
|
||||
],
|
||||
)
|
||||
|
||||
zapp_binary(
|
||||
name = "repl",
|
||||
main = "src/python/shogoth/repl/__main__.py",
|
||||
shebang = "/usr/bin/env python3.10",
|
||||
deps = [
|
||||
":lib",
|
||||
]
|
||||
)
|
||||
|
|
11
projects/shogoth/src/python/shogoth/vm/NOTES.md
Normal file
11
projects/shogoth/src/python/shogoth/vm/NOTES.md
Normal file
|
@ -0,0 +1,11 @@
|
|||
# VM notes
|
||||
|
||||
## Papers
|
||||
|
||||
10.1145/3486606.3488073
|
||||
10.1145/3486606.3486783
|
||||
10.1145/3427765.3432355
|
||||
10.1145/3281287.3281295
|
||||
10.1145/2542142.2542144
|
||||
10.1145/1941054.1941059
|
||||
10.1145/1941054.1941058
|
3
projects/shogoth/src/python/shogoth/vm/__init__.py
Normal file
3
projects/shogoth/src/python/shogoth/vm/__init__.py
Normal file
|
@ -0,0 +1,3 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from .impl import Interpreter, Opcode, BOOTSTRAP, AND, OR, NOT, XOR
|
325
projects/shogoth/src/python/shogoth/vm/impl.py
Normal file
325
projects/shogoth/src/python/shogoth/vm/impl.py
Normal file
|
@ -0,0 +1,325 @@
|
|||
#!/usr/bin/env python3.10
|
||||
|
||||
"""The Shogoth VM implementation.
|
||||
|
||||
The whole point of shogoth is that program executions are checkpointable and restartable. This requires that rather than
|
||||
using a traditional recursive interpreter which is difficult to snapshot, interpretation in shogoth occur within a
|
||||
context (a virtual machine) which DOES have an easily introspected and serialized representation.
|
||||
|
||||
## The Shogoth VM Architecture
|
||||
|
||||
|
||||
- NOT [bool] -> [bool]
|
||||
- IF [then: addr, else: addr, cond: bool] -> []
|
||||
- CALL [procedure, n, ...] -> [...]
|
||||
- RETURN [n, ...]
|
||||
|
||||
"""
|
||||
|
||||
from random import Random
|
||||
from typing import NamedTuple
|
||||
|
||||
class Module(NamedTuple):
|
||||
opcodes: list = []
|
||||
functions: dict = {}
|
||||
types: dict = {}
|
||||
constants: dict = {}
|
||||
|
||||
rand: Random = Random()
|
||||
|
||||
def copy(self):
|
||||
return Module(
|
||||
self.opcodes.copy(),
|
||||
self.functions.copy(),
|
||||
self.types.copy(),
|
||||
self.constants.copy(),
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def translate(offset: int, i: "Opcode"):
|
||||
match i:
|
||||
case Opcode.IF(t):
|
||||
return Opcode.IF(t + offset)
|
||||
case Opcode.GOTO(t, anywhere=False):
|
||||
return Opcode.GOTO(t + offset)
|
||||
case _:
|
||||
return i
|
||||
|
||||
def define_function(self, name, opcodes):
|
||||
start = len(self.opcodes)
|
||||
self.functions[name] = start
|
||||
for op in opcodes:
|
||||
self.opcodes.append(self.translate(start, op))
|
||||
return name
|
||||
|
||||
|
||||
class Opcode:
|
||||
class TRUE(NamedTuple):
|
||||
"""() -> (bool)
|
||||
Push the constant TRUE onto the stack.
|
||||
"""
|
||||
|
||||
class FALSE(NamedTuple):
|
||||
"""() -> (bool)
|
||||
Push the constant FALSE onto the stack.
|
||||
"""
|
||||
|
||||
class IF(NamedTuple):
|
||||
"""(bool) -> ()
|
||||
Branch to another point if the top item of the stack is TRUE.
|
||||
Otherwise fall through.
|
||||
"""
|
||||
|
||||
target: int
|
||||
|
||||
# not, and, or, xor etc. can all be functions given if.
|
||||
|
||||
class DUP(NamedTuple):
|
||||
"""(A, B, ...) -> (A, B, ...)
|
||||
Duplicate the top N items of the stack.
|
||||
"""
|
||||
|
||||
nargs: int = 1
|
||||
|
||||
class ROT(NamedTuple):
|
||||
"""(A, B, ... Z) -> (Z, A, B, ...)
|
||||
Rotate the top N elements of the stack.
|
||||
"""
|
||||
|
||||
nargs: int = 2
|
||||
|
||||
class DROP(NamedTuple):
|
||||
"""(*) -> ()
|
||||
Drop the top N items of the stack.
|
||||
"""
|
||||
|
||||
nargs: int = 1
|
||||
|
||||
class CALL(NamedTuple):
|
||||
"""(*) -> ()
|
||||
Branch to `target` pushing the current point onto the call stack.
|
||||
The callee will see a stack containg only the provided `nargs`.
|
||||
A subsequent RETURN will return execution to the next point.
|
||||
"""
|
||||
|
||||
funref: str
|
||||
|
||||
class RETURN(NamedTuple):
|
||||
"""(*) -> ()
|
||||
Return to the source of the last `CALL`.
|
||||
The returnee will see the top `nargs` values of the present stack appended to theirs.
|
||||
All other values on the stack will be discarded.
|
||||
If the call stack is empty, `RETURN` will exit the interpreter.
|
||||
"""
|
||||
|
||||
nargs: int
|
||||
|
||||
class GOTO(NamedTuple):
|
||||
"""() -> ()
|
||||
Branch to another point within the same bytecode segment.
|
||||
"""
|
||||
|
||||
target: int
|
||||
anywhere: bool = False
|
||||
|
||||
class STRUCT(NamedTuple):
|
||||
"""(*) -> (T)
|
||||
Consume the top N items of the stack, producing a struct.
|
||||
"""
|
||||
|
||||
nargs: int
|
||||
structref: str
|
||||
|
||||
class FIELD(NamedTuple):
|
||||
"""(A) -> (B)
|
||||
Consume the struct reference at the top of the stack, producing the value of the referenced field.
|
||||
"""
|
||||
|
||||
fieldref: str
|
||||
|
||||
|
||||
def rotate(l):
|
||||
return [l[-1]] + l[:-1]
|
||||
|
||||
|
||||
class FunctionSignature(NamedTuple):
|
||||
type_params: list
|
||||
name: str
|
||||
args: list
|
||||
sig: list
|
||||
|
||||
@staticmethod
|
||||
def parse_list(l):
|
||||
return [e for e in l.split(",") if e]
|
||||
|
||||
@classmethod
|
||||
def parse(cls, name: str):
|
||||
vars, name, args, sig = name.split(";")
|
||||
return cls(
|
||||
cls.parse_list(vars),
|
||||
name,
|
||||
cls.parse_list(args),
|
||||
cls.parse_list(sig)
|
||||
)
|
||||
|
||||
|
||||
class Stackframe(object):
|
||||
def __init__(self, stack=None, name=None, ip=None, parent=None):
|
||||
self.stack = stack or []
|
||||
self.name = name or ";unknown;;"
|
||||
self.ip = ip or 0
|
||||
self.parent = parent
|
||||
|
||||
def push(self, obj):
|
||||
self.stack.insert(0, obj)
|
||||
|
||||
def pop(self):
|
||||
return self.stack.pop(0)
|
||||
|
||||
def call(self, signature, ip):
|
||||
nargs = len(signature.args)
|
||||
args, self.stack = self.stack[:nargs], self.stack[nargs:]
|
||||
return Stackframe(
|
||||
stack=args,
|
||||
name=signature.name,
|
||||
ip=ip,
|
||||
parent=self
|
||||
)
|
||||
|
||||
def ret(self, nargs):
|
||||
self.parent.stack = self.stack[:nargs] + self.parent.stack
|
||||
return self.parent
|
||||
|
||||
def dup(self, nargs):
|
||||
self.stack = self.stack[:nargs] + self.stack
|
||||
|
||||
def drop(self, nargs):
|
||||
self.stack = self.stack[nargs:]
|
||||
|
||||
def rot(self, nargs):
|
||||
self.stack = rotate(self.stack[:nargs]) + self.stack[nargs:]
|
||||
|
||||
|
||||
class Interpreter(object):
|
||||
def __init__(self, bootstrap_module):
|
||||
self.bootstrap = bootstrap_module
|
||||
|
||||
def run(self, opcodes):
|
||||
"""Directly interpret some opcodes in the configured environment."""
|
||||
|
||||
stack = Stackframe()
|
||||
mod = self.bootstrap.copy()
|
||||
mod.define_function(";<entry>;;", opcodes)
|
||||
stack.ip = mod.functions[";<entry>;;"]
|
||||
|
||||
while True:
|
||||
op = mod.opcodes[stack.ip]
|
||||
print(stack.ip, op, stack.stack)
|
||||
match op:
|
||||
case Opcode.TRUE():
|
||||
stack.push(True)
|
||||
|
||||
case Opcode.FALSE():
|
||||
stack.push(False)
|
||||
|
||||
case Opcode.IF(target):
|
||||
if not stack.pop():
|
||||
stack.ip = target
|
||||
continue
|
||||
|
||||
case Opcode.DUP(n):
|
||||
stack.dup(n)
|
||||
|
||||
case Opcode.ROT(n):
|
||||
stack.rot(n)
|
||||
|
||||
case Opcode.DROP(n):
|
||||
stack.drop(n)
|
||||
|
||||
case Opcode.CALL(dest):
|
||||
sig = FunctionSignature.parse(dest)
|
||||
ip = mod.functions[dest]
|
||||
stack = stack.call(sig, ip)
|
||||
continue
|
||||
|
||||
case Opcode.RETURN(n):
|
||||
if stack.parent:
|
||||
stack = stack.ret(n)
|
||||
else:
|
||||
return stack.stack[:n]
|
||||
|
||||
case Opcode.GOTO(n, _):
|
||||
stack.ip = n
|
||||
continue
|
||||
|
||||
case _:
|
||||
raise Exception(f"Unhandled interpreter state {op}")
|
||||
|
||||
stack.ip += 1
|
||||
|
||||
|
||||
BOOTSTRAP = Module()
|
||||
|
||||
NOT = ";/lang/shogoth/v0/bootstrap/not;bool;bool"
|
||||
BOOTSTRAP.define_function(
|
||||
NOT,
|
||||
[
|
||||
Opcode.IF(target=3),
|
||||
Opcode.FALSE(),
|
||||
Opcode.RETURN(1),
|
||||
Opcode.TRUE(),
|
||||
Opcode.RETURN(1),
|
||||
],
|
||||
)
|
||||
|
||||
OR = ";/lang/shogoth/v0/bootstrap/or;bool,bool;bool"
|
||||
BOOTSTRAP.define_function(
|
||||
OR,
|
||||
[
|
||||
Opcode.IF(target=3),
|
||||
Opcode.TRUE(),
|
||||
Opcode.RETURN(1),
|
||||
Opcode.IF(target=6),
|
||||
Opcode.TRUE(),
|
||||
Opcode.RETURN(1),
|
||||
Opcode.FALSE(),
|
||||
Opcode.RETURN(1)
|
||||
],
|
||||
)
|
||||
|
||||
AND = ";/lang/shogoth/v0/bootstrap/and;bool,bool;bool"
|
||||
BOOTSTRAP.define_function(
|
||||
AND,
|
||||
[
|
||||
Opcode.IF(target=3),
|
||||
Opcode.IF(target=3),
|
||||
Opcode.GOTO(target=5),
|
||||
Opcode.FALSE(),
|
||||
Opcode.RETURN(1),
|
||||
Opcode.TRUE(),
|
||||
Opcode.RETURN(1),
|
||||
],
|
||||
)
|
||||
|
||||
XOR = ";/lang/shogoth/v0/bootstrap/xor;bool,bool;bool"
|
||||
BOOTSTRAP.define_function(
|
||||
XOR,
|
||||
[
|
||||
Opcode.DUP(nargs=2),
|
||||
# !A && B
|
||||
Opcode.CALL(";/lang/shogoth/v0/bootstrap/not;bool;bool"),
|
||||
Opcode.CALL(";/lang/shogoth/v0/bootstrap/and;bool,bool;bool"),
|
||||
Opcode.IF(target=6),
|
||||
Opcode.TRUE(),
|
||||
Opcode.RETURN(1),
|
||||
# !B && A
|
||||
Opcode.ROT(2),
|
||||
Opcode.CALL(";/lang/shogoth/v0/bootstrap/not;bool;bool"),
|
||||
Opcode.CALL(";/lang/shogoth/v0/bootstrap/and;bool,bool;bool"),
|
||||
Opcode.IF(target=12),
|
||||
Opcode.TRUE(),
|
||||
Opcode.RETURN(1),
|
||||
Opcode.FALSE(),
|
||||
Opcode.RETURN(1),
|
||||
],
|
||||
)
|
167
projects/shogoth/test/python/shogoth/vm/test_interpreter.py
Normal file
167
projects/shogoth/test/python/shogoth/vm/test_interpreter.py
Normal file
|
@ -0,0 +1,167 @@
|
|||
"""
|
||||
Tests coverign the VM interpreter
|
||||
"""
|
||||
|
||||
from shogoth.vm import *
|
||||
|
||||
vm = Interpreter(BOOTSTRAP)
|
||||
|
||||
|
||||
def test_true():
|
||||
assert vm.run([Opcode.TRUE(), Opcode.RETURN(1)]) == [True]
|
||||
|
||||
|
||||
def test_false():
|
||||
assert vm.run([Opcode.FALSE(), Opcode.RETURN(1)]) == [False]
|
||||
|
||||
|
||||
def test_return():
|
||||
assert vm.run([Opcode.FALSE(), Opcode.RETURN(0)]) == []
|
||||
assert vm.run([Opcode.TRUE(), Opcode.FALSE(), Opcode.RETURN(1)]) == [False]
|
||||
assert vm.run([Opcode.TRUE(), Opcode.FALSE(), Opcode.RETURN(2)]) == [False, True]
|
||||
|
||||
|
||||
def test_dup():
|
||||
assert vm.run([
|
||||
Opcode.TRUE(),
|
||||
Opcode.FALSE(),
|
||||
Opcode.DUP(1),
|
||||
Opcode.RETURN(3)
|
||||
]) == [False, False, True]
|
||||
|
||||
assert vm.run([
|
||||
Opcode.TRUE(),
|
||||
Opcode.FALSE(),
|
||||
Opcode.DUP(2),
|
||||
Opcode.RETURN(4)
|
||||
]) == [False, True, False, True]
|
||||
|
||||
|
||||
def test_rot():
|
||||
assert vm.run([
|
||||
Opcode.TRUE(),
|
||||
Opcode.FALSE(),
|
||||
Opcode.ROT(2),
|
||||
Opcode.RETURN(2)
|
||||
]) == [True, False]
|
||||
|
||||
assert vm.run([
|
||||
Opcode.FALSE(),
|
||||
Opcode.TRUE(),
|
||||
Opcode.FALSE(),
|
||||
Opcode.ROT(3),
|
||||
Opcode.RETURN(3)
|
||||
]) == [False, False, True]
|
||||
|
||||
|
||||
def test_drop():
|
||||
assert vm.run([
|
||||
Opcode.TRUE(),
|
||||
Opcode.FALSE(),
|
||||
Opcode.DROP(1),
|
||||
Opcode.RETURN(1)
|
||||
]) == [True]
|
||||
|
||||
|
||||
def test_not():
|
||||
assert vm.run([
|
||||
Opcode.TRUE(),
|
||||
Opcode.CALL(NOT),
|
||||
Opcode.RETURN(1)
|
||||
]) == [False]
|
||||
|
||||
assert vm.run([
|
||||
Opcode.FALSE(),
|
||||
Opcode.CALL(NOT),
|
||||
Opcode.RETURN(1)
|
||||
]) == [True]
|
||||
|
||||
|
||||
def test_or():
|
||||
assert vm.run([
|
||||
Opcode.FALSE(),
|
||||
Opcode.FALSE(),
|
||||
Opcode.CALL(OR),
|
||||
Opcode.RETURN(1)
|
||||
]) == [False]
|
||||
|
||||
assert vm.run([
|
||||
Opcode.TRUE(),
|
||||
Opcode.FALSE(),
|
||||
Opcode.CALL(OR),
|
||||
Opcode.RETURN(1)
|
||||
]) == [True]
|
||||
|
||||
assert vm.run([
|
||||
Opcode.FALSE(),
|
||||
Opcode.TRUE(),
|
||||
Opcode.CALL(OR),
|
||||
Opcode.RETURN(1)
|
||||
]) == [True]
|
||||
|
||||
assert vm.run([
|
||||
Opcode.TRUE(),
|
||||
Opcode.TRUE(),
|
||||
Opcode.CALL(OR),
|
||||
Opcode.RETURN(1)
|
||||
]) == [True]
|
||||
|
||||
|
||||
def test_and():
|
||||
assert vm.run([
|
||||
Opcode.FALSE(),
|
||||
Opcode.FALSE(),
|
||||
Opcode.CALL(AND),
|
||||
Opcode.RETURN(1)
|
||||
]) == [False]
|
||||
|
||||
assert vm.run([
|
||||
Opcode.TRUE(),
|
||||
Opcode.FALSE(),
|
||||
Opcode.CALL(AND),
|
||||
Opcode.RETURN(1)
|
||||
]) == [False]
|
||||
|
||||
assert vm.run([
|
||||
Opcode.FALSE(),
|
||||
Opcode.TRUE(),
|
||||
Opcode.CALL(AND),
|
||||
Opcode.RETURN(1)
|
||||
]) == [False]
|
||||
|
||||
assert vm.run([
|
||||
Opcode.TRUE(),
|
||||
Opcode.TRUE(),
|
||||
Opcode.CALL(AND),
|
||||
Opcode.RETURN(1)
|
||||
]) == [True]
|
||||
|
||||
|
||||
def test_xor():
|
||||
assert vm.run([
|
||||
Opcode.FALSE(),
|
||||
Opcode.FALSE(),
|
||||
Opcode.CALL(XOR),
|
||||
Opcode.RETURN(1)
|
||||
]) == [False]
|
||||
|
||||
assert vm.run([
|
||||
Opcode.TRUE(),
|
||||
Opcode.FALSE(),
|
||||
Opcode.CALL(XOR),
|
||||
Opcode.RETURN(1)
|
||||
]) == [True]
|
||||
|
||||
assert vm.run([
|
||||
Opcode.FALSE(),
|
||||
Opcode.TRUE(),
|
||||
Opcode.CALL(XOR),
|
||||
Opcode.RETURN(1)
|
||||
]) == [True]
|
||||
|
||||
assert vm.run([
|
||||
Opcode.TRUE(),
|
||||
Opcode.TRUE(),
|
||||
Opcode.CALL(XOR),
|
||||
Opcode.RETURN(1)
|
||||
]) == [False]
|
Loading…
Reference in a new issue