Sketching towards a prototype lisp

This commit is contained in:
Reid 'arrdem' McKenzie 2023-05-03 22:46:26 -06:00
parent 6ce0e888b9
commit 55efcc6115
14 changed files with 729 additions and 0 deletions

View file

@ -1,3 +1,8 @@
py_project(
name = "hydra",
lib_deps = [
py_requirement("attrs"),
py_requirement("click"),
],
main = "src/python/hydra/__main__.py",
)

View file

@ -1,12 +1,21 @@
#!/usr/bin/env python3.10
from pathlib import Path
import click
from hydra.interpreter import run_script
@click.group()
def cli():
pass
@cli.command()
@click.argument("script", type=Path)
def run(script):
run_script(script)
if __name__ == "__main__":
cli.main(prog_name="hydra")

View file

@ -0,0 +1,54 @@
#!/usr/bin/env python3
"""The Hydra interpreter.
Hydra is an odd implementation of Python interpretation. Control is modeled in
terms of continuations, each continuation having a classical stack and a control
pointer. However rather than converting Python ASTs to a bytecode format
suitable for sequential indexing as in the CPython VM, Hydra simply maintains a
"cursor" into the AST. This allows for simultaneous implementation of direct AST
interpretation (for simplicity) and continuations.
"""
import ast
from pathlib import Path
from typing import Dict, List, Optional
from hydra.models import Cursor, Continuation, Module, Runtime, Namespace
from hydra.syntax import validate_mod
def run_script(
script: Path,
pypath: Optional[List[Path]] = None,
pyenv: Optional[Dict[str, str]] = None,
):
"""Given a script, start a thread and run it to 'completion'.
Mostly a stub designed to get me rolling.
"""
pypath = pypath or []
pyenv = pyenv or {}
with open(script, "r", encoding="utf-8") as fp:
mod_ast: ast.Module = ast.parse(fp.read())
# Locate unsupported syntax, if any
validate_mod(mod_ast)
# Create the module object
mod = Module("__main__", mod_ast)
# Create the main continuation
main = Continuation(entrypoint=Cursor(module="__main__", path=[0]), stack=[])
# Create the runtime
rt = Runtime(modules=Namespace({"__main__": mod}))
# Register the main continuation
rt.conts[rt.main] = main
for stmt in mod_ast.body:
print(stmt)

View file

@ -0,0 +1,52 @@
#!/usr/bin/env python3
import ast
from pathlib import Path
from typing import Any, Dict, List, Optional
from attrs import define, field
from uuid import UUID, uuid4
@define
class Cursor:
module: str
path: List[int]
@define
class Continuation:
entrypoint: Cursor
stack: List[Cursor]
parent: Optional["Continuation"] = None
@define
class Namespace:
"""Namespaces name ... values.
Modules, classes and other forms of scopes are all Namespaces.
"""
d: Dict[str, Any] = field(default=dict)
@define
class Module:
"""A 'module" maps names to code objects such as constants, classes and functions (also classes)."""
name: str
ast: ast.Module
d: Dict[str, Any] = field(default=dict)
@define
class Runtime:
"""A Runtime represents a loaded and potentially runnable"""
env: Dict[str, str] = field(default=dict)
path: List[Path] = field(default=list)
main: UUID = field(default=uuid4)
conts: Dict[UUID, Continuation] = field(default=dict)
modules: Namespace = field(default=Namespace)

View file

@ -0,0 +1,154 @@
#!/usr/bin/env python3
"""The syntax checker.
The Python AST module will gladly parse a variety of meaningless statements such
as top-level `pass` and `return` statements. These are, technically, illegal and
cannot be evaluated.
Furthermore, Hydra doesn't support (currently a lot) of Python's syntax. Match
and async for instance are right out.
The syntax module exists to provide "validation" that a given AST tree is
"legitimate".
"""
import ast
from typing import Union
# 'stmt' from the AST def. (3.11)
Statement = Union[
ast.FunctionDef,
ast.AsyncFunctionDef,
ast.ClassDef,
ast.Return,
ast.Delete,
ast.Assign,
ast.AugAssign,
ast.AnnAssign,
ast.For,
ast.AsyncFor,
ast.While,
ast.If,
ast.With,
ast.AsyncWith,
ast.Match,
ast.Raise,
ast.Try,
ast.Assert,
ast.Import,
ast.ImportFrom,
ast.Global,
ast.Nonlocal,
ast.Expr,
ast.Pass,
ast.Break,
ast.Continue,
]
# 'expr' from the AST def. (3.11)
Expression = Union[
ast.BoolOp,
ast.NamedExpr,
ast.BinOp,
ast.UnaryOp,
ast.Lambda,
ast.IfExp,
ast.Dict,
ast.Set,
ast.ListComp,
ast.SetComp,
ast.DictComp,
ast.GeneratorExp,
ast.Await,
ast.Yield,
ast.YieldFrom,
ast.Compare,
ast.Call,
ast.FormattedValue,
ast.JoinedStr,
ast.Constant,
ast.Attribute,
ast.Subscript,
ast.Starred,
ast.Name,
ast.List,
ast.Tuple,
ast.Slice,
]
def collect(ctor):
"""Coerce a generator style function into returning a collection."""
def decorator(f):
def helper(*args, **kwargs):
return ctor(f(*args, **kwargs))
return helper
return decorator
@collect(list)
def validate_stmt(stmt: Statement, top=False):
if isinstance(stmt, (ast.AsyncFunctionDef, ast.AsyncFor, ast.AsyncWith, ast.Match)):
# Stuff we don't support anywhere
yield stmt
elif top and isinstance(
stmt,
(
ast.AsyncFor,
ast.AsyncWith,
ast.Break,
ast.Continue,
ast.Nonlocal,
ast.Pass,
ast.Return,
),
):
# Statements that are specifically meaningless at the top level
yield stmt
elif isinstance(stmt, (ast.FunctionDef, ast.ClassDef)):
# Recur into function/class definitions
for it in stmt.body:
yield from validate_stmt(it)
elif isinstance(stmt, ast.If):
for it in stmt.body:
yield from validate_stmt(it)
for it in stmt.orelse or []:
yield from validate_stmt(it)
elif isinstance(stmt, ast.Try):
for it in stmt.body:
yield from validate_stmt(it)
for handler in stmt.handlers or []:
for it in handler.body:
yield from validate_stmt(it)
for it in stmt.orelse or []:
yield from validate_stmt(it)
for it in stmt.finalbody or []:
yield from validate_stmt(it)
elif isinstance(stmt, (ast.With, ast.While, ast.For)):
for it in stmt.body:
yield from validate_stmt(it)
elif isinstance(stmt, ast.Expression):
yield from validate_expr(stmt)
def validate_mod(mod: ast.Module):
problems = []
for stmt in mod.body:
if ps := validate_stmt(stmt, top=True):
problems.extend(ps)

View file

@ -0,0 +1,6 @@
py_project(
name = "milkshake",
lib_deps = [
py_requirement("lark"),
],
)

View file

@ -0,0 +1,45 @@
"""A quick and dirty Lisp reader"""
from importlib.resources import files
from lark import Lark, Tree, Token, Transformer, v_args
with files(__package__).joinpath("grammar.lark").open("r", encoding="utf-8") as fp:
GRAMMAR = fp.read()
@v_args(tree=True)
class T(Transformer):
"""A prepackaged transformer that cleans up the quoting details."""
def un_qq(self, obj):
return Tree(
Token(obj.data.type, obj.data.value.replace("qq_", "", 1)), obj.children
)
qq_list = un_qq
qq_tuple = un_qq
qq_vecotor = un_qq
qq_map = un_qq
qq_set = un_qq
qq_atom = un_qq
qq_symbol = un_qq
def qq_quote(self, obj):
return self.quote(self.un_qq(obj))
def quote(self, obj):
return Tree(obj.data, obj.children[1:])
quasiquote = quote
unquote = quote
unquote_splicing = quote
PARSER = Lark(GRAMMAR, start=["module", "expr"])
def slurp(text: str, start="module") -> Tree:
return T().transform(PARSER.parse(text, start=start))

View file

@ -0,0 +1,77 @@
start: module
module: expr+
?expr: quote
| quasiquote
| list
| tuple
| vector
| set
| map
| atom
quote: QUOTE expr
quasiquote: QUASIQUOTE qq_expr
unquote: UNQUOTE qq_expr
unquote_splicing: UNQUOTE_SPLICING qq_expr
qq_quote: QUOTE qq_expr
?qq_expr: qq_quote
| unquote
| unquote_splicing
| qq_list
| qq_tuple
| qq_vector
| qq_set
| qq_map
| qq_atom
list: "(" expr* ")"
qq_list: "(" qq_expr* ")"
tuple: "[" expr "]"
qq_tuple: "[" qq_expr "]"
vector: "#[" expr "]"
qq_vector: "#[" qq_expr "]"
map: "{" (expr expr)* "}"
qq_map: "{" (qq_expr qq_expr)* "}"
set: "#{" expr* "}"
qq_set: "#{" qq_expr* "}"
?atom: string
| pattern
| number
| symbol
?qq_atom: string
| pattern
| number
| qq_symbol
string: /"([^"]|\\")+"/
pattern: /\/([^\/]|\\\/)+\//
number: /[+-]?(\d+r)?(\d[\d,_\.]*)([\.,][\d,_\.]*)?(e[+-]?\d+)?/
// Note that we're demoting Symbol from the default parse priority of 0 to -1
// This because _anything more specific_ should be chosen over symbol
symbol.-1: /[^\d\s'"`()\[\]:]+/
// Note that qq symbols explicitly forbid leading ~, unlike normal symbols
qq_symbol.-1: /[^\d\s'"`()\[\]:~]+/
QUOTE: /'/
QUASIQUOTE: /`/
UNQUOTE: /~/
UNQUOTE_SPLICING: /~@/
COMMENT: /;.*?\n/
WHITESPACE: /\s+/
%ignore COMMENT
%ignore WHITESPACE

View file

@ -0,0 +1,101 @@
#!/usr/bin/env python3
from milkshake import slurp
from lark import Tree, Token
import pytest
@pytest.mark.parametrize(
"input, val",
[
("()", Tree("list", [])),
("nil", nil := Tree("symbol", ["nil"])),
("(nil nil nil)", Tree("list", [nil, nil, nil])),
(
"(/ + - * % ^ \\ & # @ ! = |)",
Tree(
"list",
[
Tree("symbol", ["/"]),
Tree("symbol", ["+"]),
Tree("symbol", ["-"]),
Tree("symbol", ["*"]),
Tree("symbol", ["%"]),
Tree("symbol", ["^"]),
Tree("symbol", ["\\"]),
Tree("symbol", ["&"]),
Tree("symbol", ["#"]),
Tree("symbol", ["@"]),
Tree("symbol", ["!"]),
Tree("symbol", ["="]),
Tree("symbol", ["|"]),
],
),
),
("1.0", Tree("number", ["1.0"])),
("+1.0", Tree("number", ["+1.0"])),
("-1.0", Tree("number", ["-1.0"])),
("-10,000,000.0", Tree("number", ["-10,000,000.0"])),
("-10.000.000,0", Tree("number", ["-10.000.000,0"])),
("-10_000_000,0", Tree("number", ["-10_000_000,0"])),
("-10_000_000.0", Tree("number", ["-10_000_000.0"])),
("1e50", Tree("number", ["1e50"])),
("-1e-50", Tree("number", ["-1e-50"])),
("-1e+50", Tree("number", ["-1e+50"])),
(
"(+inf -inf inf nan +nan -nan)",
Tree(
"list",
[
Tree("symbol", ["+inf"]),
Tree("symbol", ["-inf"]),
Tree("symbol", ["inf"]),
Tree("symbol", ["nan"]),
Tree("symbol", ["+nan"]),
Tree("symbol", ["-nan"]),
],
),
),
("'()", Tree("quote", [Tree("list", [])])),
(
"`(nil ~'() ~@'())",
Tree(
"quasiquote",
[
Tree(
"list",
[
Tree("symbol", ["nil"]),
Tree(
"unquote",
[
Tree(
"quote",
[
Tree("list", []),
],
),
],
),
Tree(
"unquote_splicing",
[
Tree(
"quote",
[
Tree("list", []),
],
),
],
),
],
),
],
),
),
(r"/foo\S+/", Tree("pattern", [r"/foo\S+/"])),
],
)
def test_examples(input, val):
assert slurp(input, start="expr") == val

7
components/typhon/BUILD Normal file
View file

@ -0,0 +1,7 @@
py_project(
name = "typhon",
lib_deps = [
"//components/milkshake",
py_requirement("attrs"),
]
)

View file

@ -0,0 +1,51 @@
# Notes
Natural gradient; interpreter -> semi-VM -> tracing JIT/translator -> optimizing JIT/translator
-> abstract interpreter -> static compiler/translator
A semi-VM which demand translates AST nodes into a stack of demanded evaluation
terms and then walks the evaluation stack as if it were a bytecode or
semi-bytecode evaluator. The advantage of this strategy is that the demanded
operation / control stack and paired data stack eliminate the need to leverage
the system control stack. This gets you serializable stacks for Flowmetal. But
you write two half interpreters.
Now the natural question is why write a hosted VM to get serializable stacks
when Python has a perfectly good bytecode VM already? Sticking one VM atop
another is ... a bit silly especially since the goal of doing so is to be able
to "drop down" from the one to the other to ensure compatibility.
Is there a lens through which the serialization requirements of Flowmental can
be satisfied from "normal" Python using the "normal" Python bytecode
interpreter?
Consider - function call points and function return points are in a sense
language safe points. Rather than trying to capture the entire evaluation
"image" at some point, one could instead track the call/return evaluation log
for replay. Such a scheme would allow Flowmetal to be implemented using static
rewrites of Python ASTs. Any function call becomes a checkpoint as does
receiving the return result.
Any `__call__` invocation needs to be evaluated as something like
x = runtime.call(const_gen_call_id, instance, args)
This tactic specifically leans on `yield` being a statement _with a return
value_. This pattern would let the "runtime" as the root evaluation routine
'continue' any given call site with the return result. `runtime.call` would be
some incantation for producing a sentinel value to the runtime noting that a
function call had been requested - and that its result should either be computed
or replayed from a log.
There are a lot of opportunities for optimization here. Not every function call
needs its value persisted into the log. Most function calls depend only on the
direct live state of the program. Exceptions are things like interacting with
file descriptors/sockets and clocks. But strictly data-dependent operations like
dictionary mutations are entirely safe under replay. They're only path
dependent. So really we only need to "de-optimize" or spy on "key" function
calls which occur against _unsafe_ operations. Or which occur against captured
function/method instances which cannot be statically identified.
There may be games to be played with yield/coroutines here, but that could play
heck with normal generators. Intercepting "normal" calls with "normal" calls is
probably the easy strategy.

View file

@ -0,0 +1,147 @@
#!/usr/bin/env python3
"""
A prototype 'flat' interpreter for a continuation based Lisp.
Not intended to be anything real, just intended to serve as a prototype for how
to implement a flat, stackless, serializable interpreter.
"""
import operator
import typing as t
import attrs
@attrs.define()
class Vm:
"""
:field log: All evaluated expressions, in order
:field mod_ns: A map from symbols to currently bound expressions
:field continuations: A list of coroutines/continuations of control
"""
log: t.List[list]
mod_ns: t.Dict[str, t.Any] = attrs.field(factory=dict)
continuations: t.List["Cont"] = attrs.field(factory=list)
@attrs.define()
class Cont:
"""
Continuations represent sequences of evaluation.
:field frames: A list of call frames
"""
frames: t.List["Frame"] = attrs.field(factory=list)
@attrs.define()
class Frame:
"""
Frames represent function call boundaries.
:field pc: Program points within the AST being interpreted
:field frame_ns: Frame-local bindings
"""
pc: list = attrs.field(factory=lambda: [0])
frame_ns: dict = attrs.field(factory=dict)
op_stack: list = attrs.field(factory=list)
data_stack: list = attrs.field(factory=list)
def chain(its):
for it in its:
yield from it
def get_in(idxable, idxs):
for idx in idxs:
idxable = idxable[idx]
return idxable
class Operator:
"""Stack operators."""
class Task:
@attrs.define()
class Complete:
"""Signals program termination."""
pass
@attrs.define()
class Eval:
"""Evaluate the current term."""
pass
@attrs.define()
class Apply:
"""Apply an operator to the stack."""
operator: "Operator" = attrs.field()
@attrs.define()
class Next:
"""Advance the program counter to the 'next' statement."""
pc: list
def step(vm: Vm, cont: Cont, frame: Frame):
"""Step moves the VM forwards one "cycle".
This VM is built as a semi-VM.
Interpretation occurs at two levels - the first over a direct syntax tree.
This allows the interpreter to forego a formal VM or any compilation step.
This model however poses a challenge for AST nodes with data dependencies.
Sub-expressions are the most obivous example. For these, we need semi-VM
behavior where the AST node is translated into a sequence of concrete
sub-step demanded operations which can actually be executed. Note that this
provides a neat natural definition of a tracing JIT or optimizing compiler.
This makes the execution mode well bimodal.
- On the one hand if we have demanded operations on the stack (`op_stack`)
then we want to execute the "top" demanded operation.
- On the other, if we have no demanded operations we want to
"""
match expr:
case ["native", native_fn, arg_exprs, vararg_expr]:
pass
case ["invoke", fn_expr, arg_exprs, vararg_expr]:
pass
case ["var", name]:
pass
case int() | float() | str() | bool():
pass
def run(program, state=None):
vm = state or Vm([], {}, [])
replc = Cont([Frame([0], {})])
vm.continuations.append(replc)
for form in program:
# Enter the form into the log
vm.log.append(form)
# Set the REPL continuation's point to the new form
replc.frames = [Frame([len(vm.log) - 1])]
# Evaluate the continuation to completion (or something)
while True:
match (state := step(vm, replc, replc.frames[-1])):
case _:
pass

View file

@ -0,0 +1 @@
#!/usr/bin/env python3

View file

@ -0,0 +1,20 @@
; -*- mode: scheme -*-
(define +
(lambda (x y)
(py "lambda x, y: x + y" [x y] [])))
(define format
(lambda (&args)
(py "lambda fmt, *args: fmt % args" [fmt] &args)))
(define print
(lambda (&args)
(py "print" [] &args)))
(define add2
(lambda (x)
(+ (2 x))))
(print
(format "%d" (add2 2)))