[NO TESTS] WIP

Initial venv snapshot
Import the upstream Python unit tests covering tokenize and ast
2023-03-15 00:45:32 -06:00 · 2023-03-08 15:31:04 -07:00 · 2023-03-08 15:28:56 -07:00 · 2023-03-08 15:28:34 -07:00 · 2023-03-08 15:15:50 -07:00 · 2023-03-08 15:15:50 -07:00
26 changed files with 946 additions and 103 deletions
--- a/.bazelignore
+++ b/.bazelignore
@ -0,0 +1 @@
+.git
--- a/.bazelrc
+++ b/.bazelrc
@ -0,0 +1,2 @@
+test --test_output=errors
+build --keep_going
--- a/.bazelversion
+++ b/.bazelversion
@ -0,0 +1 @@
+6.0.0
--- a/.envrc
+++ b/.envrc
@ -0,0 +1 @@
+export VIRTUAL_ENV=/home/arrdem/.virtualenvs/flowmetal
--- a/.gitignore
+++ b/.gitignore
@ -1,2 +1,4 @@
 /**/__pycache__
 /**/*.egg-info
+scratch
+bazel-*
--- a/LICENSE.md
+++ b/LICENSE.md
@ -1,7 +0,0 @@
-Copyright 2019 Reid 'arrdem' McKenzie
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- a/README.md
+++ b/README.md
@ -33,4 +33,4 @@ This centering of evented communication makes Flowmetal ideal for **coordination

 ## License

-Published under the MIT license. See [LICENSE.md](LICENSE.md)
+Copyright © 2023 Reid D. 'arrdem' McKenzie, all rights reserved.
--- a/52
+++ b/52
@ -0,0 +1,52 @@
+workspace(
+    name = "flowmetal"
+)
+
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+
+load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
+
+http_archive(
+    name = "bazel_skylib",
+    sha256 = "b8a1527901774180afc798aeb28c4634bdccf19c4d98e7bdd1ce79d1fe9aaad7",
+    urls = [
+        "https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.4.1/bazel-skylib-1.4.1.tar.gz",
+        "https://github.com/bazelbuild/bazel-skylib/releases/download/1.4.1/bazel-skylib-1.4.1.tar.gz",
+    ],
+)
+
+load("@bazel_skylib//:workspace.bzl", "bazel_skylib_workspace")
+
+bazel_skylib_workspace()
+
+rules_python_version = "c504355672223144cefb2cbf3f69e2d38e7e2726"
+
+http_archive(
+    name = "rules_python",
+    sha256 = "3f12b492dbf7d56b0e3deed81f21d56c3241babaa52d7eb525cb7c657bba9125",
+    strip_prefix = "rules_python-{}".format(rules_python_version),
+    url = "https://github.com/bazelbuild/rules_python/archive/{}.zip".format(rules_python_version),
+)
+
+load("@rules_python//python:repositories.bzl", "python_register_toolchains")
+
+python_register_toolchains(
+    name = "python3_10",
+    python_version = "3.10",
+)
+
+load("@python3_10//:defs.bzl", python3_10="interpreter")
+
+load("@rules_python//python:pip.bzl", "pip_parse")
+
+# Create a central repo that knows about the dependencies needed from
+# requirements_lock.txt.
+pip_parse(
+   name = "pypi",
+   python_interpreter_target = python3_10,
+   requirements_lock = "//tools/python:requirements_lock.txt",
+)
+
+load("@pypi//:requirements.bzl", "install_deps")
+
+install_deps()
--- a/components/hydra/BUILD
+++ b/components/hydra/BUILD
@ -0,0 +1,3 @@
+py_project(
+    name = "hydra",
+)
--- a/components/hydra/README.md
+++ b/components/hydra/README.md
@ -0,0 +1,13 @@
+# Hydra; a multi-headed Python-on-Python interpreter.
+
+Hydra draws extensively the OCaml and Scheme prior art for coroutine based
+interpreters to produce a Python interpreter along the same lines.
+
+At the top of the interpreter we see an extensible `while` loop. The interpreter
+performs one "step" - evaluating a single statement - producing a new state and
+a "next PC"; a path into the AST identifying the next statement to be executed.
+
+This model enables single stepping, stack analysis and most importantly
+interruptions as for snapshotting or suspending.
+
+Hydra is the basis for the Flowmetal interpreter.
--- a/components/hydra/src/python/hydra/init.py
+++ b/components/hydra/src/python/hydra/init.py
@ -0,0 +1,201 @@
+#!/usr/bin/env python3.10
+
+"""Hydra; the multi-headed Python interpreter.
+
+> Chop off one head and two more grow back in its place.
+
+
+
+"""
+
+import ast
+import builtins
+import logging
+import os
+import sys
+from typing import Optional, Type, Union, List, Callable
+from pathlib import Path
+from importlib import __import__
+
+from attrs import Factory, define, field
+
+log = logging.getLogger(__name__)
+
+
+@define
+class ANamespace:
+    node: ast.AST
+    d: dict = {}
+    parent: Optional[Type["ANamespace"]] = None
+
+    def __getitem__(self, k):
+        return self.d[k]
+
+    def get(self, k, default=None):
+        return self.d.get(k, default)
+
+    def __setitem__(self, k, v):
+        self.d[k] = v
+
+    def __delitem__(self, k):
+        del self.d[k]
+
+    def __contains__(self, k):
+        return k in self.d
+
+    def __str__(self):
+        return "<{} {}>".format(self.__class__.__name__, self.d)
+
+
+@define
+class ModuleNS(ANamespace):
+    # parent: Optional["ModuleNS"] = None
+    pass
+
+
+@define
+class FunctionNS(ANamespace):
+    pass
+
+
+@define
+class ClassNS(ANamespace):
+    cls: Optional[type] = None
+
+
+# TODO (arrdem 2023-03-08):
+#   This interpreter works well enough to import `requests` and many other libraries and do some
+#   work, but is unsuited to Flowmetal's needs for checkpointing. Because this interpreter uses
+#   direct execution, there's really no way to jam breakpoints or checkpoints or resume points into
+#   program execution. Which is kinda the goal of the whole project.
+#
+#   This interpreter, while complete, needs to get refactored into probably a `yield` based
+#   coroutine structure wherein individual operations explicitly `yield` to an outer state
+#   management loop which effectively trampolines single statements together with state management
+#   logic.
+#
+#   The outer interpreter needs to be able to check the "step budget" and decide if it's time for
+#   the program to suspend.
+#
+#   Individual steps (workflow calls/function calls) may also cause the program to suspend.
+#
+#   Suspending requires signaling the top level loop, and the top level loop needs both the
+#   namespace tree and the some sort of cursor or address into the AST under interpretation
+#   representing where to resume. The logical equivalent of a program counter, but a tree path.
+
+
+@define
+class Module:
+    fname: Union[Path, str]
+    tree: ast.AST
+    ns: "ANamespace" = field()
+
+    @ns.default
+    def _ns_default(self):
+        return ModuleNS(self.tree)
+
+@define
+class Pc:
+    """A 'program counter' as a list of AST indices."""
+
+    idxs: List[int] = Factory(list)
+
+
+@define
+class Frame:
+    """An 'execution frame' as a PC, AST and namespace."""
+    pc: Pc
+    ast: ast.AST
+    ns: ANamespace
+
+
+@define
+class Cont:
+    """A 'Continuation' (thread/coroutine) of execution."""
+    id: int
+    entry: Frame
+    stack: List[Frame] = field()
+
+    @stack.default
+    def _stack_default(self):
+        return [self.entry]
+
+
+@define
+class Vm:
+    """A bag of shared state.
+
+    :attribute path: The equivalent of sys.path
+    :attribute modules: The equivalent of sys.modules
+    :attribute conts: All interpreter continuations
+    :attribute log: A log of all statements executed by any continuation
+
+    It should be possible to reconstruct the VM's state simply by replaying the
+    log in statement order, as a fallback for dealing with C-extension state,
+    connections and soforth.
+
+    """
+
+    path: list = Factory(lambda: list(sys.path))
+    modules: dict = Factory(dict)
+    conts: dict = Factory(dict)
+    log: list = Factory(list)
+
+    def handle_import(
+        self, thread, name, globals=None, locals=None, fromlist=(), level=0
+    ):
+        log.debug("  Attempting to import '{}'".format(name))
+        if name not in self.modules:
+            if name in sys.modules:
+                # FIXME: Need to hack sys, os and several other built-in packages here
+                log.debug("Short-circuited loading %r from bootstrap sys.modules", name)
+                self.modules[name] = sys.modules[name]
+
+            else:
+                name = name.replace(".", os.path.sep)
+                for e in self.path:
+                    for ext in [
+                        ".py",
+                    ]:
+                        if os.path.isdir(e):
+                            f = os.path.join(e, name + ext)
+                            log.debug("  Checking {}".format(f))
+                            if os.path.exists(f):
+                                mod = self.execute_load(thread, f, name)
+                                self.modules[name] = mod.ns
+                                break
+
+                        elif os.path.isfile(e):
+                            # FIXME (arrdem 2021-05-31)
+                            raise RuntimeError(
+                                "Import from .zip/.whl/.egg archives aren't supported yet"
+                            )
+
+                else:
+                    log.debug("Falling back to native import for %r", name)
+                    self.modules[name] = __import__(
+                        name, globals, locals, fromlist, level
+                    )
+
+        return self.modules[name]
+
+    def execute_module(self, module: Module):
+        """Execute all the Expressions and Statements in a given Module sequentially, as in a single thread."""
+
+    def execute_load(self, fname, name):
+        """Execute the given file as if it were an imported module."""
+
+        # FIXME: Choose encoding here
+        with open(fname) as f:
+            tree = ast.parse(f.read())
+
+        mod = Module(fname, tree)
+        mod.ns["__name__"] = name
+        self.modules[name] = mod.ns
+        self.execute_module(mod)
+        return mod
+
+    def execute_dunder_main(self, fname):
+        """Execute the given file as if it were a script entrypoint."""
+
+        return self.execute_load(fname, "__main__")
--- a/components/hydra/src/python/hydra/main.py
+++ b/components/hydra/src/python/hydra/main.py
@ -0,0 +1,12 @@
+#!/usr/bin/env python3.10
+
+import click
+
+
+@click.group()
+def cli():
+    pass
+
+
+if __name__ == "__main__":
+    cli.main(prog_name="hydra")
--- a/examples/timeout.flow
+++ b/examples/timeout.flow
@ -2,7 +2,7 @@

 from datetime import timedelta
 from time import sleep
-from flowmetal import workflow, timeout, CancelledError, TimeoutError, Task
+from flowmetal import workflow, CancelledError, TimeoutError, Task


 def cancellable_activity():
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,8 @@
+[tool.isort]
+py_version=311
+line_length=100
+skip_glob = [
+   ".git/*",
+   ".bazel/*",
+   "bazel-*",
+]
--- a/scratch/astinterp.py
+++ b/scratch/astinterp.py
@ -1,47 +1,17 @@
 # flake8: noqa: all

-# Python AST interpreter written in Python
+# A Python AST interpreter written in Python
 #
-# This module is part of the Pycopy https://github.com/pfalcon/pycopy
-# project.
+# This module is part of the Pycopy https://github.com/pfalcon/pycopy project.
 #
-# Copyright (c) 2019 Paul Sokolovsky
-#
-# The MIT License
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# Modified by Reid D. 'ardem' Mckenzie in 2021 to be a bit more fully-featured
-# and usable for running 'real' code as part of an experiment in implementing a
-# durable Python interpreter atop the original pycopy substrate.
+# Copyright (c) 2019 Paul Sokolovsky, published under the MIT License

 import ast
+import builtins
 import logging
 import os
 import sys
-
-
-if sys.version_info < (3, 0, 0):
-    builtins = __builtins__
-else:
-    import builtins
-
+from typing import Optional, Type

 log = logging.getLogger(__name__)

@ -55,7 +25,7 @@ class StrictNodeVisitor(ast.NodeVisitor):
 class ANamespace:
    def __init__(self, node):
        self.d = {}
-        self.parent = None
+        self.parent: Optional[Type["ANamespace"]] = None
        # Cross-link namespace to AST node. Note that we can't do the
        # opposite, because for one node, there can be different namespaces.
        self.node = node
@ -80,6 +50,7 @@ class ANamespace:


 class ModuleNS(ANamespace):
+    # parent: Optional["ModuleNS"] = None
    pass


@ -88,7 +59,7 @@ class FunctionNS(ANamespace):


 class ClassNS(ANamespace):
-    pass
+    cls: Optional[type] = None


 # Pycopy by default doesn't support direct slice construction, use helper
@ -102,16 +73,10 @@ slice_getter = SliceGetter()


 def arg_name(arg):
-    if sys.version_info < (3, 0, 0):
-        return arg.id
-    else:
    return arg.arg


 def kwarg_defaults(args):
-    if sys.version_info < (3, 0, 0):
-        return args.defaults
-    else:
    return args.kw_defaults


@ -154,11 +119,12 @@ class InterpFuncWrap:
        return self.interp.call_func(self.node, self, *args, **kwargs)


-# Python don't fully treat objects, even those defining __call__() special method, as a true callable. For example, such
-# objects aren't automatically converted to bound methods if looked up as another object's attributes. As we want our
-# "interpreted functions" to behave as close as possible to real functions, we just wrap function object with a real
-# function. An alternative might have been to perform needed checks and explicitly bind a method using
-# types.MethodType() in visit_Attribute (but then maybe there would be still other cases of "callable object" vs
+# Python don't fully treat objects, even those defining __call__() special method, as a true
+# callable. For example, such objects aren't automatically converted to bound methods if looked up
+# as another object's attributes. As we want our "interpreted functions" to behave as closely as
+# possible to real functions, we just wrap function object with a real function. An alternative
+# might have been to perform needed checks and explicitly bind a method using types.MethodType() in
+# visit_Attribute (but then maybe there would be still other cases of "callable object" vs
 # "function" discrepancies).
 def InterpFunc(fun):
    def func(*args, **kwargs):
@ -195,26 +161,50 @@ class InterpModule:
        return list(self.ns.d.keys())


+# TODO (arrdem 2023-03-08):
+#   This interpreter works well enough to import `requests` and many other libraries and do some
+#   work, but is unsuited to Flowmetal's needs for checkpointing. Because this interpreter uses
+#   direct execution, there's really no way to jam breakpoints or checkpoints or resume points into
+#   program execution. Which is kinda the goal of the whole project.
+#
+#   This interpreter, while complete, needs to get refactored into probably a `yield` based
+#   coroutine structure wherein individual operations explicitly `yield` to an outer state
+#   management loop which effectively trampolines single statements together with state management
+#   logic.
+#
+#   The outer interpreter needs to be able to check the "step budget" and decide if it's time for
+#   the program to suspend.
+#
+#   Individual steps (workflow calls/function calls) may also cause the program to suspend.
+#
+#   Suspending requires signaling the top level loop, and the top level loop needs both the
+#   namespace tree and the some sort of cursor or address into the AST under interpretation
+#   representing where to resume. The logical equivalent of a program counter, but a tree path.
+
+
 class ModuleInterpreter(StrictNodeVisitor):
    """An interpreter specific to a single module."""

    def __init__(self, system, fname, node):
        self.system = system
        self.fname = fname
-        self.ns = self.module_ns = ModuleNS(node)
+        self.module_ns: ModuleNS = ModuleNS(node)
+        self.ns: ANamespace = self.module_ns

        # Call stack (in terms of function AST nodes).
        self.call_stack = []

-        # To implement "store" operation, we need to arguments: location and value to store. The operation itself is
-        # handled by a node visitor (e.g. visit_Name), and location is represented by AST node, but there's no support
-        # to pass additional arguments to a visitor (likely, because it would be a burden to explicit pass such
-        # additional arguments thru the chain of visitors). So instead, we store this value as field. As interpretation
-        # happens sequentially, there's no risk that it will be overwritten "concurrently".
+        # To implement "store" operation, we need to arguments: location and value to store. The
+        # operation itself is handled by a node visitor (e.g. visit_Name), and location is
+        # represented by AST node, but there's no support to pass additional arguments to a visitor
+        # (likely, because it would be a burden to explicit pass such additional arguments thru the
+        # chain of visitors). So instead, we store this value as field. As interpretation happens
+        # sequentially, there's no risk that it will be overwritten "concurrently".
        self.store_val = None

-        # Current active exception, for bare "raise", which doesn't work across function boundaries (and that's how we
-        # have it - exception would be caught in visit_Try, while re-rasing would happen in visit_Raise).
+        # Current active exception, for bare "raise", which doesn't work across function boundaries
+        # (and that's how we have it - exception would be caught in visit_Try, while re-rasing would
+        # happen in visit_Raise).
        self.cur_exc = []

    def push_ns(self, new_ns):
@ -222,6 +212,7 @@ class ModuleInterpreter(StrictNodeVisitor):
        self.ns = new_ns

    def pop_ns(self):
+        assert self.ns is not None
        self.ns = self.ns.parent

    def stmt_list_visit(self, lst):
@ -247,13 +238,13 @@ class ModuleInterpreter(StrictNodeVisitor):
        return self.visit(node.body)

    def visit_ClassDef(self, node):
-        self.push_ns(ClassNS(node))
+        ns: ClassNS = ClassNS(node)
+        self.push_ns(ns)
        try:
            self.stmt_list_visit(node.body)
        except Exception:
            self.pop_ns()
            raise
-        ns = self.ns
        self.pop_ns()
        cls = type(node.name, tuple([self.visit(b) for b in node.bases]), ns.d)
        cls = self.wrap_decorators(cls, node)
@ -266,8 +257,7 @@ class ModuleInterpreter(StrictNodeVisitor):
        return self.prepare_func(node)

    def visit_FunctionDef(self, node):
-        # Defaults are evaluated at function definition time, so we
-        # need to do that now.
+        # Defaults are evaluated at function definition time, so we need to do that now.
        func = self.prepare_func(node)
        func = self.wrap_decorators(func, node)
        self.ns[node.name] = func
@ -290,11 +280,10 @@ class ModuleInterpreter(StrictNodeVisitor):
            all_args.add(arg_name(a))
            if v is not None:
                d[arg_name(a)] = self.visit(v)
-        # We can store cached argument names of a function in its node -
-        # it's static.
+        # We can store cached argument names of a function in its node - it's static.
        node.args.all_args = all_args
-        # We can't store the values of default arguments - they're dynamic,
-        # may depend on the lexical scope.
+        # We can't store the values of default arguments - they're dynamic, may depend on the
+        # lexical scope.
        func.defaults_dict = d

        return InterpFunc(func)
@ -308,9 +297,8 @@ class ModuleInterpreter(StrictNodeVisitor):
            )

        argspec = node.args
-        # If there's vararg, either offload surplus of args to it, or init
-        # it to empty tuple (all in one statement). If no vararg, error on
-        # too many args.
+        # If there's vararg, either offload surplus of args to it, or init it to empty tuple (all in
+        # one statement). If no vararg, error on too many args.
        #
        # Note that we have to do the .posonlyargs dance
        if argspec.vararg:
@ -329,9 +317,8 @@ class ModuleInterpreter(StrictNodeVisitor):
            for a, value in zip(argspec.posonlyargs, args):
                self.ns[arg_name(a)] = value

-        # Process incoming keyword arguments, putting them in namespace if
-        # actual arg exists by that name, or offload to function's kwarg
-        # if any. All make needed checks and error out.
+        # Process incoming keyword arguments, putting them in namespace if actual arg exists by that
+        # name, or offload to function's kwarg if any. All make needed checks and error out.
        func_kwarg = {}
        for k, v in kwargs.items():
            if k in argspec.all_args:
@ -351,9 +338,8 @@ class ModuleInterpreter(StrictNodeVisitor):
        if argspec.kwarg:
            self.ns[arg_name(argspec.kwarg)] = func_kwarg

-        # Finally, overlay default values for arguments not yet initialized.
-        # We need to do this last for "multiple values for the same arg"
-        # check to work.
+        # Finally, overlay default values for arguments not yet initialized. We need to do this last
+        # for "multiple values for the same arg" check to work.
        for k, v in interp_func.defaults_dict.items():
            if k not in self.ns:
                self.ns[k] = v
@ -376,8 +362,8 @@ class ModuleInterpreter(StrictNodeVisitor):

    def call_func(self, node, interp_func, *args, **kwargs):
        self.call_stack.append(node)
-        # We need to switch from dynamic execution scope to lexical scope
-        # in which function was defined (then switch back on return).
+        # We need to switch from dynamic execution scope to lexical scope in which function was
+        # defined (then switch back on return).
        dyna_scope = self.ns
        self.ns = interp_func.lexical_scope
        self.push_ns(FunctionNS(node))
@ -508,9 +494,9 @@ class ModuleInterpreter(StrictNodeVisitor):

    def visit_AugAssign(self, node):
        assert isinstance(node.target.ctx, ast.Store)
-        # Not functional style, oops. Node in AST has store context, but we
-        # need to read its value first. To not construct a copy of the entire
-        # node with load context, we temporarily patch it in-place.
+        # Not functional style, oops. Node in AST has store context, but we need to read its value
+        # first. To not construct a copy of the entire node with load context, we temporarily patch
+        # it in-place.
        save_ctx = node.target.ctx
        node.target.ctx = ast.Load()
        var_val = self.visit(node.target)
@ -518,12 +504,11 @@ class ModuleInterpreter(StrictNodeVisitor):

        rval = self.visit(node.value)

-        # As augmented assignment is statement, not operator, we can't put them
-        # all into map. We could instead directly lookup special inplace methods
-        # (__iadd__ and friends) and use them, with a fallback to normal binary
-        # operations, but from the point of view of this interpreter, presence
-        # of such methods is an implementation detail of the object system, it's
-        # not concerned with it.
+        # As augmented assignment is statement, not operator, we can't put them all into map. We
+        # could instead directly lookup special inplace methods (__iadd__ and friends) and use them,
+        # with a fallback to normal binary operations, but from the point of view of this
+        # interpreter, presence of such methods is an implementation detail of the object system,
+        # it's not concerned with it.
        op = type(node.op)
        if op is ast.Add:
            var_val += rval
@ -682,10 +667,11 @@ class ModuleInterpreter(StrictNodeVisitor):
        if func is builtins.super and not args:
            if not self.ns.parent or not isinstance(self.ns.parent, ClassNS):
                raise RuntimeError("super(): no arguments")
-            # As we're creating methods dynamically outside of class, super() without argument won't work, as that
-            # requires __class__ cell. Creating that would be cumbersome (Pycopy definitely lacks enough introspection
-            # for that), so we substitute 2 implied args (which argumentless super() would take from cell and 1st arg to
-            # func). In our case, we take them from prepared bookkeeping info.
+            # As we're creating methods dynamically outside of class, super() without argument won't
+            # work, as that requires __class__ cell. Creating that would be cumbersome (Pycopy
+            # definitely lacks enough introspection for that), so we substitute 2 implied args
+            # (which argumentless super() would take from cell and 1st arg to func). In our case, we
+            # take them from prepared bookkeeping info.
            args = (self.ns.parent.cls, self.ns["self"])

        return func(*args, **kwargs)
@ -732,7 +718,7 @@ class ModuleInterpreter(StrictNodeVisitor):
            ast.Div: lambda x, y: x / y,
            ast.FloorDiv: lambda x, y: x // y,
            ast.Mod: lambda x, y: x % y,
-            ast.Pow: lambda x, y: x ** y,
+            ast.Pow: lambda x, y: x**y,
            ast.LShift: lambda x, y: x << y,
            ast.RShift: lambda x, y: x >> y,
            ast.BitAnd: lambda x, y: x & y,
@ -901,7 +887,7 @@ class ModuleInterpreter(StrictNodeVisitor):

    def visit_Print(self, node):
        # In Py2k only
-        raise NotImplementedError("Absolutely not. Use __future__.")
+        raise SyntaxError("Absolutely not. Use __future__.")

    def visit_Str(self, node):
        return node.s
--- a/scratch/test.py
+++ b/scratch/test.py
@ -19,7 +19,6 @@ print(a.baz)

 import random

-
 for _ in range(10):
    print(random.randint(0, 1024))

@ -30,5 +29,4 @@ def bar(a, b, **bs):

 import requests

-
 print(len(requests.get("https://pypi.org/pypi/requests/json").text))
--- a/tools/build_rules/BUILD
+++ b/tools/build_rules/BUILD
@ -0,0 +1,3 @@
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])
--- a/tools/build_rules/prelude_bazel
+++ b/tools/build_rules/prelude_bazel
@ -0,0 +1,15 @@
+# -*- mode: bazel -*-
+# A global prelude for all BUILD[.bazel] files
+
+load("//tools/python:defs.bzl",
+     "py_library",
+     "py_binary",
+     "py_unittest",
+     "py_pytest",
+     "py_resources",
+     "py_project",
+)
+
+load("@pypi//:requirements.bzl",
+     py_requirement="requirement"
+)
--- a/tools/python/BUILD
+++ b/tools/python/BUILD
@ -0,0 +1,45 @@
+load("@rules_python//python:defs.bzl",
+     "py_runtime_pair",
+)
+
+load("@pypi//:requirements.bzl", "all_requirements")
+
+package(default_visibility = ["//visibility:public"])
+
+licenses(["notice"])
+
+exports_files([
+    "defs.bzl",
+    "bzl_pytest_shim.py",
+    "bzl_unittest_shim.py",
+    "pythonshim",
+    "requirements_lock.txt",
+])
+
+py_runtime(
+    name = "python3_runtime",
+    files = [],
+    interpreter = ":pythonshim",
+    python_version = "PY3",
+    visibility = ["//visibility:public"],
+)
+
+py_runtime_pair(
+    name = "python_runtime",
+    py2_runtime = None,
+    py3_runtime = ":python3_runtime",
+)
+
+toolchain(
+    name = "python3_toolchain",
+    toolchain = ":python_runtime",
+    toolchain_type = "@bazel_tools//tools/python:toolchain_type",
+)
+
+py_pytest(
+    name = "test_licenses",
+    srcs = [
+        "test_licenses.py",
+    ],
+    deps = all_requirements,
+)
--- a/tools/python/bzl_pytest_shim.py
+++ b/tools/python/bzl_pytest_shim.py
@ -0,0 +1,10 @@
+"""A shim for executing pytest."""
+
+import sys
+
+import pytest
+
+if __name__ == "__main__":
+    cmdline = ["--ignore=external"] + sys.argv[1:]
+    print(cmdline, file=sys.stderr)
+    sys.exit(pytest.main(cmdline))
--- a/tools/python/bzl_unittest_shim.py
+++ b/tools/python/bzl_unittest_shim.py
@ -0,0 +1,66 @@
+"""Universal launcher for unit tests"""
+
+import argparse
+import logging
+import os
+import sys
+import unittest
+
+
+def main():
+    """Parse args, collect tests and run them"""
+    # Disable *.pyc files
+    sys.dont_write_bytecode = True
+
+    # Add ".." to module search path
+    cur_dir = os.path.dirname(os.path.realpath(__file__))
+    top_dir = os.path.abspath(os.path.join(cur_dir, os.pardir))
+    sys.path.append(top_dir)
+
+    # Parse command line arguments
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="count",
+        default=0,
+        help="verbosity level, use: [-v | -vv | -vvv]",
+    )
+    parser.add_argument(
+        "-s", "--start-directory", default=None, help="directory to start discovery"
+    )
+    parser.add_argument(
+        "-p",
+        "--pattern",
+        default="test*.py",
+        help="pattern to match test files ('test*.py' default)",
+    )
+    parser.add_argument(
+        "test", nargs="*", help="test specs (e.g. module.TestCase.test_func)"
+    )
+    args = parser.parse_args()
+
+    if not args.start_directory:
+        args.start_directory = cur_dir
+
+    if args.verbose > 2:
+        logging.basicConfig(level=logging.DEBUG, format="DEBUG: %(message)s")
+
+    loader = unittest.TestLoader()
+    if args.test:
+        # Add particular tests
+        for test in args.test:
+            suite = unittest.TestSuite()
+            suite.addTests(loader.loadTestsFromName(test))
+    else:
+        # Find all tests
+        suite = loader.discover(args.start_directory, args.pattern)
+
+    runner = unittest.TextTestRunner(verbosity=args.verbose)
+    result = runner.run(suite)
+    return result.wasSuccessful()
+
+
+if __name__ == "__main__":
+    # NOTE: True(success) -> 0, False(fail) -> 1
+    exit(not main())
--- a/tools/python/defs.bzl
+++ b/tools/python/defs.bzl
@ -0,0 +1,237 @@
+load("@pypi//:requirements.bzl",
+     _py_requirement = "requirement"
+)
+
+load("@rules_python//python:defs.bzl",
+     "py_runtime",
+     "py_runtime_pair",
+     _py_binary = "py_binary",
+     _py_test = "py_test",
+     _py_library = "py_library",
+)
+
+load("@bazel_skylib//lib:sets.bzl", "sets")
+
+
+def py_requirement(*args, **kwargs):
+    """A re-export of requirement()"""
+    return _py_requirement(*args, **kwargs)
+
+
+def py_test(python_version=None, **kwargs):
+    """A re-export of py_test()"""
+
+    if python_version and python_version != "PY3":
+        fail("py3k only!")
+
+    return _py_test(
+        python_version="PY3",
+        **kwargs,
+    )
+
+
+def py_pytest(name, srcs, deps, main=None, python_version=None, args=None, **kwargs):
+    """A py_test target which uses pytest."""
+
+    if python_version and python_version != "PY3":
+        fail("py3k only!")
+
+    f = "//tools/python:bzl_pytest_shim.py"
+
+    deps = sets.to_list(sets.make([
+        py_requirement("pytest"),
+        py_requirement("pytest-pudb"),
+        py_requirement("pytest-cov"),
+        py_requirement("pytest-timeout"),
+    ] + deps))
+
+    srcs = [f] + srcs
+
+    py_test(
+      name = name,
+      srcs = srcs,
+      main = f,
+      args = args,
+      python_version="PY3",
+      deps = deps,
+      **kwargs,
+    )
+
+    # zapp_test(
+    #   name = name + ".zapp",
+    #   main = f,
+    #   args = args,
+    #   srcs = srcs,
+    #   deps = deps,
+    #   test = True,
+    #   zip_safe = False,
+    #   **kwargs,
+    # )
+
+    # FIXME (arrdem 2020-09-27):
+    #   Generate a py_image_test.
+    #   Not clear how to achieve that.
+
+
+def py_unittest(srcs=[], **kwargs):
+    """A helper for running unittest tests"""
+
+    f = "//tools/python:bzl_unittest_shim.py"
+    return py_test(
+        main = f,
+        srcs = [f] + srcs,
+        **kwargs
+    )
+
+
+def py_binary(python_version=None, main=None, srcs=None, **kwargs):
+    """A re-export of py_binary()"""
+
+    if python_version and python_version != "PY3":
+        fail("py3k only!")
+
+    srcs = srcs or []
+    if main not in srcs:
+        srcs = [main] + srcs
+
+    return _py_binary(
+        python_version = "PY3",
+        main = main,
+        srcs = srcs,
+        **kwargs,
+    )
+
+
+def py_library(srcs_version=None, **kwargs):
+    """A re-export of py_library()"""
+
+    if srcs_version and srcs_version != "PY3":
+        fail("py3k only!")
+
+    return _py_library(
+        srcs_version="PY3",
+        **kwargs
+    )
+
+
+ResourceGroupInfo = provider(
+    fields = {
+        "srcs": "files to use from Python",
+    },
+)
+
+
+def _resource_impl(ctx):
+    srcs = []
+    for target in ctx.attr.srcs:
+        srcs.extend(target.files.to_list())
+    transitive_srcs = depset(direct = srcs)
+
+    return [
+        ResourceGroupInfo(
+            srcs = ctx.attr.srcs,
+        ),
+        PyInfo(
+            has_py2_only_sources = False,
+            has_py3_only_sources = True,
+            uses_shared_libraries = False,
+            transitive_sources = transitive_srcs,
+        ),
+    ]
+
+py_resources = rule(
+    implementation = _resource_impl,
+    attrs = {
+        "srcs": attr.label_list(
+            allow_empty = True,
+            mandatory = True,
+            allow_files = True,
+            doc = "Files to hand through to Python",
+        ),
+    },
+)
+
+def py_project(name=None,
+               main=None,
+               main_deps=None,
+               shebang=None,
+               lib_srcs=None,
+               lib_deps=None,
+               lib_data=None,
+               test_srcs=None,
+               test_deps=None,
+               test_data=None):
+    """
+    A helper for defining conventionally-formatted python project.
+
+    Assumes that there's a {src,test}/{resources,python} where src/ is a library and test/ is local tests only.
+
+    Each test_*.py source generates its own implicit test target. This allows for automatic test parallelism. Non
+    test_*.py files are implicitly srcs for the generated test targets. This is the same as making them implicitly a
+    testonly lib.
+
+    """
+
+    lib_srcs = lib_srcs or native.glob(["src/python/**/*.py"],
+                                       exclude=[
+                                           "**/*.pyc",
+                                       ])
+    lib_data = lib_data or native.glob(["src/resources/**/*",
+                                        "src/python/**/*"],
+                                       exclude=[
+                                           "**/*.py",
+                                           "**/*.pyc",
+                                       ])
+    test_srcs = test_srcs or native.glob(["test/python/**/*.py"],
+                                         exclude=[
+                                             "**/*.pyc",
+                                         ])
+    test_data = test_data or native.glob(["test/resources/**/*",
+                                          "test/python/**/*"],
+                                         exclude=[
+                                             "**/*.py",
+                                             "**/*.pyc",
+                                         ])
+
+    lib_name = name if not main else "lib"
+
+    py_library(
+        name=lib_name,
+        srcs=lib_srcs,
+        deps=lib_deps,
+        data=lib_data,
+        imports=[
+            "src/python",
+            "src/resources",
+        ],
+        visibility = [
+            "//visibility:public",
+        ],
+    )
+
+    if main:
+        py_binary(
+            name=name,
+            main=main,
+            deps=(main_deps or []) + [lib_name],
+            imports=[
+                "src/python",
+                "src/resources",
+            ],
+            visibility = [
+                "//visibility:public",
+            ],
+        )
+
+    for src in test_srcs:
+        if "test_" in src:
+            py_pytest(
+                name=src.split("/")[-1],
+                srcs=[src] + [f for f in test_srcs if "test_" not in f],
+                deps=[lib_name] + (test_deps or []),
+                data=test_data,
+                imports=[
+                    "test/python",
+                    "test/resources",
+                ],
+            )
--- a/tools/python/pythonshim
+++ b/tools/python/pythonshim
@ -0,0 +1,21 @@
+#!/bin/sh
+
+# Bazel STRONGLY disapproves of linking dynamically to a Python interpreter.
+# But ... that's exactly what we want to do.
+# So this script exists to find a 'compliant' Python install and use that.
+
+PYTHONREV="3.10"
+CMD="python${PYTHONREV}"
+
+if [ -x "$(command -v "$CMD")" ]; then
+    exec "$(which "$CMD")" "$@"
+else
+    case "$(uname)" in
+        Darwin)
+            # FIXME: What if it isn't there?
+            exec /opt/homebrew/bin/"$CMD" "$@"
+            ;;
+    esac
+    echo "Error: Unable to find a viable Python executable" >&2
+    exit 1
+fi
--- a/tools/python/requirements.in
+++ b/tools/python/requirements.in
@ -0,0 +1,9 @@
+attrs
+black
+cattrs
+hypothesis
+pudb
+pytest
+pytest-cov
+pytest-pudb
+pytest-timeout
--- a/tools/python/requirements_lock.txt
+++ b/tools/python/requirements_lock.txt
@ -0,0 +1,28 @@
+attrs==22.2.0
+autoflake8==0.4.0
+black==23.1.0
+cattrs==22.2.0
+click==8.1.3
+coverage==7.2.1
+exceptiongroup==1.1.0
+hypothesis==6.68.2
+iniconfig==2.0.0
+isort==5.12.0
+jedi==0.18.2
+mypy-extensions==1.0.0
+packaging==23.0
+parso==0.8.3
+pathspec==0.11.0
+platformdirs==3.1.0
+pluggy==1.0.0
+pudb==2022.1.3
+pyflakes==3.0.1
+Pygments==2.14.0
+pytest==7.2.2
+pytest-cov==4.0.0
+pytest-pudb==0.7.0
+pytest-timeout==2.1.0
+sortedcontainers==2.4.0
+tomli==2.0.1
+urwid==2.1.2
+urwid-readline==0.13
--- a/tools/python/test_licenses.py
+++ b/tools/python/test_licenses.py
@ -0,0 +1,136 @@
+"""
+Validate 3rdparty library licenses as approved.
+"""
+
+import re
+
+import pytest
+from pkg_resources import DistInfoDistribution, working_set
+
+# Licenses approved as representing non-copyleft and not precluding commercial usage.
+# This is all easy, there's a good schema here.
+APPROVED_LICENSES = [
+    MIT := "License :: OSI Approved :: MIT License",
+    APACHE := "License :: OSI Approved :: Apache Software License",
+    BSD := "License :: OSI Approved :: BSD License",
+    MPL10 := "License :: OSI Approved :: Mozilla Public License 1.0 (MPL)",
+    MPL11 := "License :: OSI Approved :: Mozilla Public License 1.1 (MPL 1.1)",
+    MPL20 := "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)",
+    PSFL := "License :: OSI Approved :: Python Software Foundation License",
+    LGPL := "License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)",
+    LGPL3 := "License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)",
+    ISCL := "License :: OSI Approved :: ISC License (ISCL)",
+]
+
+UNAPPROVED_LICENSES = [
+    GPL1 := "License :: OSI Approved :: GNU General Public License",
+    GPL2 := "License :: OSI Approved :: GNU General Public License v2 (GPLv2)",
+    GPL3 := "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
+]
+
+# This data is GARBO.
+LICENSES_BY_LOWERNAME = {
+    "apache 2.0": APACHE,
+    "apache": APACHE,
+    "http://www.apache.org/licenses/license-2.0": APACHE,
+    "bsd 3": BSD,
+    "bsd": BSD,
+    "gpl": GPL1,
+    "gpl2": GPL2,
+    "gpl3": GPL3,
+    "lgpl": LGPL,
+    "lgpl3": LGPL3,
+    "isc": ISCL,
+    "mit": MIT,
+    "mpl": MPL10,
+    "mpl 2.0": MPL20,
+    "psf": PSFL,
+}
+
+# Mash in some cases.
+LICENSES_BY_LOWERNAME.update(
+    {lic.split(" :: ")[-1].lower(): lic for lic in APPROVED_LICENSES}
+)
+
+# As a workaround for packages which don"t have correct meadata on PyPi, hand-verified packages
+APPROVED_PACKAGES = []
+
+
+def bash_license(ln):
+    while True:
+        lnn = re.sub(
+            r"[(),]|( version)|( license)|( ?v(?=\d))|([ -]clause)|(or later)",
+            "",
+            ln.lower(),
+        )
+        if ln != lnn:
+            ln = lnn
+        else:
+            break
+
+    ln = LICENSES_BY_LOWERNAME.get(ln, ln)
+    return ln
+
+
+@pytest.mark.parametrize(
+    "a,b",
+    [
+        ("MIT", MIT),
+        ("mit", MIT),
+        ("BSD", BSD),
+        ("BSD 3-clause", BSD),
+        ("BSD 3 clause", BSD),
+        ("GPL3", GPL3),
+        ("GPL v3", GPL3),
+        ("GPLv3", GPL3),
+    ],
+)
+def test_bash_license(a, b):
+    assert bash_license(a) == b
+
+
+def licenses(dist: DistInfoDistribution):
+    """Get dist metadata (the licenses list) from PyPi.
+
+    pip and other tools use the local dist metadata to introspect licenses which requires that
+    packages be installed. Going to PyPi isn't strictly reproducible both because the PyPi database
+    could be updated and we could see network failures but there really isn't a good way to solve
+    this problem.
+
+    """
+
+    lics = []
+    name = dist.project_name
+    version = dist.version
+    print(name, version, type(dist))
+
+    meta = dist.get_metadata(dist.PKG_INFO).split("\n")
+    classifiers = [
+        l.replace("Classifier: ", "", 1) for l in meta if l.startswith("Classifier: ")
+    ]
+    license = bash_license(
+        next((l for l in meta if l.startswith("License:")), "License: UNKNOWN").replace(
+            "License: ", "", 1
+        )
+    )
+    lics.extend(l for l in classifiers if l.startswith("License ::"))
+
+    if not lics:
+        lics.append(license)
+
+    return lics
+
+
+@pytest.mark.parametrize(
+    "dist",
+    (w for w in working_set if w.location.find("arrdem_source_pypi") != -1),
+    ids=lambda dist: dist.project_name,
+)
+def test_approved_license(dist: DistInfoDistribution):
+    """Ensure that a given package is either allowed by name or uses an approved license."""
+
+    _licenses = licenses(dist)
+    print(dist.location)
+    assert dist.project_name in APPROVED_PACKAGES or any(
+        lic in APPROVED_LICENSES for lic in _licenses
+    ), f"{dist.project_name} ({dist.location}) was not approved and its license(s) were unknown {_licenses!r}"
Author	SHA1	Message	Date
Reid 'arrdem' McKenzie	6ce0e888b9	[NO TESTS] WIP	2023-03-15 00:45:32 -06:00
Reid 'arrdem' McKenzie	48ae28f7d6	Initial venv snapshot	2023-03-08 15:31:04 -07:00
Reid 'arrdem' McKenzie	c27af42dfb	Import the upstream Python unit tests covering tokenize and ast	2023-03-08 15:28:56 -07:00
Reid 'arrdem' McKenzie	90df10f3a8	Avoid wildcard import	2023-03-08 15:28:34 -07:00
Reid 'arrdem' McKenzie	eef3a17e55	Remove unused import	2023-03-08 15:15:50 -07:00
Reid 'arrdem' McKenzie	d086487a94	Start sketching at Hydra	2023-03-08 15:15:50 -07:00
Reid 'arrdem' McKenzie	c4930e0be8	Vendor in ast and utokenize from pycopy	2023-03-08 15:15:36 -07:00
Reid 'arrdem' McKenzie	471af02d9b	Import a bunch of my Bazel infrastructure	2023-03-08 15:11:51 -07:00
Reid 'arrdem' McKenzie	c0749cdcbf	Update licensure	2023-03-08 10:55:09 -07:00
Reid 'arrdem' McKenzie	0a75d08b5a	Write up where this is at	2023-03-08 10:53:54 -07:00
				`@ -0,0 +1 @@`
				`export VIRTUAL_ENV=/home/arrdem/.virtualenvs/flowmetal`