Compare commits

...

10 commits

26 changed files with 946 additions and 103 deletions

1
.bazelignore Normal file
View file

@ -0,0 +1 @@
.git

2
.bazelrc Normal file
View file

@ -0,0 +1,2 @@
test --test_output=errors
build --keep_going

1
.bazelversion Normal file
View file

@ -0,0 +1 @@
6.0.0

1
.envrc Normal file
View file

@ -0,0 +1 @@
export VIRTUAL_ENV=/home/arrdem/.virtualenvs/flowmetal

2
.gitignore vendored
View file

@ -1,2 +1,4 @@
/**/__pycache__ /**/__pycache__
/**/*.egg-info /**/*.egg-info
scratch
bazel-*

View file

@ -1,7 +0,0 @@
Copyright 2019 Reid 'arrdem' McKenzie
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View file

@ -33,4 +33,4 @@ This centering of evented communication makes Flowmetal ideal for **coordination
## License ## License
Published under the MIT license. See [LICENSE.md](LICENSE.md) Copyright © 2023 Reid D. 'arrdem' McKenzie, all rights reserved.

52
WORKSPACE Normal file
View file

@ -0,0 +1,52 @@
workspace(
name = "flowmetal"
)
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
http_archive(
name = "bazel_skylib",
sha256 = "b8a1527901774180afc798aeb28c4634bdccf19c4d98e7bdd1ce79d1fe9aaad7",
urls = [
"https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.4.1/bazel-skylib-1.4.1.tar.gz",
"https://github.com/bazelbuild/bazel-skylib/releases/download/1.4.1/bazel-skylib-1.4.1.tar.gz",
],
)
load("@bazel_skylib//:workspace.bzl", "bazel_skylib_workspace")
bazel_skylib_workspace()
rules_python_version = "c504355672223144cefb2cbf3f69e2d38e7e2726"
http_archive(
name = "rules_python",
sha256 = "3f12b492dbf7d56b0e3deed81f21d56c3241babaa52d7eb525cb7c657bba9125",
strip_prefix = "rules_python-{}".format(rules_python_version),
url = "https://github.com/bazelbuild/rules_python/archive/{}.zip".format(rules_python_version),
)
load("@rules_python//python:repositories.bzl", "python_register_toolchains")
python_register_toolchains(
name = "python3_10",
python_version = "3.10",
)
load("@python3_10//:defs.bzl", python3_10="interpreter")
load("@rules_python//python:pip.bzl", "pip_parse")
# Create a central repo that knows about the dependencies needed from
# requirements_lock.txt.
pip_parse(
name = "pypi",
python_interpreter_target = python3_10,
requirements_lock = "//tools/python:requirements_lock.txt",
)
load("@pypi//:requirements.bzl", "install_deps")
install_deps()

3
components/hydra/BUILD Normal file
View file

@ -0,0 +1,3 @@
py_project(
name = "hydra",
)

View file

@ -0,0 +1,13 @@
# Hydra; a multi-headed Python-on-Python interpreter.
Hydra draws extensively the OCaml and Scheme prior art for coroutine based
interpreters to produce a Python interpreter along the same lines.
At the top of the interpreter we see an extensible `while` loop. The interpreter
performs one "step" - evaluating a single statement - producing a new state and
a "next PC"; a path into the AST identifying the next statement to be executed.
This model enables single stepping, stack analysis and most importantly
interruptions as for snapshotting or suspending.
Hydra is the basis for the Flowmetal interpreter.

View file

@ -0,0 +1,201 @@
#!/usr/bin/env python3.10
"""Hydra; the multi-headed Python interpreter.
> Chop off one head and two more grow back in its place.
"""
import ast
import builtins
import logging
import os
import sys
from typing import Optional, Type, Union, List, Callable
from pathlib import Path
from importlib import __import__
from attrs import Factory, define, field
log = logging.getLogger(__name__)
@define
class ANamespace:
node: ast.AST
d: dict = {}
parent: Optional[Type["ANamespace"]] = None
def __getitem__(self, k):
return self.d[k]
def get(self, k, default=None):
return self.d.get(k, default)
def __setitem__(self, k, v):
self.d[k] = v
def __delitem__(self, k):
del self.d[k]
def __contains__(self, k):
return k in self.d
def __str__(self):
return "<{} {}>".format(self.__class__.__name__, self.d)
@define
class ModuleNS(ANamespace):
# parent: Optional["ModuleNS"] = None
pass
@define
class FunctionNS(ANamespace):
pass
@define
class ClassNS(ANamespace):
cls: Optional[type] = None
# TODO (arrdem 2023-03-08):
# This interpreter works well enough to import `requests` and many other libraries and do some
# work, but is unsuited to Flowmetal's needs for checkpointing. Because this interpreter uses
# direct execution, there's really no way to jam breakpoints or checkpoints or resume points into
# program execution. Which is kinda the goal of the whole project.
#
# This interpreter, while complete, needs to get refactored into probably a `yield` based
# coroutine structure wherein individual operations explicitly `yield` to an outer state
# management loop which effectively trampolines single statements together with state management
# logic.
#
# The outer interpreter needs to be able to check the "step budget" and decide if it's time for
# the program to suspend.
#
# Individual steps (workflow calls/function calls) may also cause the program to suspend.
#
# Suspending requires signaling the top level loop, and the top level loop needs both the
# namespace tree and the some sort of cursor or address into the AST under interpretation
# representing where to resume. The logical equivalent of a program counter, but a tree path.
@define
class Module:
fname: Union[Path, str]
tree: ast.AST
ns: "ANamespace" = field()
@ns.default
def _ns_default(self):
return ModuleNS(self.tree)
@define
class Pc:
"""A 'program counter' as a list of AST indices."""
idxs: List[int] = Factory(list)
@define
class Frame:
"""An 'execution frame' as a PC, AST and namespace."""
pc: Pc
ast: ast.AST
ns: ANamespace
@define
class Cont:
"""A 'Continuation' (thread/coroutine) of execution."""
id: int
entry: Frame
stack: List[Frame] = field()
@stack.default
def _stack_default(self):
return [self.entry]
@define
class Vm:
"""A bag of shared state.
:attribute path: The equivalent of sys.path
:attribute modules: The equivalent of sys.modules
:attribute conts: All interpreter continuations
:attribute log: A log of all statements executed by any continuation
It should be possible to reconstruct the VM's state simply by replaying the
log in statement order, as a fallback for dealing with C-extension state,
connections and soforth.
"""
path: list = Factory(lambda: list(sys.path))
modules: dict = Factory(dict)
conts: dict = Factory(dict)
log: list = Factory(list)
def handle_import(
self, thread, name, globals=None, locals=None, fromlist=(), level=0
):
log.debug(" Attempting to import '{}'".format(name))
if name not in self.modules:
if name in sys.modules:
# FIXME: Need to hack sys, os and several other built-in packages here
log.debug("Short-circuited loading %r from bootstrap sys.modules", name)
self.modules[name] = sys.modules[name]
else:
name = name.replace(".", os.path.sep)
for e in self.path:
for ext in [
".py",
]:
if os.path.isdir(e):
f = os.path.join(e, name + ext)
log.debug(" Checking {}".format(f))
if os.path.exists(f):
mod = self.execute_load(thread, f, name)
self.modules[name] = mod.ns
break
elif os.path.isfile(e):
# FIXME (arrdem 2021-05-31)
raise RuntimeError(
"Import from .zip/.whl/.egg archives aren't supported yet"
)
else:
log.debug("Falling back to native import for %r", name)
self.modules[name] = __import__(
name, globals, locals, fromlist, level
)
return self.modules[name]
def execute_module(self, module: Module):
"""Execute all the Expressions and Statements in a given Module sequentially, as in a single thread."""
def execute_load(self, fname, name):
"""Execute the given file as if it were an imported module."""
# FIXME: Choose encoding here
with open(fname) as f:
tree = ast.parse(f.read())
mod = Module(fname, tree)
mod.ns["__name__"] = name
self.modules[name] = mod.ns
self.execute_module(mod)
return mod
def execute_dunder_main(self, fname):
"""Execute the given file as if it were a script entrypoint."""
return self.execute_load(fname, "__main__")

View file

@ -0,0 +1,12 @@
#!/usr/bin/env python3.10
import click
@click.group()
def cli():
pass
if __name__ == "__main__":
cli.main(prog_name="hydra")

View file

@ -2,7 +2,7 @@
from datetime import timedelta from datetime import timedelta
from time import sleep from time import sleep
from flowmetal import workflow, timeout, CancelledError, TimeoutError, Task from flowmetal import workflow, CancelledError, TimeoutError, Task
def cancellable_activity(): def cancellable_activity():

8
pyproject.toml Normal file
View file

@ -0,0 +1,8 @@
[tool.isort]
py_version=311
line_length=100
skip_glob = [
".git/*",
".bazel/*",
"bazel-*",
]

View file

@ -1,47 +1,17 @@
# flake8: noqa: all # flake8: noqa: all
# Python AST interpreter written in Python # A Python AST interpreter written in Python
# #
# This module is part of the Pycopy https://github.com/pfalcon/pycopy # This module is part of the Pycopy https://github.com/pfalcon/pycopy project.
# project.
# #
# Copyright (c) 2019 Paul Sokolovsky # Copyright (c) 2019 Paul Sokolovsky, published under the MIT License
#
# The MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# Modified by Reid D. 'ardem' Mckenzie in 2021 to be a bit more fully-featured
# and usable for running 'real' code as part of an experiment in implementing a
# durable Python interpreter atop the original pycopy substrate.
import ast import ast
import builtins
import logging import logging
import os import os
import sys import sys
from typing import Optional, Type
if sys.version_info < (3, 0, 0):
builtins = __builtins__
else:
import builtins
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@ -55,7 +25,7 @@ class StrictNodeVisitor(ast.NodeVisitor):
class ANamespace: class ANamespace:
def __init__(self, node): def __init__(self, node):
self.d = {} self.d = {}
self.parent = None self.parent: Optional[Type["ANamespace"]] = None
# Cross-link namespace to AST node. Note that we can't do the # Cross-link namespace to AST node. Note that we can't do the
# opposite, because for one node, there can be different namespaces. # opposite, because for one node, there can be different namespaces.
self.node = node self.node = node
@ -80,6 +50,7 @@ class ANamespace:
class ModuleNS(ANamespace): class ModuleNS(ANamespace):
# parent: Optional["ModuleNS"] = None
pass pass
@ -88,7 +59,7 @@ class FunctionNS(ANamespace):
class ClassNS(ANamespace): class ClassNS(ANamespace):
pass cls: Optional[type] = None
# Pycopy by default doesn't support direct slice construction, use helper # Pycopy by default doesn't support direct slice construction, use helper
@ -102,17 +73,11 @@ slice_getter = SliceGetter()
def arg_name(arg): def arg_name(arg):
if sys.version_info < (3, 0, 0): return arg.arg
return arg.id
else:
return arg.arg
def kwarg_defaults(args): def kwarg_defaults(args):
if sys.version_info < (3, 0, 0): return args.kw_defaults
return args.defaults
else:
return args.kw_defaults
class TargetNonlocalFlow(Exception): class TargetNonlocalFlow(Exception):
@ -154,11 +119,12 @@ class InterpFuncWrap:
return self.interp.call_func(self.node, self, *args, **kwargs) return self.interp.call_func(self.node, self, *args, **kwargs)
# Python don't fully treat objects, even those defining __call__() special method, as a true callable. For example, such # Python don't fully treat objects, even those defining __call__() special method, as a true
# objects aren't automatically converted to bound methods if looked up as another object's attributes. As we want our # callable. For example, such objects aren't automatically converted to bound methods if looked up
# "interpreted functions" to behave as close as possible to real functions, we just wrap function object with a real # as another object's attributes. As we want our "interpreted functions" to behave as closely as
# function. An alternative might have been to perform needed checks and explicitly bind a method using # possible to real functions, we just wrap function object with a real function. An alternative
# types.MethodType() in visit_Attribute (but then maybe there would be still other cases of "callable object" vs # might have been to perform needed checks and explicitly bind a method using types.MethodType() in
# visit_Attribute (but then maybe there would be still other cases of "callable object" vs
# "function" discrepancies). # "function" discrepancies).
def InterpFunc(fun): def InterpFunc(fun):
def func(*args, **kwargs): def func(*args, **kwargs):
@ -195,26 +161,50 @@ class InterpModule:
return list(self.ns.d.keys()) return list(self.ns.d.keys())
# TODO (arrdem 2023-03-08):
# This interpreter works well enough to import `requests` and many other libraries and do some
# work, but is unsuited to Flowmetal's needs for checkpointing. Because this interpreter uses
# direct execution, there's really no way to jam breakpoints or checkpoints or resume points into
# program execution. Which is kinda the goal of the whole project.
#
# This interpreter, while complete, needs to get refactored into probably a `yield` based
# coroutine structure wherein individual operations explicitly `yield` to an outer state
# management loop which effectively trampolines single statements together with state management
# logic.
#
# The outer interpreter needs to be able to check the "step budget" and decide if it's time for
# the program to suspend.
#
# Individual steps (workflow calls/function calls) may also cause the program to suspend.
#
# Suspending requires signaling the top level loop, and the top level loop needs both the
# namespace tree and the some sort of cursor or address into the AST under interpretation
# representing where to resume. The logical equivalent of a program counter, but a tree path.
class ModuleInterpreter(StrictNodeVisitor): class ModuleInterpreter(StrictNodeVisitor):
"""An interpreter specific to a single module.""" """An interpreter specific to a single module."""
def __init__(self, system, fname, node): def __init__(self, system, fname, node):
self.system = system self.system = system
self.fname = fname self.fname = fname
self.ns = self.module_ns = ModuleNS(node) self.module_ns: ModuleNS = ModuleNS(node)
self.ns: ANamespace = self.module_ns
# Call stack (in terms of function AST nodes). # Call stack (in terms of function AST nodes).
self.call_stack = [] self.call_stack = []
# To implement "store" operation, we need to arguments: location and value to store. The operation itself is # To implement "store" operation, we need to arguments: location and value to store. The
# handled by a node visitor (e.g. visit_Name), and location is represented by AST node, but there's no support # operation itself is handled by a node visitor (e.g. visit_Name), and location is
# to pass additional arguments to a visitor (likely, because it would be a burden to explicit pass such # represented by AST node, but there's no support to pass additional arguments to a visitor
# additional arguments thru the chain of visitors). So instead, we store this value as field. As interpretation # (likely, because it would be a burden to explicit pass such additional arguments thru the
# happens sequentially, there's no risk that it will be overwritten "concurrently". # chain of visitors). So instead, we store this value as field. As interpretation happens
# sequentially, there's no risk that it will be overwritten "concurrently".
self.store_val = None self.store_val = None
# Current active exception, for bare "raise", which doesn't work across function boundaries (and that's how we # Current active exception, for bare "raise", which doesn't work across function boundaries
# have it - exception would be caught in visit_Try, while re-rasing would happen in visit_Raise). # (and that's how we have it - exception would be caught in visit_Try, while re-rasing would
# happen in visit_Raise).
self.cur_exc = [] self.cur_exc = []
def push_ns(self, new_ns): def push_ns(self, new_ns):
@ -222,6 +212,7 @@ class ModuleInterpreter(StrictNodeVisitor):
self.ns = new_ns self.ns = new_ns
def pop_ns(self): def pop_ns(self):
assert self.ns is not None
self.ns = self.ns.parent self.ns = self.ns.parent
def stmt_list_visit(self, lst): def stmt_list_visit(self, lst):
@ -247,13 +238,13 @@ class ModuleInterpreter(StrictNodeVisitor):
return self.visit(node.body) return self.visit(node.body)
def visit_ClassDef(self, node): def visit_ClassDef(self, node):
self.push_ns(ClassNS(node)) ns: ClassNS = ClassNS(node)
self.push_ns(ns)
try: try:
self.stmt_list_visit(node.body) self.stmt_list_visit(node.body)
except Exception: except Exception:
self.pop_ns() self.pop_ns()
raise raise
ns = self.ns
self.pop_ns() self.pop_ns()
cls = type(node.name, tuple([self.visit(b) for b in node.bases]), ns.d) cls = type(node.name, tuple([self.visit(b) for b in node.bases]), ns.d)
cls = self.wrap_decorators(cls, node) cls = self.wrap_decorators(cls, node)
@ -266,8 +257,7 @@ class ModuleInterpreter(StrictNodeVisitor):
return self.prepare_func(node) return self.prepare_func(node)
def visit_FunctionDef(self, node): def visit_FunctionDef(self, node):
# Defaults are evaluated at function definition time, so we # Defaults are evaluated at function definition time, so we need to do that now.
# need to do that now.
func = self.prepare_func(node) func = self.prepare_func(node)
func = self.wrap_decorators(func, node) func = self.wrap_decorators(func, node)
self.ns[node.name] = func self.ns[node.name] = func
@ -290,11 +280,10 @@ class ModuleInterpreter(StrictNodeVisitor):
all_args.add(arg_name(a)) all_args.add(arg_name(a))
if v is not None: if v is not None:
d[arg_name(a)] = self.visit(v) d[arg_name(a)] = self.visit(v)
# We can store cached argument names of a function in its node - # We can store cached argument names of a function in its node - it's static.
# it's static.
node.args.all_args = all_args node.args.all_args = all_args
# We can't store the values of default arguments - they're dynamic, # We can't store the values of default arguments - they're dynamic, may depend on the
# may depend on the lexical scope. # lexical scope.
func.defaults_dict = d func.defaults_dict = d
return InterpFunc(func) return InterpFunc(func)
@ -308,9 +297,8 @@ class ModuleInterpreter(StrictNodeVisitor):
) )
argspec = node.args argspec = node.args
# If there's vararg, either offload surplus of args to it, or init # If there's vararg, either offload surplus of args to it, or init it to empty tuple (all in
# it to empty tuple (all in one statement). If no vararg, error on # one statement). If no vararg, error on too many args.
# too many args.
# #
# Note that we have to do the .posonlyargs dance # Note that we have to do the .posonlyargs dance
if argspec.vararg: if argspec.vararg:
@ -329,9 +317,8 @@ class ModuleInterpreter(StrictNodeVisitor):
for a, value in zip(argspec.posonlyargs, args): for a, value in zip(argspec.posonlyargs, args):
self.ns[arg_name(a)] = value self.ns[arg_name(a)] = value
# Process incoming keyword arguments, putting them in namespace if # Process incoming keyword arguments, putting them in namespace if actual arg exists by that
# actual arg exists by that name, or offload to function's kwarg # name, or offload to function's kwarg if any. All make needed checks and error out.
# if any. All make needed checks and error out.
func_kwarg = {} func_kwarg = {}
for k, v in kwargs.items(): for k, v in kwargs.items():
if k in argspec.all_args: if k in argspec.all_args:
@ -351,9 +338,8 @@ class ModuleInterpreter(StrictNodeVisitor):
if argspec.kwarg: if argspec.kwarg:
self.ns[arg_name(argspec.kwarg)] = func_kwarg self.ns[arg_name(argspec.kwarg)] = func_kwarg
# Finally, overlay default values for arguments not yet initialized. # Finally, overlay default values for arguments not yet initialized. We need to do this last
# We need to do this last for "multiple values for the same arg" # for "multiple values for the same arg" check to work.
# check to work.
for k, v in interp_func.defaults_dict.items(): for k, v in interp_func.defaults_dict.items():
if k not in self.ns: if k not in self.ns:
self.ns[k] = v self.ns[k] = v
@ -376,8 +362,8 @@ class ModuleInterpreter(StrictNodeVisitor):
def call_func(self, node, interp_func, *args, **kwargs): def call_func(self, node, interp_func, *args, **kwargs):
self.call_stack.append(node) self.call_stack.append(node)
# We need to switch from dynamic execution scope to lexical scope # We need to switch from dynamic execution scope to lexical scope in which function was
# in which function was defined (then switch back on return). # defined (then switch back on return).
dyna_scope = self.ns dyna_scope = self.ns
self.ns = interp_func.lexical_scope self.ns = interp_func.lexical_scope
self.push_ns(FunctionNS(node)) self.push_ns(FunctionNS(node))
@ -508,9 +494,9 @@ class ModuleInterpreter(StrictNodeVisitor):
def visit_AugAssign(self, node): def visit_AugAssign(self, node):
assert isinstance(node.target.ctx, ast.Store) assert isinstance(node.target.ctx, ast.Store)
# Not functional style, oops. Node in AST has store context, but we # Not functional style, oops. Node in AST has store context, but we need to read its value
# need to read its value first. To not construct a copy of the entire # first. To not construct a copy of the entire node with load context, we temporarily patch
# node with load context, we temporarily patch it in-place. # it in-place.
save_ctx = node.target.ctx save_ctx = node.target.ctx
node.target.ctx = ast.Load() node.target.ctx = ast.Load()
var_val = self.visit(node.target) var_val = self.visit(node.target)
@ -518,12 +504,11 @@ class ModuleInterpreter(StrictNodeVisitor):
rval = self.visit(node.value) rval = self.visit(node.value)
# As augmented assignment is statement, not operator, we can't put them # As augmented assignment is statement, not operator, we can't put them all into map. We
# all into map. We could instead directly lookup special inplace methods # could instead directly lookup special inplace methods (__iadd__ and friends) and use them,
# (__iadd__ and friends) and use them, with a fallback to normal binary # with a fallback to normal binary operations, but from the point of view of this
# operations, but from the point of view of this interpreter, presence # interpreter, presence of such methods is an implementation detail of the object system,
# of such methods is an implementation detail of the object system, it's # it's not concerned with it.
# not concerned with it.
op = type(node.op) op = type(node.op)
if op is ast.Add: if op is ast.Add:
var_val += rval var_val += rval
@ -682,10 +667,11 @@ class ModuleInterpreter(StrictNodeVisitor):
if func is builtins.super and not args: if func is builtins.super and not args:
if not self.ns.parent or not isinstance(self.ns.parent, ClassNS): if not self.ns.parent or not isinstance(self.ns.parent, ClassNS):
raise RuntimeError("super(): no arguments") raise RuntimeError("super(): no arguments")
# As we're creating methods dynamically outside of class, super() without argument won't work, as that # As we're creating methods dynamically outside of class, super() without argument won't
# requires __class__ cell. Creating that would be cumbersome (Pycopy definitely lacks enough introspection # work, as that requires __class__ cell. Creating that would be cumbersome (Pycopy
# for that), so we substitute 2 implied args (which argumentless super() would take from cell and 1st arg to # definitely lacks enough introspection for that), so we substitute 2 implied args
# func). In our case, we take them from prepared bookkeeping info. # (which argumentless super() would take from cell and 1st arg to func). In our case, we
# take them from prepared bookkeeping info.
args = (self.ns.parent.cls, self.ns["self"]) args = (self.ns.parent.cls, self.ns["self"])
return func(*args, **kwargs) return func(*args, **kwargs)
@ -732,7 +718,7 @@ class ModuleInterpreter(StrictNodeVisitor):
ast.Div: lambda x, y: x / y, ast.Div: lambda x, y: x / y,
ast.FloorDiv: lambda x, y: x // y, ast.FloorDiv: lambda x, y: x // y,
ast.Mod: lambda x, y: x % y, ast.Mod: lambda x, y: x % y,
ast.Pow: lambda x, y: x ** y, ast.Pow: lambda x, y: x**y,
ast.LShift: lambda x, y: x << y, ast.LShift: lambda x, y: x << y,
ast.RShift: lambda x, y: x >> y, ast.RShift: lambda x, y: x >> y,
ast.BitAnd: lambda x, y: x & y, ast.BitAnd: lambda x, y: x & y,
@ -901,7 +887,7 @@ class ModuleInterpreter(StrictNodeVisitor):
def visit_Print(self, node): def visit_Print(self, node):
# In Py2k only # In Py2k only
raise NotImplementedError("Absolutely not. Use __future__.") raise SyntaxError("Absolutely not. Use __future__.")
def visit_Str(self, node): def visit_Str(self, node):
return node.s return node.s

View file

@ -19,7 +19,6 @@ print(a.baz)
import random import random
for _ in range(10): for _ in range(10):
print(random.randint(0, 1024)) print(random.randint(0, 1024))
@ -30,5 +29,4 @@ def bar(a, b, **bs):
import requests import requests
print(len(requests.get("https://pypi.org/pypi/requests/json").text)) print(len(requests.get("https://pypi.org/pypi/requests/json").text))

3
tools/build_rules/BUILD Normal file
View file

@ -0,0 +1,3 @@
package(default_visibility = ["//visibility:public"])
licenses(["notice"])

View file

@ -0,0 +1,15 @@
# -*- mode: bazel -*-
# A global prelude for all BUILD[.bazel] files
load("//tools/python:defs.bzl",
"py_library",
"py_binary",
"py_unittest",
"py_pytest",
"py_resources",
"py_project",
)
load("@pypi//:requirements.bzl",
py_requirement="requirement"
)

45
tools/python/BUILD Normal file
View file

@ -0,0 +1,45 @@
load("@rules_python//python:defs.bzl",
"py_runtime_pair",
)
load("@pypi//:requirements.bzl", "all_requirements")
package(default_visibility = ["//visibility:public"])
licenses(["notice"])
exports_files([
"defs.bzl",
"bzl_pytest_shim.py",
"bzl_unittest_shim.py",
"pythonshim",
"requirements_lock.txt",
])
py_runtime(
name = "python3_runtime",
files = [],
interpreter = ":pythonshim",
python_version = "PY3",
visibility = ["//visibility:public"],
)
py_runtime_pair(
name = "python_runtime",
py2_runtime = None,
py3_runtime = ":python3_runtime",
)
toolchain(
name = "python3_toolchain",
toolchain = ":python_runtime",
toolchain_type = "@bazel_tools//tools/python:toolchain_type",
)
py_pytest(
name = "test_licenses",
srcs = [
"test_licenses.py",
],
deps = all_requirements,
)

View file

@ -0,0 +1,10 @@
"""A shim for executing pytest."""
import sys
import pytest
if __name__ == "__main__":
cmdline = ["--ignore=external"] + sys.argv[1:]
print(cmdline, file=sys.stderr)
sys.exit(pytest.main(cmdline))

View file

@ -0,0 +1,66 @@
"""Universal launcher for unit tests"""
import argparse
import logging
import os
import sys
import unittest
def main():
"""Parse args, collect tests and run them"""
# Disable *.pyc files
sys.dont_write_bytecode = True
# Add ".." to module search path
cur_dir = os.path.dirname(os.path.realpath(__file__))
top_dir = os.path.abspath(os.path.join(cur_dir, os.pardir))
sys.path.append(top_dir)
# Parse command line arguments
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"-v",
"--verbose",
action="count",
default=0,
help="verbosity level, use: [-v | -vv | -vvv]",
)
parser.add_argument(
"-s", "--start-directory", default=None, help="directory to start discovery"
)
parser.add_argument(
"-p",
"--pattern",
default="test*.py",
help="pattern to match test files ('test*.py' default)",
)
parser.add_argument(
"test", nargs="*", help="test specs (e.g. module.TestCase.test_func)"
)
args = parser.parse_args()
if not args.start_directory:
args.start_directory = cur_dir
if args.verbose > 2:
logging.basicConfig(level=logging.DEBUG, format="DEBUG: %(message)s")
loader = unittest.TestLoader()
if args.test:
# Add particular tests
for test in args.test:
suite = unittest.TestSuite()
suite.addTests(loader.loadTestsFromName(test))
else:
# Find all tests
suite = loader.discover(args.start_directory, args.pattern)
runner = unittest.TextTestRunner(verbosity=args.verbose)
result = runner.run(suite)
return result.wasSuccessful()
if __name__ == "__main__":
# NOTE: True(success) -> 0, False(fail) -> 1
exit(not main())

237
tools/python/defs.bzl Normal file
View file

@ -0,0 +1,237 @@
load("@pypi//:requirements.bzl",
_py_requirement = "requirement"
)
load("@rules_python//python:defs.bzl",
"py_runtime",
"py_runtime_pair",
_py_binary = "py_binary",
_py_test = "py_test",
_py_library = "py_library",
)
load("@bazel_skylib//lib:sets.bzl", "sets")
def py_requirement(*args, **kwargs):
"""A re-export of requirement()"""
return _py_requirement(*args, **kwargs)
def py_test(python_version=None, **kwargs):
"""A re-export of py_test()"""
if python_version and python_version != "PY3":
fail("py3k only!")
return _py_test(
python_version="PY3",
**kwargs,
)
def py_pytest(name, srcs, deps, main=None, python_version=None, args=None, **kwargs):
"""A py_test target which uses pytest."""
if python_version and python_version != "PY3":
fail("py3k only!")
f = "//tools/python:bzl_pytest_shim.py"
deps = sets.to_list(sets.make([
py_requirement("pytest"),
py_requirement("pytest-pudb"),
py_requirement("pytest-cov"),
py_requirement("pytest-timeout"),
] + deps))
srcs = [f] + srcs
py_test(
name = name,
srcs = srcs,
main = f,
args = args,
python_version="PY3",
deps = deps,
**kwargs,
)
# zapp_test(
# name = name + ".zapp",
# main = f,
# args = args,
# srcs = srcs,
# deps = deps,
# test = True,
# zip_safe = False,
# **kwargs,
# )
# FIXME (arrdem 2020-09-27):
# Generate a py_image_test.
# Not clear how to achieve that.
def py_unittest(srcs=[], **kwargs):
"""A helper for running unittest tests"""
f = "//tools/python:bzl_unittest_shim.py"
return py_test(
main = f,
srcs = [f] + srcs,
**kwargs
)
def py_binary(python_version=None, main=None, srcs=None, **kwargs):
"""A re-export of py_binary()"""
if python_version and python_version != "PY3":
fail("py3k only!")
srcs = srcs or []
if main not in srcs:
srcs = [main] + srcs
return _py_binary(
python_version = "PY3",
main = main,
srcs = srcs,
**kwargs,
)
def py_library(srcs_version=None, **kwargs):
"""A re-export of py_library()"""
if srcs_version and srcs_version != "PY3":
fail("py3k only!")
return _py_library(
srcs_version="PY3",
**kwargs
)
ResourceGroupInfo = provider(
fields = {
"srcs": "files to use from Python",
},
)
def _resource_impl(ctx):
srcs = []
for target in ctx.attr.srcs:
srcs.extend(target.files.to_list())
transitive_srcs = depset(direct = srcs)
return [
ResourceGroupInfo(
srcs = ctx.attr.srcs,
),
PyInfo(
has_py2_only_sources = False,
has_py3_only_sources = True,
uses_shared_libraries = False,
transitive_sources = transitive_srcs,
),
]
py_resources = rule(
implementation = _resource_impl,
attrs = {
"srcs": attr.label_list(
allow_empty = True,
mandatory = True,
allow_files = True,
doc = "Files to hand through to Python",
),
},
)
def py_project(name=None,
main=None,
main_deps=None,
shebang=None,
lib_srcs=None,
lib_deps=None,
lib_data=None,
test_srcs=None,
test_deps=None,
test_data=None):
"""
A helper for defining conventionally-formatted python project.
Assumes that there's a {src,test}/{resources,python} where src/ is a library and test/ is local tests only.
Each test_*.py source generates its own implicit test target. This allows for automatic test parallelism. Non
test_*.py files are implicitly srcs for the generated test targets. This is the same as making them implicitly a
testonly lib.
"""
lib_srcs = lib_srcs or native.glob(["src/python/**/*.py"],
exclude=[
"**/*.pyc",
])
lib_data = lib_data or native.glob(["src/resources/**/*",
"src/python/**/*"],
exclude=[
"**/*.py",
"**/*.pyc",
])
test_srcs = test_srcs or native.glob(["test/python/**/*.py"],
exclude=[
"**/*.pyc",
])
test_data = test_data or native.glob(["test/resources/**/*",
"test/python/**/*"],
exclude=[
"**/*.py",
"**/*.pyc",
])
lib_name = name if not main else "lib"
py_library(
name=lib_name,
srcs=lib_srcs,
deps=lib_deps,
data=lib_data,
imports=[
"src/python",
"src/resources",
],
visibility = [
"//visibility:public",
],
)
if main:
py_binary(
name=name,
main=main,
deps=(main_deps or []) + [lib_name],
imports=[
"src/python",
"src/resources",
],
visibility = [
"//visibility:public",
],
)
for src in test_srcs:
if "test_" in src:
py_pytest(
name=src.split("/")[-1],
srcs=[src] + [f for f in test_srcs if "test_" not in f],
deps=[lib_name] + (test_deps or []),
data=test_data,
imports=[
"test/python",
"test/resources",
],
)

21
tools/python/pythonshim Executable file
View file

@ -0,0 +1,21 @@
#!/bin/sh
# Bazel STRONGLY disapproves of linking dynamically to a Python interpreter.
# But ... that's exactly what we want to do.
# So this script exists to find a 'compliant' Python install and use that.
PYTHONREV="3.10"
CMD="python${PYTHONREV}"
if [ -x "$(command -v "$CMD")" ]; then
exec "$(which "$CMD")" "$@"
else
case "$(uname)" in
Darwin)
# FIXME: What if it isn't there?
exec /opt/homebrew/bin/"$CMD" "$@"
;;
esac
echo "Error: Unable to find a viable Python executable" >&2
exit 1
fi

View file

@ -0,0 +1,9 @@
attrs
black
cattrs
hypothesis
pudb
pytest
pytest-cov
pytest-pudb
pytest-timeout

View file

@ -0,0 +1,28 @@
attrs==22.2.0
autoflake8==0.4.0
black==23.1.0
cattrs==22.2.0
click==8.1.3
coverage==7.2.1
exceptiongroup==1.1.0
hypothesis==6.68.2
iniconfig==2.0.0
isort==5.12.0
jedi==0.18.2
mypy-extensions==1.0.0
packaging==23.0
parso==0.8.3
pathspec==0.11.0
platformdirs==3.1.0
pluggy==1.0.0
pudb==2022.1.3
pyflakes==3.0.1
Pygments==2.14.0
pytest==7.2.2
pytest-cov==4.0.0
pytest-pudb==0.7.0
pytest-timeout==2.1.0
sortedcontainers==2.4.0
tomli==2.0.1
urwid==2.1.2
urwid-readline==0.13

View file

@ -0,0 +1,136 @@
"""
Validate 3rdparty library licenses as approved.
"""
import re
import pytest
from pkg_resources import DistInfoDistribution, working_set
# Licenses approved as representing non-copyleft and not precluding commercial usage.
# This is all easy, there's a good schema here.
APPROVED_LICENSES = [
MIT := "License :: OSI Approved :: MIT License",
APACHE := "License :: OSI Approved :: Apache Software License",
BSD := "License :: OSI Approved :: BSD License",
MPL10 := "License :: OSI Approved :: Mozilla Public License 1.0 (MPL)",
MPL11 := "License :: OSI Approved :: Mozilla Public License 1.1 (MPL 1.1)",
MPL20 := "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)",
PSFL := "License :: OSI Approved :: Python Software Foundation License",
LGPL := "License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)",
LGPL3 := "License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)",
ISCL := "License :: OSI Approved :: ISC License (ISCL)",
]
UNAPPROVED_LICENSES = [
GPL1 := "License :: OSI Approved :: GNU General Public License",
GPL2 := "License :: OSI Approved :: GNU General Public License v2 (GPLv2)",
GPL3 := "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
]
# This data is GARBO.
LICENSES_BY_LOWERNAME = {
"apache 2.0": APACHE,
"apache": APACHE,
"http://www.apache.org/licenses/license-2.0": APACHE,
"bsd 3": BSD,
"bsd": BSD,
"gpl": GPL1,
"gpl2": GPL2,
"gpl3": GPL3,
"lgpl": LGPL,
"lgpl3": LGPL3,
"isc": ISCL,
"mit": MIT,
"mpl": MPL10,
"mpl 2.0": MPL20,
"psf": PSFL,
}
# Mash in some cases.
LICENSES_BY_LOWERNAME.update(
{lic.split(" :: ")[-1].lower(): lic for lic in APPROVED_LICENSES}
)
# As a workaround for packages which don"t have correct meadata on PyPi, hand-verified packages
APPROVED_PACKAGES = []
def bash_license(ln):
while True:
lnn = re.sub(
r"[(),]|( version)|( license)|( ?v(?=\d))|([ -]clause)|(or later)",
"",
ln.lower(),
)
if ln != lnn:
ln = lnn
else:
break
ln = LICENSES_BY_LOWERNAME.get(ln, ln)
return ln
@pytest.mark.parametrize(
"a,b",
[
("MIT", MIT),
("mit", MIT),
("BSD", BSD),
("BSD 3-clause", BSD),
("BSD 3 clause", BSD),
("GPL3", GPL3),
("GPL v3", GPL3),
("GPLv3", GPL3),
],
)
def test_bash_license(a, b):
assert bash_license(a) == b
def licenses(dist: DistInfoDistribution):
"""Get dist metadata (the licenses list) from PyPi.
pip and other tools use the local dist metadata to introspect licenses which requires that
packages be installed. Going to PyPi isn't strictly reproducible both because the PyPi database
could be updated and we could see network failures but there really isn't a good way to solve
this problem.
"""
lics = []
name = dist.project_name
version = dist.version
print(name, version, type(dist))
meta = dist.get_metadata(dist.PKG_INFO).split("\n")
classifiers = [
l.replace("Classifier: ", "", 1) for l in meta if l.startswith("Classifier: ")
]
license = bash_license(
next((l for l in meta if l.startswith("License:")), "License: UNKNOWN").replace(
"License: ", "", 1
)
)
lics.extend(l for l in classifiers if l.startswith("License ::"))
if not lics:
lics.append(license)
return lics
@pytest.mark.parametrize(
"dist",
(w for w in working_set if w.location.find("arrdem_source_pypi") != -1),
ids=lambda dist: dist.project_name,
)
def test_approved_license(dist: DistInfoDistribution):
"""Ensure that a given package is either allowed by name or uses an approved license."""
_licenses = licenses(dist)
print(dist.location)
assert dist.project_name in APPROVED_PACKAGES or any(
lic in APPROVED_LICENSES for lic in _licenses
), f"{dist.project_name} ({dist.location}) was not approved and its license(s) were unknown {_licenses!r}"