Compare commits

...

10 commits

26 changed files with 946 additions and 103 deletions

1
.bazelignore Normal file
View file

@ -0,0 +1 @@
.git

2
.bazelrc Normal file
View file

@ -0,0 +1,2 @@
test --test_output=errors
build --keep_going

1
.bazelversion Normal file
View file

@ -0,0 +1 @@
6.0.0

1
.envrc Normal file
View file

@ -0,0 +1 @@
export VIRTUAL_ENV=/home/arrdem/.virtualenvs/flowmetal

2
.gitignore vendored
View file

@ -1,2 +1,4 @@
/**/__pycache__
/**/*.egg-info
scratch
bazel-*

View file

@ -1,7 +0,0 @@
Copyright 2019 Reid 'arrdem' McKenzie
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View file

@ -33,4 +33,4 @@ This centering of evented communication makes Flowmetal ideal for **coordination
## License
Published under the MIT license. See [LICENSE.md](LICENSE.md)
Copyright © 2023 Reid D. 'arrdem' McKenzie, all rights reserved.

52
WORKSPACE Normal file
View file

@ -0,0 +1,52 @@
workspace(
name = "flowmetal"
)
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
http_archive(
name = "bazel_skylib",
sha256 = "b8a1527901774180afc798aeb28c4634bdccf19c4d98e7bdd1ce79d1fe9aaad7",
urls = [
"https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.4.1/bazel-skylib-1.4.1.tar.gz",
"https://github.com/bazelbuild/bazel-skylib/releases/download/1.4.1/bazel-skylib-1.4.1.tar.gz",
],
)
load("@bazel_skylib//:workspace.bzl", "bazel_skylib_workspace")
bazel_skylib_workspace()
rules_python_version = "c504355672223144cefb2cbf3f69e2d38e7e2726"
http_archive(
name = "rules_python",
sha256 = "3f12b492dbf7d56b0e3deed81f21d56c3241babaa52d7eb525cb7c657bba9125",
strip_prefix = "rules_python-{}".format(rules_python_version),
url = "https://github.com/bazelbuild/rules_python/archive/{}.zip".format(rules_python_version),
)
load("@rules_python//python:repositories.bzl", "python_register_toolchains")
python_register_toolchains(
name = "python3_10",
python_version = "3.10",
)
load("@python3_10//:defs.bzl", python3_10="interpreter")
load("@rules_python//python:pip.bzl", "pip_parse")
# Create a central repo that knows about the dependencies needed from
# requirements_lock.txt.
pip_parse(
name = "pypi",
python_interpreter_target = python3_10,
requirements_lock = "//tools/python:requirements_lock.txt",
)
load("@pypi//:requirements.bzl", "install_deps")
install_deps()

3
components/hydra/BUILD Normal file
View file

@ -0,0 +1,3 @@
py_project(
name = "hydra",
)

View file

@ -0,0 +1,13 @@
# Hydra; a multi-headed Python-on-Python interpreter.
Hydra draws extensively the OCaml and Scheme prior art for coroutine based
interpreters to produce a Python interpreter along the same lines.
At the top of the interpreter we see an extensible `while` loop. The interpreter
performs one "step" - evaluating a single statement - producing a new state and
a "next PC"; a path into the AST identifying the next statement to be executed.
This model enables single stepping, stack analysis and most importantly
interruptions as for snapshotting or suspending.
Hydra is the basis for the Flowmetal interpreter.

View file

@ -0,0 +1,201 @@
#!/usr/bin/env python3.10
"""Hydra; the multi-headed Python interpreter.
> Chop off one head and two more grow back in its place.
"""
import ast
import builtins
import logging
import os
import sys
from typing import Optional, Type, Union, List, Callable
from pathlib import Path
from importlib import __import__
from attrs import Factory, define, field
log = logging.getLogger(__name__)
@define
class ANamespace:
node: ast.AST
d: dict = {}
parent: Optional[Type["ANamespace"]] = None
def __getitem__(self, k):
return self.d[k]
def get(self, k, default=None):
return self.d.get(k, default)
def __setitem__(self, k, v):
self.d[k] = v
def __delitem__(self, k):
del self.d[k]
def __contains__(self, k):
return k in self.d
def __str__(self):
return "<{} {}>".format(self.__class__.__name__, self.d)
@define
class ModuleNS(ANamespace):
# parent: Optional["ModuleNS"] = None
pass
@define
class FunctionNS(ANamespace):
pass
@define
class ClassNS(ANamespace):
cls: Optional[type] = None
# TODO (arrdem 2023-03-08):
# This interpreter works well enough to import `requests` and many other libraries and do some
# work, but is unsuited to Flowmetal's needs for checkpointing. Because this interpreter uses
# direct execution, there's really no way to jam breakpoints or checkpoints or resume points into
# program execution. Which is kinda the goal of the whole project.
#
# This interpreter, while complete, needs to get refactored into probably a `yield` based
# coroutine structure wherein individual operations explicitly `yield` to an outer state
# management loop which effectively trampolines single statements together with state management
# logic.
#
# The outer interpreter needs to be able to check the "step budget" and decide if it's time for
# the program to suspend.
#
# Individual steps (workflow calls/function calls) may also cause the program to suspend.
#
# Suspending requires signaling the top level loop, and the top level loop needs both the
# namespace tree and the some sort of cursor or address into the AST under interpretation
# representing where to resume. The logical equivalent of a program counter, but a tree path.
@define
class Module:
fname: Union[Path, str]
tree: ast.AST
ns: "ANamespace" = field()
@ns.default
def _ns_default(self):
return ModuleNS(self.tree)
@define
class Pc:
"""A 'program counter' as a list of AST indices."""
idxs: List[int] = Factory(list)
@define
class Frame:
"""An 'execution frame' as a PC, AST and namespace."""
pc: Pc
ast: ast.AST
ns: ANamespace
@define
class Cont:
"""A 'Continuation' (thread/coroutine) of execution."""
id: int
entry: Frame
stack: List[Frame] = field()
@stack.default
def _stack_default(self):
return [self.entry]
@define
class Vm:
"""A bag of shared state.
:attribute path: The equivalent of sys.path
:attribute modules: The equivalent of sys.modules
:attribute conts: All interpreter continuations
:attribute log: A log of all statements executed by any continuation
It should be possible to reconstruct the VM's state simply by replaying the
log in statement order, as a fallback for dealing with C-extension state,
connections and soforth.
"""
path: list = Factory(lambda: list(sys.path))
modules: dict = Factory(dict)
conts: dict = Factory(dict)
log: list = Factory(list)
def handle_import(
self, thread, name, globals=None, locals=None, fromlist=(), level=0
):
log.debug(" Attempting to import '{}'".format(name))
if name not in self.modules:
if name in sys.modules:
# FIXME: Need to hack sys, os and several other built-in packages here
log.debug("Short-circuited loading %r from bootstrap sys.modules", name)
self.modules[name] = sys.modules[name]
else:
name = name.replace(".", os.path.sep)
for e in self.path:
for ext in [
".py",
]:
if os.path.isdir(e):
f = os.path.join(e, name + ext)
log.debug(" Checking {}".format(f))
if os.path.exists(f):
mod = self.execute_load(thread, f, name)
self.modules[name] = mod.ns
break
elif os.path.isfile(e):
# FIXME (arrdem 2021-05-31)
raise RuntimeError(
"Import from .zip/.whl/.egg archives aren't supported yet"
)
else:
log.debug("Falling back to native import for %r", name)
self.modules[name] = __import__(
name, globals, locals, fromlist, level
)
return self.modules[name]
def execute_module(self, module: Module):
"""Execute all the Expressions and Statements in a given Module sequentially, as in a single thread."""
def execute_load(self, fname, name):
"""Execute the given file as if it were an imported module."""
# FIXME: Choose encoding here
with open(fname) as f:
tree = ast.parse(f.read())
mod = Module(fname, tree)
mod.ns["__name__"] = name
self.modules[name] = mod.ns
self.execute_module(mod)
return mod
def execute_dunder_main(self, fname):
"""Execute the given file as if it were a script entrypoint."""
return self.execute_load(fname, "__main__")

View file

@ -0,0 +1,12 @@
#!/usr/bin/env python3.10
import click
@click.group()
def cli():
pass
if __name__ == "__main__":
cli.main(prog_name="hydra")

View file

@ -2,7 +2,7 @@
from datetime import timedelta
from time import sleep
from flowmetal import workflow, timeout, CancelledError, TimeoutError, Task
from flowmetal import workflow, CancelledError, TimeoutError, Task
def cancellable_activity():

8
pyproject.toml Normal file
View file

@ -0,0 +1,8 @@
[tool.isort]
py_version=311
line_length=100
skip_glob = [
".git/*",
".bazel/*",
"bazel-*",
]

View file

@ -1,47 +1,17 @@
# flake8: noqa: all
# Python AST interpreter written in Python
# A Python AST interpreter written in Python
#
# This module is part of the Pycopy https://github.com/pfalcon/pycopy
# project.
# This module is part of the Pycopy https://github.com/pfalcon/pycopy project.
#
# Copyright (c) 2019 Paul Sokolovsky
#
# The MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# Modified by Reid D. 'ardem' Mckenzie in 2021 to be a bit more fully-featured
# and usable for running 'real' code as part of an experiment in implementing a
# durable Python interpreter atop the original pycopy substrate.
# Copyright (c) 2019 Paul Sokolovsky, published under the MIT License
import ast
import builtins
import logging
import os
import sys
if sys.version_info < (3, 0, 0):
builtins = __builtins__
else:
import builtins
from typing import Optional, Type
log = logging.getLogger(__name__)
@ -55,7 +25,7 @@ class StrictNodeVisitor(ast.NodeVisitor):
class ANamespace:
def __init__(self, node):
self.d = {}
self.parent = None
self.parent: Optional[Type["ANamespace"]] = None
# Cross-link namespace to AST node. Note that we can't do the
# opposite, because for one node, there can be different namespaces.
self.node = node
@ -80,6 +50,7 @@ class ANamespace:
class ModuleNS(ANamespace):
# parent: Optional["ModuleNS"] = None
pass
@ -88,7 +59,7 @@ class FunctionNS(ANamespace):
class ClassNS(ANamespace):
pass
cls: Optional[type] = None
# Pycopy by default doesn't support direct slice construction, use helper
@ -102,16 +73,10 @@ slice_getter = SliceGetter()
def arg_name(arg):
if sys.version_info < (3, 0, 0):
return arg.id
else:
return arg.arg
def kwarg_defaults(args):
if sys.version_info < (3, 0, 0):
return args.defaults
else:
return args.kw_defaults
@ -154,11 +119,12 @@ class InterpFuncWrap:
return self.interp.call_func(self.node, self, *args, **kwargs)
# Python don't fully treat objects, even those defining __call__() special method, as a true callable. For example, such
# objects aren't automatically converted to bound methods if looked up as another object's attributes. As we want our
# "interpreted functions" to behave as close as possible to real functions, we just wrap function object with a real
# function. An alternative might have been to perform needed checks and explicitly bind a method using
# types.MethodType() in visit_Attribute (but then maybe there would be still other cases of "callable object" vs
# Python don't fully treat objects, even those defining __call__() special method, as a true
# callable. For example, such objects aren't automatically converted to bound methods if looked up
# as another object's attributes. As we want our "interpreted functions" to behave as closely as
# possible to real functions, we just wrap function object with a real function. An alternative
# might have been to perform needed checks and explicitly bind a method using types.MethodType() in
# visit_Attribute (but then maybe there would be still other cases of "callable object" vs
# "function" discrepancies).
def InterpFunc(fun):
def func(*args, **kwargs):
@ -195,26 +161,50 @@ class InterpModule:
return list(self.ns.d.keys())
# TODO (arrdem 2023-03-08):
# This interpreter works well enough to import `requests` and many other libraries and do some
# work, but is unsuited to Flowmetal's needs for checkpointing. Because this interpreter uses
# direct execution, there's really no way to jam breakpoints or checkpoints or resume points into
# program execution. Which is kinda the goal of the whole project.
#
# This interpreter, while complete, needs to get refactored into probably a `yield` based
# coroutine structure wherein individual operations explicitly `yield` to an outer state
# management loop which effectively trampolines single statements together with state management
# logic.
#
# The outer interpreter needs to be able to check the "step budget" and decide if it's time for
# the program to suspend.
#
# Individual steps (workflow calls/function calls) may also cause the program to suspend.
#
# Suspending requires signaling the top level loop, and the top level loop needs both the
# namespace tree and the some sort of cursor or address into the AST under interpretation
# representing where to resume. The logical equivalent of a program counter, but a tree path.
class ModuleInterpreter(StrictNodeVisitor):
"""An interpreter specific to a single module."""
def __init__(self, system, fname, node):
self.system = system
self.fname = fname
self.ns = self.module_ns = ModuleNS(node)
self.module_ns: ModuleNS = ModuleNS(node)
self.ns: ANamespace = self.module_ns
# Call stack (in terms of function AST nodes).
self.call_stack = []
# To implement "store" operation, we need to arguments: location and value to store. The operation itself is
# handled by a node visitor (e.g. visit_Name), and location is represented by AST node, but there's no support
# to pass additional arguments to a visitor (likely, because it would be a burden to explicit pass such
# additional arguments thru the chain of visitors). So instead, we store this value as field. As interpretation
# happens sequentially, there's no risk that it will be overwritten "concurrently".
# To implement "store" operation, we need to arguments: location and value to store. The
# operation itself is handled by a node visitor (e.g. visit_Name), and location is
# represented by AST node, but there's no support to pass additional arguments to a visitor
# (likely, because it would be a burden to explicit pass such additional arguments thru the
# chain of visitors). So instead, we store this value as field. As interpretation happens
# sequentially, there's no risk that it will be overwritten "concurrently".
self.store_val = None
# Current active exception, for bare "raise", which doesn't work across function boundaries (and that's how we
# have it - exception would be caught in visit_Try, while re-rasing would happen in visit_Raise).
# Current active exception, for bare "raise", which doesn't work across function boundaries
# (and that's how we have it - exception would be caught in visit_Try, while re-rasing would
# happen in visit_Raise).
self.cur_exc = []
def push_ns(self, new_ns):
@ -222,6 +212,7 @@ class ModuleInterpreter(StrictNodeVisitor):
self.ns = new_ns
def pop_ns(self):
assert self.ns is not None
self.ns = self.ns.parent
def stmt_list_visit(self, lst):
@ -247,13 +238,13 @@ class ModuleInterpreter(StrictNodeVisitor):
return self.visit(node.body)
def visit_ClassDef(self, node):
self.push_ns(ClassNS(node))
ns: ClassNS = ClassNS(node)
self.push_ns(ns)
try:
self.stmt_list_visit(node.body)
except Exception:
self.pop_ns()
raise
ns = self.ns
self.pop_ns()
cls = type(node.name, tuple([self.visit(b) for b in node.bases]), ns.d)
cls = self.wrap_decorators(cls, node)
@ -266,8 +257,7 @@ class ModuleInterpreter(StrictNodeVisitor):
return self.prepare_func(node)
def visit_FunctionDef(self, node):
# Defaults are evaluated at function definition time, so we
# need to do that now.
# Defaults are evaluated at function definition time, so we need to do that now.
func = self.prepare_func(node)
func = self.wrap_decorators(func, node)
self.ns[node.name] = func
@ -290,11 +280,10 @@ class ModuleInterpreter(StrictNodeVisitor):
all_args.add(arg_name(a))
if v is not None:
d[arg_name(a)] = self.visit(v)
# We can store cached argument names of a function in its node -
# it's static.
# We can store cached argument names of a function in its node - it's static.
node.args.all_args = all_args
# We can't store the values of default arguments - they're dynamic,
# may depend on the lexical scope.
# We can't store the values of default arguments - they're dynamic, may depend on the
# lexical scope.
func.defaults_dict = d
return InterpFunc(func)
@ -308,9 +297,8 @@ class ModuleInterpreter(StrictNodeVisitor):
)
argspec = node.args
# If there's vararg, either offload surplus of args to it, or init
# it to empty tuple (all in one statement). If no vararg, error on
# too many args.
# If there's vararg, either offload surplus of args to it, or init it to empty tuple (all in
# one statement). If no vararg, error on too many args.
#
# Note that we have to do the .posonlyargs dance
if argspec.vararg:
@ -329,9 +317,8 @@ class ModuleInterpreter(StrictNodeVisitor):
for a, value in zip(argspec.posonlyargs, args):
self.ns[arg_name(a)] = value
# Process incoming keyword arguments, putting them in namespace if
# actual arg exists by that name, or offload to function's kwarg
# if any. All make needed checks and error out.
# Process incoming keyword arguments, putting them in namespace if actual arg exists by that
# name, or offload to function's kwarg if any. All make needed checks and error out.
func_kwarg = {}
for k, v in kwargs.items():
if k in argspec.all_args:
@ -351,9 +338,8 @@ class ModuleInterpreter(StrictNodeVisitor):
if argspec.kwarg:
self.ns[arg_name(argspec.kwarg)] = func_kwarg
# Finally, overlay default values for arguments not yet initialized.
# We need to do this last for "multiple values for the same arg"
# check to work.
# Finally, overlay default values for arguments not yet initialized. We need to do this last
# for "multiple values for the same arg" check to work.
for k, v in interp_func.defaults_dict.items():
if k not in self.ns:
self.ns[k] = v
@ -376,8 +362,8 @@ class ModuleInterpreter(StrictNodeVisitor):
def call_func(self, node, interp_func, *args, **kwargs):
self.call_stack.append(node)
# We need to switch from dynamic execution scope to lexical scope
# in which function was defined (then switch back on return).
# We need to switch from dynamic execution scope to lexical scope in which function was
# defined (then switch back on return).
dyna_scope = self.ns
self.ns = interp_func.lexical_scope
self.push_ns(FunctionNS(node))
@ -508,9 +494,9 @@ class ModuleInterpreter(StrictNodeVisitor):
def visit_AugAssign(self, node):
assert isinstance(node.target.ctx, ast.Store)
# Not functional style, oops. Node in AST has store context, but we
# need to read its value first. To not construct a copy of the entire
# node with load context, we temporarily patch it in-place.
# Not functional style, oops. Node in AST has store context, but we need to read its value
# first. To not construct a copy of the entire node with load context, we temporarily patch
# it in-place.
save_ctx = node.target.ctx
node.target.ctx = ast.Load()
var_val = self.visit(node.target)
@ -518,12 +504,11 @@ class ModuleInterpreter(StrictNodeVisitor):
rval = self.visit(node.value)
# As augmented assignment is statement, not operator, we can't put them
# all into map. We could instead directly lookup special inplace methods
# (__iadd__ and friends) and use them, with a fallback to normal binary
# operations, but from the point of view of this interpreter, presence
# of such methods is an implementation detail of the object system, it's
# not concerned with it.
# As augmented assignment is statement, not operator, we can't put them all into map. We
# could instead directly lookup special inplace methods (__iadd__ and friends) and use them,
# with a fallback to normal binary operations, but from the point of view of this
# interpreter, presence of such methods is an implementation detail of the object system,
# it's not concerned with it.
op = type(node.op)
if op is ast.Add:
var_val += rval
@ -682,10 +667,11 @@ class ModuleInterpreter(StrictNodeVisitor):
if func is builtins.super and not args:
if not self.ns.parent or not isinstance(self.ns.parent, ClassNS):
raise RuntimeError("super(): no arguments")
# As we're creating methods dynamically outside of class, super() without argument won't work, as that
# requires __class__ cell. Creating that would be cumbersome (Pycopy definitely lacks enough introspection
# for that), so we substitute 2 implied args (which argumentless super() would take from cell and 1st arg to
# func). In our case, we take them from prepared bookkeeping info.
# As we're creating methods dynamically outside of class, super() without argument won't
# work, as that requires __class__ cell. Creating that would be cumbersome (Pycopy
# definitely lacks enough introspection for that), so we substitute 2 implied args
# (which argumentless super() would take from cell and 1st arg to func). In our case, we
# take them from prepared bookkeeping info.
args = (self.ns.parent.cls, self.ns["self"])
return func(*args, **kwargs)
@ -732,7 +718,7 @@ class ModuleInterpreter(StrictNodeVisitor):
ast.Div: lambda x, y: x / y,
ast.FloorDiv: lambda x, y: x // y,
ast.Mod: lambda x, y: x % y,
ast.Pow: lambda x, y: x ** y,
ast.Pow: lambda x, y: x**y,
ast.LShift: lambda x, y: x << y,
ast.RShift: lambda x, y: x >> y,
ast.BitAnd: lambda x, y: x & y,
@ -901,7 +887,7 @@ class ModuleInterpreter(StrictNodeVisitor):
def visit_Print(self, node):
# In Py2k only
raise NotImplementedError("Absolutely not. Use __future__.")
raise SyntaxError("Absolutely not. Use __future__.")
def visit_Str(self, node):
return node.s

View file

@ -19,7 +19,6 @@ print(a.baz)
import random
for _ in range(10):
print(random.randint(0, 1024))
@ -30,5 +29,4 @@ def bar(a, b, **bs):
import requests
print(len(requests.get("https://pypi.org/pypi/requests/json").text))

3
tools/build_rules/BUILD Normal file
View file

@ -0,0 +1,3 @@
package(default_visibility = ["//visibility:public"])
licenses(["notice"])

View file

@ -0,0 +1,15 @@
# -*- mode: bazel -*-
# A global prelude for all BUILD[.bazel] files
load("//tools/python:defs.bzl",
"py_library",
"py_binary",
"py_unittest",
"py_pytest",
"py_resources",
"py_project",
)
load("@pypi//:requirements.bzl",
py_requirement="requirement"
)

45
tools/python/BUILD Normal file
View file

@ -0,0 +1,45 @@
load("@rules_python//python:defs.bzl",
"py_runtime_pair",
)
load("@pypi//:requirements.bzl", "all_requirements")
package(default_visibility = ["//visibility:public"])
licenses(["notice"])
exports_files([
"defs.bzl",
"bzl_pytest_shim.py",
"bzl_unittest_shim.py",
"pythonshim",
"requirements_lock.txt",
])
py_runtime(
name = "python3_runtime",
files = [],
interpreter = ":pythonshim",
python_version = "PY3",
visibility = ["//visibility:public"],
)
py_runtime_pair(
name = "python_runtime",
py2_runtime = None,
py3_runtime = ":python3_runtime",
)
toolchain(
name = "python3_toolchain",
toolchain = ":python_runtime",
toolchain_type = "@bazel_tools//tools/python:toolchain_type",
)
py_pytest(
name = "test_licenses",
srcs = [
"test_licenses.py",
],
deps = all_requirements,
)

View file

@ -0,0 +1,10 @@
"""A shim for executing pytest."""
import sys
import pytest
if __name__ == "__main__":
cmdline = ["--ignore=external"] + sys.argv[1:]
print(cmdline, file=sys.stderr)
sys.exit(pytest.main(cmdline))

View file

@ -0,0 +1,66 @@
"""Universal launcher for unit tests"""
import argparse
import logging
import os
import sys
import unittest
def main():
"""Parse args, collect tests and run them"""
# Disable *.pyc files
sys.dont_write_bytecode = True
# Add ".." to module search path
cur_dir = os.path.dirname(os.path.realpath(__file__))
top_dir = os.path.abspath(os.path.join(cur_dir, os.pardir))
sys.path.append(top_dir)
# Parse command line arguments
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"-v",
"--verbose",
action="count",
default=0,
help="verbosity level, use: [-v | -vv | -vvv]",
)
parser.add_argument(
"-s", "--start-directory", default=None, help="directory to start discovery"
)
parser.add_argument(
"-p",
"--pattern",
default="test*.py",
help="pattern to match test files ('test*.py' default)",
)
parser.add_argument(
"test", nargs="*", help="test specs (e.g. module.TestCase.test_func)"
)
args = parser.parse_args()
if not args.start_directory:
args.start_directory = cur_dir
if args.verbose > 2:
logging.basicConfig(level=logging.DEBUG, format="DEBUG: %(message)s")
loader = unittest.TestLoader()
if args.test:
# Add particular tests
for test in args.test:
suite = unittest.TestSuite()
suite.addTests(loader.loadTestsFromName(test))
else:
# Find all tests
suite = loader.discover(args.start_directory, args.pattern)
runner = unittest.TextTestRunner(verbosity=args.verbose)
result = runner.run(suite)
return result.wasSuccessful()
if __name__ == "__main__":
# NOTE: True(success) -> 0, False(fail) -> 1
exit(not main())

237
tools/python/defs.bzl Normal file
View file

@ -0,0 +1,237 @@
load("@pypi//:requirements.bzl",
_py_requirement = "requirement"
)
load("@rules_python//python:defs.bzl",
"py_runtime",
"py_runtime_pair",
_py_binary = "py_binary",
_py_test = "py_test",
_py_library = "py_library",
)
load("@bazel_skylib//lib:sets.bzl", "sets")
def py_requirement(*args, **kwargs):
"""A re-export of requirement()"""
return _py_requirement(*args, **kwargs)
def py_test(python_version=None, **kwargs):
"""A re-export of py_test()"""
if python_version and python_version != "PY3":
fail("py3k only!")
return _py_test(
python_version="PY3",
**kwargs,
)
def py_pytest(name, srcs, deps, main=None, python_version=None, args=None, **kwargs):
"""A py_test target which uses pytest."""
if python_version and python_version != "PY3":
fail("py3k only!")
f = "//tools/python:bzl_pytest_shim.py"
deps = sets.to_list(sets.make([
py_requirement("pytest"),
py_requirement("pytest-pudb"),
py_requirement("pytest-cov"),
py_requirement("pytest-timeout"),
] + deps))
srcs = [f] + srcs
py_test(
name = name,
srcs = srcs,
main = f,
args = args,
python_version="PY3",
deps = deps,
**kwargs,
)
# zapp_test(
# name = name + ".zapp",
# main = f,
# args = args,
# srcs = srcs,
# deps = deps,
# test = True,
# zip_safe = False,
# **kwargs,
# )
# FIXME (arrdem 2020-09-27):
# Generate a py_image_test.
# Not clear how to achieve that.
def py_unittest(srcs=[], **kwargs):
"""A helper for running unittest tests"""
f = "//tools/python:bzl_unittest_shim.py"
return py_test(
main = f,
srcs = [f] + srcs,
**kwargs
)
def py_binary(python_version=None, main=None, srcs=None, **kwargs):
"""A re-export of py_binary()"""
if python_version and python_version != "PY3":
fail("py3k only!")
srcs = srcs or []
if main not in srcs:
srcs = [main] + srcs
return _py_binary(
python_version = "PY3",
main = main,
srcs = srcs,
**kwargs,
)
def py_library(srcs_version=None, **kwargs):
"""A re-export of py_library()"""
if srcs_version and srcs_version != "PY3":
fail("py3k only!")
return _py_library(
srcs_version="PY3",
**kwargs
)
ResourceGroupInfo = provider(
fields = {
"srcs": "files to use from Python",
},
)
def _resource_impl(ctx):
srcs = []
for target in ctx.attr.srcs:
srcs.extend(target.files.to_list())
transitive_srcs = depset(direct = srcs)
return [
ResourceGroupInfo(
srcs = ctx.attr.srcs,
),
PyInfo(
has_py2_only_sources = False,
has_py3_only_sources = True,
uses_shared_libraries = False,
transitive_sources = transitive_srcs,
),
]
py_resources = rule(
implementation = _resource_impl,
attrs = {
"srcs": attr.label_list(
allow_empty = True,
mandatory = True,
allow_files = True,
doc = "Files to hand through to Python",
),
},
)
def py_project(name=None,
main=None,
main_deps=None,
shebang=None,
lib_srcs=None,
lib_deps=None,
lib_data=None,
test_srcs=None,
test_deps=None,
test_data=None):
"""
A helper for defining conventionally-formatted python project.
Assumes that there's a {src,test}/{resources,python} where src/ is a library and test/ is local tests only.
Each test_*.py source generates its own implicit test target. This allows for automatic test parallelism. Non
test_*.py files are implicitly srcs for the generated test targets. This is the same as making them implicitly a
testonly lib.
"""
lib_srcs = lib_srcs or native.glob(["src/python/**/*.py"],
exclude=[
"**/*.pyc",
])
lib_data = lib_data or native.glob(["src/resources/**/*",
"src/python/**/*"],
exclude=[
"**/*.py",
"**/*.pyc",
])
test_srcs = test_srcs or native.glob(["test/python/**/*.py"],
exclude=[
"**/*.pyc",
])
test_data = test_data or native.glob(["test/resources/**/*",
"test/python/**/*"],
exclude=[
"**/*.py",
"**/*.pyc",
])
lib_name = name if not main else "lib"
py_library(
name=lib_name,
srcs=lib_srcs,
deps=lib_deps,
data=lib_data,
imports=[
"src/python",
"src/resources",
],
visibility = [
"//visibility:public",
],
)
if main:
py_binary(
name=name,
main=main,
deps=(main_deps or []) + [lib_name],
imports=[
"src/python",
"src/resources",
],
visibility = [
"//visibility:public",
],
)
for src in test_srcs:
if "test_" in src:
py_pytest(
name=src.split("/")[-1],
srcs=[src] + [f for f in test_srcs if "test_" not in f],
deps=[lib_name] + (test_deps or []),
data=test_data,
imports=[
"test/python",
"test/resources",
],
)

21
tools/python/pythonshim Executable file
View file

@ -0,0 +1,21 @@
#!/bin/sh
# Bazel STRONGLY disapproves of linking dynamically to a Python interpreter.
# But ... that's exactly what we want to do.
# So this script exists to find a 'compliant' Python install and use that.
PYTHONREV="3.10"
CMD="python${PYTHONREV}"
if [ -x "$(command -v "$CMD")" ]; then
exec "$(which "$CMD")" "$@"
else
case "$(uname)" in
Darwin)
# FIXME: What if it isn't there?
exec /opt/homebrew/bin/"$CMD" "$@"
;;
esac
echo "Error: Unable to find a viable Python executable" >&2
exit 1
fi

View file

@ -0,0 +1,9 @@
attrs
black
cattrs
hypothesis
pudb
pytest
pytest-cov
pytest-pudb
pytest-timeout

View file

@ -0,0 +1,28 @@
attrs==22.2.0
autoflake8==0.4.0
black==23.1.0
cattrs==22.2.0
click==8.1.3
coverage==7.2.1
exceptiongroup==1.1.0
hypothesis==6.68.2
iniconfig==2.0.0
isort==5.12.0
jedi==0.18.2
mypy-extensions==1.0.0
packaging==23.0
parso==0.8.3
pathspec==0.11.0
platformdirs==3.1.0
pluggy==1.0.0
pudb==2022.1.3
pyflakes==3.0.1
Pygments==2.14.0
pytest==7.2.2
pytest-cov==4.0.0
pytest-pudb==0.7.0
pytest-timeout==2.1.0
sortedcontainers==2.4.0
tomli==2.0.1
urwid==2.1.2
urwid-readline==0.13

View file

@ -0,0 +1,136 @@
"""
Validate 3rdparty library licenses as approved.
"""
import re
import pytest
from pkg_resources import DistInfoDistribution, working_set
# Licenses approved as representing non-copyleft and not precluding commercial usage.
# This is all easy, there's a good schema here.
APPROVED_LICENSES = [
MIT := "License :: OSI Approved :: MIT License",
APACHE := "License :: OSI Approved :: Apache Software License",
BSD := "License :: OSI Approved :: BSD License",
MPL10 := "License :: OSI Approved :: Mozilla Public License 1.0 (MPL)",
MPL11 := "License :: OSI Approved :: Mozilla Public License 1.1 (MPL 1.1)",
MPL20 := "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)",
PSFL := "License :: OSI Approved :: Python Software Foundation License",
LGPL := "License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)",
LGPL3 := "License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)",
ISCL := "License :: OSI Approved :: ISC License (ISCL)",
]
UNAPPROVED_LICENSES = [
GPL1 := "License :: OSI Approved :: GNU General Public License",
GPL2 := "License :: OSI Approved :: GNU General Public License v2 (GPLv2)",
GPL3 := "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
]
# This data is GARBO.
LICENSES_BY_LOWERNAME = {
"apache 2.0": APACHE,
"apache": APACHE,
"http://www.apache.org/licenses/license-2.0": APACHE,
"bsd 3": BSD,
"bsd": BSD,
"gpl": GPL1,
"gpl2": GPL2,
"gpl3": GPL3,
"lgpl": LGPL,
"lgpl3": LGPL3,
"isc": ISCL,
"mit": MIT,
"mpl": MPL10,
"mpl 2.0": MPL20,
"psf": PSFL,
}
# Mash in some cases.
LICENSES_BY_LOWERNAME.update(
{lic.split(" :: ")[-1].lower(): lic for lic in APPROVED_LICENSES}
)
# As a workaround for packages which don"t have correct meadata on PyPi, hand-verified packages
APPROVED_PACKAGES = []
def bash_license(ln):
while True:
lnn = re.sub(
r"[(),]|( version)|( license)|( ?v(?=\d))|([ -]clause)|(or later)",
"",
ln.lower(),
)
if ln != lnn:
ln = lnn
else:
break
ln = LICENSES_BY_LOWERNAME.get(ln, ln)
return ln
@pytest.mark.parametrize(
"a,b",
[
("MIT", MIT),
("mit", MIT),
("BSD", BSD),
("BSD 3-clause", BSD),
("BSD 3 clause", BSD),
("GPL3", GPL3),
("GPL v3", GPL3),
("GPLv3", GPL3),
],
)
def test_bash_license(a, b):
assert bash_license(a) == b
def licenses(dist: DistInfoDistribution):
"""Get dist metadata (the licenses list) from PyPi.
pip and other tools use the local dist metadata to introspect licenses which requires that
packages be installed. Going to PyPi isn't strictly reproducible both because the PyPi database
could be updated and we could see network failures but there really isn't a good way to solve
this problem.
"""
lics = []
name = dist.project_name
version = dist.version
print(name, version, type(dist))
meta = dist.get_metadata(dist.PKG_INFO).split("\n")
classifiers = [
l.replace("Classifier: ", "", 1) for l in meta if l.startswith("Classifier: ")
]
license = bash_license(
next((l for l in meta if l.startswith("License:")), "License: UNKNOWN").replace(
"License: ", "", 1
)
)
lics.extend(l for l in classifiers if l.startswith("License ::"))
if not lics:
lics.append(license)
return lics
@pytest.mark.parametrize(
"dist",
(w for w in working_set if w.location.find("arrdem_source_pypi") != -1),
ids=lambda dist: dist.project_name,
)
def test_approved_license(dist: DistInfoDistribution):
"""Ensure that a given package is either allowed by name or uses an approved license."""
_licenses = licenses(dist)
print(dist.location)
assert dist.project_name in APPROVED_PACKAGES or any(
lic in APPROVED_LICENSES for lic in _licenses
), f"{dist.project_name} ({dist.location}) was not approved and its license(s) were unknown {_licenses!r}"