From 0a75d08b5af0988cd8c385485d5f5ae3445c4e6e Mon Sep 17 00:00:00 2001 From: Reid 'arrdem' McKenzie Date: Wed, 8 Mar 2023 10:53:54 -0700 Subject: [PATCH] Write up where this is at --- scratch/astinterp.py | 167 ++++++++++++++++++++----------------------- 1 file changed, 76 insertions(+), 91 deletions(-) diff --git a/scratch/astinterp.py b/scratch/astinterp.py index ede73b1..560dc52 100644 --- a/scratch/astinterp.py +++ b/scratch/astinterp.py @@ -1,46 +1,17 @@ # flake8: noqa: all -# Python AST interpreter written in Python +# A Python AST interpreter written in Python # -# This module is part of the Pycopy https://github.com/pfalcon/pycopy -# project. +# This module is part of the Pycopy https://github.com/pfalcon/pycopy project. # -# Copyright (c) 2019 Paul Sokolovsky -# -# The MIT License -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. -# -# Modified by Reid D. 'ardem' Mckenzie in 2021 to be a bit more fully-featured -# and usable for running 'real' code as part of an experiment in implementing a -# durable Python interpreter atop the original pycopy substrate. +# Copyright (c) 2019 Paul Sokolovsky, published under the MIT License import ast import logging import os import sys - - -if sys.version_info < (3, 0, 0): - builtins = __builtins__ -else: - import builtins +import builtins +from typing import Optional, Type log = logging.getLogger(__name__) @@ -55,7 +26,7 @@ class StrictNodeVisitor(ast.NodeVisitor): class ANamespace: def __init__(self, node): self.d = {} - self.parent = None + self.parent: Optional[Type["ANamespace"]] = None # Cross-link namespace to AST node. Note that we can't do the # opposite, because for one node, there can be different namespaces. self.node = node @@ -80,15 +51,15 @@ class ANamespace: class ModuleNS(ANamespace): + # parent: Optional["ModuleNS"] = None pass - class FunctionNS(ANamespace): pass class ClassNS(ANamespace): - pass + cls: Optional[type] = None # Pycopy by default doesn't support direct slice construction, use helper @@ -102,17 +73,11 @@ slice_getter = SliceGetter() def arg_name(arg): - if sys.version_info < (3, 0, 0): - return arg.id - else: - return arg.arg + return arg.arg def kwarg_defaults(args): - if sys.version_info < (3, 0, 0): - return args.defaults - else: - return args.kw_defaults + return args.kw_defaults class TargetNonlocalFlow(Exception): @@ -154,11 +119,12 @@ class InterpFuncWrap: return self.interp.call_func(self.node, self, *args, **kwargs) -# Python don't fully treat objects, even those defining __call__() special method, as a true callable. For example, such -# objects aren't automatically converted to bound methods if looked up as another object's attributes. As we want our -# "interpreted functions" to behave as close as possible to real functions, we just wrap function object with a real -# function. An alternative might have been to perform needed checks and explicitly bind a method using -# types.MethodType() in visit_Attribute (but then maybe there would be still other cases of "callable object" vs +# Python don't fully treat objects, even those defining __call__() special method, as a true +# callable. For example, such objects aren't automatically converted to bound methods if looked up +# as another object's attributes. As we want our "interpreted functions" to behave as closely as +# possible to real functions, we just wrap function object with a real function. An alternative +# might have been to perform needed checks and explicitly bind a method using types.MethodType() in +# visit_Attribute (but then maybe there would be still other cases of "callable object" vs # "function" discrepancies). def InterpFunc(fun): def func(*args, **kwargs): @@ -195,26 +161,49 @@ class InterpModule: return list(self.ns.d.keys()) +# TODO (arrdem 2023-03-08): +# This interpreter works well enough to import `requests` and many other libraries and do some +# work, but is unsuited to Flowmetal's needs for checkpointing. Because this interpreter uses +# direct execution, there's really no way to jam breakpoints or checkpoints or resume points into +# program execution. Which is kinda the goal of the whole project. +# +# This interpreter, while complete, needs to get refactored into probably a `yield` based +# coroutine structure wherein individual operations explicitly `yield` to an outer state +# management loop which effectively trampolines single statements together with state management +# logic. +# +# The outer interpreter needs to be able to check the "step budget" and decide if it's time for +# the program to suspend. +# +# Individual steps (workflow calls/function calls) may also cause the program to suspend. +# +# Suspending requires signaling the top level loop, and the top level loop needs both the +# namespace tree and the some sort of cursor or address into the AST under interpretation +# representing where to resume. The logical equivalent of a program counter, but a tree path. + class ModuleInterpreter(StrictNodeVisitor): """An interpreter specific to a single module.""" def __init__(self, system, fname, node): self.system = system self.fname = fname - self.ns = self.module_ns = ModuleNS(node) + self.module_ns: ModuleNS = ModuleNS(node) + self.ns: ANamespace = self.module_ns # Call stack (in terms of function AST nodes). self.call_stack = [] - # To implement "store" operation, we need to arguments: location and value to store. The operation itself is - # handled by a node visitor (e.g. visit_Name), and location is represented by AST node, but there's no support - # to pass additional arguments to a visitor (likely, because it would be a burden to explicit pass such - # additional arguments thru the chain of visitors). So instead, we store this value as field. As interpretation - # happens sequentially, there's no risk that it will be overwritten "concurrently". + # To implement "store" operation, we need to arguments: location and value to store. The + # operation itself is handled by a node visitor (e.g. visit_Name), and location is + # represented by AST node, but there's no support to pass additional arguments to a visitor + # (likely, because it would be a burden to explicit pass such additional arguments thru the + # chain of visitors). So instead, we store this value as field. As interpretation happens + # sequentially, there's no risk that it will be overwritten "concurrently". self.store_val = None - # Current active exception, for bare "raise", which doesn't work across function boundaries (and that's how we - # have it - exception would be caught in visit_Try, while re-rasing would happen in visit_Raise). + # Current active exception, for bare "raise", which doesn't work across function boundaries + # (and that's how we have it - exception would be caught in visit_Try, while re-rasing would + # happen in visit_Raise). self.cur_exc = [] def push_ns(self, new_ns): @@ -222,6 +211,7 @@ class ModuleInterpreter(StrictNodeVisitor): self.ns = new_ns def pop_ns(self): + assert self.ns is not None self.ns = self.ns.parent def stmt_list_visit(self, lst): @@ -247,13 +237,13 @@ class ModuleInterpreter(StrictNodeVisitor): return self.visit(node.body) def visit_ClassDef(self, node): - self.push_ns(ClassNS(node)) + ns: ClassNS = ClassNS(node) + self.push_ns(ns) try: self.stmt_list_visit(node.body) except Exception: self.pop_ns() raise - ns = self.ns self.pop_ns() cls = type(node.name, tuple([self.visit(b) for b in node.bases]), ns.d) cls = self.wrap_decorators(cls, node) @@ -266,8 +256,7 @@ class ModuleInterpreter(StrictNodeVisitor): return self.prepare_func(node) def visit_FunctionDef(self, node): - # Defaults are evaluated at function definition time, so we - # need to do that now. + # Defaults are evaluated at function definition time, so we need to do that now. func = self.prepare_func(node) func = self.wrap_decorators(func, node) self.ns[node.name] = func @@ -290,11 +279,10 @@ class ModuleInterpreter(StrictNodeVisitor): all_args.add(arg_name(a)) if v is not None: d[arg_name(a)] = self.visit(v) - # We can store cached argument names of a function in its node - - # it's static. + # We can store cached argument names of a function in its node - it's static. node.args.all_args = all_args - # We can't store the values of default arguments - they're dynamic, - # may depend on the lexical scope. + # We can't store the values of default arguments - they're dynamic, may depend on the + # lexical scope. func.defaults_dict = d return InterpFunc(func) @@ -308,9 +296,8 @@ class ModuleInterpreter(StrictNodeVisitor): ) argspec = node.args - # If there's vararg, either offload surplus of args to it, or init - # it to empty tuple (all in one statement). If no vararg, error on - # too many args. + # If there's vararg, either offload surplus of args to it, or init it to empty tuple (all in + # one statement). If no vararg, error on too many args. # # Note that we have to do the .posonlyargs dance if argspec.vararg: @@ -329,9 +316,8 @@ class ModuleInterpreter(StrictNodeVisitor): for a, value in zip(argspec.posonlyargs, args): self.ns[arg_name(a)] = value - # Process incoming keyword arguments, putting them in namespace if - # actual arg exists by that name, or offload to function's kwarg - # if any. All make needed checks and error out. + # Process incoming keyword arguments, putting them in namespace if actual arg exists by that + # name, or offload to function's kwarg if any. All make needed checks and error out. func_kwarg = {} for k, v in kwargs.items(): if k in argspec.all_args: @@ -351,9 +337,8 @@ class ModuleInterpreter(StrictNodeVisitor): if argspec.kwarg: self.ns[arg_name(argspec.kwarg)] = func_kwarg - # Finally, overlay default values for arguments not yet initialized. - # We need to do this last for "multiple values for the same arg" - # check to work. + # Finally, overlay default values for arguments not yet initialized. We need to do this last + # for "multiple values for the same arg" check to work. for k, v in interp_func.defaults_dict.items(): if k not in self.ns: self.ns[k] = v @@ -376,8 +361,8 @@ class ModuleInterpreter(StrictNodeVisitor): def call_func(self, node, interp_func, *args, **kwargs): self.call_stack.append(node) - # We need to switch from dynamic execution scope to lexical scope - # in which function was defined (then switch back on return). + # We need to switch from dynamic execution scope to lexical scope in which function was + # defined (then switch back on return). dyna_scope = self.ns self.ns = interp_func.lexical_scope self.push_ns(FunctionNS(node)) @@ -508,9 +493,9 @@ class ModuleInterpreter(StrictNodeVisitor): def visit_AugAssign(self, node): assert isinstance(node.target.ctx, ast.Store) - # Not functional style, oops. Node in AST has store context, but we - # need to read its value first. To not construct a copy of the entire - # node with load context, we temporarily patch it in-place. + # Not functional style, oops. Node in AST has store context, but we need to read its value + # first. To not construct a copy of the entire node with load context, we temporarily patch + # it in-place. save_ctx = node.target.ctx node.target.ctx = ast.Load() var_val = self.visit(node.target) @@ -518,12 +503,11 @@ class ModuleInterpreter(StrictNodeVisitor): rval = self.visit(node.value) - # As augmented assignment is statement, not operator, we can't put them - # all into map. We could instead directly lookup special inplace methods - # (__iadd__ and friends) and use them, with a fallback to normal binary - # operations, but from the point of view of this interpreter, presence - # of such methods is an implementation detail of the object system, it's - # not concerned with it. + # As augmented assignment is statement, not operator, we can't put them all into map. We + # could instead directly lookup special inplace methods (__iadd__ and friends) and use them, + # with a fallback to normal binary operations, but from the point of view of this + # interpreter, presence of such methods is an implementation detail of the object system, + # it's not concerned with it. op = type(node.op) if op is ast.Add: var_val += rval @@ -682,10 +666,11 @@ class ModuleInterpreter(StrictNodeVisitor): if func is builtins.super and not args: if not self.ns.parent or not isinstance(self.ns.parent, ClassNS): raise RuntimeError("super(): no arguments") - # As we're creating methods dynamically outside of class, super() without argument won't work, as that - # requires __class__ cell. Creating that would be cumbersome (Pycopy definitely lacks enough introspection - # for that), so we substitute 2 implied args (which argumentless super() would take from cell and 1st arg to - # func). In our case, we take them from prepared bookkeeping info. + # As we're creating methods dynamically outside of class, super() without argument won't + # work, as that requires __class__ cell. Creating that would be cumbersome (Pycopy + # definitely lacks enough introspection for that), so we substitute 2 implied args + # (which argumentless super() would take from cell and 1st arg to func). In our case, we + # take them from prepared bookkeeping info. args = (self.ns.parent.cls, self.ns["self"]) return func(*args, **kwargs) @@ -901,7 +886,7 @@ class ModuleInterpreter(StrictNodeVisitor): def visit_Print(self, node): # In Py2k only - raise NotImplementedError("Absolutely not. Use __future__.") + raise SyntaxError("Absolutely not. Use __future__.") def visit_Str(self, node): return node.s