Write up where this is at

This commit is contained in:
Reid 'arrdem' McKenzie 2023-03-08 10:53:54 -07:00
parent d62ef16f5b
commit 0a75d08b5a

View file

@ -1,46 +1,17 @@
# flake8: noqa: all # flake8: noqa: all
# Python AST interpreter written in Python # A Python AST interpreter written in Python
# #
# This module is part of the Pycopy https://github.com/pfalcon/pycopy # This module is part of the Pycopy https://github.com/pfalcon/pycopy project.
# project.
# #
# Copyright (c) 2019 Paul Sokolovsky # Copyright (c) 2019 Paul Sokolovsky, published under the MIT License
#
# The MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# Modified by Reid D. 'ardem' Mckenzie in 2021 to be a bit more fully-featured
# and usable for running 'real' code as part of an experiment in implementing a
# durable Python interpreter atop the original pycopy substrate.
import ast import ast
import logging import logging
import os import os
import sys import sys
import builtins
from typing import Optional, Type
if sys.version_info < (3, 0, 0):
builtins = __builtins__
else:
import builtins
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@ -55,7 +26,7 @@ class StrictNodeVisitor(ast.NodeVisitor):
class ANamespace: class ANamespace:
def __init__(self, node): def __init__(self, node):
self.d = {} self.d = {}
self.parent = None self.parent: Optional[Type["ANamespace"]] = None
# Cross-link namespace to AST node. Note that we can't do the # Cross-link namespace to AST node. Note that we can't do the
# opposite, because for one node, there can be different namespaces. # opposite, because for one node, there can be different namespaces.
self.node = node self.node = node
@ -80,15 +51,15 @@ class ANamespace:
class ModuleNS(ANamespace): class ModuleNS(ANamespace):
# parent: Optional["ModuleNS"] = None
pass pass
class FunctionNS(ANamespace): class FunctionNS(ANamespace):
pass pass
class ClassNS(ANamespace): class ClassNS(ANamespace):
pass cls: Optional[type] = None
# Pycopy by default doesn't support direct slice construction, use helper # Pycopy by default doesn't support direct slice construction, use helper
@ -102,17 +73,11 @@ slice_getter = SliceGetter()
def arg_name(arg): def arg_name(arg):
if sys.version_info < (3, 0, 0): return arg.arg
return arg.id
else:
return arg.arg
def kwarg_defaults(args): def kwarg_defaults(args):
if sys.version_info < (3, 0, 0): return args.kw_defaults
return args.defaults
else:
return args.kw_defaults
class TargetNonlocalFlow(Exception): class TargetNonlocalFlow(Exception):
@ -154,11 +119,12 @@ class InterpFuncWrap:
return self.interp.call_func(self.node, self, *args, **kwargs) return self.interp.call_func(self.node, self, *args, **kwargs)
# Python don't fully treat objects, even those defining __call__() special method, as a true callable. For example, such # Python don't fully treat objects, even those defining __call__() special method, as a true
# objects aren't automatically converted to bound methods if looked up as another object's attributes. As we want our # callable. For example, such objects aren't automatically converted to bound methods if looked up
# "interpreted functions" to behave as close as possible to real functions, we just wrap function object with a real # as another object's attributes. As we want our "interpreted functions" to behave as closely as
# function. An alternative might have been to perform needed checks and explicitly bind a method using # possible to real functions, we just wrap function object with a real function. An alternative
# types.MethodType() in visit_Attribute (but then maybe there would be still other cases of "callable object" vs # might have been to perform needed checks and explicitly bind a method using types.MethodType() in
# visit_Attribute (but then maybe there would be still other cases of "callable object" vs
# "function" discrepancies). # "function" discrepancies).
def InterpFunc(fun): def InterpFunc(fun):
def func(*args, **kwargs): def func(*args, **kwargs):
@ -195,26 +161,49 @@ class InterpModule:
return list(self.ns.d.keys()) return list(self.ns.d.keys())
# TODO (arrdem 2023-03-08):
# This interpreter works well enough to import `requests` and many other libraries and do some
# work, but is unsuited to Flowmetal's needs for checkpointing. Because this interpreter uses
# direct execution, there's really no way to jam breakpoints or checkpoints or resume points into
# program execution. Which is kinda the goal of the whole project.
#
# This interpreter, while complete, needs to get refactored into probably a `yield` based
# coroutine structure wherein individual operations explicitly `yield` to an outer state
# management loop which effectively trampolines single statements together with state management
# logic.
#
# The outer interpreter needs to be able to check the "step budget" and decide if it's time for
# the program to suspend.
#
# Individual steps (workflow calls/function calls) may also cause the program to suspend.
#
# Suspending requires signaling the top level loop, and the top level loop needs both the
# namespace tree and the some sort of cursor or address into the AST under interpretation
# representing where to resume. The logical equivalent of a program counter, but a tree path.
class ModuleInterpreter(StrictNodeVisitor): class ModuleInterpreter(StrictNodeVisitor):
"""An interpreter specific to a single module.""" """An interpreter specific to a single module."""
def __init__(self, system, fname, node): def __init__(self, system, fname, node):
self.system = system self.system = system
self.fname = fname self.fname = fname
self.ns = self.module_ns = ModuleNS(node) self.module_ns: ModuleNS = ModuleNS(node)
self.ns: ANamespace = self.module_ns
# Call stack (in terms of function AST nodes). # Call stack (in terms of function AST nodes).
self.call_stack = [] self.call_stack = []
# To implement "store" operation, we need to arguments: location and value to store. The operation itself is # To implement "store" operation, we need to arguments: location and value to store. The
# handled by a node visitor (e.g. visit_Name), and location is represented by AST node, but there's no support # operation itself is handled by a node visitor (e.g. visit_Name), and location is
# to pass additional arguments to a visitor (likely, because it would be a burden to explicit pass such # represented by AST node, but there's no support to pass additional arguments to a visitor
# additional arguments thru the chain of visitors). So instead, we store this value as field. As interpretation # (likely, because it would be a burden to explicit pass such additional arguments thru the
# happens sequentially, there's no risk that it will be overwritten "concurrently". # chain of visitors). So instead, we store this value as field. As interpretation happens
# sequentially, there's no risk that it will be overwritten "concurrently".
self.store_val = None self.store_val = None
# Current active exception, for bare "raise", which doesn't work across function boundaries (and that's how we # Current active exception, for bare "raise", which doesn't work across function boundaries
# have it - exception would be caught in visit_Try, while re-rasing would happen in visit_Raise). # (and that's how we have it - exception would be caught in visit_Try, while re-rasing would
# happen in visit_Raise).
self.cur_exc = [] self.cur_exc = []
def push_ns(self, new_ns): def push_ns(self, new_ns):
@ -222,6 +211,7 @@ class ModuleInterpreter(StrictNodeVisitor):
self.ns = new_ns self.ns = new_ns
def pop_ns(self): def pop_ns(self):
assert self.ns is not None
self.ns = self.ns.parent self.ns = self.ns.parent
def stmt_list_visit(self, lst): def stmt_list_visit(self, lst):
@ -247,13 +237,13 @@ class ModuleInterpreter(StrictNodeVisitor):
return self.visit(node.body) return self.visit(node.body)
def visit_ClassDef(self, node): def visit_ClassDef(self, node):
self.push_ns(ClassNS(node)) ns: ClassNS = ClassNS(node)
self.push_ns(ns)
try: try:
self.stmt_list_visit(node.body) self.stmt_list_visit(node.body)
except Exception: except Exception:
self.pop_ns() self.pop_ns()
raise raise
ns = self.ns
self.pop_ns() self.pop_ns()
cls = type(node.name, tuple([self.visit(b) for b in node.bases]), ns.d) cls = type(node.name, tuple([self.visit(b) for b in node.bases]), ns.d)
cls = self.wrap_decorators(cls, node) cls = self.wrap_decorators(cls, node)
@ -266,8 +256,7 @@ class ModuleInterpreter(StrictNodeVisitor):
return self.prepare_func(node) return self.prepare_func(node)
def visit_FunctionDef(self, node): def visit_FunctionDef(self, node):
# Defaults are evaluated at function definition time, so we # Defaults are evaluated at function definition time, so we need to do that now.
# need to do that now.
func = self.prepare_func(node) func = self.prepare_func(node)
func = self.wrap_decorators(func, node) func = self.wrap_decorators(func, node)
self.ns[node.name] = func self.ns[node.name] = func
@ -290,11 +279,10 @@ class ModuleInterpreter(StrictNodeVisitor):
all_args.add(arg_name(a)) all_args.add(arg_name(a))
if v is not None: if v is not None:
d[arg_name(a)] = self.visit(v) d[arg_name(a)] = self.visit(v)
# We can store cached argument names of a function in its node - # We can store cached argument names of a function in its node - it's static.
# it's static.
node.args.all_args = all_args node.args.all_args = all_args
# We can't store the values of default arguments - they're dynamic, # We can't store the values of default arguments - they're dynamic, may depend on the
# may depend on the lexical scope. # lexical scope.
func.defaults_dict = d func.defaults_dict = d
return InterpFunc(func) return InterpFunc(func)
@ -308,9 +296,8 @@ class ModuleInterpreter(StrictNodeVisitor):
) )
argspec = node.args argspec = node.args
# If there's vararg, either offload surplus of args to it, or init # If there's vararg, either offload surplus of args to it, or init it to empty tuple (all in
# it to empty tuple (all in one statement). If no vararg, error on # one statement). If no vararg, error on too many args.
# too many args.
# #
# Note that we have to do the .posonlyargs dance # Note that we have to do the .posonlyargs dance
if argspec.vararg: if argspec.vararg:
@ -329,9 +316,8 @@ class ModuleInterpreter(StrictNodeVisitor):
for a, value in zip(argspec.posonlyargs, args): for a, value in zip(argspec.posonlyargs, args):
self.ns[arg_name(a)] = value self.ns[arg_name(a)] = value
# Process incoming keyword arguments, putting them in namespace if # Process incoming keyword arguments, putting them in namespace if actual arg exists by that
# actual arg exists by that name, or offload to function's kwarg # name, or offload to function's kwarg if any. All make needed checks and error out.
# if any. All make needed checks and error out.
func_kwarg = {} func_kwarg = {}
for k, v in kwargs.items(): for k, v in kwargs.items():
if k in argspec.all_args: if k in argspec.all_args:
@ -351,9 +337,8 @@ class ModuleInterpreter(StrictNodeVisitor):
if argspec.kwarg: if argspec.kwarg:
self.ns[arg_name(argspec.kwarg)] = func_kwarg self.ns[arg_name(argspec.kwarg)] = func_kwarg
# Finally, overlay default values for arguments not yet initialized. # Finally, overlay default values for arguments not yet initialized. We need to do this last
# We need to do this last for "multiple values for the same arg" # for "multiple values for the same arg" check to work.
# check to work.
for k, v in interp_func.defaults_dict.items(): for k, v in interp_func.defaults_dict.items():
if k not in self.ns: if k not in self.ns:
self.ns[k] = v self.ns[k] = v
@ -376,8 +361,8 @@ class ModuleInterpreter(StrictNodeVisitor):
def call_func(self, node, interp_func, *args, **kwargs): def call_func(self, node, interp_func, *args, **kwargs):
self.call_stack.append(node) self.call_stack.append(node)
# We need to switch from dynamic execution scope to lexical scope # We need to switch from dynamic execution scope to lexical scope in which function was
# in which function was defined (then switch back on return). # defined (then switch back on return).
dyna_scope = self.ns dyna_scope = self.ns
self.ns = interp_func.lexical_scope self.ns = interp_func.lexical_scope
self.push_ns(FunctionNS(node)) self.push_ns(FunctionNS(node))
@ -508,9 +493,9 @@ class ModuleInterpreter(StrictNodeVisitor):
def visit_AugAssign(self, node): def visit_AugAssign(self, node):
assert isinstance(node.target.ctx, ast.Store) assert isinstance(node.target.ctx, ast.Store)
# Not functional style, oops. Node in AST has store context, but we # Not functional style, oops. Node in AST has store context, but we need to read its value
# need to read its value first. To not construct a copy of the entire # first. To not construct a copy of the entire node with load context, we temporarily patch
# node with load context, we temporarily patch it in-place. # it in-place.
save_ctx = node.target.ctx save_ctx = node.target.ctx
node.target.ctx = ast.Load() node.target.ctx = ast.Load()
var_val = self.visit(node.target) var_val = self.visit(node.target)
@ -518,12 +503,11 @@ class ModuleInterpreter(StrictNodeVisitor):
rval = self.visit(node.value) rval = self.visit(node.value)
# As augmented assignment is statement, not operator, we can't put them # As augmented assignment is statement, not operator, we can't put them all into map. We
# all into map. We could instead directly lookup special inplace methods # could instead directly lookup special inplace methods (__iadd__ and friends) and use them,
# (__iadd__ and friends) and use them, with a fallback to normal binary # with a fallback to normal binary operations, but from the point of view of this
# operations, but from the point of view of this interpreter, presence # interpreter, presence of such methods is an implementation detail of the object system,
# of such methods is an implementation detail of the object system, it's # it's not concerned with it.
# not concerned with it.
op = type(node.op) op = type(node.op)
if op is ast.Add: if op is ast.Add:
var_val += rval var_val += rval
@ -682,10 +666,11 @@ class ModuleInterpreter(StrictNodeVisitor):
if func is builtins.super and not args: if func is builtins.super and not args:
if not self.ns.parent or not isinstance(self.ns.parent, ClassNS): if not self.ns.parent or not isinstance(self.ns.parent, ClassNS):
raise RuntimeError("super(): no arguments") raise RuntimeError("super(): no arguments")
# As we're creating methods dynamically outside of class, super() without argument won't work, as that # As we're creating methods dynamically outside of class, super() without argument won't
# requires __class__ cell. Creating that would be cumbersome (Pycopy definitely lacks enough introspection # work, as that requires __class__ cell. Creating that would be cumbersome (Pycopy
# for that), so we substitute 2 implied args (which argumentless super() would take from cell and 1st arg to # definitely lacks enough introspection for that), so we substitute 2 implied args
# func). In our case, we take them from prepared bookkeeping info. # (which argumentless super() would take from cell and 1st arg to func). In our case, we
# take them from prepared bookkeeping info.
args = (self.ns.parent.cls, self.ns["self"]) args = (self.ns.parent.cls, self.ns["self"])
return func(*args, **kwargs) return func(*args, **kwargs)
@ -901,7 +886,7 @@ class ModuleInterpreter(StrictNodeVisitor):
def visit_Print(self, node): def visit_Print(self, node):
# In Py2k only # In Py2k only
raise NotImplementedError("Absolutely not. Use __future__.") raise SyntaxError("Absolutely not. Use __future__.")
def visit_Str(self, node): def visit_Str(self, node):
return node.s return node.s