Write up where this is at

This commit is contained in:
Reid 'arrdem' McKenzie 2023-03-08 10:53:54 -07:00
parent d62ef16f5b
commit 0a75d08b5a

View file

@ -1,46 +1,17 @@
# flake8: noqa: all
# Python AST interpreter written in Python
# A Python AST interpreter written in Python
#
# This module is part of the Pycopy https://github.com/pfalcon/pycopy
# project.
# This module is part of the Pycopy https://github.com/pfalcon/pycopy project.
#
# Copyright (c) 2019 Paul Sokolovsky
#
# The MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# Modified by Reid D. 'ardem' Mckenzie in 2021 to be a bit more fully-featured
# and usable for running 'real' code as part of an experiment in implementing a
# durable Python interpreter atop the original pycopy substrate.
# Copyright (c) 2019 Paul Sokolovsky, published under the MIT License
import ast
import logging
import os
import sys
if sys.version_info < (3, 0, 0):
builtins = __builtins__
else:
import builtins
from typing import Optional, Type
log = logging.getLogger(__name__)
@ -55,7 +26,7 @@ class StrictNodeVisitor(ast.NodeVisitor):
class ANamespace:
def __init__(self, node):
self.d = {}
self.parent = None
self.parent: Optional[Type["ANamespace"]] = None
# Cross-link namespace to AST node. Note that we can't do the
# opposite, because for one node, there can be different namespaces.
self.node = node
@ -80,15 +51,15 @@ class ANamespace:
class ModuleNS(ANamespace):
# parent: Optional["ModuleNS"] = None
pass
class FunctionNS(ANamespace):
pass
class ClassNS(ANamespace):
pass
cls: Optional[type] = None
# Pycopy by default doesn't support direct slice construction, use helper
@ -102,16 +73,10 @@ slice_getter = SliceGetter()
def arg_name(arg):
if sys.version_info < (3, 0, 0):
return arg.id
else:
return arg.arg
def kwarg_defaults(args):
if sys.version_info < (3, 0, 0):
return args.defaults
else:
return args.kw_defaults
@ -154,11 +119,12 @@ class InterpFuncWrap:
return self.interp.call_func(self.node, self, *args, **kwargs)
# Python don't fully treat objects, even those defining __call__() special method, as a true callable. For example, such
# objects aren't automatically converted to bound methods if looked up as another object's attributes. As we want our
# "interpreted functions" to behave as close as possible to real functions, we just wrap function object with a real
# function. An alternative might have been to perform needed checks and explicitly bind a method using
# types.MethodType() in visit_Attribute (but then maybe there would be still other cases of "callable object" vs
# Python don't fully treat objects, even those defining __call__() special method, as a true
# callable. For example, such objects aren't automatically converted to bound methods if looked up
# as another object's attributes. As we want our "interpreted functions" to behave as closely as
# possible to real functions, we just wrap function object with a real function. An alternative
# might have been to perform needed checks and explicitly bind a method using types.MethodType() in
# visit_Attribute (but then maybe there would be still other cases of "callable object" vs
# "function" discrepancies).
def InterpFunc(fun):
def func(*args, **kwargs):
@ -195,26 +161,49 @@ class InterpModule:
return list(self.ns.d.keys())
# TODO (arrdem 2023-03-08):
# This interpreter works well enough to import `requests` and many other libraries and do some
# work, but is unsuited to Flowmetal's needs for checkpointing. Because this interpreter uses
# direct execution, there's really no way to jam breakpoints or checkpoints or resume points into
# program execution. Which is kinda the goal of the whole project.
#
# This interpreter, while complete, needs to get refactored into probably a `yield` based
# coroutine structure wherein individual operations explicitly `yield` to an outer state
# management loop which effectively trampolines single statements together with state management
# logic.
#
# The outer interpreter needs to be able to check the "step budget" and decide if it's time for
# the program to suspend.
#
# Individual steps (workflow calls/function calls) may also cause the program to suspend.
#
# Suspending requires signaling the top level loop, and the top level loop needs both the
# namespace tree and the some sort of cursor or address into the AST under interpretation
# representing where to resume. The logical equivalent of a program counter, but a tree path.
class ModuleInterpreter(StrictNodeVisitor):
"""An interpreter specific to a single module."""
def __init__(self, system, fname, node):
self.system = system
self.fname = fname
self.ns = self.module_ns = ModuleNS(node)
self.module_ns: ModuleNS = ModuleNS(node)
self.ns: ANamespace = self.module_ns
# Call stack (in terms of function AST nodes).
self.call_stack = []
# To implement "store" operation, we need to arguments: location and value to store. The operation itself is
# handled by a node visitor (e.g. visit_Name), and location is represented by AST node, but there's no support
# to pass additional arguments to a visitor (likely, because it would be a burden to explicit pass such
# additional arguments thru the chain of visitors). So instead, we store this value as field. As interpretation
# happens sequentially, there's no risk that it will be overwritten "concurrently".
# To implement "store" operation, we need to arguments: location and value to store. The
# operation itself is handled by a node visitor (e.g. visit_Name), and location is
# represented by AST node, but there's no support to pass additional arguments to a visitor
# (likely, because it would be a burden to explicit pass such additional arguments thru the
# chain of visitors). So instead, we store this value as field. As interpretation happens
# sequentially, there's no risk that it will be overwritten "concurrently".
self.store_val = None
# Current active exception, for bare "raise", which doesn't work across function boundaries (and that's how we
# have it - exception would be caught in visit_Try, while re-rasing would happen in visit_Raise).
# Current active exception, for bare "raise", which doesn't work across function boundaries
# (and that's how we have it - exception would be caught in visit_Try, while re-rasing would
# happen in visit_Raise).
self.cur_exc = []
def push_ns(self, new_ns):
@ -222,6 +211,7 @@ class ModuleInterpreter(StrictNodeVisitor):
self.ns = new_ns
def pop_ns(self):
assert self.ns is not None
self.ns = self.ns.parent
def stmt_list_visit(self, lst):
@ -247,13 +237,13 @@ class ModuleInterpreter(StrictNodeVisitor):
return self.visit(node.body)
def visit_ClassDef(self, node):
self.push_ns(ClassNS(node))
ns: ClassNS = ClassNS(node)
self.push_ns(ns)
try:
self.stmt_list_visit(node.body)
except Exception:
self.pop_ns()
raise
ns = self.ns
self.pop_ns()
cls = type(node.name, tuple([self.visit(b) for b in node.bases]), ns.d)
cls = self.wrap_decorators(cls, node)
@ -266,8 +256,7 @@ class ModuleInterpreter(StrictNodeVisitor):
return self.prepare_func(node)
def visit_FunctionDef(self, node):
# Defaults are evaluated at function definition time, so we
# need to do that now.
# Defaults are evaluated at function definition time, so we need to do that now.
func = self.prepare_func(node)
func = self.wrap_decorators(func, node)
self.ns[node.name] = func
@ -290,11 +279,10 @@ class ModuleInterpreter(StrictNodeVisitor):
all_args.add(arg_name(a))
if v is not None:
d[arg_name(a)] = self.visit(v)
# We can store cached argument names of a function in its node -
# it's static.
# We can store cached argument names of a function in its node - it's static.
node.args.all_args = all_args
# We can't store the values of default arguments - they're dynamic,
# may depend on the lexical scope.
# We can't store the values of default arguments - they're dynamic, may depend on the
# lexical scope.
func.defaults_dict = d
return InterpFunc(func)
@ -308,9 +296,8 @@ class ModuleInterpreter(StrictNodeVisitor):
)
argspec = node.args
# If there's vararg, either offload surplus of args to it, or init
# it to empty tuple (all in one statement). If no vararg, error on
# too many args.
# If there's vararg, either offload surplus of args to it, or init it to empty tuple (all in
# one statement). If no vararg, error on too many args.
#
# Note that we have to do the .posonlyargs dance
if argspec.vararg:
@ -329,9 +316,8 @@ class ModuleInterpreter(StrictNodeVisitor):
for a, value in zip(argspec.posonlyargs, args):
self.ns[arg_name(a)] = value
# Process incoming keyword arguments, putting them in namespace if
# actual arg exists by that name, or offload to function's kwarg
# if any. All make needed checks and error out.
# Process incoming keyword arguments, putting them in namespace if actual arg exists by that
# name, or offload to function's kwarg if any. All make needed checks and error out.
func_kwarg = {}
for k, v in kwargs.items():
if k in argspec.all_args:
@ -351,9 +337,8 @@ class ModuleInterpreter(StrictNodeVisitor):
if argspec.kwarg:
self.ns[arg_name(argspec.kwarg)] = func_kwarg
# Finally, overlay default values for arguments not yet initialized.
# We need to do this last for "multiple values for the same arg"
# check to work.
# Finally, overlay default values for arguments not yet initialized. We need to do this last
# for "multiple values for the same arg" check to work.
for k, v in interp_func.defaults_dict.items():
if k not in self.ns:
self.ns[k] = v
@ -376,8 +361,8 @@ class ModuleInterpreter(StrictNodeVisitor):
def call_func(self, node, interp_func, *args, **kwargs):
self.call_stack.append(node)
# We need to switch from dynamic execution scope to lexical scope
# in which function was defined (then switch back on return).
# We need to switch from dynamic execution scope to lexical scope in which function was
# defined (then switch back on return).
dyna_scope = self.ns
self.ns = interp_func.lexical_scope
self.push_ns(FunctionNS(node))
@ -508,9 +493,9 @@ class ModuleInterpreter(StrictNodeVisitor):
def visit_AugAssign(self, node):
assert isinstance(node.target.ctx, ast.Store)
# Not functional style, oops. Node in AST has store context, but we
# need to read its value first. To not construct a copy of the entire
# node with load context, we temporarily patch it in-place.
# Not functional style, oops. Node in AST has store context, but we need to read its value
# first. To not construct a copy of the entire node with load context, we temporarily patch
# it in-place.
save_ctx = node.target.ctx
node.target.ctx = ast.Load()
var_val = self.visit(node.target)
@ -518,12 +503,11 @@ class ModuleInterpreter(StrictNodeVisitor):
rval = self.visit(node.value)
# As augmented assignment is statement, not operator, we can't put them
# all into map. We could instead directly lookup special inplace methods
# (__iadd__ and friends) and use them, with a fallback to normal binary
# operations, but from the point of view of this interpreter, presence
# of such methods is an implementation detail of the object system, it's
# not concerned with it.
# As augmented assignment is statement, not operator, we can't put them all into map. We
# could instead directly lookup special inplace methods (__iadd__ and friends) and use them,
# with a fallback to normal binary operations, but from the point of view of this
# interpreter, presence of such methods is an implementation detail of the object system,
# it's not concerned with it.
op = type(node.op)
if op is ast.Add:
var_val += rval
@ -682,10 +666,11 @@ class ModuleInterpreter(StrictNodeVisitor):
if func is builtins.super and not args:
if not self.ns.parent or not isinstance(self.ns.parent, ClassNS):
raise RuntimeError("super(): no arguments")
# As we're creating methods dynamically outside of class, super() without argument won't work, as that
# requires __class__ cell. Creating that would be cumbersome (Pycopy definitely lacks enough introspection
# for that), so we substitute 2 implied args (which argumentless super() would take from cell and 1st arg to
# func). In our case, we take them from prepared bookkeeping info.
# As we're creating methods dynamically outside of class, super() without argument won't
# work, as that requires __class__ cell. Creating that would be cumbersome (Pycopy
# definitely lacks enough introspection for that), so we substitute 2 implied args
# (which argumentless super() would take from cell and 1st arg to func). In our case, we
# take them from prepared bookkeeping info.
args = (self.ns.parent.cls, self.ns["self"])
return func(*args, **kwargs)
@ -901,7 +886,7 @@ class ModuleInterpreter(StrictNodeVisitor):
def visit_Print(self, node):
# In Py2k only
raise NotImplementedError("Absolutely not. Use __future__.")
raise SyntaxError("Absolutely not. Use __future__.")
def visit_Str(self, node):
return node.s