Write up where this is at
This commit is contained in:
parent
d62ef16f5b
commit
0a75d08b5a
1 changed files with 76 additions and 91 deletions
|
@ -1,46 +1,17 @@
|
||||||
# flake8: noqa: all
|
# flake8: noqa: all
|
||||||
|
|
||||||
# Python AST interpreter written in Python
|
# A Python AST interpreter written in Python
|
||||||
#
|
#
|
||||||
# This module is part of the Pycopy https://github.com/pfalcon/pycopy
|
# This module is part of the Pycopy https://github.com/pfalcon/pycopy project.
|
||||||
# project.
|
|
||||||
#
|
#
|
||||||
# Copyright (c) 2019 Paul Sokolovsky
|
# Copyright (c) 2019 Paul Sokolovsky, published under the MIT License
|
||||||
#
|
|
||||||
# The MIT License
|
|
||||||
#
|
|
||||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
# of this software and associated documentation files (the "Software"), to deal
|
|
||||||
# in the Software without restriction, including without limitation the rights
|
|
||||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
# copies of the Software, and to permit persons to whom the Software is
|
|
||||||
# furnished to do so, subject to the following conditions:
|
|
||||||
#
|
|
||||||
# The above copyright notice and this permission notice shall be included in
|
|
||||||
# all copies or substantial portions of the Software.
|
|
||||||
#
|
|
||||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
||||||
# THE SOFTWARE.
|
|
||||||
#
|
|
||||||
# Modified by Reid D. 'ardem' Mckenzie in 2021 to be a bit more fully-featured
|
|
||||||
# and usable for running 'real' code as part of an experiment in implementing a
|
|
||||||
# durable Python interpreter atop the original pycopy substrate.
|
|
||||||
|
|
||||||
import ast
|
import ast
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
import builtins
|
||||||
|
from typing import Optional, Type
|
||||||
if sys.version_info < (3, 0, 0):
|
|
||||||
builtins = __builtins__
|
|
||||||
else:
|
|
||||||
import builtins
|
|
||||||
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
@ -55,7 +26,7 @@ class StrictNodeVisitor(ast.NodeVisitor):
|
||||||
class ANamespace:
|
class ANamespace:
|
||||||
def __init__(self, node):
|
def __init__(self, node):
|
||||||
self.d = {}
|
self.d = {}
|
||||||
self.parent = None
|
self.parent: Optional[Type["ANamespace"]] = None
|
||||||
# Cross-link namespace to AST node. Note that we can't do the
|
# Cross-link namespace to AST node. Note that we can't do the
|
||||||
# opposite, because for one node, there can be different namespaces.
|
# opposite, because for one node, there can be different namespaces.
|
||||||
self.node = node
|
self.node = node
|
||||||
|
@ -80,15 +51,15 @@ class ANamespace:
|
||||||
|
|
||||||
|
|
||||||
class ModuleNS(ANamespace):
|
class ModuleNS(ANamespace):
|
||||||
|
# parent: Optional["ModuleNS"] = None
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class FunctionNS(ANamespace):
|
class FunctionNS(ANamespace):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class ClassNS(ANamespace):
|
class ClassNS(ANamespace):
|
||||||
pass
|
cls: Optional[type] = None
|
||||||
|
|
||||||
|
|
||||||
# Pycopy by default doesn't support direct slice construction, use helper
|
# Pycopy by default doesn't support direct slice construction, use helper
|
||||||
|
@ -102,17 +73,11 @@ slice_getter = SliceGetter()
|
||||||
|
|
||||||
|
|
||||||
def arg_name(arg):
|
def arg_name(arg):
|
||||||
if sys.version_info < (3, 0, 0):
|
return arg.arg
|
||||||
return arg.id
|
|
||||||
else:
|
|
||||||
return arg.arg
|
|
||||||
|
|
||||||
|
|
||||||
def kwarg_defaults(args):
|
def kwarg_defaults(args):
|
||||||
if sys.version_info < (3, 0, 0):
|
return args.kw_defaults
|
||||||
return args.defaults
|
|
||||||
else:
|
|
||||||
return args.kw_defaults
|
|
||||||
|
|
||||||
|
|
||||||
class TargetNonlocalFlow(Exception):
|
class TargetNonlocalFlow(Exception):
|
||||||
|
@ -154,11 +119,12 @@ class InterpFuncWrap:
|
||||||
return self.interp.call_func(self.node, self, *args, **kwargs)
|
return self.interp.call_func(self.node, self, *args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
# Python don't fully treat objects, even those defining __call__() special method, as a true callable. For example, such
|
# Python don't fully treat objects, even those defining __call__() special method, as a true
|
||||||
# objects aren't automatically converted to bound methods if looked up as another object's attributes. As we want our
|
# callable. For example, such objects aren't automatically converted to bound methods if looked up
|
||||||
# "interpreted functions" to behave as close as possible to real functions, we just wrap function object with a real
|
# as another object's attributes. As we want our "interpreted functions" to behave as closely as
|
||||||
# function. An alternative might have been to perform needed checks and explicitly bind a method using
|
# possible to real functions, we just wrap function object with a real function. An alternative
|
||||||
# types.MethodType() in visit_Attribute (but then maybe there would be still other cases of "callable object" vs
|
# might have been to perform needed checks and explicitly bind a method using types.MethodType() in
|
||||||
|
# visit_Attribute (but then maybe there would be still other cases of "callable object" vs
|
||||||
# "function" discrepancies).
|
# "function" discrepancies).
|
||||||
def InterpFunc(fun):
|
def InterpFunc(fun):
|
||||||
def func(*args, **kwargs):
|
def func(*args, **kwargs):
|
||||||
|
@ -195,26 +161,49 @@ class InterpModule:
|
||||||
return list(self.ns.d.keys())
|
return list(self.ns.d.keys())
|
||||||
|
|
||||||
|
|
||||||
|
# TODO (arrdem 2023-03-08):
|
||||||
|
# This interpreter works well enough to import `requests` and many other libraries and do some
|
||||||
|
# work, but is unsuited to Flowmetal's needs for checkpointing. Because this interpreter uses
|
||||||
|
# direct execution, there's really no way to jam breakpoints or checkpoints or resume points into
|
||||||
|
# program execution. Which is kinda the goal of the whole project.
|
||||||
|
#
|
||||||
|
# This interpreter, while complete, needs to get refactored into probably a `yield` based
|
||||||
|
# coroutine structure wherein individual operations explicitly `yield` to an outer state
|
||||||
|
# management loop which effectively trampolines single statements together with state management
|
||||||
|
# logic.
|
||||||
|
#
|
||||||
|
# The outer interpreter needs to be able to check the "step budget" and decide if it's time for
|
||||||
|
# the program to suspend.
|
||||||
|
#
|
||||||
|
# Individual steps (workflow calls/function calls) may also cause the program to suspend.
|
||||||
|
#
|
||||||
|
# Suspending requires signaling the top level loop, and the top level loop needs both the
|
||||||
|
# namespace tree and the some sort of cursor or address into the AST under interpretation
|
||||||
|
# representing where to resume. The logical equivalent of a program counter, but a tree path.
|
||||||
|
|
||||||
class ModuleInterpreter(StrictNodeVisitor):
|
class ModuleInterpreter(StrictNodeVisitor):
|
||||||
"""An interpreter specific to a single module."""
|
"""An interpreter specific to a single module."""
|
||||||
|
|
||||||
def __init__(self, system, fname, node):
|
def __init__(self, system, fname, node):
|
||||||
self.system = system
|
self.system = system
|
||||||
self.fname = fname
|
self.fname = fname
|
||||||
self.ns = self.module_ns = ModuleNS(node)
|
self.module_ns: ModuleNS = ModuleNS(node)
|
||||||
|
self.ns: ANamespace = self.module_ns
|
||||||
|
|
||||||
# Call stack (in terms of function AST nodes).
|
# Call stack (in terms of function AST nodes).
|
||||||
self.call_stack = []
|
self.call_stack = []
|
||||||
|
|
||||||
# To implement "store" operation, we need to arguments: location and value to store. The operation itself is
|
# To implement "store" operation, we need to arguments: location and value to store. The
|
||||||
# handled by a node visitor (e.g. visit_Name), and location is represented by AST node, but there's no support
|
# operation itself is handled by a node visitor (e.g. visit_Name), and location is
|
||||||
# to pass additional arguments to a visitor (likely, because it would be a burden to explicit pass such
|
# represented by AST node, but there's no support to pass additional arguments to a visitor
|
||||||
# additional arguments thru the chain of visitors). So instead, we store this value as field. As interpretation
|
# (likely, because it would be a burden to explicit pass such additional arguments thru the
|
||||||
# happens sequentially, there's no risk that it will be overwritten "concurrently".
|
# chain of visitors). So instead, we store this value as field. As interpretation happens
|
||||||
|
# sequentially, there's no risk that it will be overwritten "concurrently".
|
||||||
self.store_val = None
|
self.store_val = None
|
||||||
|
|
||||||
# Current active exception, for bare "raise", which doesn't work across function boundaries (and that's how we
|
# Current active exception, for bare "raise", which doesn't work across function boundaries
|
||||||
# have it - exception would be caught in visit_Try, while re-rasing would happen in visit_Raise).
|
# (and that's how we have it - exception would be caught in visit_Try, while re-rasing would
|
||||||
|
# happen in visit_Raise).
|
||||||
self.cur_exc = []
|
self.cur_exc = []
|
||||||
|
|
||||||
def push_ns(self, new_ns):
|
def push_ns(self, new_ns):
|
||||||
|
@ -222,6 +211,7 @@ class ModuleInterpreter(StrictNodeVisitor):
|
||||||
self.ns = new_ns
|
self.ns = new_ns
|
||||||
|
|
||||||
def pop_ns(self):
|
def pop_ns(self):
|
||||||
|
assert self.ns is not None
|
||||||
self.ns = self.ns.parent
|
self.ns = self.ns.parent
|
||||||
|
|
||||||
def stmt_list_visit(self, lst):
|
def stmt_list_visit(self, lst):
|
||||||
|
@ -247,13 +237,13 @@ class ModuleInterpreter(StrictNodeVisitor):
|
||||||
return self.visit(node.body)
|
return self.visit(node.body)
|
||||||
|
|
||||||
def visit_ClassDef(self, node):
|
def visit_ClassDef(self, node):
|
||||||
self.push_ns(ClassNS(node))
|
ns: ClassNS = ClassNS(node)
|
||||||
|
self.push_ns(ns)
|
||||||
try:
|
try:
|
||||||
self.stmt_list_visit(node.body)
|
self.stmt_list_visit(node.body)
|
||||||
except Exception:
|
except Exception:
|
||||||
self.pop_ns()
|
self.pop_ns()
|
||||||
raise
|
raise
|
||||||
ns = self.ns
|
|
||||||
self.pop_ns()
|
self.pop_ns()
|
||||||
cls = type(node.name, tuple([self.visit(b) for b in node.bases]), ns.d)
|
cls = type(node.name, tuple([self.visit(b) for b in node.bases]), ns.d)
|
||||||
cls = self.wrap_decorators(cls, node)
|
cls = self.wrap_decorators(cls, node)
|
||||||
|
@ -266,8 +256,7 @@ class ModuleInterpreter(StrictNodeVisitor):
|
||||||
return self.prepare_func(node)
|
return self.prepare_func(node)
|
||||||
|
|
||||||
def visit_FunctionDef(self, node):
|
def visit_FunctionDef(self, node):
|
||||||
# Defaults are evaluated at function definition time, so we
|
# Defaults are evaluated at function definition time, so we need to do that now.
|
||||||
# need to do that now.
|
|
||||||
func = self.prepare_func(node)
|
func = self.prepare_func(node)
|
||||||
func = self.wrap_decorators(func, node)
|
func = self.wrap_decorators(func, node)
|
||||||
self.ns[node.name] = func
|
self.ns[node.name] = func
|
||||||
|
@ -290,11 +279,10 @@ class ModuleInterpreter(StrictNodeVisitor):
|
||||||
all_args.add(arg_name(a))
|
all_args.add(arg_name(a))
|
||||||
if v is not None:
|
if v is not None:
|
||||||
d[arg_name(a)] = self.visit(v)
|
d[arg_name(a)] = self.visit(v)
|
||||||
# We can store cached argument names of a function in its node -
|
# We can store cached argument names of a function in its node - it's static.
|
||||||
# it's static.
|
|
||||||
node.args.all_args = all_args
|
node.args.all_args = all_args
|
||||||
# We can't store the values of default arguments - they're dynamic,
|
# We can't store the values of default arguments - they're dynamic, may depend on the
|
||||||
# may depend on the lexical scope.
|
# lexical scope.
|
||||||
func.defaults_dict = d
|
func.defaults_dict = d
|
||||||
|
|
||||||
return InterpFunc(func)
|
return InterpFunc(func)
|
||||||
|
@ -308,9 +296,8 @@ class ModuleInterpreter(StrictNodeVisitor):
|
||||||
)
|
)
|
||||||
|
|
||||||
argspec = node.args
|
argspec = node.args
|
||||||
# If there's vararg, either offload surplus of args to it, or init
|
# If there's vararg, either offload surplus of args to it, or init it to empty tuple (all in
|
||||||
# it to empty tuple (all in one statement). If no vararg, error on
|
# one statement). If no vararg, error on too many args.
|
||||||
# too many args.
|
|
||||||
#
|
#
|
||||||
# Note that we have to do the .posonlyargs dance
|
# Note that we have to do the .posonlyargs dance
|
||||||
if argspec.vararg:
|
if argspec.vararg:
|
||||||
|
@ -329,9 +316,8 @@ class ModuleInterpreter(StrictNodeVisitor):
|
||||||
for a, value in zip(argspec.posonlyargs, args):
|
for a, value in zip(argspec.posonlyargs, args):
|
||||||
self.ns[arg_name(a)] = value
|
self.ns[arg_name(a)] = value
|
||||||
|
|
||||||
# Process incoming keyword arguments, putting them in namespace if
|
# Process incoming keyword arguments, putting them in namespace if actual arg exists by that
|
||||||
# actual arg exists by that name, or offload to function's kwarg
|
# name, or offload to function's kwarg if any. All make needed checks and error out.
|
||||||
# if any. All make needed checks and error out.
|
|
||||||
func_kwarg = {}
|
func_kwarg = {}
|
||||||
for k, v in kwargs.items():
|
for k, v in kwargs.items():
|
||||||
if k in argspec.all_args:
|
if k in argspec.all_args:
|
||||||
|
@ -351,9 +337,8 @@ class ModuleInterpreter(StrictNodeVisitor):
|
||||||
if argspec.kwarg:
|
if argspec.kwarg:
|
||||||
self.ns[arg_name(argspec.kwarg)] = func_kwarg
|
self.ns[arg_name(argspec.kwarg)] = func_kwarg
|
||||||
|
|
||||||
# Finally, overlay default values for arguments not yet initialized.
|
# Finally, overlay default values for arguments not yet initialized. We need to do this last
|
||||||
# We need to do this last for "multiple values for the same arg"
|
# for "multiple values for the same arg" check to work.
|
||||||
# check to work.
|
|
||||||
for k, v in interp_func.defaults_dict.items():
|
for k, v in interp_func.defaults_dict.items():
|
||||||
if k not in self.ns:
|
if k not in self.ns:
|
||||||
self.ns[k] = v
|
self.ns[k] = v
|
||||||
|
@ -376,8 +361,8 @@ class ModuleInterpreter(StrictNodeVisitor):
|
||||||
|
|
||||||
def call_func(self, node, interp_func, *args, **kwargs):
|
def call_func(self, node, interp_func, *args, **kwargs):
|
||||||
self.call_stack.append(node)
|
self.call_stack.append(node)
|
||||||
# We need to switch from dynamic execution scope to lexical scope
|
# We need to switch from dynamic execution scope to lexical scope in which function was
|
||||||
# in which function was defined (then switch back on return).
|
# defined (then switch back on return).
|
||||||
dyna_scope = self.ns
|
dyna_scope = self.ns
|
||||||
self.ns = interp_func.lexical_scope
|
self.ns = interp_func.lexical_scope
|
||||||
self.push_ns(FunctionNS(node))
|
self.push_ns(FunctionNS(node))
|
||||||
|
@ -508,9 +493,9 @@ class ModuleInterpreter(StrictNodeVisitor):
|
||||||
|
|
||||||
def visit_AugAssign(self, node):
|
def visit_AugAssign(self, node):
|
||||||
assert isinstance(node.target.ctx, ast.Store)
|
assert isinstance(node.target.ctx, ast.Store)
|
||||||
# Not functional style, oops. Node in AST has store context, but we
|
# Not functional style, oops. Node in AST has store context, but we need to read its value
|
||||||
# need to read its value first. To not construct a copy of the entire
|
# first. To not construct a copy of the entire node with load context, we temporarily patch
|
||||||
# node with load context, we temporarily patch it in-place.
|
# it in-place.
|
||||||
save_ctx = node.target.ctx
|
save_ctx = node.target.ctx
|
||||||
node.target.ctx = ast.Load()
|
node.target.ctx = ast.Load()
|
||||||
var_val = self.visit(node.target)
|
var_val = self.visit(node.target)
|
||||||
|
@ -518,12 +503,11 @@ class ModuleInterpreter(StrictNodeVisitor):
|
||||||
|
|
||||||
rval = self.visit(node.value)
|
rval = self.visit(node.value)
|
||||||
|
|
||||||
# As augmented assignment is statement, not operator, we can't put them
|
# As augmented assignment is statement, not operator, we can't put them all into map. We
|
||||||
# all into map. We could instead directly lookup special inplace methods
|
# could instead directly lookup special inplace methods (__iadd__ and friends) and use them,
|
||||||
# (__iadd__ and friends) and use them, with a fallback to normal binary
|
# with a fallback to normal binary operations, but from the point of view of this
|
||||||
# operations, but from the point of view of this interpreter, presence
|
# interpreter, presence of such methods is an implementation detail of the object system,
|
||||||
# of such methods is an implementation detail of the object system, it's
|
# it's not concerned with it.
|
||||||
# not concerned with it.
|
|
||||||
op = type(node.op)
|
op = type(node.op)
|
||||||
if op is ast.Add:
|
if op is ast.Add:
|
||||||
var_val += rval
|
var_val += rval
|
||||||
|
@ -682,10 +666,11 @@ class ModuleInterpreter(StrictNodeVisitor):
|
||||||
if func is builtins.super and not args:
|
if func is builtins.super and not args:
|
||||||
if not self.ns.parent or not isinstance(self.ns.parent, ClassNS):
|
if not self.ns.parent or not isinstance(self.ns.parent, ClassNS):
|
||||||
raise RuntimeError("super(): no arguments")
|
raise RuntimeError("super(): no arguments")
|
||||||
# As we're creating methods dynamically outside of class, super() without argument won't work, as that
|
# As we're creating methods dynamically outside of class, super() without argument won't
|
||||||
# requires __class__ cell. Creating that would be cumbersome (Pycopy definitely lacks enough introspection
|
# work, as that requires __class__ cell. Creating that would be cumbersome (Pycopy
|
||||||
# for that), so we substitute 2 implied args (which argumentless super() would take from cell and 1st arg to
|
# definitely lacks enough introspection for that), so we substitute 2 implied args
|
||||||
# func). In our case, we take them from prepared bookkeeping info.
|
# (which argumentless super() would take from cell and 1st arg to func). In our case, we
|
||||||
|
# take them from prepared bookkeeping info.
|
||||||
args = (self.ns.parent.cls, self.ns["self"])
|
args = (self.ns.parent.cls, self.ns["self"])
|
||||||
|
|
||||||
return func(*args, **kwargs)
|
return func(*args, **kwargs)
|
||||||
|
@ -901,7 +886,7 @@ class ModuleInterpreter(StrictNodeVisitor):
|
||||||
|
|
||||||
def visit_Print(self, node):
|
def visit_Print(self, node):
|
||||||
# In Py2k only
|
# In Py2k only
|
||||||
raise NotImplementedError("Absolutely not. Use __future__.")
|
raise SyntaxError("Absolutely not. Use __future__.")
|
||||||
|
|
||||||
def visit_Str(self, node):
|
def visit_Str(self, node):
|
||||||
return node.s
|
return node.s
|
||||||
|
|
Loading…
Reference in a new issue