From 642219d84c029056f2070655606b328c08d38af7 Mon Sep 17 00:00:00 2001 From: Reid 'arrdem' McKenzie Date: Mon, 31 May 2021 23:46:43 -0600 Subject: [PATCH] README and working-ish astinterp --- projects/flowmetal/README.md | 45 +++++++++++++++++++++++++ projects/flowmetal/scratch/astinterp.py | 4 +-- projects/flowmetal/scratch/test.py | 3 ++ 3 files changed, 50 insertions(+), 2 deletions(-) diff --git a/projects/flowmetal/README.md b/projects/flowmetal/README.md index defafae..d75deb4 100644 --- a/projects/flowmetal/README.md +++ b/projects/flowmetal/README.md @@ -267,6 +267,51 @@ def orchestrator_function(context: df.DurableOrchestrationContext): main = df.Orchestrator.create(orchestrator_function) ``` +Now it would seem that you could "just" automate doing rewriting that to something like this - + +``` python +@df.Durable +def main(ctx): + x = context.call_activity("F1", None) + y = context.call_activity("F2", x) + z = context.call_activity("F3", y) + return context.call_activity("F4", z) +``` + +There's some prior art for doing this (https://eigenfoo.xyz/manipulating-python-asts/, https://greentreesnakes.readthedocs.io/en/latest/manipulating.html#modifying-the-tree) but it's a lot of legwork for not much. +There are also some pretty gaping correctness holes in taking the decorator based rewriting approach; +how do you deal with rewriting imported code, or code that's in classes/behind `@property` and other such tricks? + +Just not worth it. + +Now, what we _can_ do is try to hijack the entire Python interpreter to implement the properties/tracing/history recording we want there. +The default cpython lacks hooks for doing this, but we can write a python-in-python interpreter and "lift" the user's program into an interpreter we control, which ultimately gets most of its behavior "for free" from the underlying cpython interpreter. +There's [an example](https://github.com/pfalcon/pyastinterp) of doing this as part of the pycopy project; although there it's more of a Scheme-style proof of metacircular self-hosting. + +There's a modified copy of the astinterp in `scratch/` which is capable of running a considerable subset of py2/3.9 to the point of being able to source-import many libraries including `requests` and run PyPi sourced library code along with user code under hoisted interpretation. + +It doesn't support coroutines/generators yet, and there's some machinery required to make it "safe" (meaningfully single-stepable; "fix"/support eval, enable user-defined import/`__import__` through the lifted python VM) but as a proof of concept of a lifted VM I'm genuinely shocked how well this works. + +Next questions here revolve around how to "snapshot" the state of the interpreter meaningfully, and how to build a replayable interpreter log. +There are some specific challenges around how Python code interacts with native C code that could limit the viability of this approach, but at the absolute least this fully sandboxed Python interpreter could be used to implement whatever underlying magic could be desired and restricted to some language subset as desired. + +The goal is to make something like this work - + +``` python +from df import Activity + +f1 = Activity("F1") +f2 = Activity("F2") +f3 = Activity("F3") +f4 = Activity("F4") + +def main(): + return f4(f3(f2(f1(None)))) +``` + +Which may offer a possible solution to the interpreter checkpointing problem - only checkpoint "supported" operations. +Here the `Activity().__call__` operation would have special support, as with `datetime.datetime.now()` and controlling `time.sleep()`, threading and possibly `random.Random` seeding which cannot trivially be made repeatable. + ### Durability challenges FIXME - manually implementing snapshotting and recovery is hard diff --git a/projects/flowmetal/scratch/astinterp.py b/projects/flowmetal/scratch/astinterp.py index 99570ee..72c4aa8 100644 --- a/projects/flowmetal/scratch/astinterp.py +++ b/projects/flowmetal/scratch/astinterp.py @@ -938,7 +938,7 @@ class InterpreterSystem(object): self.modules[name] = mod.ns break elif os.path.isfile(e): - # FIXME (arrdem 2021-05-) + # FIXME (arrdem 2021-05-31) raise RuntimeError("Import from .zip/.whl/.egg archives aren't supported yet") else: self.modules[name] = __import__(name, globals, locals, fromlist, level) @@ -963,5 +963,5 @@ class InterpreterSystem(object): if __name__ == "__main__": - logging.basicConfig(level=logging.WARNING) + logging.basicConfig(level=logging.DEBUG) InterpreterSystem().execute(sys.argv[1]) diff --git a/projects/flowmetal/scratch/test.py b/projects/flowmetal/scratch/test.py index 3db3160..4669588 100644 --- a/projects/flowmetal/scratch/test.py +++ b/projects/flowmetal/scratch/test.py @@ -23,3 +23,6 @@ for _ in range(10): def bar(a, b, **bs): pass + +import requests +print(len(requests.get("https://pypi.org/pypi/requests/json").text))