From bf22e072e3511d1aa31dd7103af027edab68f04c Mon Sep 17 00:00:00 2001 From: Reid 'arrdem' McKenzie Date: Fri, 9 Apr 2021 01:38:49 -0600 Subject: [PATCH] Import the Flowmetal project --- projects/flowmetal/BUILD | 10 + projects/flowmetal/LICENSE.md | 7 + projects/flowmetal/README.md | 85 +++ projects/flowmetal/TODO.org | 46 ++ projects/flowmetal/TODO.org_archive | 39 ++ projects/flowmetal/setup.py | 37 ++ .../src/python/flowmetal/__init__.py | 1 + .../src/python/flowmetal/module_analyzer.py | 80 +++ .../flowmetal/src/python/flowmetal/parser.py | 511 ++++++++++++++++++ .../flowmetal/src/python/flowmetal/repl.py | 78 +++ .../src/python/flowmetal/syntax_analyzer.py | 356 ++++++++++++ projects/flowmetal/test/python/BUILD | 20 + projects/flowmetal/test/python/conftest.py | 0 projects/flowmetal/test/python/test_parser.py | 161 ++++++ .../test/python/test_syntax_analyzer.py | 50 ++ tools/python/requirements.txt | 2 + 16 files changed, 1483 insertions(+) create mode 100644 projects/flowmetal/BUILD create mode 100644 projects/flowmetal/LICENSE.md create mode 100644 projects/flowmetal/README.md create mode 100644 projects/flowmetal/TODO.org create mode 100644 projects/flowmetal/TODO.org_archive create mode 100644 projects/flowmetal/setup.py create mode 100644 projects/flowmetal/src/python/flowmetal/__init__.py create mode 100644 projects/flowmetal/src/python/flowmetal/module_analyzer.py create mode 100644 projects/flowmetal/src/python/flowmetal/parser.py create mode 100644 projects/flowmetal/src/python/flowmetal/repl.py create mode 100644 projects/flowmetal/src/python/flowmetal/syntax_analyzer.py create mode 100644 projects/flowmetal/test/python/BUILD create mode 100644 projects/flowmetal/test/python/conftest.py create mode 100644 projects/flowmetal/test/python/test_parser.py create mode 100644 projects/flowmetal/test/python/test_syntax_analyzer.py diff --git a/projects/flowmetal/BUILD b/projects/flowmetal/BUILD new file mode 100644 index 0000000..6a3098e --- /dev/null +++ b/projects/flowmetal/BUILD @@ -0,0 +1,10 @@ +package(default_visibility = ["//visibility:public"]) + +py_library( + name = "lib", + srcs = glob(["src/python/**/*.py"]), + imports = ["src/python"], + deps = [ + py_requirement("prompt-toolkit"), + ] +) diff --git a/projects/flowmetal/LICENSE.md b/projects/flowmetal/LICENSE.md new file mode 100644 index 0000000..7bcf084 --- /dev/null +++ b/projects/flowmetal/LICENSE.md @@ -0,0 +1,7 @@ +Copyright 2019 Reid 'arrdem' McKenzie + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/projects/flowmetal/README.md b/projects/flowmetal/README.md new file mode 100644 index 0000000..5419a47 --- /dev/null +++ b/projects/flowmetal/README.md @@ -0,0 +1,85 @@ +# Flowmetal + +> A shining mercurial metal laden with sensors and almost infinitely reconfigurable. +> +> The stuff of which robots and servitors are made. + +Flowmetal is a substrate for automation. +It attempts to provide a programming environment wherein programs are durable, evented and asynchronous aimed at what would traditionally be described as scripting or coordination. + +Let's unpack these terms. + +**Durable** - programs and their state are not dynamic and RAM located as with traditional models of processes. +Instead programs and their state are always persisted to storage. +This allows programs to sleep for a long time or even move seamlessly between machines. + +**Evented** - durability is implemented in an event sourced style. +Each program retails - or at least has the opportunity to retain - both a log of any external events and of its own execution. +This allows for programs to easily rebuild their state, simplifies the durable programming model, and greatly simplifies debugging as intermediary states are retained and inspectable. + +This also allows for external systems such as REST callback APIs, databases and such to easily integrate with Flowmetal programs as event sources. +It also allows bidirectional communication between Flowmetal programs and other more traditional programming environments. +Anything that can communicate with Flowmetal can provide function implementations, or call Flowmetal programs! + +**Asynchronous** - thanks to Flowmetal's evented execution model, waiting for slow external events either synchronously or asynchronously is second nature! +Flowmetal is especially good at waiting for very, very slow external operations. +Stuff like webhooks and batch processes. + +**Scripting** - the tradeoff Flowmetal makes for the evented model is that it's slow. +While Flowmetal foreign functions could be fast, Flowmetal's interpreter isn't designed for speed. +It's designed for eventing and ensuring durability. +This makes Flowmetal suitable for interacting with and coordinating other systems, but it's not gonna win any benchmark games. + +## Wait what? + +Okay. +In simpler words, Flowmetal is an interpreted lisp which can use a datastore of your choice for durability. +Other systems can attach to Flowmetal's datastore and send events to and receive them from Flowmetal. +For instance Flowmetal contains a reference implementation of a HTTP callback connector and of a HTTP request connector. + +A possible Flowmetal setup looks something like this - + +``` + +----------------------------+ + +---------------------------+ | + +--------------------------+ |--+ + | External HTTP service(s) |--+ + +--------------------------+ + ^ ^ + | | + v v + +-----------------------+ +------------------------+ + | HTTP server connector | | HTTP request connector | + +-----------------------+ +------------------------+ + ^ ^ + | | + v v + +--------------------+ + | Shared event store | + +--------------------+ + ^ + | + v + +--------------------------+ + | Flowmetal interpreter(s) | + +--------------------------+ +``` + +In this setup, the Flowmetal interpreters are able to interact with an external HTTP service; sending and receiving webhooks with Flowmetal programs waiting for those external events to arrive. + +For instance this program would use the external connector stubs to build up interaction(s) with an external system. + +```lisp + + + +``` + + +Comparisons to Apache Airflow are at least in this setup pretty apt, although Flowmetal's durable execution model makes it much more suitable for providing reliable workflows and its DSL is more approachable. + +## License + +Mirrored from https://git.arrdem.com/arrdem/flowmetal + +Published under the MIT license. See [LICENSE.md](LICENSE.md) diff --git a/projects/flowmetal/TODO.org b/projects/flowmetal/TODO.org new file mode 100644 index 0000000..981a0f1 --- /dev/null +++ b/projects/flowmetal/TODO.org @@ -0,0 +1,46 @@ +#+TITLE: Flowmetal TODOs + +* parser +** TODO Rework the tokens in terms of spans instead of just start points :tokens:parser: +Having start and end information allows for textual display of ranges and other +potentially interesting error formatting. Requires some refactoring. + +** TODO Implement load() in the parser :parser: +At present the parser can parse well enough, but it returns a token tree +intended for use in refactoring and autoformatting tools not a direct 'ast' list +tree which is how load() is supposed to behave. + +Figure out how to "mixin" implicit unwrapping of token boxes to values when +loading insted of reading. + +** DONE Implement parser support for :- type ascriptions :parser: +Maybe this is a special case of keywords, maybe it isn't. Support ⊢ as an alternative. Maybe |- ? + +** TODO Think about the difference between reading "data" and reading expression/code syntax :parser: +EDN suggests these two things are largely the same ... but they clearly aren't. + +** TODO Do I want to implement #_ reader discard support? :parser: +Reader discard is a convenient alternative to commenting a bunch of stuff out, +but does require a fair bit of complexity in the parser to support properly. + +** TODO Do I want to implement #?() reader conditional support? :parser: +Reader conditionals are cool for feature expressions and multiple platforms, but +are of unclear value given that I only have one target for the forseeable and +Flowmetal is already supposed to be a platform agnostic sort of thing. + +** DONE Finish out float support +** DONE Implement strings +** TODO Think about what multiple grammars / dialects look like +* TODO Look at Python SQL ORMs :server:storage: +- Must support PostgresQL +- Must support SQLite + +The goal is to be able to run the "leader" servers off of postgres and have local +state stores for wokers stored in sqlite using large amounts of the same schema. +Being able to get marshalling and unmarshalling to JSON 'for free' would be +lovely. + +* TODO Look at Flask OpenAPI spec stuff :server: +- Ideally want to go spec first +- Must be able to provide validation +- Would be nice to be able to use the spec to drive implementing the app (mounting functions to routes) diff --git a/projects/flowmetal/TODO.org_archive b/projects/flowmetal/TODO.org_archive new file mode 100644 index 0000000..fd2acc8 --- /dev/null +++ b/projects/flowmetal/TODO.org_archive @@ -0,0 +1,39 @@ +# -*- mode: org -*- + + +Archived entries from file /home/arrdem/doc/hobby/programming/lang/python/flowmetal/TODO.org + + +* DONE Implement parse() separately in the parser + :PROPERTIES: + :ARCHIVE_TIME: 2020-06-14 Sun 11:34 + :ARCHIVE_FILE: ~/doc/hobby/programming/lang/python/flowmetal/TODO.org + :ARCHIVE_CATEGORY: TODO + :ARCHIVE_TODO: DONE + :END: +Relates to implementing load() + +When we have a workable load which generates data, we'll want a read() which +generates a syntax tree so that we don't discard that API entirely. + + +* DONE Parser test suite + :PROPERTIES: + :ARCHIVE_TIME: 2020-06-14 Sun 11:34 + :ARCHIVE_FILE: ~/doc/hobby/programming/lang/python/flowmetal/TODO.org + :ARCHIVE_CATEGORY: TODO + :ARCHIVE_TODO: DONE + :END: +- Cover the various scanners +- Cover the position tracking machinery + + +* DONE Get pytest set up + :PROPERTIES: + :ARCHIVE_TIME: 2020-06-14 Sun 11:34 + :ARCHIVE_FILE: ~/doc/hobby/programming/lang/python/flowmetal/TODO.org + :ARCHIVE_CATEGORY: TODO + :ARCHIVE_TODO: DONE + :END: +As it says on the tim + diff --git a/projects/flowmetal/setup.py b/projects/flowmetal/setup.py new file mode 100644 index 0000000..fa67c3c --- /dev/null +++ b/projects/flowmetal/setup.py @@ -0,0 +1,37 @@ +from setuptools import setup + +setup( + name="arrdem.flowmetal", + # Package metadata + version='0.0.0', + license="MIT", + description="A weird execution engine", + long_description=open("README.md").read(), + long_description_content_type="text/markdown", + author="Reid 'arrdem' McKenzie", + author_email="me@arrdem.com", + url="https://git.arrdem.com/arrdem/flowmetal", + classifiers=[ + "License :: OSI Approved :: MIT License", + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + ], + + # Package setup + package_dir={"": "src/python"}, + packages=[ + "flowmetal", + ], + entry_points={ + 'console_scripts': [ + 'iflow=flowmetal.repl:main' + ], + }, + install_requires=[ + 'prompt-toolkit~=3.0.0', + ], + extras_require={ + } +) diff --git a/projects/flowmetal/src/python/flowmetal/__init__.py b/projects/flowmetal/src/python/flowmetal/__init__.py new file mode 100644 index 0000000..e5a0d9b --- /dev/null +++ b/projects/flowmetal/src/python/flowmetal/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/projects/flowmetal/src/python/flowmetal/module_analyzer.py b/projects/flowmetal/src/python/flowmetal/module_analyzer.py new file mode 100644 index 0000000..2b98ec0 --- /dev/null +++ b/projects/flowmetal/src/python/flowmetal/module_analyzer.py @@ -0,0 +1,80 @@ +"""The module analyzer chews modules using bindings. + +Using the parser and syntax analyzer, this module chews on analyzed syntax trees doing the heavy lifting of working with +modules, namespaces and bindings. Gotta sort out all those symbols somewhere. +""" + +from io import StringIO +from typing import IO, NamedTuple, Mapping +from abc import ABC, abstractmethod, abstractproperty + +import flowmetal.parser as p +import flowmetal.syntax_analyzer as sa + + +class Namespace(NamedTuple): + + +## Syntax analysis implementation +class AnalyzerBase(ABC): + """Analyzer interface.""" + + @classmethod + @abstractmethod + def analyze(cls, token: sa.ValueLevelExpr): + """Analyze an expr tree, returning a binding tree.""" + + +class Analyzer(AnalyzerBase): + @classmethod + def analyze(cls, + token: sa.ValueLevelExpr, + environment = None): + pass + + +## Analysis interface +def analyzes(buff: str, + module_analyzer: AnalyzerBase = Analyzer, + module_environment = None, + syntax_analyzer: sa.AnalyzerBase = sa.Analyzer, + parser: p.SexpParser = p.Parser, + source_name = None): + """Parse a single s-expression from a string, returning its token tree.""" + + return analyze(StringIO(buff), + module_analyzer, + module_environment, + syntax_analyzer, + parser, + source_name or f"") + + +def analyzef(path: str, + module_analyzer: AnalyzerBase = Analyzer, + module_environment = None, + syntax_analyzer: sa.AnalyzerBase = sa.Analyzer, + parser: p.SexpParser = p.Parser): + """Parse a single s-expression from the file named by a string, returning its token tree.""" + + with open(path, "r") as f: + return analyze(f, + module_analyzer, + module_environment, + syntax_analyzer, + parser, + path) + + +def analyze(file: IO, + module_analyzer: AnalyzerBase = Analyzer, + module_environment = None, + syntax_analyzer: sa.AnalyzerBase = sa.Analyzer, + parser: p.SexpParser = p.Parser, + source_name = None): + """Parse a single sexpression from a file-like object, returning its token tree.""" + + return module_analyzer.analyze( + syntax_analyzer.analyze( + p.parse(file, parser, source_name)), + module_environment) diff --git a/projects/flowmetal/src/python/flowmetal/parser.py b/projects/flowmetal/src/python/flowmetal/parser.py new file mode 100644 index 0000000..bd0ff16 --- /dev/null +++ b/projects/flowmetal/src/python/flowmetal/parser.py @@ -0,0 +1,511 @@ +""" +A parser for s-expressions. +""" + +from abc import ABC, abstractmethod +from enum import Enum +from io import StringIO, BufferedReader +from typing import IO, NamedTuple, Any +from fractions import Fraction +import re + + +## Types +class Position(NamedTuple): + """An encoding for the location of a read token within a source.""" + source: str + line: int + col: int + offset: int + + @staticmethod + def next_pos(pos: "Position"): + return Position(pos.source, pos.line, pos.col + 1, pos.offset + 1) + + @staticmethod + def next_line(pos: "Position"): + return Position(pos.source, pos.line + 1, 1, pos.offset + 1) + + +class TokenBase(object): + """The shared interface to tokens.""" + + @property + @abstractmethod + def pos(self): + """The position of the token within its source.""" + + @property + @abstractmethod + def raw(self): + """The raw token as scanned.""" + + +class ConstTokenBase(TokenBase, NamedTuple): + """The shared interface for constant tokens""" + data: Any + raw: str + pos: Position + + # Hash according to data + def __hash__(self): + return hash(self.data) + + # And make sure it's orderable + def __eq__(self, other): + return self.data == other + + def __lt__(self, other): + return self.data < other + + def __gt__(self, other): + return self.data > other + + +class BooleanToken(ConstTokenBase): + """A read boolean.""" + + +class IntegerToken(ConstTokenBase): + """A read integer, including position.""" + + +class FractionToken(ConstTokenBase): + """A read fraction, including position.""" + + +class FloatToken(ConstTokenBase): + """A read floating point number, including position.""" + + +class SymbolToken(ConstTokenBase): + """A read symbol, including position.""" + + +class KeywordToken(ConstTokenBase): + """A read keyword.""" + + +class StringToken(ConstTokenBase): + """A read string, including position.""" + + +class ListType(Enum): + """The supported types of lists.""" + ROUND = ("(", ")") + SQUARE = ("[", "]") + + +class ListToken(NamedTuple, TokenBase): + """A read list, including its start position and the paren type.""" + data: list + raw: str + pos: Position + paren: ListType = ListType.ROUND + + +class SetToken(NamedTuple, TokenBase): + """A read set, including its start position.""" + data: list + raw: str + pos: Position + + +class MappingToken(NamedTuple, TokenBase): + """A read mapping, including its start position.""" + data: list + raw: str + pos: Position + + +class WhitespaceToken(NamedTuple, TokenBase): + """A bunch of whitespace with no semantic value.""" + data: str + raw: str + pos: Position + + +class CommentToken(WhitespaceToken): + """A read comment with no semantic value.""" + + +## Parser implementation +class PosTrackingBufferedReader(object): + """A slight riff on BufferedReader which only allows for reads and peeks of a + char, and tracks positions. + + Perfect for implementing LL(1) parsers. + """ + + def __init__(self, f: IO, source_name=None): + self._next_pos = self._pos = Position(source_name, 1, 1, 0) + self._char = None + self._f = f + + def pos(self): + return self._pos + + def peek(self): + if self._char is None: + self._char = self._f.read(1) + return self._char + + def read(self): + # Accounting for lookahead(1) + ch = self._char or self._f.read(1) + self._char = self._f.read(1) + + # Accounting for the positions + self._pos = self._next_pos + if ch == "\r" and self.peek() == "\n": + super.read(1) # Throw out a character + self._next_pos = Position.next_line(self._next_pos) + elif ch == "\n": + self._next_pos = Position.next_line(self._next_pos) + else: + self._next_pos = Position.next_pos(self._next_pos) + + return ch + + +class ReadThroughBuffer(PosTrackingBufferedReader): + """A duck that quacks like a PosTrackingBufferedReader.""" + + def __init__(self, ptcr: PosTrackingBufferedReader): + self._reader = ptcr + self._buffer = StringIO() + + def pos(self): + return self._reader.pos() + + def peek(self): + return self._reader.peek() + + def read(self): + ch = self._reader.read() + self._buffer.write(ch) + return ch + + def __str__(self): + return self._buffer.getvalue() + + def __enter__(self, *args): + return self + + def __exit__(self, *args): + pass + + +class SexpParser(ABC): + @classmethod + @abstractmethod + def parse(cls, f: PosTrackingBufferedReader) -> TokenBase: + """Parse an s-expression, returning a parsed token tree.""" + + def read(cls, f: PosTrackingBufferedReader): + """Parse to a token tree and read to values returning the resulting values.""" + + return cls.parse(f).read() + + +class Parser(SexpParser): + """A basic parser which knows about lists, symbols and numbers. + + Intended as a base class / extension point for other parsers. + """ + + @classmethod + def parse(cls, f: PosTrackingBufferedReader): + if not f.peek(): + raise SyntaxError(f"Got end of file ({f.pos()}) while parsing") + elif cls.ispunct(f.peek()): + if f.peek() == "(": + return cls.parse_list(f) + elif f.peek() == "[": + return cls.parse_sqlist(f) + elif f.peek() == '"': + return cls.parse_str(f) + elif f.peek() == ";": + return cls.parse_comment(f) + else: + raise SyntaxError(f"Got unexpected punctuation {f.read()!r} at {f.pos()} while parsing") + elif cls.isspace(f.peek()): + return cls.parse_whitespace(f) + else: + return cls.parse_symbol(f) + + @classmethod + def isspace(cls, ch: str): + """An extension point allowing for a more expansive concept of whitespace.""" + return ch.isspace() or ch == ',' + + @classmethod + def ispunct(cls, ch: str): + return ch in ( + '"' + ';' # Semicolon + '()' # Parens + '⟮⟯' # 'flat' parens + '[]' # Square brackets + '⟦⟧' # 'white' square brackets + '{}' # Curly brackets + '⟨⟩' # Angle brackets + '《》' # Double angle brackets + '⟪⟫' # Another kind of double angle brackets + ) + + @classmethod + def parse_delimeted(cls, f: PosTrackingBufferedReader, openc, closec, ctor): + with ReadThroughBuffer(f) as rtb: + pos = None + for c in openc: + pos = pos or rtb.pos() + assert rtb.read() == c # Discard the leading delimeter + pos = rtb.pos() + acc = [] + while f.peek() != closec: + if not f.peek(): + raise SyntaxError(f"Got end of file while parsing {openc!r}...{closec!r} starting at {pos}") + try: + acc.append(cls.parse(rtb)) + except SyntaxError as e: + raise SyntaxError(f"While parsing {openc!r}...{closec!r} starting at {pos},\n{e}") + + assert rtb.read() == closec # Discard the trailing delimeter + return ctor(acc, str(rtb), pos) + + # FIXME (arrdem 2020-07-18): + # Break this apart and make the supported lists composable features somehow? + @classmethod + def parse_list(cls, f: PosTrackingBufferedReader): + return cls.parse_delimeted(f, "(", ")", lambda *args: ListToken(*args, ListType.ROUND)) + + @classmethod + def parse_sqlist(cls, f: PosTrackingBufferedReader): + return cls.parse_delimeted(f, "[", "]", lambda *args: ListToken(*args, ListType.SQUARE)) + + # FIXME (arrdem 2020-07-18): + # Break this apart into middleware or composable features somehow? + @classmethod + def handle_symbol(cls, buff, pos): + def _sign(m, idx): + if m.group(idx) == '-': + return -1 + else: + return 1 + + # Parsing integers with bases + if m := re.fullmatch(r"([+-]?)(\d+)r([a-z0-9_]+)", buff): + return IntegerToken( + _sign(m, 1) * int(m.group(3).replace("_", ""), + int(m.group(2))), + buff, + pos, + ) + + # Parsing hex numbers + if m := re.fullmatch(r"([+-]?)0[xX]([A-Fa-f0-9_]*)", buff): + val = m.group(2).replace("_", "") + return IntegerToken(_sign(m, 1) * int(val, 16), buff, pos) + + # Parsing octal numbers + if m := re.fullmatch(r"([+-]?)0([\d_]*)", buff): + val = m.group(2).replace("_", "") + return IntegerToken(_sign(m, 1) * int(val, 8), buff, pos) + + # Parsing integers + if m := re.fullmatch(r"([+-]?)\d[\d_]*", buff): + return IntegerToken(int(buff.replace("_", "")), buff, pos) + + # Parsing fractions + if m := re.fullmatch(r"([+-]?)(\d[\d_]*)/(\d[\d_]*)", buff): + return FractionToken( + Fraction( + int(m.group(2).replace("_", "")), + int(m.group(3).replace("_", ""))), + buff, + pos, + ) + + # Parsing floats + if re.fullmatch(r"([+-]?)\d[\d_]*(\.\d[\d_]*)?(e[+-]?\d[\d_]*)?", buff): + return FloatToken(float(buff), buff, pos) + + # Booleans + if buff == "true": + return BooleanToken(True, buff, pos) + + if buff == "false": + return BooleanToken(False, buff, pos) + + # Keywords + if buff.startswith(":"): + return KeywordToken(buff, buff, pos) + + # Default behavior + return SymbolToken(buff, buff, pos) + + @classmethod + def parse_symbol(cls, f: PosTrackingBufferedReader): + with ReadThroughBuffer(f) as rtb: + pos = None + while rtb.peek() and not cls.isspace(rtb.peek()) and not cls.ispunct(rtb.peek()): + pos = pos or rtb.pos() + rtb.read() + buff = str(rtb) + return cls.handle_symbol(buff, pos) + + @classmethod + def parse_whitespace(cls, f: PosTrackingBufferedReader): + with ReadThroughBuffer(f) as rtb: + pos = None + while rtb.peek() and cls.isspace(rtb.peek()): + pos = pos or rtb.pos() + ch = rtb.read() + if ch == "\n": + break + buff = str(rtb) + return WhitespaceToken(buff, buff, pos) + + @classmethod + def parse_comment(cls, f: PosTrackingBufferedReader): + with ReadThroughBuffer(f) as rtb: + pos = None + while rtb.read() not in ["\n", ""]: + pos = pos or rtb.pos() + continue + buff = str(rtb) + return CommentToken(buff, buff, pos) + + + @classmethod + def handle_escape(cls, ch: str): + if ch == 'n': + return "\n" + elif ch == 'r': + return "\r" + elif ch == 'l': + return "\014" # form feed + elif ch == 't': + return "\t" + elif ch == '"': + return '"' + + @classmethod + def parse_str(cls, f: PosTrackingBufferedReader): + with ReadThroughBuffer(f) as rtb: + assert rtb.read() == '"' + pos = rtb.pos() + content = [] + + while True: + if not rtb.peek(): + raise + + # Handle end of string + elif rtb.peek() == '"': + rtb.read() + break + + # Handle escape sequences + elif rtb.peek() == '\\': + rtb.read() # Discard the escape leader + # Octal escape + if rtb.peek() == '0': + rtb.read() + buff = [] + while rtb.peek() in '01234567': + buff.append(rtb.read()) + content.append(chr(int(''.join(buff), 8))) + + # Hex escape + elif rtb.peek() == 'x': + rtb.read() # Discard the escape leader + buff = [] + while rtb.peek() in '0123456789abcdefABCDEF': + buff.append(rtb.read()) + content.append(chr(int(''.join(buff), 16))) + + else: + content.append(cls.handle_escape(rtb.read())) + + else: + content.append(rtb.read()) + + buff = str(rtb) + return StringToken(content, buff, pos) + + +## Parsing interface +def parses(buff: str, + parser: SexpParser = Parser, + source_name=None): + """Parse a single s-expression from a string, returning its token tree.""" + + return parse(StringIO(buff), parser, source_name or f"") + + +def parsef(path: str, + parser: SexpParser = Parser): + """Parse a single s-expression from the file named by a string, returning its token tree.""" + + with open(path, "r") as f: + return parse(f, parser, path) + + +def parse(file: IO, + parser: SexpParser = Parser, + source_name=None): + """Parse a single sexpression from a file-like object, returning its token tree.""" + + return parser.parse( + PosTrackingBufferedReader( + file, + source_name=source_name + ) + ) + + +## Loading interface +def loads(buff: str, + parser: SexpParser = Parser, + source_name=None): + """Load a single s-expression from a string, returning its object representation.""" + + return load(StringIO(buff), parser, source_name or f"") + + +def loadf(path: str, + parser: SexpParser = Parser): + """Load a single s-expression from the file named by a string, returning its object representation.""" + + with open(path, "r") as f: + return load(f, parser, path) + + +def load(file: IO, + parser: SexpParser = Parser, + source_name=None): + """Load a single sexpression from a file-like object, returning its object representation.""" + + return parser.load( + PosTrackingBufferedReader( + file, + source_name=source_name + ) + ) + + +## Dumping interface +def dump(file: IO, obj): + """Given an object, dump its s-expression coding to the given file-like object.""" + + raise NotImplementedError() + + +def dumps(obj): + """Given an object, dump its s-expression coding to a string and return that string.""" + + with StringIO("") as f: + dump(f, obj) + return str(f) diff --git a/projects/flowmetal/src/python/flowmetal/repl.py b/projects/flowmetal/src/python/flowmetal/repl.py new file mode 100644 index 0000000..149b621 --- /dev/null +++ b/projects/flowmetal/src/python/flowmetal/repl.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 + +import argparse +import logging +import sys + +from flowmetal.syntax_analyzer import analyzes + +from prompt_toolkit import print_formatted_text, prompt, PromptSession +from prompt_toolkit.formatted_text import FormattedText +from prompt_toolkit.history import FileHistory +from prompt_toolkit.styles import Style + + +STYLE = Style.from_dict({ + # User input (default text). + "": "", + "prompt": "ansigreen", + "time": "ansiyellow" +}) + + +class InterpreterInterrupt(Exception): + """An exception used to break the prompt or evaluation.""" + + +def pp(t, indent=""): + if isinstance(t, list): # lists + buff = ["["] + for e in t: + buff.append(f"{indent} " + pp(e, indent+" ")+",") + return "\n".join(buff + [f"{indent}]"]) + + elif hasattr(t, '_fields'): # namedtuples + buff = [f"{type(t).__name__}("] + for field, value in zip(t._fields, t): + buff.append(f"{indent} {field}=" + pp(value, indent+" ")+",") + return "\n".join(buff + [f"{indent})"]) + + elif isinstance(t, tuple): # tuples + buff = ["("] + for e in t: + buff.append(f"{indent} " + pp(e, indent+" ")+",") + return "\n".join(buff + [f"{indent})"]) + + else: + return repr(t) + +parser = argparse.ArgumentParser() + +def main(): + """REPL entry point.""" + + args = parser.parse_args(sys.argv[1:]) + logger = logging.getLogger("flowmetal") + ch = logging.StreamHandler() + ch.setLevel(logging.INFO) + formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") + ch.setFormatter(formatter) + logger.addHandler(ch) + + session = PromptSession(history=FileHistory(".iflow.history")) + line_no = 0 + + while True: + try: + line = session.prompt([("class:prompt", ">>> ")], style=STYLE) + except (InterpreterInterrupt, KeyboardInterrupt): + continue + except EOFError: + break + + try: + print(pp(analyzes(line, source_name=f"repl@{line_no}"))) + except Exception as e: + print(e) + finally: + line_no += 1 diff --git a/projects/flowmetal/src/python/flowmetal/syntax_analyzer.py b/projects/flowmetal/src/python/flowmetal/syntax_analyzer.py new file mode 100644 index 0000000..19c3d27 --- /dev/null +++ b/projects/flowmetal/src/python/flowmetal/syntax_analyzer.py @@ -0,0 +1,356 @@ +""" +The parser just parses and tokenizes. + +The [syntax] syntax_analyzer interprets a parse sequence into a syntax tree which can be checked, type inferred and compiled. +""" + +from abc import ABC, abstractmethod +from io import StringIO +from typing import NamedTuple, List, Union, Any, IO, Tuple +from enum import Enum + +import flowmetal.parser as p + + +### Types +## We are not, in fact, sponsored by Typelevel LLC. +class TypeLevelExpr(object): + """A base class for type-level expressions.""" + pass + + +class GenericExpr(TypeLevelExpr, NamedTuple): + """'invocation' (application) of a generic type to Type[Level]Exprs.""" + pass + + +class TypeExpr(TypeLevelExpr, NamedTuple): + """A bound (or yet to be bound) type level symbol.""" + pass + + +class BuiltinType(TypeLevelExpr, Enum): + """Built in types for atoms.""" + BOOLEAN = 'Boolean' + SYMBOL = 'Symbol' + KEYWORD = 'Keyword' + STRING = 'String' + INTEGER = 'Integer' + FRACTION = 'Fraction' + FLOAT = 'Float' + + +class ConstraintExpr(TypeLevelExpr, NamedTuple): + """A value-level constraint (predicate) as a type.""" + + +## Terms +# Now down to reality +class ValueLevelExpr(object): + """A base class for value-level expressions.""" + + +class TriviallyTypedExpr(ValueLevelExpr): + """And some of those expressions have trivial types.""" + @property + def type(self) -> TypeExpr: + """The type of an expression.""" + + +class AscribeExpr(TriviallyTypedExpr, NamedTuple): + value: ValueLevelExpr + type: TypeLevelExpr + + +class ConstExpr(TriviallyTypedExpr, NamedTuple): + """Constant expressions. Keywords, strings, numbers, that sort of thing.""" + + token: p.ConstTokenBase + + @property + def data(self) -> Any: + """The value of the constant.""" + # The parser gives us this data + return self.token.data + + @abstractmethod + def type(self): + raise NotImplementedError() + + +class BooleanExpr(ConstExpr): + @property + def type(self): + return BuiltinType.BOOLEAN + + +class IntegerExpr(ConstExpr): + @property + def type(self): + return BuiltinType.INTEGER + + +class FractionExpr(ConstExpr): + @property + def type(self): + return BuiltinType.FRACTION + + +class FloatExpr(ConstExpr): + @property + def type(self): + return BuiltinType.FLOAT + + +class KeywordExpr(ConstExpr): + @property + def type(self): + return BuiltinType.KEYWORD + + +class StringExpr(ConstExpr): + @property + def type(self): + return BuiltinType.STRING + + +class ListExpr(ValueLevelExpr, NamedTuple): + elements: List[ValueLevelExpr] + + +## 'real' AST nodes +class DoExpr(ValueLevelExpr, NamedTuple): + effect_exprs: List[ValueLevelExpr] + ret_expr: ValueLevelExpr + + +class LetExpr(ValueLevelExpr, NamedTuple): + binding_exprs: List[Tuple] + ret_expr: DoExpr + + +class FnExpr(ValueLevelExpr, NamedTuple): + arguments: List + ret_type: TypeExpr + ret_expr: DoExpr + + +## Syntax analysis implementation +class AnalyzerBase(ABC): + """Analyzer interface.""" + + @classmethod + @abstractmethod + def analyze(cls, token: p.TokenBase) -> ValueLevelExpr: + """Analyze a token tree, returning an expr tree.""" + + +def _t(txt): + return p.SymbolToken(txt, txt, None) + + +class Analyzer(AnalyzerBase): + """A reference Analyzer implementation. + + Walks a parsed token tree, building up a syntax tree. + """ + TACK0 = _t('⊢') + TACK1 = _t('|-') + TACK2 = p.KeywordToken(":-", None, None) + LET = _t('let') + DO = _t('do') + FN = _t('fn') + LIST = _t('list') + QUOTE = _t('quote') + + @classmethod + def _tackp(cls, t): + return t in [cls.TACK0, cls.TACK1, cls.TACK2] + + @classmethod + def _nows(cls, tokens): + return [t for t in tokens if not isinstance(t, p.WhitespaceToken)] + + @classmethod + def _chomp(cls, tokens): + """'chomp' an expression and optional ascription off the tokens, returning an expression and the remaining tokens.""" + + if len(tokens) == 1: + return cls.analyze(tokens[0]), [] + elif cls._tackp(tokens[1]): + if len(tokens) >= 3: + return ( + AscribeExpr( + cls.analyze(tokens[0]), + cls.analyze(tokens[2])), + tokens[3:], + ) + else: + raise SyntaxError(f"Analyzing tack at {tokens[1].pos}, did not find following type ascription!") + else: + return cls.analyze(tokens[0]), tokens[1::] + + @classmethod + def _terms(cls, tokens): + terms = [] + tokens = cls._nows(tokens) + while tokens: + term, tokens = cls._chomp(tokens) + terms.append(term) + return terms + + @classmethod + def analyze(cls, token: p.TokenBase): + if isinstance(token, p.BooleanToken): + return BooleanExpr(token) + + if isinstance(token, p.KeywordToken): + return KeywordExpr(token) + + if isinstance(token, p.IntegerToken): + return IntegerExpr(token) + + if isinstance(token, p.FractionToken): + return FractionExpr(token) + + if isinstance(token, p.FloatToken): + return FloatExpr(token) + + if isinstance(token, p.StringToken): + return StringExpr(token) + + if isinstance(token, p.SymbolToken): + return token + + if isinstance(token, p.ListToken): + return cls.analyze_list(token) + + @classmethod + def _do(cls, t, body: list): + return p.ListToken([cls.DO] + body, t.raw, t.pos) + + @classmethod + def analyze_list(cls, token: p.ListToken): + """Analyze a list, for which there are several 'ground' forms.""" + + # Expunge any whitespace tokens + tokens = cls._nows(token.data) + + if len(tokens) == 0: + return ListExpr([]) + + if tokens[0] == cls.QUOTE: + raise NotImplementedError("Quote isn't quite there!") + + if tokens[0] == cls.LIST: + return ListExpr(cls._terms(tokens[1:])) + + if tokens[0] == cls.DO: + return cls.analyze_do(token) + + if tokens[0] == cls.LET: + return cls.analyze_let(token) + + if tokens[0] == cls.FN: + return cls.analyze_fn(token) + + cls.analyze_invoke(tokens) + + @classmethod + def analyze_let(cls, let_token): + tokens = cls._nows(let_token.data[1:]) + assert len(tokens) >= 2 + assert isinstance(tokens[0], p.ListToken) + bindings = [] + binding_tokens = cls._nows(tokens[0].data) + tokens = tokens[1:] + while binding_tokens: + if isinstance(binding_tokens[0], p.SymbolToken): + bindexpr = binding_tokens[0] + binding_tokens = binding_tokens[1:] + else: + raise SyntaxError(f"Analyzing `let` at {let_token.pos}, got illegal binding expression {binding_tokens[0]}") + + if not binding_tokens: + raise SyntaxError(f"Analyzing `let` at {let_token.pos}, got binding expression without subsequent value expression!") + + if cls._tackp(binding_tokens[0]): + if len(binding_tokens) < 2: + raise SyntaxError(f"Analyzing `let` at {let_token.pos}, got `⊢` at {binding_tokens[0].pos} without type!") + bind_ascription = cls.analyze(binding_tokens[1]) + binding_tokens = binding_tokens[2:] + bindexpr = AscribeExpr(bindexpr, bind_ascription) + + if not binding_tokens: + raise SyntaxError(f"Analyzing `let` at {let_token.pos}, got binding expression without subsequent value expression!") + + valexpr = binding_tokens[0] + binding_tokens = cls.analyze(binding_tokens[1:]) + + bindings.append((bindexpr, valexpr)) + + # FIXME (arrdem 2020-07-18): + # This needs to happen with bindings + tail = tokens[0] if len(tokens) == 1 else cls._do(let_token, tokens) + return LetExpr(bindings, cls.analyze(tail)) + + @classmethod + def analyze_do(cls, do_token): + tokens = cls._nows(do_token.data[1:]) + exprs = cls._terms(tokens) + if exprs[:-1]: + return DoExpr(exprs[:-1], exprs[-1]) + else: + return exprs[-1] + + @classmethod + def analyze_fn(cls, fn_token): + tokens = cls._nows(fn_token.data[1:]) + assert len(tokens) >= 2 + assert isinstance(tokens[0], p.ListToken) + + args = [] + arg_tokens = cls._nows(tokens[0].data) + while arg_tokens: + argexpr, arg_tokens = cls._chomp(arg_tokens) + args.append(argexpr) + + ascription = None + if cls._tackp(tokens[1]): + ascription = cls.analyze(tokens[2]) + tokens = tokens[2:] + else: + tokens = tokens[1:] + + # FIXME (arrdem 2020-07-18): + # This needs to happen with bindings + body = cls.analyze(cls._do(fn_token, tokens)) + return FnExpr(args, ascription, body) + + +## Analysis interface +def analyzes(buff: str, + syntax_analyzer: AnalyzerBase = Analyzer, + parser: p.SexpParser = p.Parser, + source_name = None): + """Parse a single s-expression from a string, returning its token tree.""" + + return analyze(StringIO(buff), syntax_analyzer, parser, source_name or f"") + + +def analyzef(path: str, + syntax_analyzer: AnalyzerBase = Analyzer, + parser: p.SexpParser = p.Parser): + """Parse a single s-expression from the file named by a string, returning its token tree.""" + + with open(path, "r") as f: + return analyze(f, syntax_analyzer, parser, path) + + +def analyze(file: IO, + syntax_analyzer: AnalyzerBase = Analyzer, + parser: p.SexpParser = p.Parser, + source_name = None): + """Parse a single sexpression from a file-like object, returning its token tree.""" + + return syntax_analyzer.analyze(p.parse(file, parser, source_name)) diff --git a/projects/flowmetal/test/python/BUILD b/projects/flowmetal/test/python/BUILD new file mode 100644 index 0000000..c8f0792 --- /dev/null +++ b/projects/flowmetal/test/python/BUILD @@ -0,0 +1,20 @@ +py_library( + name = "conftest", + srcs = [ + "conftest.py" + ], + imports = [ + "." + ], +) + +py_pytest( + name = "test", + srcs = glob(["*.py"]), + deps = [ + "//projects/flowmetal:lib", + ":conftest", + py_requirement("pytest-cov"), + ], + args = ["--cov-report", "term", "--cov=flowmetal"], +) diff --git a/projects/flowmetal/test/python/conftest.py b/projects/flowmetal/test/python/conftest.py new file mode 100644 index 0000000..e69de29 diff --git a/projects/flowmetal/test/python/test_parser.py b/projects/flowmetal/test/python/test_parser.py new file mode 100644 index 0000000..b78caf4 --- /dev/null +++ b/projects/flowmetal/test/python/test_parser.py @@ -0,0 +1,161 @@ +""" +Tests covering the Flowmetal parser. +""" + +from math import nan + +import flowmetal.parser as p + +import pytest + + +def test_parse_list(): + """Trivial parsing a list.""" + assert isinstance(p.parses("()"), p.ListToken) + assert p.parses("()").paren == p.ListType.ROUND + + +@pytest.mark.parametrize('txt, val', [ + ('1', 1), + ('2', 2), + ('103', 103), + ('504', 504), + # Sign prefixes + ('-1', -1), + ('+1', +1), + # Underscores as whitespace + ('1_000_000', 1e6), + ('+1_000', 1000), + ('-1_000', -1000), + # Variable base + ('2r1', 1), + ('2r10', 2), + ('2r100', 4), + ('2r101', 5), + ('+2r10', 2), + ('-2r10', -2), + # Octal + ('00', 0), + ('01', 1), + ('010', 8), + ('+010', 8), + ('-010', -8), + # Hex + ('0x0', 0), + ('0xF', 15), + ('0x10', 16), + ('+0x10', 16), + ('-0x10', -16), +]) +def test_parse_num(txt, val): + """Some trivial cases of parsing numbers.""" + assert isinstance(p.parses(txt), p.IntegerToken) + assert p.parses(txt).data == val + + +@pytest.mark.parametrize('frac', [ + '1/2', '1/4', '1/512', +]) +def test_parse_ratio(frac): + """Test covering the ratio notation.""" + assert isinstance(p.parses(frac), p.FractionToken) + assert p.parses(frac).data == p.Fraction(frac) + + + +@pytest.mark.parametrize('sym,', [ + 'a', + 'b', + '*earmuff-style*', + '+kebab-style+', + 'JAVA_CONSTANT_STYLE', +]) +def test_parse_sym(sym): + """Some trivial cases of parsing symbols.""" + assert isinstance(p.parses(sym), p.SymbolToken) + assert p.parses(sym).data == sym + + +@pytest.mark.parametrize('txt, tokenization', [ + ('(1 2 3)', + [(p.IntegerToken, '1'), + (p.WhitespaceToken, ' '), + (p.IntegerToken, '2'), + (p.WhitespaceToken, ' '), + (p.IntegerToken, '3')]), + ('(a 1 b 2)', + [(p.SymbolToken, 'a'), + (p.WhitespaceToken, ' '), + (p.IntegerToken, '1'), + (p.WhitespaceToken, ' '), + (p.SymbolToken, 'b'), + (p.WhitespaceToken, ' '), + (p.IntegerToken, '2')]) +]) +def test_list_contents(txt, tokenization): + """Parse examples of list contents.""" + assert isinstance(p.parses(txt), p.ListToken) + + lelems = p.parses(txt).data + for (type, text), token in zip(tokenization, lelems): + assert isinstance(token, type) + assert token.raw == text + + +@pytest.mark.parametrize('txt, value', [ + ('1.0', 1.0), + ('-1.0', -1.0), + ('1.01', 1.01), + ('1e0', 1e0), + ('1e3', 1e3), + ('1e-3', 1e-3), + ('1.01e3', 1.01e3), + ('1_000e0', 1e3), +]) +def test_float_values(txt, value): + """Some examples of floats.""" + assert isinstance(p.parses(txt), p.FloatToken) + assert p.parses(txt).data == value + + +@pytest.mark.parametrize('txt, tokenization', [ + ('+1', p.IntegerToken), + ('+1+', p.SymbolToken), + ('+1e', p.SymbolToken), + ('+1e3', p.FloatToken), + ('+1.0', p.FloatToken), + ('+1.0e3', p.FloatToken), + ('a.b', p.SymbolToken), + ('1.b', p.SymbolToken), +]) +def test_ambiguous_floats(txt, tokenization): + """Parse examples of 'difficult' floats and symbols.""" + assert isinstance(p.parses(txt), tokenization), "Token type didn't match!" + assert p.parses(txt).raw == txt, "Parse wasn't total!" + + +@pytest.mark.parametrize('txt,', [ + r'""', + r'"foo"', + r'"foo bar baz qux"', + r'"foo\nbar\tbaz\lqux"', + r'''"foo + bar + baz + qux"''', + r'"\000 \x00"', + r'"\"\""', +]) +def test_string(txt): + """Some examples of strings, and of escape sequences.""" + assert isinstance(p.parses(txt), p.StringToken) + + +@pytest.mark.parametrize('txt,', [ + ':foo', + ':foo/bar', + ':foo.bar/baz?', +]) +def test_keyword(txt): + """Some examples of keywords.""" + assert isinstance(p.parses(txt), p.KeywordToken) diff --git a/projects/flowmetal/test/python/test_syntax_analyzer.py b/projects/flowmetal/test/python/test_syntax_analyzer.py new file mode 100644 index 0000000..7afde5d --- /dev/null +++ b/projects/flowmetal/test/python/test_syntax_analyzer.py @@ -0,0 +1,50 @@ +""" +Tests covering the Flowmetal analyzer. +""" + +import flowmetal.parser as p +import flowmetal.syntax_analyzer as a + +import pytest + + +@pytest.mark.parametrize('txt, exprtype', [ + # Booleans + ('true', a.ConstExpr), + ('false', a.BooleanExpr), + # Integers + ('1', a.ConstExpr), + ('1', a.IntegerExpr), + # Fractions + ('1/2', a.ConstExpr), + ('1/2', a.FractionExpr), + # Floats + ('1.0', a.ConstExpr), + ('1.0', a.FloatExpr), + # Keywords + (':foo', a.ConstExpr), + (':foo', a.KeywordExpr), + # Strings + ('"foo"', a.ConstExpr), + ('"foo"', a.StringExpr), +]) +def test_analyze_constants(txt, exprtype): + """Make sure the analyzer can chew on constants.""" + assert isinstance(a.analyzes(txt), exprtype) + + +@pytest.mark.parametrize('txt', [ + '()', + '(list)', + '(list 1)', + '(do 1)', + '(do foo bar 1)', + '(let [a 1, b 2] 1)', + '(fn [] 1)', + '(fn [] ⊢ integer? x)', + '(fn [] x |- integer?)', + '(fn [] x :- integer?)', +]) +def test_analyze(txt): + """Make sure that do exprs work.""" + assert a.analyzes(txt) diff --git a/tools/python/requirements.txt b/tools/python/requirements.txt index 7f51109..a492d2e 100644 --- a/tools/python/requirements.txt +++ b/tools/python/requirements.txt @@ -31,6 +31,7 @@ packaging==20.9 parso==0.8.2 pathspec==0.8.1 pluggy==0.13.1 +prompt-toolkit==3.0.18 pudb==2020.1 py==1.10.0 pyflakes==2.3.1 @@ -66,4 +67,5 @@ unify==0.5 untokenize==0.1.1 urllib3==1.26.4 urwid==2.1.2 +wcwidth==0.2.5 yamllint==1.26.1