Starting to put a parser around all this

2020-06-14 11:32:21 -06:00 · 2020-06-14 11:32:21 -06:00 · 50e7213d7b
commit 50e7213d7b
parent 5b1ec2e24c
7 changed files with 546 additions and 117 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
+/**/__pycache__
+/**/*.egg-info
--- a/README.md
+++ b/README.md
@ -70,125 +70,9 @@ In this setup, the Flowmetal interpreters are able to interact with an external
 For instance this program would use the external connector stubs to build up interaction(s) with an external system.

 ```lisp
-(defpackage flowmetal.time
-  (defenum time-unit
-    "Calendar-independent durations."
-    +milliseconds+
-    +seconds+
-    +hours+
-    +days+)
 
-  (defrecord duration [num : int?, scale : time-unit?]
-     "A type for representing scalar durations.")

-  (defn as-milliseconds [d : duration?]
-                        : duration?
-     "Normalize a duration to a number of milliseconds."
-     (match d
-        [(duration x +days+)         (duration (* x 24) +hours+)]
-        [(duration x +hours+)        (duration (* x 60) +minutes+)]
-        [(duration x +minutes+)      (duration (* x 60) +seconds+)]
-        [(duration x +seconds+)      (duration (* x 1000) +milliseconds+)]
-        [(duration x +milliseconds+) d]))

-  ;; A type of one value used to represent an error
-  (defenum timeout
-    +timeout+)
-    
-  (defendpoint with-timeout!
-     [d : duration?
-      f : (fn? [] : a)]
-     : a
-
-    )
-
-)
-
-(defpackage com.twitter.wilson
-  (require
-    ;; The log lets you record status information into a program's trace
-    [flowmetal.log
-     :refer [log!]]
-    ;; The time system lets you put bounds on the implicit awaiting Flowmetal does
-    [flowmetal.time
-     :refer [with-timeout!, timeout?, make-duration, duration?, +seconds+, +hours+, sleep!]]
-    ;; JSON. Simple enough
-    [flowmetal.json
-     :refer [loads, dumps, json?]]
-    ;; Extensions! Provided by other systems.
-    ;;
-    ;; This one allows for an external service to receive HTTP callbacks on Flowmetal's behalf.
-    [http.callback
-     :refer [make-callback!, get-callback!, callback?]]
-     ;; This one allows for an external service to make HTTP requests on Flowmetal's behalf.
-    [http.request
-     :refer [post!, error?, dns-error?, connection-error?, response-error?]])
-
-  (defenum stage
-    +reboot+
-    +bios-update+
-    +reinstall+)
-
-  ;; FIXME: how to do table optimization?
-  (defn fib [x]
-    (match x
-       [0 1]
-       [1 1]
-       [_ (+ (fib (- x 1) (- x 2)))]))
-
-  (defn retry-http [f
-                      : (fn? [] a?)
-                    backoff-fn
-                      : (fn? [int?] duration?)
-                      :default (fn [x : int?]
-                                   : duration?
-                                  (make-duration (fib x) +seconds+))
-                    backoff-count
-                      : int?
-                      :default 0]
-                    : a
-    """The implementation of HTTP with retrying."""
-    (let [response (f)]
-      (if (not (error? response))
-         response
-         ;; FIXME: how does auth denied get represented?
-         (if (or (dns-error? response)
-                 (connection-error? response)
-                 (response-error? response))
-             (do (sleep (backoff-fn backoff-count))
-                 (retry-http* f backoff-fn (+ backoff-count 1)))))))
-
-  (defn job [hostname
-               : str?
-             stages
-               : (list? stage?)
-             job-timeout
-               : duration?
-               :default (duration 3 :hours)]
-             : (union? [timeout? json?])
-    """Run a wilson job, wait for the callback and process the result.
-
-    By default the job is only waited for three hours.
-
-    """
-    (let [callback : callback? (make-callback!)
-          job (retry-http
-                (fn []
-                  (post "http://wilson.local.twitter.com"
-                   :data
-                   (dumps
-                     {:host hostname
-                      :stages [stages]
-                      :callbacks [{:type :http, :url callback}]}))))]
-
-      (let [result (with-timeout! (duration 3 :hours)
-                     (fn []
-                        (get-callback callback)))]
-         (if-not (timeout? result)
-           (loads result)
-           result))))
-           
-    )
 ```


--- a/setup.py
+++ b/setup.py
@ -0,0 +1,33 @@
+from setuptools import setup
+
+setup(
+    name="arrdem.flowmetal",
+    # Package metadata
+    version='0.0.0',
+    license="MIT",
+    description="A weird execution engine",
+    long_description=open("README.md").read(),
+    long_description_content_type="text/markdown",
+    author="Reid 'arrdem' McKenzie",
+    author_email="me@arrdem.com",
+    url="https://git.arrdem.com/arrdem/flowmetal",
+    classifiers=[
+        "License :: OSI Approved :: MIT License",
+        "Development Status :: 3 - Alpha",
+        "Intended Audience :: Developers",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Python :: 3.8",
+    ],
+
+    # Package setup
+    package_dir={"": "src/python"},
+    packages=[
+        "flowmetal",
+    ],
+    scripts=[
+    ],
+    install_requires=[
+    ],
+    extras_require={
+    }
+)
--- a/src/flowmetal/flowmetal.time.flow
+++ b/src/flowmetal/flowmetal.time.flow
@ -0,0 +1,33 @@
+(defpackage flowmetal.time
+  (defenum time-unit
+    "Calendar-independent durations."
+    +milliseconds+
+    +seconds+
+    +hours+
+    +days+)
+
+  (defrecord duration [num :- int?, scale :- time-unit?]
+     "A type for representing scalar durations.")
+
+  (defn as-milliseconds [d :- duration?]
+                        :- duration?
+     "Normalize a duration to a number of milliseconds."
+     (match d
+        [(duration x +days+)         (duration (* x 24) +hours+)]
+        [(duration x +hours+)        (duration (* x 60) +minutes+)]
+        [(duration x +minutes+)      (duration (* x 60) +seconds+)]
+        [(duration x +seconds+)      (duration (* x 1000) +milliseconds+)]
+        [(duration x +milliseconds+) d]))
+
+  ;; A type of one value used to represent an error
+  (defenum timeout
+    +timeout+)
+
+  (defendpoint with-timeout!
+     [d :- duration?
+      f :- (fn? [] :- a)]
+     :- a
+
+    )
+
+)
--- a/src/python/flowmetal/init.py
+++ b/src/python/flowmetal/init.py
@ -0,0 +1 @@
+#!/usr/bin/env python3
--- a/src/python/flowmetal/parser.py
+++ b/src/python/flowmetal/parser.py
@ -0,0 +1,410 @@
+"""
+A parser for s-expressions.
+"""
+
+from abc import ABC, abstractmethod
+from enum import Enum
+from io import StringIO, BufferedReader
+from typing import IO, NamedTuple
+from fractions import Fraction
+
+
+## Types
+class Position(NamedTuple):
+    """An encoding for the location of a read token within a source."""
+    source: str
+    line: int
+    col: int
+    offset: int
+
+    @staticmethod
+    def next_pos(pos: "Position"):
+        return Position(pos.source, pos.line, pos.col + 1, pos.offset + 1)
+
+    @staticmethod
+    def next_line(pos: "Position"):
+        return Position(pos.source, pos.line + 1, 1, pos.offset + 1)
+
+
+class TokenBase(object):
+    """The shared interface to tokens."""
+
+    @property
+    @abstractmethod
+    def pos(self):
+        """The position of the token within its source."""
+
+    @property
+    @abstractmethod
+    def raw(self):
+        """The raw token as scanned."""
+
+    @abstractmethod
+    def read(self):
+        """Return a runtime value for the token, discarding any whitespace and soforth."""
+
+
+class IntegerToken(NamedTuple, TokenBase):
+    """A read integer, including position."""
+    data: int
+    raw: str
+    pos: Position
+
+    def read(self):
+        return
+
+
+class RationalToken(NamedTuple, TokenBase):
+    """A read integer, including position."""
+    data: int
+    raw: str
+    pos: Position
+
+    def read(self):
+        return
+
+
+class FloatToken(NamedTuple, TokenBase):
+    """A read floating point number, including position."""
+    data: int
+    raw: str
+    pos: Position
+
+    def read(self):
+        return
+
+
+class SymbolToken(NamedTuple, TokenBase):
+    """A read symbol, including position."""
+    data: str
+    raw: str
+    pos: Position
+
+
+class KeywordToken(NamedTuple, TokenBase):
+    """A read keyword."""
+    data: str
+    pos: Position
+
+
+class StringToken(NamedTuple, TokenBase):
+    """A read string, including position."""
+    data: str
+    raw: str
+    pos: Position
+
+
+class ListType(Enum):
+    """The supported types of lists."""
+    ROUND = ("(", ")")
+    SQUARE = ("[", "]")
+
+
+class ListToken(NamedTuple, TokenBase):
+    """A read list, including its start position and the paren type."""
+    data: list
+    raw: str
+    pos: Position
+    paren: ListType = ListType.ROUND
+
+
+class SetToken(NamedTuple, TokenBase):
+    """A read set, including its start position."""
+    data: list
+    raw: str
+    pos: Position
+
+
+class MappingToken(NamedTuple, TokenBase):
+    """A read mapping, including its start position."""
+    data: list
+    raw: str
+    pos: Position
+
+
+class WhitespaceToken(NamedTuple, TokenBase):
+    """A bunch of whitespace with no semantic value."""
+    data: str
+    raw: str
+    pos: Position
+
+
+class CommentToken(WhitespaceToken):
+    """A read comment with no semantic value."""
+
+
+## Parser implementation
+class PosTrackingBufferedReader(object):
+    """A slight riff on BufferedReader which only allows for reads and peeks of a
+    char, and tracks positions.
+
+    Perfect for implementing LL(1) parsers.
+    """
+
+    def __init__(self, f: IO, source_name=None):
+        self._next_pos = self._pos = Position(source_name, 1, 1, 0)
+        self._char = None
+        self._f = f
+
+    def pos(self):
+        return self._pos
+
+    def peek(self):
+        if self._char is None:
+            self._char = self._f.read(1)
+        return self._char
+
+    def read(self):
+        # Accounting for lookahead(1)
+        ch = self._char or self._f.read(1)
+        self._char = self._f.read(1)
+
+        # Accounting for the positions
+        self._pos = self._next_pos
+        if ch == "\r" and self.peek() == "\n":
+            super.read(1)  # Throw out a character
+            self._next_pos = Position.next_line(self._next_pos)
+        elif ch == "\n":
+            self._next_pos = Position.next_line(self._next_pos)
+        else:
+            self._next_pos = Position.next_pos(self._next_pos)
+
+        return ch
+
+
+class ReadThroughBuffer(PosTrackingBufferedReader):
+    """A duck that quacks like a PosTrackingBufferedReader."""
+
+    def __init__(self, ptcr: PosTrackingBufferedReader):
+        self._reader = ptcr
+        self._buffer = StringIO()
+
+    def pos(self):
+        return self._reader.pos()
+
+    def peek(self):
+        return self._reader.peek()
+
+    def read(self):
+        ch = self._reader.read()
+        self._buffer.write(ch)
+        return ch
+
+    def __str__(self):
+        return self._buffer.getvalue()
+
+    def __enter__(self, *args):
+        return self
+
+    def __exit__(self, *args):
+        pass
+
+
+class SexpParser(ABC):
+    @classmethod
+    @abstractmethod
+    def parse(cls, f: PosTrackingBufferedReader) -> TokenBase:
+        """Parse an s-expression, returning a parsed token tree."""
+
+    def read(cls, f: PosTrackingBufferedReader):
+        """Parse to a token tree and read to values returning the resulting values."""
+
+        return cls.parse(f).read()
+
+
+class Parser(SexpParser):
+    """A basic parser which knows about lists, symbols and numbers.
+
+    Intended as a base class / extension point for other parsers.
+    """
+
+    @classmethod
+    def parse(cls, f: PosTrackingBufferedReader):
+        if f.peek() == "(":
+            return cls.parse_list(f)
+        elif f.peek() == "[":
+            return cls.parse_sqlist(f)
+        elif cls.isspace(f.peek()):
+            return cls.parse_whitespace(f)
+        elif f.peek().isdigit():
+            return cls.parse_num(f)
+        elif f.peek() == ";":
+            return cls.parse_comment(f)
+        else:
+            return cls.parse_symbol(f)
+
+    @classmethod
+    def isspace(cls, ch: str):
+        """An extension point allowing for a more expansive concept of whitespace."""
+        return ch.isspace()
+
+    @classmethod
+    def parse_delimeted(cls, f: PosTrackingBufferedReader, openc, closec, ctor):
+        with ReadThroughBuffer(f) as rtb:
+            pos = None
+            for c in openc:
+                pos = pos or rtb.pos()
+                assert rtb.read() == c  # Discard the leading delimeter
+            pos = rtb.pos()
+            acc = []
+            while f.peek() != closec:
+                acc.append(cls.parse(rtb))
+            assert rtb.read() == closec  # Discard the trailing delimeter
+            return ctor(acc, str(rtb), pos)
+
+    @classmethod
+    def parse_list(cls, f: PosTrackingBufferedReader):
+        return cls.parse_delimeted(f, "(", ")", lambda *args: ListToken(*args, ListType.ROUND))
+
+    @classmethod
+    def parse_sqlist(cls, f: PosTrackingBufferedReader):
+        return cls.parse_delimeted(f, "[", "]", lambda *args: ListToken(*args, ListType.SQUARE))
+
+    @classmethod
+    def parse_unum(cls, f: PosTrackingBufferedReader):
+        with ReadThroughBuffer(f) as rtb:
+            assert rtb.peek().isdigit()
+            pos = f.pos()
+            while rtb.peek().isdigit():
+                rtb.read()
+            buff = str(rtb)
+            return IntegerToken(int(buff), buff, pos)
+
+    @classmethod
+    def parse_num(cls, f: PosTrackingBufferedReader):
+        with ReadThroughBuffer(f) as rtb:
+            num: IntegerToken = cls.parse_unum(rtb)
+
+            # Various cases of more interesting numbers
+            if rtb.peek() == "/":
+                ## Case of a rational
+                # Discard the delimeter
+                rtb.read()
+                denom = cls.parse_num(rtb)
+
+                return RationalToken(Fraction(num.data, denom.data), str(rtb), num.pos)
+
+            elif rtb.peek() == "r":
+                ## Case of a number with a base
+                # Discard thd delimeter
+                rtb.read()
+                body = cls.parse_symbol(rtb)
+                return IntegerToken(int(body.raw, num.data), str(rtb), num.pos)
+
+            elif rtb.peek() == ".":
+                ## Case of a number with a decimal component
+                ## Note there may be a trailing exponent
+                raise NotImplementedError()
+
+            elif rtb.peek() == "e":
+                ## Case of a number with a floating point exponent
+                raise NotImplementedError()
+
+            else:
+                return num
+
+    @classmethod
+    def parse_symbol(cls, f: PosTrackingBufferedReader):
+        with ReadThroughBuffer(f) as rtb:
+            pos = None
+            while rtb.peek() and not cls.isspace(rtb.peek()):
+                pos = pos or rtb.pos()
+                rtb.read()
+            buff = str(rtb)
+            return SymbolToken(buff, buff, pos)
+
+    @classmethod
+    def parse_whitespace(cls, f: PosTrackingBufferedReader):
+        with ReadThroughBuffer(f) as rtb:
+            pos = None
+            while rtb.peek() and cls.isspace(rtb.peek()):
+                pos = pos or rtb.pos()
+                ch = rtb.read()
+                if ch == "\n":
+                    break
+            buff = str(rtb)
+            return WhitespaceToken(buff, buff, pos)
+
+    @classmethod
+    def parse_comment(cls, f: PosTrackingBufferedReader):
+        with ReadThroughBuffer(f) as rtb:
+            pos = None
+            while rtb.read() not in ["\n", ""]:
+                pos = pos or rtb.pos()
+                continue
+            buff = str(rtb)
+            return CommentToken(buff, buff, pos)
+
+## Parsing
+def parses(buff: str,
+           parser: SexpParser = Parser,
+           source_name=None):
+    """Parse a single s-expression from a string, returning its token tree."""
+
+    return parse(StringIO(buff), parser, source_name or f"<string {id(buff):x}>")
+
+
+def parsef(path: str,
+           parser: SexpParser = Parser):
+    """Parse a single s-expression from the file named by a string, returning its token tree."""
+
+    with open(path, "r") as f:
+        return parse(f, parser, path)
+
+
+def parse(file: IO,
+          parser: SexpParser = Parser,
+          source_name=None):
+    """Parse a single sexpression from a file-like object, returning its token tree."""
+
+    return parser.parse(
+        PosTrackingBufferedReader(
+            file,
+            source_name=source_name
+        )
+    )
+
+
+## Loading
+def loads(buff: str,
+          parser: SexpParser = Parser,
+          source_name=None):
+    """Load a single s-expression from a string, returning its object representation."""
+
+    return load(StringIO(buff), parser, source_name or f"<string {id(buff):x}>")
+
+
+def loadf(path: str,
+          parser: SexpParser = Parser):
+    """Load a single s-expression from the file named by a string, returning its object representation."""
+
+    with open(path, "r") as f:
+        return load(f, parser, path)
+
+
+def load(file: IO,
+         parser: SexpParser = Parser,
+         source_name=None):
+    """Load a single sexpression from a file-like object, returning its object representation."""
+
+    return parser.load(
+        PosTrackingBufferedReader(
+            file,
+            source_name=source_name
+        )
+    )
+
+
+## Dumping
+def dump(file: IO, obj):
+    """Given an object, dump its s-expression coding to the given file-like object."""
+
+    raise NotImplementedError()
+
+
+def dumps(obj):
+    """Given an object, dump its s-expression coding to a string and return that string."""
+
+    with StringIO("") as f:
+        dump(f, obj)
+        return str(f)
--- a/test/python/flowmetal/test_parser.py
+++ b/test/python/flowmetal/test_parser.py
@ -0,0 +1,66 @@
+"""
+Tests covering the Flowmetal parser.
+"""
+
+import flowmetal.parser as p
+
+import pytest
+
+
+def test_parse_list():
+    """Trivial parsing a list."""
+    assert isinstance(p.parses("()"), p.ListToken)
+    assert p.parses("()").paren == p.ListType.ROUND
+
+
+@pytest.mark.parametrize('num,', [
+    1, 2, 103, 504,
+])
+def test_parse_num(num):
+    """Some trivial cases of parsing numbers."""
+    assert isinstance(p.parses(str(num)), p.IntegerToken)
+    assert p.parses(str(num)).data == num
+
+
+@pytest.mark.parametrize('frac', [
+    '1/2', '1/4', '1/512',
+])
+def test_parse_ratio(frac):
+    """Test covering the ratio notation."""
+    assert isinstance(p.parses(frac), p.RationalToken)
+    assert p.parses(frac).data == p.Fraction(frac)
+
+
+@pytest.mark.parametrize('sym,', [
+    'a', 'b', '*earmuff-style*', '+kebab-style+', 'JAVA_CONSTANT_STYLE'
+])
+def test_parse_sym(sym):
+    """Some trivial cases of parsing symbols."""
+    assert isinstance(p.parses(sym), p.SymbolToken)
+    assert p.parses(sym).data == sym
+
+
+@pytest.mark.parametrize('txt, tokenization', [
+    ('(1 2 3)',
+     [(p.IntegerToken, '1'),
+      (p.WhitespaceToken, ' '),
+      (p.IntegerToken, '2'),
+      (p.WhitespaceToken, ' '),
+      (p.IntegerToken, '3')]),
+    ('(a 1 b 2)',
+     [(p.SymbolToken, 'a'),
+      (p.WhitespaceToken, ' '),
+      (p.IntegerToken, '1'),
+      (p.WhitespaceToken, ' '),
+      (p.SymbolToken, 'b'),
+      (p.WhitespaceToken, ' '),
+      (p.IntegerToken, '2')])
+])
+def test_list_contents(txt, tokenization):
+    """Parse examples of list contents."""
+    assert isinstance(p.parses(txt), p.ListToken)
+
+    lelems = p.parses(txt).data
+    for (type, text), token in zip(tokenization, lelems):
+        assert isinstance(token, type)
+        assert token.raw == text