Starting to put a parser around all this

2020-06-14 11:32:21 -06:00 · 2020-06-14 11:32:21 -06:00 · 50e7213d7b
commit 50e7213d7b
parent 5b1ec2e24c
7 changed files with 546 additions and 117 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,2 @@
 /**/__pycache__
 /**/*.egg-info
--- a/README.md
+++ b/README.md
@ -70,125 +70,9 @@ In this setup, the Flowmetal interpreters are able to interact with an external
 For instance this program would use the external connector stubs to build up interaction(s) with an external system.
 ```lisp
 (defpackage flowmetal.time
  (defenum time-unit
    "Calendar-independent durations."
    +milliseconds+
    +seconds+
    +hours+
    +days+)
  (defrecord duration [num : int?, scale : time-unit?]
     "A type for representing scalar durations.")
  (defn as-milliseconds [d : duration?]
                        : duration?
     "Normalize a duration to a number of milliseconds."
     (match d
        [(duration x +days+)         (duration (* x 24) +hours+)]
        [(duration x +hours+)        (duration (* x 60) +minutes+)]
        [(duration x +minutes+)      (duration (* x 60) +seconds+)]
        [(duration x +seconds+)      (duration (* x 1000) +milliseconds+)]
        [(duration x +milliseconds+) d]))
  ;; A type of one value used to represent an error
  (defenum timeout
    +timeout+)
  (defendpoint with-timeout!
     [d : duration?
      f : (fn? [] : a)]
     : a
    )
 )
 (defpackage com.twitter.wilson
  (require
    ;; The log lets you record status information into a program's trace
    [flowmetal.log
     :refer [log!]]
    ;; The time system lets you put bounds on the implicit awaiting Flowmetal does
    [flowmetal.time
     :refer [with-timeout!, timeout?, make-duration, duration?, +seconds+, +hours+, sleep!]]
    ;; JSON. Simple enough
    [flowmetal.json
     :refer [loads, dumps, json?]]
    ;; Extensions! Provided by other systems.
    ;;
    ;; This one allows for an external service to receive HTTP callbacks on Flowmetal's behalf.
    [http.callback
     :refer [make-callback!, get-callback!, callback?]]
     ;; This one allows for an external service to make HTTP requests on Flowmetal's behalf.
    [http.request
     :refer [post!, error?, dns-error?, connection-error?, response-error?]])
  (defenum stage
    +reboot+
    +bios-update+
    +reinstall+)
  ;; FIXME: how to do table optimization?
  (defn fib [x]
    (match x
       [0 1]
       [1 1]
       [_ (+ (fib (- x 1) (- x 2)))]))
  (defn retry-http [f
                      : (fn? [] a?)
                    backoff-fn
                      : (fn? [int?] duration?)
                      :default (fn [x : int?]
                                   : duration?
                                  (make-duration (fib x) +seconds+))
                    backoff-count
                      : int?
                      :default 0]
                    : a
    """The implementation of HTTP with retrying."""
    (let [response (f)]
      (if (not (error? response))
         response
         ;; FIXME: how does auth denied get represented?
         (if (or (dns-error? response)
                 (connection-error? response)
                 (response-error? response))
             (do (sleep (backoff-fn backoff-count))
                 (retry-http* f backoff-fn (+ backoff-count 1)))))))
  (defn job [hostname
               : str?
             stages
               : (list? stage?)
             job-timeout
               : duration?
               :default (duration 3 :hours)]
             : (union? [timeout? json?])
    """Run a wilson job, wait for the callback and process the result.
    By default the job is only waited for three hours.
    """
    (let [callback : callback? (make-callback!)
          job (retry-http
                (fn []
                  (post "http://wilson.local.twitter.com"
                   :data
                   (dumps
                     {:host hostname
                      :stages [stages]
                      :callbacks [{:type :http, :url callback}]}))))]
      (let [result (with-timeout! (duration 3 :hours)
                     (fn []
                        (get-callback callback)))]
         (if-not (timeout? result)
           (loads result)
           result))))
    )
 ```
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,33 @@
 from setuptools import setup
 setup(
    name="arrdem.flowmetal",
    # Package metadata
    version='0.0.0',
    license="MIT",
    description="A weird execution engine",
    long_description=open("README.md").read(),
    long_description_content_type="text/markdown",
    author="Reid 'arrdem' McKenzie",
    author_email="me@arrdem.com",
    url="https://git.arrdem.com/arrdem/flowmetal",
    classifiers=[
        "License :: OSI Approved :: MIT License",
        "Development Status :: 3 - Alpha",
        "Intended Audience :: Developers",
        "Programming Language :: Python :: 3",
        "Programming Language :: Python :: 3.8",
    ],
    # Package setup
    package_dir={"": "src/python"},
    packages=[
        "flowmetal",
    ],
    scripts=[
    ],
    install_requires=[
    ],
    extras_require={
    }
 )
--- a/src/flowmetal/flowmetal.time.flow
+++ b/src/flowmetal/flowmetal.time.flow
@ -0,0 +1,33 @@
 (defpackage flowmetal.time
  (defenum time-unit
    "Calendar-independent durations."
    +milliseconds+
    +seconds+
    +hours+
    +days+)
  (defrecord duration [num :- int?, scale :- time-unit?]
     "A type for representing scalar durations.")
  (defn as-milliseconds [d :- duration?]
                        :- duration?
     "Normalize a duration to a number of milliseconds."
     (match d
        [(duration x +days+)         (duration (* x 24) +hours+)]
        [(duration x +hours+)        (duration (* x 60) +minutes+)]
        [(duration x +minutes+)      (duration (* x 60) +seconds+)]
        [(duration x +seconds+)      (duration (* x 1000) +milliseconds+)]
        [(duration x +milliseconds+) d]))
  ;; A type of one value used to represent an error
  (defenum timeout
    +timeout+)
  (defendpoint with-timeout!
     [d :- duration?
      f :- (fn? [] :- a)]
     :- a
    )
 )
--- a/src/python/flowmetal/init.py
+++ b/src/python/flowmetal/init.py
@ -0,0 +1 @@
 #!/usr/bin/env python3
--- a/src/python/flowmetal/parser.py
+++ b/src/python/flowmetal/parser.py
@ -0,0 +1,410 @@
 """
 A parser for s-expressions.
 """
 from abc import ABC, abstractmethod
 from enum import Enum
 from io import StringIO, BufferedReader
 from typing import IO, NamedTuple
 from fractions import Fraction
 ## Types
 class Position(NamedTuple):
    """An encoding for the location of a read token within a source."""
    source: str
    line: int
    col: int
    offset: int
    @staticmethod
    def next_pos(pos: "Position"):
        return Position(pos.source, pos.line, pos.col + 1, pos.offset + 1)
    @staticmethod
    def next_line(pos: "Position"):
        return Position(pos.source, pos.line + 1, 1, pos.offset + 1)
 class TokenBase(object):
    """The shared interface to tokens."""
    @property
    @abstractmethod
    def pos(self):
        """The position of the token within its source."""
    @property
    @abstractmethod
    def raw(self):
        """The raw token as scanned."""
    @abstractmethod
    def read(self):
        """Return a runtime value for the token, discarding any whitespace and soforth."""
 class IntegerToken(NamedTuple, TokenBase):
    """A read integer, including position."""
    data: int
    raw: str
    pos: Position
    def read(self):
        return
 class RationalToken(NamedTuple, TokenBase):
    """A read integer, including position."""
    data: int
    raw: str
    pos: Position
    def read(self):
        return
 class FloatToken(NamedTuple, TokenBase):
    """A read floating point number, including position."""
    data: int
    raw: str
    pos: Position
    def read(self):
        return
 class SymbolToken(NamedTuple, TokenBase):
    """A read symbol, including position."""
    data: str
    raw: str
    pos: Position
 class KeywordToken(NamedTuple, TokenBase):
    """A read keyword."""
    data: str
    pos: Position
 class StringToken(NamedTuple, TokenBase):
    """A read string, including position."""
    data: str
    raw: str
    pos: Position
 class ListType(Enum):
    """The supported types of lists."""
    ROUND = ("(", ")")
    SQUARE = ("[", "]")
 class ListToken(NamedTuple, TokenBase):
    """A read list, including its start position and the paren type."""
    data: list
    raw: str
    pos: Position
    paren: ListType = ListType.ROUND
 class SetToken(NamedTuple, TokenBase):
    """A read set, including its start position."""
    data: list
    raw: str
    pos: Position
 class MappingToken(NamedTuple, TokenBase):
    """A read mapping, including its start position."""
    data: list
    raw: str
    pos: Position
 class WhitespaceToken(NamedTuple, TokenBase):
    """A bunch of whitespace with no semantic value."""
    data: str
    raw: str
    pos: Position
 class CommentToken(WhitespaceToken):
    """A read comment with no semantic value."""
 ## Parser implementation
 class PosTrackingBufferedReader(object):
    """A slight riff on BufferedReader which only allows for reads and peeks of a
    char, and tracks positions.
    Perfect for implementing LL(1) parsers.
    """
    def __init__(self, f: IO, source_name=None):
        self._next_pos = self._pos = Position(source_name, 1, 1, 0)
        self._char = None
        self._f = f
    def pos(self):
        return self._pos
    def peek(self):
        if self._char is None:
            self._char = self._f.read(1)
        return self._char
    def read(self):
        # Accounting for lookahead(1)
        ch = self._char or self._f.read(1)
        self._char = self._f.read(1)
        # Accounting for the positions
        self._pos = self._next_pos
        if ch == "\r" and self.peek() == "\n":
            super.read(1)  # Throw out a character
            self._next_pos = Position.next_line(self._next_pos)
        elif ch == "\n":
            self._next_pos = Position.next_line(self._next_pos)
        else:
            self._next_pos = Position.next_pos(self._next_pos)
        return ch
 class ReadThroughBuffer(PosTrackingBufferedReader):
    """A duck that quacks like a PosTrackingBufferedReader."""
    def __init__(self, ptcr: PosTrackingBufferedReader):
        self._reader = ptcr
        self._buffer = StringIO()
    def pos(self):
        return self._reader.pos()
    def peek(self):
        return self._reader.peek()
    def read(self):
        ch = self._reader.read()
        self._buffer.write(ch)
        return ch
    def __str__(self):
        return self._buffer.getvalue()
    def __enter__(self, *args):
        return self
    def __exit__(self, *args):
        pass
 class SexpParser(ABC):
    @classmethod
    @abstractmethod
    def parse(cls, f: PosTrackingBufferedReader) -> TokenBase:
        """Parse an s-expression, returning a parsed token tree."""
    def read(cls, f: PosTrackingBufferedReader):
        """Parse to a token tree and read to values returning the resulting values."""
        return cls.parse(f).read()
 class Parser(SexpParser):
    """A basic parser which knows about lists, symbols and numbers.
    Intended as a base class / extension point for other parsers.
    """
    @classmethod
    def parse(cls, f: PosTrackingBufferedReader):
        if f.peek() == "(":
            return cls.parse_list(f)
        elif f.peek() == "[":
            return cls.parse_sqlist(f)
        elif cls.isspace(f.peek()):
            return cls.parse_whitespace(f)
        elif f.peek().isdigit():
            return cls.parse_num(f)
        elif f.peek() == ";":
            return cls.parse_comment(f)
        else:
            return cls.parse_symbol(f)
    @classmethod
    def isspace(cls, ch: str):
        """An extension point allowing for a more expansive concept of whitespace."""
        return ch.isspace()
    @classmethod
    def parse_delimeted(cls, f: PosTrackingBufferedReader, openc, closec, ctor):
        with ReadThroughBuffer(f) as rtb:
            pos = None
            for c in openc:
                pos = pos or rtb.pos()
                assert rtb.read() == c  # Discard the leading delimeter
            pos = rtb.pos()
            acc = []
            while f.peek() != closec:
                acc.append(cls.parse(rtb))
            assert rtb.read() == closec  # Discard the trailing delimeter
            return ctor(acc, str(rtb), pos)
    @classmethod
    def parse_list(cls, f: PosTrackingBufferedReader):
        return cls.parse_delimeted(f, "(", ")", lambda *args: ListToken(*args, ListType.ROUND))
    @classmethod
    def parse_sqlist(cls, f: PosTrackingBufferedReader):
        return cls.parse_delimeted(f, "[", "]", lambda *args: ListToken(*args, ListType.SQUARE))
    @classmethod
    def parse_unum(cls, f: PosTrackingBufferedReader):
        with ReadThroughBuffer(f) as rtb:
            assert rtb.peek().isdigit()
            pos = f.pos()
            while rtb.peek().isdigit():
                rtb.read()
            buff = str(rtb)
            return IntegerToken(int(buff), buff, pos)
    @classmethod
    def parse_num(cls, f: PosTrackingBufferedReader):
        with ReadThroughBuffer(f) as rtb:
            num: IntegerToken = cls.parse_unum(rtb)
            # Various cases of more interesting numbers
            if rtb.peek() == "/":
                ## Case of a rational
                # Discard the delimeter
                rtb.read()
                denom = cls.parse_num(rtb)
                return RationalToken(Fraction(num.data, denom.data), str(rtb), num.pos)
            elif rtb.peek() == "r":
                ## Case of a number with a base
                # Discard thd delimeter
                rtb.read()
                body = cls.parse_symbol(rtb)
                return IntegerToken(int(body.raw, num.data), str(rtb), num.pos)
            elif rtb.peek() == ".":
                ## Case of a number with a decimal component
                ## Note there may be a trailing exponent
                raise NotImplementedError()
            elif rtb.peek() == "e":
                ## Case of a number with a floating point exponent
                raise NotImplementedError()
            else:
                return num
    @classmethod
    def parse_symbol(cls, f: PosTrackingBufferedReader):
        with ReadThroughBuffer(f) as rtb:
            pos = None
            while rtb.peek() and not cls.isspace(rtb.peek()):
                pos = pos or rtb.pos()
                rtb.read()
            buff = str(rtb)
            return SymbolToken(buff, buff, pos)
    @classmethod
    def parse_whitespace(cls, f: PosTrackingBufferedReader):
        with ReadThroughBuffer(f) as rtb:
            pos = None
            while rtb.peek() and cls.isspace(rtb.peek()):
                pos = pos or rtb.pos()
                ch = rtb.read()
                if ch == "\n":
                    break
            buff = str(rtb)
            return WhitespaceToken(buff, buff, pos)
    @classmethod
    def parse_comment(cls, f: PosTrackingBufferedReader):
        with ReadThroughBuffer(f) as rtb:
            pos = None
            while rtb.read() not in ["\n", ""]:
                pos = pos or rtb.pos()
                continue
            buff = str(rtb)
            return CommentToken(buff, buff, pos)
 ## Parsing
 def parses(buff: str,
           parser: SexpParser = Parser,
           source_name=None):
    """Parse a single s-expression from a string, returning its token tree."""
    return parse(StringIO(buff), parser, source_name or f"<string {id(buff):x}>")
 def parsef(path: str,
           parser: SexpParser = Parser):
    """Parse a single s-expression from the file named by a string, returning its token tree."""
    with open(path, "r") as f:
        return parse(f, parser, path)
 def parse(file: IO,
          parser: SexpParser = Parser,
          source_name=None):
    """Parse a single sexpression from a file-like object, returning its token tree."""
    return parser.parse(
        PosTrackingBufferedReader(
            file,
            source_name=source_name
        )
    )
 ## Loading
 def loads(buff: str,
          parser: SexpParser = Parser,
          source_name=None):
    """Load a single s-expression from a string, returning its object representation."""
    return load(StringIO(buff), parser, source_name or f"<string {id(buff):x}>")
 def loadf(path: str,
          parser: SexpParser = Parser):
    """Load a single s-expression from the file named by a string, returning its object representation."""
    with open(path, "r") as f:
        return load(f, parser, path)
 def load(file: IO,
         parser: SexpParser = Parser,
         source_name=None):
    """Load a single sexpression from a file-like object, returning its object representation."""
    return parser.load(
        PosTrackingBufferedReader(
            file,
            source_name=source_name
        )
    )
 ## Dumping
 def dump(file: IO, obj):
    """Given an object, dump its s-expression coding to the given file-like object."""
    raise NotImplementedError()
 def dumps(obj):
    """Given an object, dump its s-expression coding to a string and return that string."""
    with StringIO("") as f:
        dump(f, obj)
        return str(f)
--- a/test/python/flowmetal/test_parser.py
+++ b/test/python/flowmetal/test_parser.py
@ -0,0 +1,66 @@
 """
 Tests covering the Flowmetal parser.
 """
 import flowmetal.parser as p
 import pytest
 def test_parse_list():
    """Trivial parsing a list."""
    assert isinstance(p.parses("()"), p.ListToken)
    assert p.parses("()").paren == p.ListType.ROUND
@pytest.mark.parametrize('num,', [
    1, 2, 103, 504,
 ])
 def test_parse_num(num):
    """Some trivial cases of parsing numbers."""
    assert isinstance(p.parses(str(num)), p.IntegerToken)
    assert p.parses(str(num)).data == num
@pytest.mark.parametrize('frac', [
    '1/2', '1/4', '1/512',
 ])
 def test_parse_ratio(frac):
    """Test covering the ratio notation."""
    assert isinstance(p.parses(frac), p.RationalToken)
    assert p.parses(frac).data == p.Fraction(frac)
@pytest.mark.parametrize('sym,', [
    'a', 'b', '*earmuff-style*', '+kebab-style+', 'JAVA_CONSTANT_STYLE'
 ])
 def test_parse_sym(sym):
    """Some trivial cases of parsing symbols."""
    assert isinstance(p.parses(sym), p.SymbolToken)
    assert p.parses(sym).data == sym
@pytest.mark.parametrize('txt, tokenization', [
    ('(1 2 3)',
     [(p.IntegerToken, '1'),
      (p.WhitespaceToken, ' '),
      (p.IntegerToken, '2'),
      (p.WhitespaceToken, ' '),
      (p.IntegerToken, '3')]),
    ('(a 1 b 2)',
     [(p.SymbolToken, 'a'),
      (p.WhitespaceToken, ' '),
      (p.IntegerToken, '1'),
      (p.WhitespaceToken, ' '),
      (p.SymbolToken, 'b'),
      (p.WhitespaceToken, ' '),
      (p.IntegerToken, '2')])
 ])
 def test_list_contents(txt, tokenization):
    """Parse examples of list contents."""
    assert isinstance(p.parses(txt), p.ListToken)
    lelems = p.parses(txt).data
    for (type, text), token in zip(tokenization, lelems):
        assert isinstance(token, type)
        assert token.raw == text