Starting to put a parser around all this

This commit is contained in:
Reid 'arrdem' McKenzie 2020-06-14 11:32:21 -06:00
parent 5b1ec2e24c
commit 50e7213d7b
7 changed files with 546 additions and 117 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
/**/__pycache__
/**/*.egg-info

116
README.md
View file

@ -70,125 +70,9 @@ In this setup, the Flowmetal interpreters are able to interact with an external
For instance this program would use the external connector stubs to build up interaction(s) with an external system. For instance this program would use the external connector stubs to build up interaction(s) with an external system.
```lisp ```lisp
(defpackage flowmetal.time
(defenum time-unit
"Calendar-independent durations."
+milliseconds+
+seconds+
+hours+
+days+)
(defrecord duration [num : int?, scale : time-unit?]
"A type for representing scalar durations.")
(defn as-milliseconds [d : duration?]
: duration?
"Normalize a duration to a number of milliseconds."
(match d
[(duration x +days+) (duration (* x 24) +hours+)]
[(duration x +hours+) (duration (* x 60) +minutes+)]
[(duration x +minutes+) (duration (* x 60) +seconds+)]
[(duration x +seconds+) (duration (* x 1000) +milliseconds+)]
[(duration x +milliseconds+) d]))
;; A type of one value used to represent an error
(defenum timeout
+timeout+)
(defendpoint with-timeout!
[d : duration?
f : (fn? [] : a)]
: a
)
)
(defpackage com.twitter.wilson
(require
;; The log lets you record status information into a program's trace
[flowmetal.log
:refer [log!]]
;; The time system lets you put bounds on the implicit awaiting Flowmetal does
[flowmetal.time
:refer [with-timeout!, timeout?, make-duration, duration?, +seconds+, +hours+, sleep!]]
;; JSON. Simple enough
[flowmetal.json
:refer [loads, dumps, json?]]
;; Extensions! Provided by other systems.
;;
;; This one allows for an external service to receive HTTP callbacks on Flowmetal's behalf.
[http.callback
:refer [make-callback!, get-callback!, callback?]]
;; This one allows for an external service to make HTTP requests on Flowmetal's behalf.
[http.request
:refer [post!, error?, dns-error?, connection-error?, response-error?]])
(defenum stage
+reboot+
+bios-update+
+reinstall+)
;; FIXME: how to do table optimization?
(defn fib [x]
(match x
[0 1]
[1 1]
[_ (+ (fib (- x 1) (- x 2)))]))
(defn retry-http [f
: (fn? [] a?)
backoff-fn
: (fn? [int?] duration?)
:default (fn [x : int?]
: duration?
(make-duration (fib x) +seconds+))
backoff-count
: int?
:default 0]
: a
"""The implementation of HTTP with retrying."""
(let [response (f)]
(if (not (error? response))
response
;; FIXME: how does auth denied get represented?
(if (or (dns-error? response)
(connection-error? response)
(response-error? response))
(do (sleep (backoff-fn backoff-count))
(retry-http* f backoff-fn (+ backoff-count 1)))))))
(defn job [hostname
: str?
stages
: (list? stage?)
job-timeout
: duration?
:default (duration 3 :hours)]
: (union? [timeout? json?])
"""Run a wilson job, wait for the callback and process the result.
By default the job is only waited for three hours.
"""
(let [callback : callback? (make-callback!)
job (retry-http
(fn []
(post "http://wilson.local.twitter.com"
:data
(dumps
{:host hostname
:stages [stages]
:callbacks [{:type :http, :url callback}]}))))]
(let [result (with-timeout! (duration 3 :hours)
(fn []
(get-callback callback)))]
(if-not (timeout? result)
(loads result)
result))))
)
``` ```

33
setup.py Normal file
View file

@ -0,0 +1,33 @@
from setuptools import setup
setup(
name="arrdem.flowmetal",
# Package metadata
version='0.0.0',
license="MIT",
description="A weird execution engine",
long_description=open("README.md").read(),
long_description_content_type="text/markdown",
author="Reid 'arrdem' McKenzie",
author_email="me@arrdem.com",
url="https://git.arrdem.com/arrdem/flowmetal",
classifiers=[
"License :: OSI Approved :: MIT License",
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.8",
],
# Package setup
package_dir={"": "src/python"},
packages=[
"flowmetal",
],
scripts=[
],
install_requires=[
],
extras_require={
}
)

View file

@ -0,0 +1,33 @@
(defpackage flowmetal.time
(defenum time-unit
"Calendar-independent durations."
+milliseconds+
+seconds+
+hours+
+days+)
(defrecord duration [num :- int?, scale :- time-unit?]
"A type for representing scalar durations.")
(defn as-milliseconds [d :- duration?]
:- duration?
"Normalize a duration to a number of milliseconds."
(match d
[(duration x +days+) (duration (* x 24) +hours+)]
[(duration x +hours+) (duration (* x 60) +minutes+)]
[(duration x +minutes+) (duration (* x 60) +seconds+)]
[(duration x +seconds+) (duration (* x 1000) +milliseconds+)]
[(duration x +milliseconds+) d]))
;; A type of one value used to represent an error
(defenum timeout
+timeout+)
(defendpoint with-timeout!
[d :- duration?
f :- (fn? [] :- a)]
:- a
)
)

View file

@ -0,0 +1 @@
#!/usr/bin/env python3

View file

@ -0,0 +1,410 @@
"""
A parser for s-expressions.
"""
from abc import ABC, abstractmethod
from enum import Enum
from io import StringIO, BufferedReader
from typing import IO, NamedTuple
from fractions import Fraction
## Types
class Position(NamedTuple):
"""An encoding for the location of a read token within a source."""
source: str
line: int
col: int
offset: int
@staticmethod
def next_pos(pos: "Position"):
return Position(pos.source, pos.line, pos.col + 1, pos.offset + 1)
@staticmethod
def next_line(pos: "Position"):
return Position(pos.source, pos.line + 1, 1, pos.offset + 1)
class TokenBase(object):
"""The shared interface to tokens."""
@property
@abstractmethod
def pos(self):
"""The position of the token within its source."""
@property
@abstractmethod
def raw(self):
"""The raw token as scanned."""
@abstractmethod
def read(self):
"""Return a runtime value for the token, discarding any whitespace and soforth."""
class IntegerToken(NamedTuple, TokenBase):
"""A read integer, including position."""
data: int
raw: str
pos: Position
def read(self):
return
class RationalToken(NamedTuple, TokenBase):
"""A read integer, including position."""
data: int
raw: str
pos: Position
def read(self):
return
class FloatToken(NamedTuple, TokenBase):
"""A read floating point number, including position."""
data: int
raw: str
pos: Position
def read(self):
return
class SymbolToken(NamedTuple, TokenBase):
"""A read symbol, including position."""
data: str
raw: str
pos: Position
class KeywordToken(NamedTuple, TokenBase):
"""A read keyword."""
data: str
pos: Position
class StringToken(NamedTuple, TokenBase):
"""A read string, including position."""
data: str
raw: str
pos: Position
class ListType(Enum):
"""The supported types of lists."""
ROUND = ("(", ")")
SQUARE = ("[", "]")
class ListToken(NamedTuple, TokenBase):
"""A read list, including its start position and the paren type."""
data: list
raw: str
pos: Position
paren: ListType = ListType.ROUND
class SetToken(NamedTuple, TokenBase):
"""A read set, including its start position."""
data: list
raw: str
pos: Position
class MappingToken(NamedTuple, TokenBase):
"""A read mapping, including its start position."""
data: list
raw: str
pos: Position
class WhitespaceToken(NamedTuple, TokenBase):
"""A bunch of whitespace with no semantic value."""
data: str
raw: str
pos: Position
class CommentToken(WhitespaceToken):
"""A read comment with no semantic value."""
## Parser implementation
class PosTrackingBufferedReader(object):
"""A slight riff on BufferedReader which only allows for reads and peeks of a
char, and tracks positions.
Perfect for implementing LL(1) parsers.
"""
def __init__(self, f: IO, source_name=None):
self._next_pos = self._pos = Position(source_name, 1, 1, 0)
self._char = None
self._f = f
def pos(self):
return self._pos
def peek(self):
if self._char is None:
self._char = self._f.read(1)
return self._char
def read(self):
# Accounting for lookahead(1)
ch = self._char or self._f.read(1)
self._char = self._f.read(1)
# Accounting for the positions
self._pos = self._next_pos
if ch == "\r" and self.peek() == "\n":
super.read(1) # Throw out a character
self._next_pos = Position.next_line(self._next_pos)
elif ch == "\n":
self._next_pos = Position.next_line(self._next_pos)
else:
self._next_pos = Position.next_pos(self._next_pos)
return ch
class ReadThroughBuffer(PosTrackingBufferedReader):
"""A duck that quacks like a PosTrackingBufferedReader."""
def __init__(self, ptcr: PosTrackingBufferedReader):
self._reader = ptcr
self._buffer = StringIO()
def pos(self):
return self._reader.pos()
def peek(self):
return self._reader.peek()
def read(self):
ch = self._reader.read()
self._buffer.write(ch)
return ch
def __str__(self):
return self._buffer.getvalue()
def __enter__(self, *args):
return self
def __exit__(self, *args):
pass
class SexpParser(ABC):
@classmethod
@abstractmethod
def parse(cls, f: PosTrackingBufferedReader) -> TokenBase:
"""Parse an s-expression, returning a parsed token tree."""
def read(cls, f: PosTrackingBufferedReader):
"""Parse to a token tree and read to values returning the resulting values."""
return cls.parse(f).read()
class Parser(SexpParser):
"""A basic parser which knows about lists, symbols and numbers.
Intended as a base class / extension point for other parsers.
"""
@classmethod
def parse(cls, f: PosTrackingBufferedReader):
if f.peek() == "(":
return cls.parse_list(f)
elif f.peek() == "[":
return cls.parse_sqlist(f)
elif cls.isspace(f.peek()):
return cls.parse_whitespace(f)
elif f.peek().isdigit():
return cls.parse_num(f)
elif f.peek() == ";":
return cls.parse_comment(f)
else:
return cls.parse_symbol(f)
@classmethod
def isspace(cls, ch: str):
"""An extension point allowing for a more expansive concept of whitespace."""
return ch.isspace()
@classmethod
def parse_delimeted(cls, f: PosTrackingBufferedReader, openc, closec, ctor):
with ReadThroughBuffer(f) as rtb:
pos = None
for c in openc:
pos = pos or rtb.pos()
assert rtb.read() == c # Discard the leading delimeter
pos = rtb.pos()
acc = []
while f.peek() != closec:
acc.append(cls.parse(rtb))
assert rtb.read() == closec # Discard the trailing delimeter
return ctor(acc, str(rtb), pos)
@classmethod
def parse_list(cls, f: PosTrackingBufferedReader):
return cls.parse_delimeted(f, "(", ")", lambda *args: ListToken(*args, ListType.ROUND))
@classmethod
def parse_sqlist(cls, f: PosTrackingBufferedReader):
return cls.parse_delimeted(f, "[", "]", lambda *args: ListToken(*args, ListType.SQUARE))
@classmethod
def parse_unum(cls, f: PosTrackingBufferedReader):
with ReadThroughBuffer(f) as rtb:
assert rtb.peek().isdigit()
pos = f.pos()
while rtb.peek().isdigit():
rtb.read()
buff = str(rtb)
return IntegerToken(int(buff), buff, pos)
@classmethod
def parse_num(cls, f: PosTrackingBufferedReader):
with ReadThroughBuffer(f) as rtb:
num: IntegerToken = cls.parse_unum(rtb)
# Various cases of more interesting numbers
if rtb.peek() == "/":
## Case of a rational
# Discard the delimeter
rtb.read()
denom = cls.parse_num(rtb)
return RationalToken(Fraction(num.data, denom.data), str(rtb), num.pos)
elif rtb.peek() == "r":
## Case of a number with a base
# Discard thd delimeter
rtb.read()
body = cls.parse_symbol(rtb)
return IntegerToken(int(body.raw, num.data), str(rtb), num.pos)
elif rtb.peek() == ".":
## Case of a number with a decimal component
## Note there may be a trailing exponent
raise NotImplementedError()
elif rtb.peek() == "e":
## Case of a number with a floating point exponent
raise NotImplementedError()
else:
return num
@classmethod
def parse_symbol(cls, f: PosTrackingBufferedReader):
with ReadThroughBuffer(f) as rtb:
pos = None
while rtb.peek() and not cls.isspace(rtb.peek()):
pos = pos or rtb.pos()
rtb.read()
buff = str(rtb)
return SymbolToken(buff, buff, pos)
@classmethod
def parse_whitespace(cls, f: PosTrackingBufferedReader):
with ReadThroughBuffer(f) as rtb:
pos = None
while rtb.peek() and cls.isspace(rtb.peek()):
pos = pos or rtb.pos()
ch = rtb.read()
if ch == "\n":
break
buff = str(rtb)
return WhitespaceToken(buff, buff, pos)
@classmethod
def parse_comment(cls, f: PosTrackingBufferedReader):
with ReadThroughBuffer(f) as rtb:
pos = None
while rtb.read() not in ["\n", ""]:
pos = pos or rtb.pos()
continue
buff = str(rtb)
return CommentToken(buff, buff, pos)
## Parsing
def parses(buff: str,
parser: SexpParser = Parser,
source_name=None):
"""Parse a single s-expression from a string, returning its token tree."""
return parse(StringIO(buff), parser, source_name or f"<string {id(buff):x}>")
def parsef(path: str,
parser: SexpParser = Parser):
"""Parse a single s-expression from the file named by a string, returning its token tree."""
with open(path, "r") as f:
return parse(f, parser, path)
def parse(file: IO,
parser: SexpParser = Parser,
source_name=None):
"""Parse a single sexpression from a file-like object, returning its token tree."""
return parser.parse(
PosTrackingBufferedReader(
file,
source_name=source_name
)
)
## Loading
def loads(buff: str,
parser: SexpParser = Parser,
source_name=None):
"""Load a single s-expression from a string, returning its object representation."""
return load(StringIO(buff), parser, source_name or f"<string {id(buff):x}>")
def loadf(path: str,
parser: SexpParser = Parser):
"""Load a single s-expression from the file named by a string, returning its object representation."""
with open(path, "r") as f:
return load(f, parser, path)
def load(file: IO,
parser: SexpParser = Parser,
source_name=None):
"""Load a single sexpression from a file-like object, returning its object representation."""
return parser.load(
PosTrackingBufferedReader(
file,
source_name=source_name
)
)
## Dumping
def dump(file: IO, obj):
"""Given an object, dump its s-expression coding to the given file-like object."""
raise NotImplementedError()
def dumps(obj):
"""Given an object, dump its s-expression coding to a string and return that string."""
with StringIO("") as f:
dump(f, obj)
return str(f)

View file

@ -0,0 +1,66 @@
"""
Tests covering the Flowmetal parser.
"""
import flowmetal.parser as p
import pytest
def test_parse_list():
"""Trivial parsing a list."""
assert isinstance(p.parses("()"), p.ListToken)
assert p.parses("()").paren == p.ListType.ROUND
@pytest.mark.parametrize('num,', [
1, 2, 103, 504,
])
def test_parse_num(num):
"""Some trivial cases of parsing numbers."""
assert isinstance(p.parses(str(num)), p.IntegerToken)
assert p.parses(str(num)).data == num
@pytest.mark.parametrize('frac', [
'1/2', '1/4', '1/512',
])
def test_parse_ratio(frac):
"""Test covering the ratio notation."""
assert isinstance(p.parses(frac), p.RationalToken)
assert p.parses(frac).data == p.Fraction(frac)
@pytest.mark.parametrize('sym,', [
'a', 'b', '*earmuff-style*', '+kebab-style+', 'JAVA_CONSTANT_STYLE'
])
def test_parse_sym(sym):
"""Some trivial cases of parsing symbols."""
assert isinstance(p.parses(sym), p.SymbolToken)
assert p.parses(sym).data == sym
@pytest.mark.parametrize('txt, tokenization', [
('(1 2 3)',
[(p.IntegerToken, '1'),
(p.WhitespaceToken, ' '),
(p.IntegerToken, '2'),
(p.WhitespaceToken, ' '),
(p.IntegerToken, '3')]),
('(a 1 b 2)',
[(p.SymbolToken, 'a'),
(p.WhitespaceToken, ' '),
(p.IntegerToken, '1'),
(p.WhitespaceToken, ' '),
(p.SymbolToken, 'b'),
(p.WhitespaceToken, ' '),
(p.IntegerToken, '2')])
])
def test_list_contents(txt, tokenization):
"""Parse examples of list contents."""
assert isinstance(p.parses(txt), p.ListToken)
lelems = p.parses(txt).data
for (type, text), token in zip(tokenization, lelems):
assert isinstance(token, type)
assert token.raw == text