Starting to put a parser around all this

This commit is contained in:
Reid 'arrdem' McKenzie 2020-06-14 11:32:21 -06:00
parent 5b1ec2e24c
commit 50e7213d7b
7 changed files with 546 additions and 117 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
/**/__pycache__
/**/*.egg-info

116
README.md
View file

@ -70,125 +70,9 @@ In this setup, the Flowmetal interpreters are able to interact with an external
For instance this program would use the external connector stubs to build up interaction(s) with an external system.
```lisp
(defpackage flowmetal.time
(defenum time-unit
"Calendar-independent durations."
+milliseconds+
+seconds+
+hours+
+days+)
(defrecord duration [num : int?, scale : time-unit?]
"A type for representing scalar durations.")
(defn as-milliseconds [d : duration?]
: duration?
"Normalize a duration to a number of milliseconds."
(match d
[(duration x +days+) (duration (* x 24) +hours+)]
[(duration x +hours+) (duration (* x 60) +minutes+)]
[(duration x +minutes+) (duration (* x 60) +seconds+)]
[(duration x +seconds+) (duration (* x 1000) +milliseconds+)]
[(duration x +milliseconds+) d]))
;; A type of one value used to represent an error
(defenum timeout
+timeout+)
(defendpoint with-timeout!
[d : duration?
f : (fn? [] : a)]
: a
)
)
(defpackage com.twitter.wilson
(require
;; The log lets you record status information into a program's trace
[flowmetal.log
:refer [log!]]
;; The time system lets you put bounds on the implicit awaiting Flowmetal does
[flowmetal.time
:refer [with-timeout!, timeout?, make-duration, duration?, +seconds+, +hours+, sleep!]]
;; JSON. Simple enough
[flowmetal.json
:refer [loads, dumps, json?]]
;; Extensions! Provided by other systems.
;;
;; This one allows for an external service to receive HTTP callbacks on Flowmetal's behalf.
[http.callback
:refer [make-callback!, get-callback!, callback?]]
;; This one allows for an external service to make HTTP requests on Flowmetal's behalf.
[http.request
:refer [post!, error?, dns-error?, connection-error?, response-error?]])
(defenum stage
+reboot+
+bios-update+
+reinstall+)
;; FIXME: how to do table optimization?
(defn fib [x]
(match x
[0 1]
[1 1]
[_ (+ (fib (- x 1) (- x 2)))]))
(defn retry-http [f
: (fn? [] a?)
backoff-fn
: (fn? [int?] duration?)
:default (fn [x : int?]
: duration?
(make-duration (fib x) +seconds+))
backoff-count
: int?
:default 0]
: a
"""The implementation of HTTP with retrying."""
(let [response (f)]
(if (not (error? response))
response
;; FIXME: how does auth denied get represented?
(if (or (dns-error? response)
(connection-error? response)
(response-error? response))
(do (sleep (backoff-fn backoff-count))
(retry-http* f backoff-fn (+ backoff-count 1)))))))
(defn job [hostname
: str?
stages
: (list? stage?)
job-timeout
: duration?
:default (duration 3 :hours)]
: (union? [timeout? json?])
"""Run a wilson job, wait for the callback and process the result.
By default the job is only waited for three hours.
"""
(let [callback : callback? (make-callback!)
job (retry-http
(fn []
(post "http://wilson.local.twitter.com"
:data
(dumps
{:host hostname
:stages [stages]
:callbacks [{:type :http, :url callback}]}))))]
(let [result (with-timeout! (duration 3 :hours)
(fn []
(get-callback callback)))]
(if-not (timeout? result)
(loads result)
result))))
)
```

33
setup.py Normal file
View file

@ -0,0 +1,33 @@
from setuptools import setup
setup(
name="arrdem.flowmetal",
# Package metadata
version='0.0.0',
license="MIT",
description="A weird execution engine",
long_description=open("README.md").read(),
long_description_content_type="text/markdown",
author="Reid 'arrdem' McKenzie",
author_email="me@arrdem.com",
url="https://git.arrdem.com/arrdem/flowmetal",
classifiers=[
"License :: OSI Approved :: MIT License",
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.8",
],
# Package setup
package_dir={"": "src/python"},
packages=[
"flowmetal",
],
scripts=[
],
install_requires=[
],
extras_require={
}
)

View file

@ -0,0 +1,33 @@
(defpackage flowmetal.time
(defenum time-unit
"Calendar-independent durations."
+milliseconds+
+seconds+
+hours+
+days+)
(defrecord duration [num :- int?, scale :- time-unit?]
"A type for representing scalar durations.")
(defn as-milliseconds [d :- duration?]
:- duration?
"Normalize a duration to a number of milliseconds."
(match d
[(duration x +days+) (duration (* x 24) +hours+)]
[(duration x +hours+) (duration (* x 60) +minutes+)]
[(duration x +minutes+) (duration (* x 60) +seconds+)]
[(duration x +seconds+) (duration (* x 1000) +milliseconds+)]
[(duration x +milliseconds+) d]))
;; A type of one value used to represent an error
(defenum timeout
+timeout+)
(defendpoint with-timeout!
[d :- duration?
f :- (fn? [] :- a)]
:- a
)
)

View file

@ -0,0 +1 @@
#!/usr/bin/env python3

View file

@ -0,0 +1,410 @@
"""
A parser for s-expressions.
"""
from abc import ABC, abstractmethod
from enum import Enum
from io import StringIO, BufferedReader
from typing import IO, NamedTuple
from fractions import Fraction
## Types
class Position(NamedTuple):
"""An encoding for the location of a read token within a source."""
source: str
line: int
col: int
offset: int
@staticmethod
def next_pos(pos: "Position"):
return Position(pos.source, pos.line, pos.col + 1, pos.offset + 1)
@staticmethod
def next_line(pos: "Position"):
return Position(pos.source, pos.line + 1, 1, pos.offset + 1)
class TokenBase(object):
"""The shared interface to tokens."""
@property
@abstractmethod
def pos(self):
"""The position of the token within its source."""
@property
@abstractmethod
def raw(self):
"""The raw token as scanned."""
@abstractmethod
def read(self):
"""Return a runtime value for the token, discarding any whitespace and soforth."""
class IntegerToken(NamedTuple, TokenBase):
"""A read integer, including position."""
data: int
raw: str
pos: Position
def read(self):
return
class RationalToken(NamedTuple, TokenBase):
"""A read integer, including position."""
data: int
raw: str
pos: Position
def read(self):
return
class FloatToken(NamedTuple, TokenBase):
"""A read floating point number, including position."""
data: int
raw: str
pos: Position
def read(self):
return
class SymbolToken(NamedTuple, TokenBase):
"""A read symbol, including position."""
data: str
raw: str
pos: Position
class KeywordToken(NamedTuple, TokenBase):
"""A read keyword."""
data: str
pos: Position
class StringToken(NamedTuple, TokenBase):
"""A read string, including position."""
data: str
raw: str
pos: Position
class ListType(Enum):
"""The supported types of lists."""
ROUND = ("(", ")")
SQUARE = ("[", "]")
class ListToken(NamedTuple, TokenBase):
"""A read list, including its start position and the paren type."""
data: list
raw: str
pos: Position
paren: ListType = ListType.ROUND
class SetToken(NamedTuple, TokenBase):
"""A read set, including its start position."""
data: list
raw: str
pos: Position
class MappingToken(NamedTuple, TokenBase):
"""A read mapping, including its start position."""
data: list
raw: str
pos: Position
class WhitespaceToken(NamedTuple, TokenBase):
"""A bunch of whitespace with no semantic value."""
data: str
raw: str
pos: Position
class CommentToken(WhitespaceToken):
"""A read comment with no semantic value."""
## Parser implementation
class PosTrackingBufferedReader(object):
"""A slight riff on BufferedReader which only allows for reads and peeks of a
char, and tracks positions.
Perfect for implementing LL(1) parsers.
"""
def __init__(self, f: IO, source_name=None):
self._next_pos = self._pos = Position(source_name, 1, 1, 0)
self._char = None
self._f = f
def pos(self):
return self._pos
def peek(self):
if self._char is None:
self._char = self._f.read(1)
return self._char
def read(self):
# Accounting for lookahead(1)
ch = self._char or self._f.read(1)
self._char = self._f.read(1)
# Accounting for the positions
self._pos = self._next_pos
if ch == "\r" and self.peek() == "\n":
super.read(1) # Throw out a character
self._next_pos = Position.next_line(self._next_pos)
elif ch == "\n":
self._next_pos = Position.next_line(self._next_pos)
else:
self._next_pos = Position.next_pos(self._next_pos)
return ch
class ReadThroughBuffer(PosTrackingBufferedReader):
"""A duck that quacks like a PosTrackingBufferedReader."""
def __init__(self, ptcr: PosTrackingBufferedReader):
self._reader = ptcr
self._buffer = StringIO()
def pos(self):
return self._reader.pos()
def peek(self):
return self._reader.peek()
def read(self):
ch = self._reader.read()
self._buffer.write(ch)
return ch
def __str__(self):
return self._buffer.getvalue()
def __enter__(self, *args):
return self
def __exit__(self, *args):
pass
class SexpParser(ABC):
@classmethod
@abstractmethod
def parse(cls, f: PosTrackingBufferedReader) -> TokenBase:
"""Parse an s-expression, returning a parsed token tree."""
def read(cls, f: PosTrackingBufferedReader):
"""Parse to a token tree and read to values returning the resulting values."""
return cls.parse(f).read()
class Parser(SexpParser):
"""A basic parser which knows about lists, symbols and numbers.
Intended as a base class / extension point for other parsers.
"""
@classmethod
def parse(cls, f: PosTrackingBufferedReader):
if f.peek() == "(":
return cls.parse_list(f)
elif f.peek() == "[":
return cls.parse_sqlist(f)
elif cls.isspace(f.peek()):
return cls.parse_whitespace(f)
elif f.peek().isdigit():
return cls.parse_num(f)
elif f.peek() == ";":
return cls.parse_comment(f)
else:
return cls.parse_symbol(f)
@classmethod
def isspace(cls, ch: str):
"""An extension point allowing for a more expansive concept of whitespace."""
return ch.isspace()
@classmethod
def parse_delimeted(cls, f: PosTrackingBufferedReader, openc, closec, ctor):
with ReadThroughBuffer(f) as rtb:
pos = None
for c in openc:
pos = pos or rtb.pos()
assert rtb.read() == c # Discard the leading delimeter
pos = rtb.pos()
acc = []
while f.peek() != closec:
acc.append(cls.parse(rtb))
assert rtb.read() == closec # Discard the trailing delimeter
return ctor(acc, str(rtb), pos)
@classmethod
def parse_list(cls, f: PosTrackingBufferedReader):
return cls.parse_delimeted(f, "(", ")", lambda *args: ListToken(*args, ListType.ROUND))
@classmethod
def parse_sqlist(cls, f: PosTrackingBufferedReader):
return cls.parse_delimeted(f, "[", "]", lambda *args: ListToken(*args, ListType.SQUARE))
@classmethod
def parse_unum(cls, f: PosTrackingBufferedReader):
with ReadThroughBuffer(f) as rtb:
assert rtb.peek().isdigit()
pos = f.pos()
while rtb.peek().isdigit():
rtb.read()
buff = str(rtb)
return IntegerToken(int(buff), buff, pos)
@classmethod
def parse_num(cls, f: PosTrackingBufferedReader):
with ReadThroughBuffer(f) as rtb:
num: IntegerToken = cls.parse_unum(rtb)
# Various cases of more interesting numbers
if rtb.peek() == "/":
## Case of a rational
# Discard the delimeter
rtb.read()
denom = cls.parse_num(rtb)
return RationalToken(Fraction(num.data, denom.data), str(rtb), num.pos)
elif rtb.peek() == "r":
## Case of a number with a base
# Discard thd delimeter
rtb.read()
body = cls.parse_symbol(rtb)
return IntegerToken(int(body.raw, num.data), str(rtb), num.pos)
elif rtb.peek() == ".":
## Case of a number with a decimal component
## Note there may be a trailing exponent
raise NotImplementedError()
elif rtb.peek() == "e":
## Case of a number with a floating point exponent
raise NotImplementedError()
else:
return num
@classmethod
def parse_symbol(cls, f: PosTrackingBufferedReader):
with ReadThroughBuffer(f) as rtb:
pos = None
while rtb.peek() and not cls.isspace(rtb.peek()):
pos = pos or rtb.pos()
rtb.read()
buff = str(rtb)
return SymbolToken(buff, buff, pos)
@classmethod
def parse_whitespace(cls, f: PosTrackingBufferedReader):
with ReadThroughBuffer(f) as rtb:
pos = None
while rtb.peek() and cls.isspace(rtb.peek()):
pos = pos or rtb.pos()
ch = rtb.read()
if ch == "\n":
break
buff = str(rtb)
return WhitespaceToken(buff, buff, pos)
@classmethod
def parse_comment(cls, f: PosTrackingBufferedReader):
with ReadThroughBuffer(f) as rtb:
pos = None
while rtb.read() not in ["\n", ""]:
pos = pos or rtb.pos()
continue
buff = str(rtb)
return CommentToken(buff, buff, pos)
## Parsing
def parses(buff: str,
parser: SexpParser = Parser,
source_name=None):
"""Parse a single s-expression from a string, returning its token tree."""
return parse(StringIO(buff), parser, source_name or f"<string {id(buff):x}>")
def parsef(path: str,
parser: SexpParser = Parser):
"""Parse a single s-expression from the file named by a string, returning its token tree."""
with open(path, "r") as f:
return parse(f, parser, path)
def parse(file: IO,
parser: SexpParser = Parser,
source_name=None):
"""Parse a single sexpression from a file-like object, returning its token tree."""
return parser.parse(
PosTrackingBufferedReader(
file,
source_name=source_name
)
)
## Loading
def loads(buff: str,
parser: SexpParser = Parser,
source_name=None):
"""Load a single s-expression from a string, returning its object representation."""
return load(StringIO(buff), parser, source_name or f"<string {id(buff):x}>")
def loadf(path: str,
parser: SexpParser = Parser):
"""Load a single s-expression from the file named by a string, returning its object representation."""
with open(path, "r") as f:
return load(f, parser, path)
def load(file: IO,
parser: SexpParser = Parser,
source_name=None):
"""Load a single sexpression from a file-like object, returning its object representation."""
return parser.load(
PosTrackingBufferedReader(
file,
source_name=source_name
)
)
## Dumping
def dump(file: IO, obj):
"""Given an object, dump its s-expression coding to the given file-like object."""
raise NotImplementedError()
def dumps(obj):
"""Given an object, dump its s-expression coding to a string and return that string."""
with StringIO("") as f:
dump(f, obj)
return str(f)

View file

@ -0,0 +1,66 @@
"""
Tests covering the Flowmetal parser.
"""
import flowmetal.parser as p
import pytest
def test_parse_list():
"""Trivial parsing a list."""
assert isinstance(p.parses("()"), p.ListToken)
assert p.parses("()").paren == p.ListType.ROUND
@pytest.mark.parametrize('num,', [
1, 2, 103, 504,
])
def test_parse_num(num):
"""Some trivial cases of parsing numbers."""
assert isinstance(p.parses(str(num)), p.IntegerToken)
assert p.parses(str(num)).data == num
@pytest.mark.parametrize('frac', [
'1/2', '1/4', '1/512',
])
def test_parse_ratio(frac):
"""Test covering the ratio notation."""
assert isinstance(p.parses(frac), p.RationalToken)
assert p.parses(frac).data == p.Fraction(frac)
@pytest.mark.parametrize('sym,', [
'a', 'b', '*earmuff-style*', '+kebab-style+', 'JAVA_CONSTANT_STYLE'
])
def test_parse_sym(sym):
"""Some trivial cases of parsing symbols."""
assert isinstance(p.parses(sym), p.SymbolToken)
assert p.parses(sym).data == sym
@pytest.mark.parametrize('txt, tokenization', [
('(1 2 3)',
[(p.IntegerToken, '1'),
(p.WhitespaceToken, ' '),
(p.IntegerToken, '2'),
(p.WhitespaceToken, ' '),
(p.IntegerToken, '3')]),
('(a 1 b 2)',
[(p.SymbolToken, 'a'),
(p.WhitespaceToken, ' '),
(p.IntegerToken, '1'),
(p.WhitespaceToken, ' '),
(p.SymbolToken, 'b'),
(p.WhitespaceToken, ' '),
(p.IntegerToken, '2')])
])
def test_list_contents(txt, tokenization):
"""Parse examples of list contents."""
assert isinstance(p.parses(txt), p.ListToken)
lelems = p.parses(txt).data
for (type, text), token in zip(tokenization, lelems):
assert isinstance(token, type)
assert token.raw == text