Starting to build an analyzer

This commit is contained in:
Reid 'arrdem' McKenzie 2020-07-18 18:46:09 -06:00
parent 9f29067f6d
commit 226ece5eaa
2 changed files with 281 additions and 79 deletions

View file

@ -5,11 +5,13 @@ The [syntax] analyzer interprets a parse sequence into a syntax tree which can b
""" """
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from io import StringIO, IO from io import StringIO
from typing import NamedTuple from typing import NamedTuple, List, Union, Any, IO, Tuple
from enum import Enum
import flowmetal.parser as p import flowmetal.parser as p
### Types ### Types
## We are not, in fact, sponsored by Typelevel LLC. ## We are not, in fact, sponsored by Typelevel LLC.
class TypeLevelExpr(object): class TypeLevelExpr(object):
@ -24,135 +26,289 @@ class GenericExpr(TypeLevelExpr, NamedTuple):
class TypeExpr(TypeLevelExpr, NamedTuple): class TypeExpr(TypeLevelExpr, NamedTuple):
"""A bound (or yet to be bound) type level symbol.""" """A bound (or yet to be bound) type level symbol."""
pass
## Now down to reality class BuiltinType(TypeLevelExpr, Enum):
"""Built in types for atoms."""
BOOLEAN = 'Boolean'
SYMBOL = 'Symbol'
KEYWORD = 'Keyword'
STRING = 'String'
INTEGER = 'Integer'
FRACTION = 'Fraction'
FLOAT = 'Float'
class ConstraintExpr(TypeLevelExpr, NamedTuple):
"""A value-level constraint (predicate) as a type."""
## Terms
# Now down to reality
class ValueLevelExpr(object): class ValueLevelExpr(object):
"""A base class for value-level expressions.""" """A base class for value-level expressions."""
pass
@property
def type(self) -> TypeExpr:
"""The type of an expression."""
class AscribeExpr(ValueLevelExpr, NamedTuple): class AscribeExpr(TypeLevelExpr):
"""Ascribe a type (via type-level expression) to a value-level expression.""" value: ValueLevelExpr
pass type: TypeLevelExpr
class InvokeExpr(ValueLevelExpr, NamedTuple): class ConstExpr(ValueLevelExpr, NamedTuple):
"""(a ⊢ (fn A ⊢ B) [...] ⊢ A) ⊢ B""" """Constant expressions. Keywords, strings, numbers, that sort of thing."""
pass
token: p.ConstTokenBase
@property
def data(self) -> Any:
"""The value of the constant."""
# The parser gives us this data
return self.token.data
@abstractmethod
def type(self):
raise NotImplementedError()
class IfExpr(ValueLevelExpr, NamedTuple): class BooleanExpr(ConstExpr):
"""(if test a ⊢ A b ⊢ B) ⊢ (Variant A B).""" @property
pass def type(self):
return BuiltinType.BOOLEAN
class LetExpr(ValueLevelExpr, NamedTuple): class IntegerExpr(ConstExpr):
"""Let a single binding and wrap a body. Yes one. N-ary let is an abstraction.""" @property
pass def type(self):
return BuiltinType.INTEGER
class DoExpr(ValueError, NamedTuple): class FractionExpr(ConstExpr):
"""do a procedure ahem sequence of things. @property
def type(self):
(do a b c ... ω Ω) Ω return BuiltinType.FRACTION
"""
pass
ProcExpr = DoExpr # ain't broke don't fix it class FloatExpr(ConstExpr):
@property
def type(self):
return BuiltinType.FLOAT
class MappingExpr(ValueLevelExpr, NamedTuple): class KeywordExpr(ConstExpr):
"""Mappings require their own constructor expression due to local/symbol references.""" @property
pass def type(self):
return BuiltinType.KEYWORD
class SetExpr(ValueLevelExpr, NamedTuple): class StringExpr(ConstExpr):
"""Sets require their own constructor expression due to local/symbol references.""" @property
pass def type(self):
return BuiltinType.STRING
class ListExpr(ValueLevelExpr, NamedTuple): class ListExpr(ValueLevelExpr, NamedTuple):
"""While round () lists are generally InvokeExprs, [] lists are constructors like sets and maps.""" elements: List[ValueLevelExpr]
pass
# FIXME (arrdem 2020-07-18):
# Probably typed? Not sure.
@property
def type(self) -> TypeExpr:
if self.elements:
return self.elements[-1].type
## 'real' AST nodes
class DoExpr(ValueLevelExpr, NamedTuple):
effect_exprs: List[ValueLevelExpr]
ret_expr: ValueLevelExpr
@property
def type(self) -> TypeExpr:
return self.ret_expr.type
class LetExpr(ValueLevelExpr, NamedTuple):
binding_exprs: List[Tuple]
ret_expr: DoExpr
@property
def type(self) -> TypeExpr:
return self.ret_expr.type
class FnExpr(ValueLevelExpr, NamedTuple):
arguments: List
ret_expr: DoExpr
@property
def type(self) -> TypeExpr:
"""This is where the fun begins."""
return
## Reader implementation ## Reader implementation
class SexpAnalyzer(ABC): class AnalyzerBase(ABC):
"""A base class for Analyzers.""" """Analyzer interface."""
pass
@classmethod
@abstractmethod
def analyze(cls, token: p.TokenBase) -> ValueLevelExpr:
"""Analyze a token tree, returning an expr tree."""
class Analyzer(SexpAnalyzer): class Analyzer(AnalyzerBase):
"""A reference Analyzer implementation. """A reference Analyzer implementation.
Walks a parsed token tree, building up a syntax tree. Walks a parsed token tree, building up a syntax tree.
""" """
@classmethod @classmethod
def read(cls, token: p.TokenBase): def _nows(cls, tokens):
if isinstance(token, p.WhitespaceToken): return [t for t in tokens if not isinstance(t, p.WhitespaceToken)]
## Whitespace tokens are discarded when considering syntax
pass
elif isinstance(token, (p.StringToken, p.KeywordToken, TACK0 = p.SymbolToken('', '/⊢', None)
p.IntegerToken, p.RationalToken, p.FloatToken)): TACK1 = p.SymbolToken('|-', '|-', None)
## These are atoms we don't do much with
pass
elif isinstance(token, p.SetToken):
## Set tokens have their own syntax object to allow for lexical sets
pass
elif isinstance(token, p.MappingToken):
## As with sets, mappings have their own syntax object
pass
elif isinstance(token, p.ListToken):
## This is the fun one because it's where most of the notation is implemented
pass
@classmethod @classmethod
def read_symexpr(cls, token: p.SymbolToken): def _chomp(cls, tokens):
"""Emit a representation of using a binding.""" """'chomp' an expression and optional ascription off the tokens, returning an expression and the remaining tokens."""
print(tokens)
if len(tokens) == 1:
return cls.analyze(tokens[0]), []
elif tokens[1] in [cls.TACK0, cls.TACK1]:
if len(tokens) >= 3:
return AscribeExpr(cls.analyze(tokens[0]), cls.analyze(tokens[2])), tokens[3:]
else:
raise SyntaxError(f"Analyzing tack at {tokens[1].pos}, did not find following type ascription!")
else:
return cls.analyze(tokens[0]), tokens[1::]
@classmethod @classmethod
def read_setexpr(cls, token: p.SetToken): def _terms(cls, tokens):
"""Emit a SetExpr """ terms = []
tokens = cls._nows(tokens)
while tokens:
term, tokens = cls._chomp(tokens)
terms.append(term)
return terms
@classmethod @classmethod
def def analyze(cls, token: p.TokenBase):
if isinstance(token, p.BooleanToken):
return BooleanExpr(token)
if isinstance(token, p.KeywordToken):
return KeywordExpr(token)
if isinstance(token, p.IntegerToken):
return IntegerExpr(token)
if isinstance(token, p.FractionToken):
return FractionExpr(token)
if isinstance(token, p.FloatToken):
return FloatExpr(token)
if isinstance(token, p.StringToken):
return StringExpr(token)
if isinstance(token, p.ListToken):
return cls.analyze_list(token)
LET = p.SymbolToken('let', 'let', None)
DO = p.SymbolToken('do', 'do', None)
FN = p.SymbolToken('fn', 'fn', None)
LIST = p.SymbolToken('list', 'list', None)
QUOTE = p.SymbolToken('quote', 'quote', None)
@classmethod
def analyze_list(cls, token: p.ListToken):
"""Analyze a list, for which there are several 'ground' forms."""
# Expunge any whitespace tokens
tokens = cls._nows(token.data)
if len(tokens) == 0:
return ListExpr([])
if tokens[0] == cls.QUOTE:
raise NotImplementedError("Quote isn't quite there!")
if tokens[0] == cls.LIST:
return ListExpr(cls._terms(tokens[1::]))
if tokens[0] == cls.DO:
return cls.analyze_do(tokens[1::])
if tokens[0] == cls.LET:
return cls.analyze_let(tokens[1::])
if tokens[0] == cls.FN:
return cls.analyze_fn(tokens[1::])
cls.analyze_invoke(tokens)
@classmethod
def analyze_let(cls, tokens):
assert len(tokens) >= 2
assert isinstance(tokens[0], p.ListToken)
bindings = []
binding_tokens = cls._nows(tokens[0].data)
while binding_tokens:
print("analyze_let", binding_tokens)
bindexpr, binding_tokens = cls._chomp(binding_tokens)
valexpr, binding_tokens = cls._chomp(binding_tokens)
bindings.append((bindexpr, valexpr))
return LetExpr(bindings, cls.analyze_do(tokens[1::]))
@classmethod
def analyze_do(cls, tokens):
exprs = cls._terms(tokens)
return DoExpr(exprs[::-1], exprs[-1])
@classmethod
def analyze_fn(cls, tokens):
assert len(tokens) >= 2
assert isinstance(tokens[0], p.ListToken)
args = []
arg_tokens = cls._nows(tokens[0].data)
while arg_tokens:
argexpr, arg_tokens = cls._chomp(arg_tokens)
args.append(argexpr)
return FnExpr(args, cls.analyze_do(tokens[1::]))
## Analysis interface ## Analysis interface
def reads(buff: str, def analyzes(buff: str,
reader: SexpReader = Reader, analyzer: AnalyzerBase = Analyzer,
parser: p.SexpParser = p.Parser, parser: p.SexpParser = p.Parser,
source_name=None): source_name = None):
"""Parse a single s-expression from a string, returning its token tree.""" """Parse a single s-expression from a string, returning its token tree."""
return read(StringIO(buff), parser, source_name or f"<string {id(buff):x}>") return analyze(StringIO(buff), analyzer, parser, source_name or f"<string {id(buff):x}>")
def readf(path: str, def analyzef(path: str,
reader: SexpReader = Reader, analyzer: AnalyzerBase = Analyzer,
parser: p.SexpParser = p.Parser): parser: p.SexpParser = p.Parser):
"""Parse a single s-expression from the file named by a string, returning its token tree.""" """Parse a single s-expression from the file named by a string, returning its token tree."""
with open(path, "r") as f: with open(path, "r") as f:
return read(f, parser, path) return analyze(f, analyzer, parser, path)
def read(file: IO, def analyze(file: IO,
reader: SexpReader = Reader, analyzer: AnalyzerBase = Analyzer,
parser: p.SexpParser = p.Parser, parser: p.SexpParser = p.Parser,
source_name=None): source_name = None):
"""Parse a single sexpression from a file-like object, returning its token tree.""" """Parse a single sexpression from a file-like object, returning its token tree."""
return parser.parse( return analyzer.analyze(p.parse(file, parser, source_name))
PosTrackingBufferedReader(
file,
source_name=source_name
)
)

View file

@ -0,0 +1,46 @@
"""
Tests covering the Flowmetal analyzer.
"""
import flowmetal.parser as p
import flowmetal.syntax_analyzer as a
import pytest
@pytest.mark.parametrize('txt, exprtype', [
# Booleans
('true', a.ConstExpr),
('false', a.BooleanExpr),
# Integers
('1', a.ConstExpr),
('1', a.IntegerExpr),
# Fractions
('1/2', a.ConstExpr),
('1/2', a.FractionExpr),
# Floats
('1.0', a.ConstExpr),
('1.0', a.FloatExpr),
# Keywords
(':foo', a.ConstExpr),
(':foo', a.KeywordExpr),
# Strings
('"foo"', a.ConstExpr),
('"foo"', a.StringExpr),
])
def test_analyze_constants(txt, exprtype):
"""Make sure the analyzer can chew on constants."""
assert isinstance(a.analyzes(txt), exprtype)
@pytest.mark.parametrize('txt, exprtype, rettype', [
('()', a.ListExpr, None),
('(list)', a.ListExpr, None),
('(list 1)', a.ListExpr, a.BuiltinType.INTEGER),
('(do foo bar 1)', a.DoExpr, a.BuiltinType.INTEGER),
('(let [a 1] 1)', a.LetExpr, a.BuiltinType.INTEGER),
])
def test_analyze_rettype(txt, exprtype, rettype):
"""Make sure that do exprs work."""
assert isinstance(a.analyzes(txt), exprtype)
assert a.analyzes(txt).type == rettype