From 226ece5eaab42ef2fa7b8c081532d3fed74fecee Mon Sep 17 00:00:00 2001 From: Reid 'arrdem' McKenzie Date: Sat, 18 Jul 2020 18:46:09 -0600 Subject: [PATCH] Starting to build an analyzer --- src/python/flowmetal/syntax_analyzer.py | 314 +++++++++++++----- test/python/flowmetal/test_syntax_analyzer.py | 46 +++ 2 files changed, 281 insertions(+), 79 deletions(-) create mode 100644 test/python/flowmetal/test_syntax_analyzer.py diff --git a/src/python/flowmetal/syntax_analyzer.py b/src/python/flowmetal/syntax_analyzer.py index 81d0717..e2a4c2d 100644 --- a/src/python/flowmetal/syntax_analyzer.py +++ b/src/python/flowmetal/syntax_analyzer.py @@ -5,11 +5,13 @@ The [syntax] analyzer interprets a parse sequence into a syntax tree which can b """ from abc import ABC, abstractmethod -from io import StringIO, IO -from typing import NamedTuple +from io import StringIO +from typing import NamedTuple, List, Union, Any, IO, Tuple +from enum import Enum import flowmetal.parser as p + ### Types ## We are not, in fact, sponsored by Typelevel LLC. class TypeLevelExpr(object): @@ -24,135 +26,289 @@ class GenericExpr(TypeLevelExpr, NamedTuple): class TypeExpr(TypeLevelExpr, NamedTuple): """A bound (or yet to be bound) type level symbol.""" + pass -## Now down to reality +class BuiltinType(TypeLevelExpr, Enum): + """Built in types for atoms.""" + BOOLEAN = 'Boolean' + SYMBOL = 'Symbol' + KEYWORD = 'Keyword' + STRING = 'String' + INTEGER = 'Integer' + FRACTION = 'Fraction' + FLOAT = 'Float' + + +class ConstraintExpr(TypeLevelExpr, NamedTuple): + """A value-level constraint (predicate) as a type.""" + + +## Terms +# Now down to reality class ValueLevelExpr(object): """A base class for value-level expressions.""" - pass + + @property + def type(self) -> TypeExpr: + """The type of an expression.""" -class AscribeExpr(ValueLevelExpr, NamedTuple): - """Ascribe a type (via type-level expression) to a value-level expression.""" - pass +class AscribeExpr(TypeLevelExpr): + value: ValueLevelExpr + type: TypeLevelExpr -class InvokeExpr(ValueLevelExpr, NamedTuple): - """(a ⊢ (fn A ⊢ B) [...] ⊢ A) ⊢ B""" - pass +class ConstExpr(ValueLevelExpr, NamedTuple): + """Constant expressions. Keywords, strings, numbers, that sort of thing.""" + + token: p.ConstTokenBase + + @property + def data(self) -> Any: + """The value of the constant.""" + # The parser gives us this data + return self.token.data + + @abstractmethod + def type(self): + raise NotImplementedError() -class IfExpr(ValueLevelExpr, NamedTuple): - """(if test a ⊢ A b ⊢ B) ⊢ (Variant A B).""" - pass +class BooleanExpr(ConstExpr): + @property + def type(self): + return BuiltinType.BOOLEAN -class LetExpr(ValueLevelExpr, NamedTuple): - """Let a single binding and wrap a body. Yes one. N-ary let is an abstraction.""" - pass +class IntegerExpr(ConstExpr): + @property + def type(self): + return BuiltinType.INTEGER -class DoExpr(ValueError, NamedTuple): - """do a procedure ahem sequence of things. - - (do a b c ... ω ⊢ Ω) ⊢ Ω - """ - pass +class FractionExpr(ConstExpr): + @property + def type(self): + return BuiltinType.FRACTION -ProcExpr = DoExpr # ain't broke don't fix it +class FloatExpr(ConstExpr): + @property + def type(self): + return BuiltinType.FLOAT -class MappingExpr(ValueLevelExpr, NamedTuple): - """Mappings require their own constructor expression due to local/symbol references.""" - pass +class KeywordExpr(ConstExpr): + @property + def type(self): + return BuiltinType.KEYWORD -class SetExpr(ValueLevelExpr, NamedTuple): - """Sets require their own constructor expression due to local/symbol references.""" - pass +class StringExpr(ConstExpr): + @property + def type(self): + return BuiltinType.STRING class ListExpr(ValueLevelExpr, NamedTuple): - """While round () lists are generally InvokeExprs, [] lists are constructors like sets and maps.""" - pass + elements: List[ValueLevelExpr] + + # FIXME (arrdem 2020-07-18): + # Probably typed? Not sure. + + @property + def type(self) -> TypeExpr: + if self.elements: + return self.elements[-1].type + + +## 'real' AST nodes +class DoExpr(ValueLevelExpr, NamedTuple): + effect_exprs: List[ValueLevelExpr] + ret_expr: ValueLevelExpr + + @property + def type(self) -> TypeExpr: + return self.ret_expr.type + + +class LetExpr(ValueLevelExpr, NamedTuple): + binding_exprs: List[Tuple] + ret_expr: DoExpr + + @property + def type(self) -> TypeExpr: + return self.ret_expr.type + + +class FnExpr(ValueLevelExpr, NamedTuple): + arguments: List + ret_expr: DoExpr + + @property + def type(self) -> TypeExpr: + """This is where the fun begins.""" + return ## Reader implementation -class SexpAnalyzer(ABC): - """A base class for Analyzers.""" - pass +class AnalyzerBase(ABC): + """Analyzer interface.""" + + @classmethod + @abstractmethod + def analyze(cls, token: p.TokenBase) -> ValueLevelExpr: + """Analyze a token tree, returning an expr tree.""" -class Analyzer(SexpAnalyzer): +class Analyzer(AnalyzerBase): """A reference Analyzer implementation. Walks a parsed token tree, building up a syntax tree. """ @classmethod - def read(cls, token: p.TokenBase): - if isinstance(token, p.WhitespaceToken): - ## Whitespace tokens are discarded when considering syntax - pass + def _nows(cls, tokens): + return [t for t in tokens if not isinstance(t, p.WhitespaceToken)] - elif isinstance(token, (p.StringToken, p.KeywordToken, - p.IntegerToken, p.RationalToken, p.FloatToken)): - ## These are atoms we don't do much with - pass - - elif isinstance(token, p.SetToken): - ## Set tokens have their own syntax object to allow for lexical sets - pass - - elif isinstance(token, p.MappingToken): - ## As with sets, mappings have their own syntax object - pass - - elif isinstance(token, p.ListToken): - ## This is the fun one because it's where most of the notation is implemented - pass + TACK0 = p.SymbolToken('⊢', '/⊢', None) + TACK1 = p.SymbolToken('|-', '|-', None) @classmethod - def read_symexpr(cls, token: p.SymbolToken): - """Emit a representation of using a binding.""" + def _chomp(cls, tokens): + """'chomp' an expression and optional ascription off the tokens, returning an expression and the remaining tokens.""" + + print(tokens) + if len(tokens) == 1: + return cls.analyze(tokens[0]), [] + elif tokens[1] in [cls.TACK0, cls.TACK1]: + if len(tokens) >= 3: + return AscribeExpr(cls.analyze(tokens[0]), cls.analyze(tokens[2])), tokens[3:] + else: + raise SyntaxError(f"Analyzing tack at {tokens[1].pos}, did not find following type ascription!") + else: + return cls.analyze(tokens[0]), tokens[1::] @classmethod - def read_setexpr(cls, token: p.SetToken): - """Emit a SetExpr """ + def _terms(cls, tokens): + terms = [] + tokens = cls._nows(tokens) + while tokens: + term, tokens = cls._chomp(tokens) + terms.append(term) + return terms @classmethod - def + def analyze(cls, token: p.TokenBase): + if isinstance(token, p.BooleanToken): + return BooleanExpr(token) + if isinstance(token, p.KeywordToken): + return KeywordExpr(token) + + if isinstance(token, p.IntegerToken): + return IntegerExpr(token) + + if isinstance(token, p.FractionToken): + return FractionExpr(token) + + if isinstance(token, p.FloatToken): + return FloatExpr(token) + + if isinstance(token, p.StringToken): + return StringExpr(token) + + if isinstance(token, p.ListToken): + return cls.analyze_list(token) + + LET = p.SymbolToken('let', 'let', None) + DO = p.SymbolToken('do', 'do', None) + FN = p.SymbolToken('fn', 'fn', None) + LIST = p.SymbolToken('list', 'list', None) + QUOTE = p.SymbolToken('quote', 'quote', None) + + @classmethod + def analyze_list(cls, token: p.ListToken): + """Analyze a list, for which there are several 'ground' forms.""" + + # Expunge any whitespace tokens + tokens = cls._nows(token.data) + + if len(tokens) == 0: + return ListExpr([]) + + if tokens[0] == cls.QUOTE: + raise NotImplementedError("Quote isn't quite there!") + + if tokens[0] == cls.LIST: + return ListExpr(cls._terms(tokens[1::])) + + if tokens[0] == cls.DO: + return cls.analyze_do(tokens[1::]) + + if tokens[0] == cls.LET: + return cls.analyze_let(tokens[1::]) + + if tokens[0] == cls.FN: + return cls.analyze_fn(tokens[1::]) + + cls.analyze_invoke(tokens) + + @classmethod + def analyze_let(cls, tokens): + assert len(tokens) >= 2 + assert isinstance(tokens[0], p.ListToken) + bindings = [] + binding_tokens = cls._nows(tokens[0].data) + while binding_tokens: + print("analyze_let", binding_tokens) + bindexpr, binding_tokens = cls._chomp(binding_tokens) + valexpr, binding_tokens = cls._chomp(binding_tokens) + bindings.append((bindexpr, valexpr)) + + return LetExpr(bindings, cls.analyze_do(tokens[1::])) + + @classmethod + def analyze_do(cls, tokens): + exprs = cls._terms(tokens) + return DoExpr(exprs[::-1], exprs[-1]) + + @classmethod + def analyze_fn(cls, tokens): + assert len(tokens) >= 2 + assert isinstance(tokens[0], p.ListToken) + args = [] + arg_tokens = cls._nows(tokens[0].data) + while arg_tokens: + argexpr, arg_tokens = cls._chomp(arg_tokens) + args.append(argexpr) + + return FnExpr(args, cls.analyze_do(tokens[1::])) ## Analysis interface -def reads(buff: str, - reader: SexpReader = Reader, - parser: p.SexpParser = p.Parser, - source_name=None): +def analyzes(buff: str, + analyzer: AnalyzerBase = Analyzer, + parser: p.SexpParser = p.Parser, + source_name = None): """Parse a single s-expression from a string, returning its token tree.""" - return read(StringIO(buff), parser, source_name or f"") + return analyze(StringIO(buff), analyzer, parser, source_name or f"") -def readf(path: str, - reader: SexpReader = Reader, - parser: p.SexpParser = p.Parser): +def analyzef(path: str, + analyzer: AnalyzerBase = Analyzer, + parser: p.SexpParser = p.Parser): """Parse a single s-expression from the file named by a string, returning its token tree.""" with open(path, "r") as f: - return read(f, parser, path) + return analyze(f, analyzer, parser, path) -def read(file: IO, - reader: SexpReader = Reader, - parser: p.SexpParser = p.Parser, - source_name=None): +def analyze(file: IO, + analyzer: AnalyzerBase = Analyzer, + parser: p.SexpParser = p.Parser, + source_name = None): """Parse a single sexpression from a file-like object, returning its token tree.""" - return parser.parse( - PosTrackingBufferedReader( - file, - source_name=source_name - ) - ) + return analyzer.analyze(p.parse(file, parser, source_name)) diff --git a/test/python/flowmetal/test_syntax_analyzer.py b/test/python/flowmetal/test_syntax_analyzer.py new file mode 100644 index 0000000..f6e419a --- /dev/null +++ b/test/python/flowmetal/test_syntax_analyzer.py @@ -0,0 +1,46 @@ +""" +Tests covering the Flowmetal analyzer. +""" + +import flowmetal.parser as p +import flowmetal.syntax_analyzer as a + +import pytest + + +@pytest.mark.parametrize('txt, exprtype', [ + # Booleans + ('true', a.ConstExpr), + ('false', a.BooleanExpr), + # Integers + ('1', a.ConstExpr), + ('1', a.IntegerExpr), + # Fractions + ('1/2', a.ConstExpr), + ('1/2', a.FractionExpr), + # Floats + ('1.0', a.ConstExpr), + ('1.0', a.FloatExpr), + # Keywords + (':foo', a.ConstExpr), + (':foo', a.KeywordExpr), + # Strings + ('"foo"', a.ConstExpr), + ('"foo"', a.StringExpr), +]) +def test_analyze_constants(txt, exprtype): + """Make sure the analyzer can chew on constants.""" + assert isinstance(a.analyzes(txt), exprtype) + + +@pytest.mark.parametrize('txt, exprtype, rettype', [ + ('()', a.ListExpr, None), + ('(list)', a.ListExpr, None), + ('(list 1)', a.ListExpr, a.BuiltinType.INTEGER), + ('(do foo bar 1)', a.DoExpr, a.BuiltinType.INTEGER), + ('(let [a 1] 1)', a.LetExpr, a.BuiltinType.INTEGER), +]) +def test_analyze_rettype(txt, exprtype, rettype): + """Make sure that do exprs work.""" + assert isinstance(a.analyzes(txt), exprtype) + assert a.analyzes(txt).type == rettype