From d60e690445ab702b570e15ca7a23e37a4882fd15 Mon Sep 17 00:00:00 2001 From: Reid 'arrdem' McKenzie Date: Sun, 14 Jun 2020 12:33:26 -0600 Subject: [PATCH] Starting to stub out a syntax analyzer --- src/python/flowmetal/parser.py | 7 +- src/python/flowmetal/syntax_analyzer.py | 146 ++++++++++++++++++++++++ 2 files changed, 149 insertions(+), 4 deletions(-) create mode 100644 src/python/flowmetal/syntax_analyzer.py diff --git a/src/python/flowmetal/parser.py b/src/python/flowmetal/parser.py index 850c733..eda3494 100644 --- a/src/python/flowmetal/parser.py +++ b/src/python/flowmetal/parser.py @@ -80,7 +80,6 @@ class SymbolToken(NamedTuple, TokenBase): raw: str pos: Position - class KeywordToken(NamedTuple, TokenBase): """A read keyword.""" data: str @@ -335,7 +334,7 @@ class Parser(SexpParser): buff = str(rtb) return CommentToken(buff, buff, pos) -## Parsing +## Parsing interface def parses(buff: str, parser: SexpParser = Parser, source_name=None): @@ -365,7 +364,7 @@ def parse(file: IO, ) -## Loading +## Loading interface def loads(buff: str, parser: SexpParser = Parser, source_name=None): @@ -395,7 +394,7 @@ def load(file: IO, ) -## Dumping +## Dumping interface def dump(file: IO, obj): """Given an object, dump its s-expression coding to the given file-like object.""" diff --git a/src/python/flowmetal/syntax_analyzer.py b/src/python/flowmetal/syntax_analyzer.py new file mode 100644 index 0000000..98c32eb --- /dev/null +++ b/src/python/flowmetal/syntax_analyzer.py @@ -0,0 +1,146 @@ +""" +The parser just parses and tokenizes. + +The [syntax] analyzer interprets a parse sequence into a syntax tree which can be checked, type inferred and compiled. +""" + +from abc import ABC, abstractmethod +from io import StringIO, IO +from typing import NamedTuple + +import flowmetal.parser as p + +### Types +## We are not, in fact, sponsored by Typelevel LLC. +class TypeLevelExpr(ABC): + """A base class for type-level expressions.""" + pass + + +class GenericExpr(TypeLevelExpr, NamedTuple): + """'invocation' (application) of a generic type to Type[Level]Exprs.""" + pass + + +class TypeExpr(TypeLevelExpr, NamedTuple): + """A bound (or yet to be bound) type level symbol.""" + + +## Now down to reality +class ValueLevelExpr(ABC): + """A base class for value-level expressions.""" + + +class AscribeExpr(ValueLevelExpr, NamedTuple): + """Ascribe a type (via type-level expression) to a value-level expression.""" + + +class InvokeExpr(ValueLevelExpr, NamedTuple): + """(a ⊢ (fn A ⊢ B) [...] ⊢ A) ⊢ B""" + + +class IfExpr(ValueLevelExpr, NamedTuple): + """(if test a ⊢ A b ⊢ B) ⊢ (Variant A B).""" + + +class LetExpr(ValueLevelExpr, NamedTuple): + """Let a single binding and wrap a body. Yes one. N-ary let is an abstraction.""" + + +class DoExpr(ValueError, NamedTuple): + """do a procedure ahem sequence of things. + + (do a b c ... ω ⊢ Ω) ⊢ Ω + """ + +ProcExpr = DoExpr # ain't broke don't fix it + +class MappingExpr(ValueLevelExpr, NamedTuple): + """Mappings require their own constructor expression due to local/symbol references.""" + + +class SetExpr(ValueLevelExpr, NamedTuple): + """Sets require their own constructor expression due to local/symbol references.""" + + +class ListExpr(ValueLevelExpr, NamedTuple): + """While round () lists are generally InvokeExprs, [] lists are constructors like sets and maps.""" + + +## Reader implementation +class SexpAnalyzer(ABC): + """A base class for Analyzers.""" + + +class Analyzer(SexpAnalyzer): + """A reference Analyzer implementation. + + Walks a parsed token tree, building up a syntax tree. + """ + + @classmethod + def read(cls, token: p.TokenBase): + if isinstance(token, p.WhitespaceToken): + ## Whitespace tokens are discarded when considering syntax + pass + + elif isinstance(token, (p.StringToken, p.KeywordToken, + p.IntegerToken, p.RationalToken, p.FloatToken)): + ## These are atoms we don't do much with + pass + + elif isinstance(token, p.SetToken): + ## Set tokens have their own syntax object to allow for lexical sets + pass + + elif isinstance(token, p.MappingToken): + ## As with sets, mappings have their own syntax object + pass + + elif isinstance(token, p.ListToken): + ## This is the fun one because it's where most of the notation is implemented + pass + + @classmethod + def read_symexpr(cls, token: p.SymbolToken): + """Emit a representation of using a binding.""" + + @classmethod + def read_setexpr(cls, token: p.SetToken): + """Emit a SetExpr """ + + @classmethod + def + + +## Analysis interface +def reads(buff: str, + reader: SexpReader = Reader, + parser: p.SexpParser = p.Parser, + source_name=None): + """Parse a single s-expression from a string, returning its token tree.""" + + return read(StringIO(buff), parser, source_name or f"") + + +def readf(path: str, + reader: SexpReader = Reader, + parser: p.SexpParser = p.Parser): + """Parse a single s-expression from the file named by a string, returning its token tree.""" + + with open(path, "r") as f: + return read(f, parser, path) + + +def read(file: IO, + reader: SexpReader = Reader, + parser: p.SexpParser = p.Parser, + source_name=None): + """Parse a single sexpression from a file-like object, returning its token tree.""" + + return parser.parse( + PosTrackingBufferedReader( + file, + source_name=source_name + ) + )