Starting to stub out a syntax analyzer

This commit is contained in:
Reid 'arrdem' McKenzie 2020-06-14 12:33:26 -06:00
parent 50e7213d7b
commit d60e690445
2 changed files with 149 additions and 4 deletions

View file

@ -80,7 +80,6 @@ class SymbolToken(NamedTuple, TokenBase):
raw: str
pos: Position
class KeywordToken(NamedTuple, TokenBase):
"""A read keyword."""
data: str
@ -335,7 +334,7 @@ class Parser(SexpParser):
buff = str(rtb)
return CommentToken(buff, buff, pos)
## Parsing
## Parsing interface
def parses(buff: str,
parser: SexpParser = Parser,
source_name=None):
@ -365,7 +364,7 @@ def parse(file: IO,
)
## Loading
## Loading interface
def loads(buff: str,
parser: SexpParser = Parser,
source_name=None):
@ -395,7 +394,7 @@ def load(file: IO,
)
## Dumping
## Dumping interface
def dump(file: IO, obj):
"""Given an object, dump its s-expression coding to the given file-like object."""

View file

@ -0,0 +1,146 @@
"""
The parser just parses and tokenizes.
The [syntax] analyzer interprets a parse sequence into a syntax tree which can be checked, type inferred and compiled.
"""
from abc import ABC, abstractmethod
from io import StringIO, IO
from typing import NamedTuple
import flowmetal.parser as p
### Types
## We are not, in fact, sponsored by Typelevel LLC.
class TypeLevelExpr(ABC):
"""A base class for type-level expressions."""
pass
class GenericExpr(TypeLevelExpr, NamedTuple):
"""'invocation' (application) of a generic type to Type[Level]Exprs."""
pass
class TypeExpr(TypeLevelExpr, NamedTuple):
"""A bound (or yet to be bound) type level symbol."""
## Now down to reality
class ValueLevelExpr(ABC):
"""A base class for value-level expressions."""
class AscribeExpr(ValueLevelExpr, NamedTuple):
"""Ascribe a type (via type-level expression) to a value-level expression."""
class InvokeExpr(ValueLevelExpr, NamedTuple):
"""(a ⊢ (fn A ⊢ B) [...] ⊢ A) ⊢ B"""
class IfExpr(ValueLevelExpr, NamedTuple):
"""(if test a ⊢ A b ⊢ B) ⊢ (Variant A B)."""
class LetExpr(ValueLevelExpr, NamedTuple):
"""Let a single binding and wrap a body. Yes one. N-ary let is an abstraction."""
class DoExpr(ValueError, NamedTuple):
"""do a procedure ahem sequence of things.
(do a b c ... ω Ω) Ω
"""
ProcExpr = DoExpr # ain't broke don't fix it
class MappingExpr(ValueLevelExpr, NamedTuple):
"""Mappings require their own constructor expression due to local/symbol references."""
class SetExpr(ValueLevelExpr, NamedTuple):
"""Sets require their own constructor expression due to local/symbol references."""
class ListExpr(ValueLevelExpr, NamedTuple):
"""While round () lists are generally InvokeExprs, [] lists are constructors like sets and maps."""
## Reader implementation
class SexpAnalyzer(ABC):
"""A base class for Analyzers."""
class Analyzer(SexpAnalyzer):
"""A reference Analyzer implementation.
Walks a parsed token tree, building up a syntax tree.
"""
@classmethod
def read(cls, token: p.TokenBase):
if isinstance(token, p.WhitespaceToken):
## Whitespace tokens are discarded when considering syntax
pass
elif isinstance(token, (p.StringToken, p.KeywordToken,
p.IntegerToken, p.RationalToken, p.FloatToken)):
## These are atoms we don't do much with
pass
elif isinstance(token, p.SetToken):
## Set tokens have their own syntax object to allow for lexical sets
pass
elif isinstance(token, p.MappingToken):
## As with sets, mappings have their own syntax object
pass
elif isinstance(token, p.ListToken):
## This is the fun one because it's where most of the notation is implemented
pass
@classmethod
def read_symexpr(cls, token: p.SymbolToken):
"""Emit a representation of using a binding."""
@classmethod
def read_setexpr(cls, token: p.SetToken):
"""Emit a SetExpr """
@classmethod
def
## Analysis interface
def reads(buff: str,
reader: SexpReader = Reader,
parser: p.SexpParser = p.Parser,
source_name=None):
"""Parse a single s-expression from a string, returning its token tree."""
return read(StringIO(buff), parser, source_name or f"<string {id(buff):x}>")
def readf(path: str,
reader: SexpReader = Reader,
parser: p.SexpParser = p.Parser):
"""Parse a single s-expression from the file named by a string, returning its token tree."""
with open(path, "r") as f:
return read(f, parser, path)
def read(file: IO,
reader: SexpReader = Reader,
parser: p.SexpParser = p.Parser,
source_name=None):
"""Parse a single sexpression from a file-like object, returning its token tree."""
return parser.parse(
PosTrackingBufferedReader(
file,
source_name=source_name
)
)