Starting to build an analyzer

This commit is contained in:
Reid 'arrdem' McKenzie 2020-07-18 18:46:09 -06:00
parent 9f29067f6d
commit 226ece5eaa
2 changed files with 281 additions and 79 deletions

View file

@ -5,11 +5,13 @@ The [syntax] analyzer interprets a parse sequence into a syntax tree which can b
"""
from abc import ABC, abstractmethod
from io import StringIO, IO
from typing import NamedTuple
from io import StringIO
from typing import NamedTuple, List, Union, Any, IO, Tuple
from enum import Enum
import flowmetal.parser as p
### Types
## We are not, in fact, sponsored by Typelevel LLC.
class TypeLevelExpr(object):
@ -24,135 +26,289 @@ class GenericExpr(TypeLevelExpr, NamedTuple):
class TypeExpr(TypeLevelExpr, NamedTuple):
"""A bound (or yet to be bound) type level symbol."""
pass
## Now down to reality
class BuiltinType(TypeLevelExpr, Enum):
"""Built in types for atoms."""
BOOLEAN = 'Boolean'
SYMBOL = 'Symbol'
KEYWORD = 'Keyword'
STRING = 'String'
INTEGER = 'Integer'
FRACTION = 'Fraction'
FLOAT = 'Float'
class ConstraintExpr(TypeLevelExpr, NamedTuple):
"""A value-level constraint (predicate) as a type."""
## Terms
# Now down to reality
class ValueLevelExpr(object):
"""A base class for value-level expressions."""
pass
@property
def type(self) -> TypeExpr:
"""The type of an expression."""
class AscribeExpr(ValueLevelExpr, NamedTuple):
"""Ascribe a type (via type-level expression) to a value-level expression."""
pass
class AscribeExpr(TypeLevelExpr):
value: ValueLevelExpr
type: TypeLevelExpr
class InvokeExpr(ValueLevelExpr, NamedTuple):
"""(a ⊢ (fn A ⊢ B) [...] ⊢ A) ⊢ B"""
pass
class ConstExpr(ValueLevelExpr, NamedTuple):
"""Constant expressions. Keywords, strings, numbers, that sort of thing."""
token: p.ConstTokenBase
@property
def data(self) -> Any:
"""The value of the constant."""
# The parser gives us this data
return self.token.data
@abstractmethod
def type(self):
raise NotImplementedError()
class IfExpr(ValueLevelExpr, NamedTuple):
"""(if test a ⊢ A b ⊢ B) ⊢ (Variant A B)."""
pass
class BooleanExpr(ConstExpr):
@property
def type(self):
return BuiltinType.BOOLEAN
class LetExpr(ValueLevelExpr, NamedTuple):
"""Let a single binding and wrap a body. Yes one. N-ary let is an abstraction."""
pass
class IntegerExpr(ConstExpr):
@property
def type(self):
return BuiltinType.INTEGER
class DoExpr(ValueError, NamedTuple):
"""do a procedure ahem sequence of things.
(do a b c ... ω Ω) Ω
"""
pass
class FractionExpr(ConstExpr):
@property
def type(self):
return BuiltinType.FRACTION
ProcExpr = DoExpr # ain't broke don't fix it
class FloatExpr(ConstExpr):
@property
def type(self):
return BuiltinType.FLOAT
class MappingExpr(ValueLevelExpr, NamedTuple):
"""Mappings require their own constructor expression due to local/symbol references."""
pass
class KeywordExpr(ConstExpr):
@property
def type(self):
return BuiltinType.KEYWORD
class SetExpr(ValueLevelExpr, NamedTuple):
"""Sets require their own constructor expression due to local/symbol references."""
pass
class StringExpr(ConstExpr):
@property
def type(self):
return BuiltinType.STRING
class ListExpr(ValueLevelExpr, NamedTuple):
"""While round () lists are generally InvokeExprs, [] lists are constructors like sets and maps."""
pass
elements: List[ValueLevelExpr]
# FIXME (arrdem 2020-07-18):
# Probably typed? Not sure.
@property
def type(self) -> TypeExpr:
if self.elements:
return self.elements[-1].type
## 'real' AST nodes
class DoExpr(ValueLevelExpr, NamedTuple):
effect_exprs: List[ValueLevelExpr]
ret_expr: ValueLevelExpr
@property
def type(self) -> TypeExpr:
return self.ret_expr.type
class LetExpr(ValueLevelExpr, NamedTuple):
binding_exprs: List[Tuple]
ret_expr: DoExpr
@property
def type(self) -> TypeExpr:
return self.ret_expr.type
class FnExpr(ValueLevelExpr, NamedTuple):
arguments: List
ret_expr: DoExpr
@property
def type(self) -> TypeExpr:
"""This is where the fun begins."""
return
## Reader implementation
class SexpAnalyzer(ABC):
"""A base class for Analyzers."""
pass
class AnalyzerBase(ABC):
"""Analyzer interface."""
@classmethod
@abstractmethod
def analyze(cls, token: p.TokenBase) -> ValueLevelExpr:
"""Analyze a token tree, returning an expr tree."""
class Analyzer(SexpAnalyzer):
class Analyzer(AnalyzerBase):
"""A reference Analyzer implementation.
Walks a parsed token tree, building up a syntax tree.
"""
@classmethod
def read(cls, token: p.TokenBase):
if isinstance(token, p.WhitespaceToken):
## Whitespace tokens are discarded when considering syntax
pass
def _nows(cls, tokens):
return [t for t in tokens if not isinstance(t, p.WhitespaceToken)]
elif isinstance(token, (p.StringToken, p.KeywordToken,
p.IntegerToken, p.RationalToken, p.FloatToken)):
## These are atoms we don't do much with
pass
elif isinstance(token, p.SetToken):
## Set tokens have their own syntax object to allow for lexical sets
pass
elif isinstance(token, p.MappingToken):
## As with sets, mappings have their own syntax object
pass
elif isinstance(token, p.ListToken):
## This is the fun one because it's where most of the notation is implemented
pass
TACK0 = p.SymbolToken('', '/⊢', None)
TACK1 = p.SymbolToken('|-', '|-', None)
@classmethod
def read_symexpr(cls, token: p.SymbolToken):
"""Emit a representation of using a binding."""
def _chomp(cls, tokens):
"""'chomp' an expression and optional ascription off the tokens, returning an expression and the remaining tokens."""
print(tokens)
if len(tokens) == 1:
return cls.analyze(tokens[0]), []
elif tokens[1] in [cls.TACK0, cls.TACK1]:
if len(tokens) >= 3:
return AscribeExpr(cls.analyze(tokens[0]), cls.analyze(tokens[2])), tokens[3:]
else:
raise SyntaxError(f"Analyzing tack at {tokens[1].pos}, did not find following type ascription!")
else:
return cls.analyze(tokens[0]), tokens[1::]
@classmethod
def read_setexpr(cls, token: p.SetToken):
"""Emit a SetExpr """
def _terms(cls, tokens):
terms = []
tokens = cls._nows(tokens)
while tokens:
term, tokens = cls._chomp(tokens)
terms.append(term)
return terms
@classmethod
def
def analyze(cls, token: p.TokenBase):
if isinstance(token, p.BooleanToken):
return BooleanExpr(token)
if isinstance(token, p.KeywordToken):
return KeywordExpr(token)
if isinstance(token, p.IntegerToken):
return IntegerExpr(token)
if isinstance(token, p.FractionToken):
return FractionExpr(token)
if isinstance(token, p.FloatToken):
return FloatExpr(token)
if isinstance(token, p.StringToken):
return StringExpr(token)
if isinstance(token, p.ListToken):
return cls.analyze_list(token)
LET = p.SymbolToken('let', 'let', None)
DO = p.SymbolToken('do', 'do', None)
FN = p.SymbolToken('fn', 'fn', None)
LIST = p.SymbolToken('list', 'list', None)
QUOTE = p.SymbolToken('quote', 'quote', None)
@classmethod
def analyze_list(cls, token: p.ListToken):
"""Analyze a list, for which there are several 'ground' forms."""
# Expunge any whitespace tokens
tokens = cls._nows(token.data)
if len(tokens) == 0:
return ListExpr([])
if tokens[0] == cls.QUOTE:
raise NotImplementedError("Quote isn't quite there!")
if tokens[0] == cls.LIST:
return ListExpr(cls._terms(tokens[1::]))
if tokens[0] == cls.DO:
return cls.analyze_do(tokens[1::])
if tokens[0] == cls.LET:
return cls.analyze_let(tokens[1::])
if tokens[0] == cls.FN:
return cls.analyze_fn(tokens[1::])
cls.analyze_invoke(tokens)
@classmethod
def analyze_let(cls, tokens):
assert len(tokens) >= 2
assert isinstance(tokens[0], p.ListToken)
bindings = []
binding_tokens = cls._nows(tokens[0].data)
while binding_tokens:
print("analyze_let", binding_tokens)
bindexpr, binding_tokens = cls._chomp(binding_tokens)
valexpr, binding_tokens = cls._chomp(binding_tokens)
bindings.append((bindexpr, valexpr))
return LetExpr(bindings, cls.analyze_do(tokens[1::]))
@classmethod
def analyze_do(cls, tokens):
exprs = cls._terms(tokens)
return DoExpr(exprs[::-1], exprs[-1])
@classmethod
def analyze_fn(cls, tokens):
assert len(tokens) >= 2
assert isinstance(tokens[0], p.ListToken)
args = []
arg_tokens = cls._nows(tokens[0].data)
while arg_tokens:
argexpr, arg_tokens = cls._chomp(arg_tokens)
args.append(argexpr)
return FnExpr(args, cls.analyze_do(tokens[1::]))
## Analysis interface
def reads(buff: str,
reader: SexpReader = Reader,
parser: p.SexpParser = p.Parser,
source_name=None):
def analyzes(buff: str,
analyzer: AnalyzerBase = Analyzer,
parser: p.SexpParser = p.Parser,
source_name = None):
"""Parse a single s-expression from a string, returning its token tree."""
return read(StringIO(buff), parser, source_name or f"<string {id(buff):x}>")
return analyze(StringIO(buff), analyzer, parser, source_name or f"<string {id(buff):x}>")
def readf(path: str,
reader: SexpReader = Reader,
parser: p.SexpParser = p.Parser):
def analyzef(path: str,
analyzer: AnalyzerBase = Analyzer,
parser: p.SexpParser = p.Parser):
"""Parse a single s-expression from the file named by a string, returning its token tree."""
with open(path, "r") as f:
return read(f, parser, path)
return analyze(f, analyzer, parser, path)
def read(file: IO,
reader: SexpReader = Reader,
parser: p.SexpParser = p.Parser,
source_name=None):
def analyze(file: IO,
analyzer: AnalyzerBase = Analyzer,
parser: p.SexpParser = p.Parser,
source_name = None):
"""Parse a single sexpression from a file-like object, returning its token tree."""
return parser.parse(
PosTrackingBufferedReader(
file,
source_name=source_name
)
)
return analyzer.analyze(p.parse(file, parser, source_name))

View file

@ -0,0 +1,46 @@
"""
Tests covering the Flowmetal analyzer.
"""
import flowmetal.parser as p
import flowmetal.syntax_analyzer as a
import pytest
@pytest.mark.parametrize('txt, exprtype', [
# Booleans
('true', a.ConstExpr),
('false', a.BooleanExpr),
# Integers
('1', a.ConstExpr),
('1', a.IntegerExpr),
# Fractions
('1/2', a.ConstExpr),
('1/2', a.FractionExpr),
# Floats
('1.0', a.ConstExpr),
('1.0', a.FloatExpr),
# Keywords
(':foo', a.ConstExpr),
(':foo', a.KeywordExpr),
# Strings
('"foo"', a.ConstExpr),
('"foo"', a.StringExpr),
])
def test_analyze_constants(txt, exprtype):
"""Make sure the analyzer can chew on constants."""
assert isinstance(a.analyzes(txt), exprtype)
@pytest.mark.parametrize('txt, exprtype, rettype', [
('()', a.ListExpr, None),
('(list)', a.ListExpr, None),
('(list 1)', a.ListExpr, a.BuiltinType.INTEGER),
('(do foo bar 1)', a.DoExpr, a.BuiltinType.INTEGER),
('(let [a 1] 1)', a.LetExpr, a.BuiltinType.INTEGER),
])
def test_analyze_rettype(txt, exprtype, rettype):
"""Make sure that do exprs work."""
assert isinstance(a.analyzes(txt), exprtype)
assert a.analyzes(txt).type == rettype