Starting to build an analyzer
This commit is contained in:
parent
9f29067f6d
commit
226ece5eaa
2 changed files with 281 additions and 79 deletions
|
@ -5,11 +5,13 @@ The [syntax] analyzer interprets a parse sequence into a syntax tree which can b
|
|||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from io import StringIO, IO
|
||||
from typing import NamedTuple
|
||||
from io import StringIO
|
||||
from typing import NamedTuple, List, Union, Any, IO, Tuple
|
||||
from enum import Enum
|
||||
|
||||
import flowmetal.parser as p
|
||||
|
||||
|
||||
### Types
|
||||
## We are not, in fact, sponsored by Typelevel LLC.
|
||||
class TypeLevelExpr(object):
|
||||
|
@ -24,135 +26,289 @@ class GenericExpr(TypeLevelExpr, NamedTuple):
|
|||
|
||||
class TypeExpr(TypeLevelExpr, NamedTuple):
|
||||
"""A bound (or yet to be bound) type level symbol."""
|
||||
pass
|
||||
|
||||
|
||||
## Now down to reality
|
||||
class BuiltinType(TypeLevelExpr, Enum):
|
||||
"""Built in types for atoms."""
|
||||
BOOLEAN = 'Boolean'
|
||||
SYMBOL = 'Symbol'
|
||||
KEYWORD = 'Keyword'
|
||||
STRING = 'String'
|
||||
INTEGER = 'Integer'
|
||||
FRACTION = 'Fraction'
|
||||
FLOAT = 'Float'
|
||||
|
||||
|
||||
class ConstraintExpr(TypeLevelExpr, NamedTuple):
|
||||
"""A value-level constraint (predicate) as a type."""
|
||||
|
||||
|
||||
## Terms
|
||||
# Now down to reality
|
||||
class ValueLevelExpr(object):
|
||||
"""A base class for value-level expressions."""
|
||||
pass
|
||||
|
||||
@property
|
||||
def type(self) -> TypeExpr:
|
||||
"""The type of an expression."""
|
||||
|
||||
|
||||
class AscribeExpr(ValueLevelExpr, NamedTuple):
|
||||
"""Ascribe a type (via type-level expression) to a value-level expression."""
|
||||
pass
|
||||
class AscribeExpr(TypeLevelExpr):
|
||||
value: ValueLevelExpr
|
||||
type: TypeLevelExpr
|
||||
|
||||
|
||||
class InvokeExpr(ValueLevelExpr, NamedTuple):
|
||||
"""(a ⊢ (fn A ⊢ B) [...] ⊢ A) ⊢ B"""
|
||||
pass
|
||||
class ConstExpr(ValueLevelExpr, NamedTuple):
|
||||
"""Constant expressions. Keywords, strings, numbers, that sort of thing."""
|
||||
|
||||
token: p.ConstTokenBase
|
||||
|
||||
@property
|
||||
def data(self) -> Any:
|
||||
"""The value of the constant."""
|
||||
# The parser gives us this data
|
||||
return self.token.data
|
||||
|
||||
@abstractmethod
|
||||
def type(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class IfExpr(ValueLevelExpr, NamedTuple):
|
||||
"""(if test a ⊢ A b ⊢ B) ⊢ (Variant A B)."""
|
||||
pass
|
||||
class BooleanExpr(ConstExpr):
|
||||
@property
|
||||
def type(self):
|
||||
return BuiltinType.BOOLEAN
|
||||
|
||||
|
||||
class LetExpr(ValueLevelExpr, NamedTuple):
|
||||
"""Let a single binding and wrap a body. Yes one. N-ary let is an abstraction."""
|
||||
pass
|
||||
class IntegerExpr(ConstExpr):
|
||||
@property
|
||||
def type(self):
|
||||
return BuiltinType.INTEGER
|
||||
|
||||
|
||||
class DoExpr(ValueError, NamedTuple):
|
||||
"""do a procedure ahem sequence of things.
|
||||
|
||||
(do a b c ... ω ⊢ Ω) ⊢ Ω
|
||||
"""
|
||||
pass
|
||||
class FractionExpr(ConstExpr):
|
||||
@property
|
||||
def type(self):
|
||||
return BuiltinType.FRACTION
|
||||
|
||||
|
||||
ProcExpr = DoExpr # ain't broke don't fix it
|
||||
class FloatExpr(ConstExpr):
|
||||
@property
|
||||
def type(self):
|
||||
return BuiltinType.FLOAT
|
||||
|
||||
|
||||
class MappingExpr(ValueLevelExpr, NamedTuple):
|
||||
"""Mappings require their own constructor expression due to local/symbol references."""
|
||||
pass
|
||||
class KeywordExpr(ConstExpr):
|
||||
@property
|
||||
def type(self):
|
||||
return BuiltinType.KEYWORD
|
||||
|
||||
|
||||
class SetExpr(ValueLevelExpr, NamedTuple):
|
||||
"""Sets require their own constructor expression due to local/symbol references."""
|
||||
pass
|
||||
class StringExpr(ConstExpr):
|
||||
@property
|
||||
def type(self):
|
||||
return BuiltinType.STRING
|
||||
|
||||
|
||||
class ListExpr(ValueLevelExpr, NamedTuple):
|
||||
"""While round () lists are generally InvokeExprs, [] lists are constructors like sets and maps."""
|
||||
pass
|
||||
elements: List[ValueLevelExpr]
|
||||
|
||||
# FIXME (arrdem 2020-07-18):
|
||||
# Probably typed? Not sure.
|
||||
|
||||
@property
|
||||
def type(self) -> TypeExpr:
|
||||
if self.elements:
|
||||
return self.elements[-1].type
|
||||
|
||||
|
||||
## 'real' AST nodes
|
||||
class DoExpr(ValueLevelExpr, NamedTuple):
|
||||
effect_exprs: List[ValueLevelExpr]
|
||||
ret_expr: ValueLevelExpr
|
||||
|
||||
@property
|
||||
def type(self) -> TypeExpr:
|
||||
return self.ret_expr.type
|
||||
|
||||
|
||||
class LetExpr(ValueLevelExpr, NamedTuple):
|
||||
binding_exprs: List[Tuple]
|
||||
ret_expr: DoExpr
|
||||
|
||||
@property
|
||||
def type(self) -> TypeExpr:
|
||||
return self.ret_expr.type
|
||||
|
||||
|
||||
class FnExpr(ValueLevelExpr, NamedTuple):
|
||||
arguments: List
|
||||
ret_expr: DoExpr
|
||||
|
||||
@property
|
||||
def type(self) -> TypeExpr:
|
||||
"""This is where the fun begins."""
|
||||
return
|
||||
|
||||
|
||||
## Reader implementation
|
||||
class SexpAnalyzer(ABC):
|
||||
"""A base class for Analyzers."""
|
||||
pass
|
||||
class AnalyzerBase(ABC):
|
||||
"""Analyzer interface."""
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
def analyze(cls, token: p.TokenBase) -> ValueLevelExpr:
|
||||
"""Analyze a token tree, returning an expr tree."""
|
||||
|
||||
|
||||
class Analyzer(SexpAnalyzer):
|
||||
class Analyzer(AnalyzerBase):
|
||||
"""A reference Analyzer implementation.
|
||||
|
||||
Walks a parsed token tree, building up a syntax tree.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def read(cls, token: p.TokenBase):
|
||||
if isinstance(token, p.WhitespaceToken):
|
||||
## Whitespace tokens are discarded when considering syntax
|
||||
pass
|
||||
def _nows(cls, tokens):
|
||||
return [t for t in tokens if not isinstance(t, p.WhitespaceToken)]
|
||||
|
||||
elif isinstance(token, (p.StringToken, p.KeywordToken,
|
||||
p.IntegerToken, p.RationalToken, p.FloatToken)):
|
||||
## These are atoms we don't do much with
|
||||
pass
|
||||
|
||||
elif isinstance(token, p.SetToken):
|
||||
## Set tokens have their own syntax object to allow for lexical sets
|
||||
pass
|
||||
|
||||
elif isinstance(token, p.MappingToken):
|
||||
## As with sets, mappings have their own syntax object
|
||||
pass
|
||||
|
||||
elif isinstance(token, p.ListToken):
|
||||
## This is the fun one because it's where most of the notation is implemented
|
||||
pass
|
||||
TACK0 = p.SymbolToken('⊢', '/⊢', None)
|
||||
TACK1 = p.SymbolToken('|-', '|-', None)
|
||||
|
||||
@classmethod
|
||||
def read_symexpr(cls, token: p.SymbolToken):
|
||||
"""Emit a representation of using a binding."""
|
||||
def _chomp(cls, tokens):
|
||||
"""'chomp' an expression and optional ascription off the tokens, returning an expression and the remaining tokens."""
|
||||
|
||||
print(tokens)
|
||||
if len(tokens) == 1:
|
||||
return cls.analyze(tokens[0]), []
|
||||
elif tokens[1] in [cls.TACK0, cls.TACK1]:
|
||||
if len(tokens) >= 3:
|
||||
return AscribeExpr(cls.analyze(tokens[0]), cls.analyze(tokens[2])), tokens[3:]
|
||||
else:
|
||||
raise SyntaxError(f"Analyzing tack at {tokens[1].pos}, did not find following type ascription!")
|
||||
else:
|
||||
return cls.analyze(tokens[0]), tokens[1::]
|
||||
|
||||
@classmethod
|
||||
def read_setexpr(cls, token: p.SetToken):
|
||||
"""Emit a SetExpr """
|
||||
def _terms(cls, tokens):
|
||||
terms = []
|
||||
tokens = cls._nows(tokens)
|
||||
while tokens:
|
||||
term, tokens = cls._chomp(tokens)
|
||||
terms.append(term)
|
||||
return terms
|
||||
|
||||
@classmethod
|
||||
def
|
||||
def analyze(cls, token: p.TokenBase):
|
||||
if isinstance(token, p.BooleanToken):
|
||||
return BooleanExpr(token)
|
||||
|
||||
if isinstance(token, p.KeywordToken):
|
||||
return KeywordExpr(token)
|
||||
|
||||
if isinstance(token, p.IntegerToken):
|
||||
return IntegerExpr(token)
|
||||
|
||||
if isinstance(token, p.FractionToken):
|
||||
return FractionExpr(token)
|
||||
|
||||
if isinstance(token, p.FloatToken):
|
||||
return FloatExpr(token)
|
||||
|
||||
if isinstance(token, p.StringToken):
|
||||
return StringExpr(token)
|
||||
|
||||
if isinstance(token, p.ListToken):
|
||||
return cls.analyze_list(token)
|
||||
|
||||
LET = p.SymbolToken('let', 'let', None)
|
||||
DO = p.SymbolToken('do', 'do', None)
|
||||
FN = p.SymbolToken('fn', 'fn', None)
|
||||
LIST = p.SymbolToken('list', 'list', None)
|
||||
QUOTE = p.SymbolToken('quote', 'quote', None)
|
||||
|
||||
@classmethod
|
||||
def analyze_list(cls, token: p.ListToken):
|
||||
"""Analyze a list, for which there are several 'ground' forms."""
|
||||
|
||||
# Expunge any whitespace tokens
|
||||
tokens = cls._nows(token.data)
|
||||
|
||||
if len(tokens) == 0:
|
||||
return ListExpr([])
|
||||
|
||||
if tokens[0] == cls.QUOTE:
|
||||
raise NotImplementedError("Quote isn't quite there!")
|
||||
|
||||
if tokens[0] == cls.LIST:
|
||||
return ListExpr(cls._terms(tokens[1::]))
|
||||
|
||||
if tokens[0] == cls.DO:
|
||||
return cls.analyze_do(tokens[1::])
|
||||
|
||||
if tokens[0] == cls.LET:
|
||||
return cls.analyze_let(tokens[1::])
|
||||
|
||||
if tokens[0] == cls.FN:
|
||||
return cls.analyze_fn(tokens[1::])
|
||||
|
||||
cls.analyze_invoke(tokens)
|
||||
|
||||
@classmethod
|
||||
def analyze_let(cls, tokens):
|
||||
assert len(tokens) >= 2
|
||||
assert isinstance(tokens[0], p.ListToken)
|
||||
bindings = []
|
||||
binding_tokens = cls._nows(tokens[0].data)
|
||||
while binding_tokens:
|
||||
print("analyze_let", binding_tokens)
|
||||
bindexpr, binding_tokens = cls._chomp(binding_tokens)
|
||||
valexpr, binding_tokens = cls._chomp(binding_tokens)
|
||||
bindings.append((bindexpr, valexpr))
|
||||
|
||||
return LetExpr(bindings, cls.analyze_do(tokens[1::]))
|
||||
|
||||
@classmethod
|
||||
def analyze_do(cls, tokens):
|
||||
exprs = cls._terms(tokens)
|
||||
return DoExpr(exprs[::-1], exprs[-1])
|
||||
|
||||
@classmethod
|
||||
def analyze_fn(cls, tokens):
|
||||
assert len(tokens) >= 2
|
||||
assert isinstance(tokens[0], p.ListToken)
|
||||
args = []
|
||||
arg_tokens = cls._nows(tokens[0].data)
|
||||
while arg_tokens:
|
||||
argexpr, arg_tokens = cls._chomp(arg_tokens)
|
||||
args.append(argexpr)
|
||||
|
||||
return FnExpr(args, cls.analyze_do(tokens[1::]))
|
||||
|
||||
## Analysis interface
|
||||
def reads(buff: str,
|
||||
reader: SexpReader = Reader,
|
||||
parser: p.SexpParser = p.Parser,
|
||||
source_name=None):
|
||||
def analyzes(buff: str,
|
||||
analyzer: AnalyzerBase = Analyzer,
|
||||
parser: p.SexpParser = p.Parser,
|
||||
source_name = None):
|
||||
"""Parse a single s-expression from a string, returning its token tree."""
|
||||
|
||||
return read(StringIO(buff), parser, source_name or f"<string {id(buff):x}>")
|
||||
return analyze(StringIO(buff), analyzer, parser, source_name or f"<string {id(buff):x}>")
|
||||
|
||||
|
||||
def readf(path: str,
|
||||
reader: SexpReader = Reader,
|
||||
parser: p.SexpParser = p.Parser):
|
||||
def analyzef(path: str,
|
||||
analyzer: AnalyzerBase = Analyzer,
|
||||
parser: p.SexpParser = p.Parser):
|
||||
"""Parse a single s-expression from the file named by a string, returning its token tree."""
|
||||
|
||||
with open(path, "r") as f:
|
||||
return read(f, parser, path)
|
||||
return analyze(f, analyzer, parser, path)
|
||||
|
||||
|
||||
def read(file: IO,
|
||||
reader: SexpReader = Reader,
|
||||
parser: p.SexpParser = p.Parser,
|
||||
source_name=None):
|
||||
def analyze(file: IO,
|
||||
analyzer: AnalyzerBase = Analyzer,
|
||||
parser: p.SexpParser = p.Parser,
|
||||
source_name = None):
|
||||
"""Parse a single sexpression from a file-like object, returning its token tree."""
|
||||
|
||||
return parser.parse(
|
||||
PosTrackingBufferedReader(
|
||||
file,
|
||||
source_name=source_name
|
||||
)
|
||||
)
|
||||
return analyzer.analyze(p.parse(file, parser, source_name))
|
||||
|
|
46
test/python/flowmetal/test_syntax_analyzer.py
Normal file
46
test/python/flowmetal/test_syntax_analyzer.py
Normal file
|
@ -0,0 +1,46 @@
|
|||
"""
|
||||
Tests covering the Flowmetal analyzer.
|
||||
"""
|
||||
|
||||
import flowmetal.parser as p
|
||||
import flowmetal.syntax_analyzer as a
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.parametrize('txt, exprtype', [
|
||||
# Booleans
|
||||
('true', a.ConstExpr),
|
||||
('false', a.BooleanExpr),
|
||||
# Integers
|
||||
('1', a.ConstExpr),
|
||||
('1', a.IntegerExpr),
|
||||
# Fractions
|
||||
('1/2', a.ConstExpr),
|
||||
('1/2', a.FractionExpr),
|
||||
# Floats
|
||||
('1.0', a.ConstExpr),
|
||||
('1.0', a.FloatExpr),
|
||||
# Keywords
|
||||
(':foo', a.ConstExpr),
|
||||
(':foo', a.KeywordExpr),
|
||||
# Strings
|
||||
('"foo"', a.ConstExpr),
|
||||
('"foo"', a.StringExpr),
|
||||
])
|
||||
def test_analyze_constants(txt, exprtype):
|
||||
"""Make sure the analyzer can chew on constants."""
|
||||
assert isinstance(a.analyzes(txt), exprtype)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('txt, exprtype, rettype', [
|
||||
('()', a.ListExpr, None),
|
||||
('(list)', a.ListExpr, None),
|
||||
('(list 1)', a.ListExpr, a.BuiltinType.INTEGER),
|
||||
('(do foo bar 1)', a.DoExpr, a.BuiltinType.INTEGER),
|
||||
('(let [a 1] 1)', a.LetExpr, a.BuiltinType.INTEGER),
|
||||
])
|
||||
def test_analyze_rettype(txt, exprtype, rettype):
|
||||
"""Make sure that do exprs work."""
|
||||
assert isinstance(a.analyzes(txt), exprtype)
|
||||
assert a.analyzes(txt).type == rettype
|
Loading…
Reference in a new issue