Starting to build an analyzer
This commit is contained in:
parent
9f29067f6d
commit
226ece5eaa
2 changed files with 281 additions and 79 deletions
|
@ -5,11 +5,13 @@ The [syntax] analyzer interprets a parse sequence into a syntax tree which can b
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from io import StringIO, IO
|
from io import StringIO
|
||||||
from typing import NamedTuple
|
from typing import NamedTuple, List, Union, Any, IO, Tuple
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
import flowmetal.parser as p
|
import flowmetal.parser as p
|
||||||
|
|
||||||
|
|
||||||
### Types
|
### Types
|
||||||
## We are not, in fact, sponsored by Typelevel LLC.
|
## We are not, in fact, sponsored by Typelevel LLC.
|
||||||
class TypeLevelExpr(object):
|
class TypeLevelExpr(object):
|
||||||
|
@ -24,135 +26,289 @@ class GenericExpr(TypeLevelExpr, NamedTuple):
|
||||||
|
|
||||||
class TypeExpr(TypeLevelExpr, NamedTuple):
|
class TypeExpr(TypeLevelExpr, NamedTuple):
|
||||||
"""A bound (or yet to be bound) type level symbol."""
|
"""A bound (or yet to be bound) type level symbol."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
## Now down to reality
|
class BuiltinType(TypeLevelExpr, Enum):
|
||||||
|
"""Built in types for atoms."""
|
||||||
|
BOOLEAN = 'Boolean'
|
||||||
|
SYMBOL = 'Symbol'
|
||||||
|
KEYWORD = 'Keyword'
|
||||||
|
STRING = 'String'
|
||||||
|
INTEGER = 'Integer'
|
||||||
|
FRACTION = 'Fraction'
|
||||||
|
FLOAT = 'Float'
|
||||||
|
|
||||||
|
|
||||||
|
class ConstraintExpr(TypeLevelExpr, NamedTuple):
|
||||||
|
"""A value-level constraint (predicate) as a type."""
|
||||||
|
|
||||||
|
|
||||||
|
## Terms
|
||||||
|
# Now down to reality
|
||||||
class ValueLevelExpr(object):
|
class ValueLevelExpr(object):
|
||||||
"""A base class for value-level expressions."""
|
"""A base class for value-level expressions."""
|
||||||
pass
|
|
||||||
|
@property
|
||||||
|
def type(self) -> TypeExpr:
|
||||||
|
"""The type of an expression."""
|
||||||
|
|
||||||
|
|
||||||
class AscribeExpr(ValueLevelExpr, NamedTuple):
|
class AscribeExpr(TypeLevelExpr):
|
||||||
"""Ascribe a type (via type-level expression) to a value-level expression."""
|
value: ValueLevelExpr
|
||||||
pass
|
type: TypeLevelExpr
|
||||||
|
|
||||||
|
|
||||||
class InvokeExpr(ValueLevelExpr, NamedTuple):
|
class ConstExpr(ValueLevelExpr, NamedTuple):
|
||||||
"""(a ⊢ (fn A ⊢ B) [...] ⊢ A) ⊢ B"""
|
"""Constant expressions. Keywords, strings, numbers, that sort of thing."""
|
||||||
pass
|
|
||||||
|
token: p.ConstTokenBase
|
||||||
|
|
||||||
|
@property
|
||||||
|
def data(self) -> Any:
|
||||||
|
"""The value of the constant."""
|
||||||
|
# The parser gives us this data
|
||||||
|
return self.token.data
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def type(self):
|
||||||
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
|
||||||
class IfExpr(ValueLevelExpr, NamedTuple):
|
class BooleanExpr(ConstExpr):
|
||||||
"""(if test a ⊢ A b ⊢ B) ⊢ (Variant A B)."""
|
@property
|
||||||
pass
|
def type(self):
|
||||||
|
return BuiltinType.BOOLEAN
|
||||||
|
|
||||||
|
|
||||||
class LetExpr(ValueLevelExpr, NamedTuple):
|
class IntegerExpr(ConstExpr):
|
||||||
"""Let a single binding and wrap a body. Yes one. N-ary let is an abstraction."""
|
@property
|
||||||
pass
|
def type(self):
|
||||||
|
return BuiltinType.INTEGER
|
||||||
|
|
||||||
|
|
||||||
class DoExpr(ValueError, NamedTuple):
|
class FractionExpr(ConstExpr):
|
||||||
"""do a procedure ahem sequence of things.
|
@property
|
||||||
|
def type(self):
|
||||||
(do a b c ... ω ⊢ Ω) ⊢ Ω
|
return BuiltinType.FRACTION
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
ProcExpr = DoExpr # ain't broke don't fix it
|
class FloatExpr(ConstExpr):
|
||||||
|
@property
|
||||||
|
def type(self):
|
||||||
|
return BuiltinType.FLOAT
|
||||||
|
|
||||||
|
|
||||||
class MappingExpr(ValueLevelExpr, NamedTuple):
|
class KeywordExpr(ConstExpr):
|
||||||
"""Mappings require their own constructor expression due to local/symbol references."""
|
@property
|
||||||
pass
|
def type(self):
|
||||||
|
return BuiltinType.KEYWORD
|
||||||
|
|
||||||
|
|
||||||
class SetExpr(ValueLevelExpr, NamedTuple):
|
class StringExpr(ConstExpr):
|
||||||
"""Sets require their own constructor expression due to local/symbol references."""
|
@property
|
||||||
pass
|
def type(self):
|
||||||
|
return BuiltinType.STRING
|
||||||
|
|
||||||
|
|
||||||
class ListExpr(ValueLevelExpr, NamedTuple):
|
class ListExpr(ValueLevelExpr, NamedTuple):
|
||||||
"""While round () lists are generally InvokeExprs, [] lists are constructors like sets and maps."""
|
elements: List[ValueLevelExpr]
|
||||||
pass
|
|
||||||
|
# FIXME (arrdem 2020-07-18):
|
||||||
|
# Probably typed? Not sure.
|
||||||
|
|
||||||
|
@property
|
||||||
|
def type(self) -> TypeExpr:
|
||||||
|
if self.elements:
|
||||||
|
return self.elements[-1].type
|
||||||
|
|
||||||
|
|
||||||
|
## 'real' AST nodes
|
||||||
|
class DoExpr(ValueLevelExpr, NamedTuple):
|
||||||
|
effect_exprs: List[ValueLevelExpr]
|
||||||
|
ret_expr: ValueLevelExpr
|
||||||
|
|
||||||
|
@property
|
||||||
|
def type(self) -> TypeExpr:
|
||||||
|
return self.ret_expr.type
|
||||||
|
|
||||||
|
|
||||||
|
class LetExpr(ValueLevelExpr, NamedTuple):
|
||||||
|
binding_exprs: List[Tuple]
|
||||||
|
ret_expr: DoExpr
|
||||||
|
|
||||||
|
@property
|
||||||
|
def type(self) -> TypeExpr:
|
||||||
|
return self.ret_expr.type
|
||||||
|
|
||||||
|
|
||||||
|
class FnExpr(ValueLevelExpr, NamedTuple):
|
||||||
|
arguments: List
|
||||||
|
ret_expr: DoExpr
|
||||||
|
|
||||||
|
@property
|
||||||
|
def type(self) -> TypeExpr:
|
||||||
|
"""This is where the fun begins."""
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
## Reader implementation
|
## Reader implementation
|
||||||
class SexpAnalyzer(ABC):
|
class AnalyzerBase(ABC):
|
||||||
"""A base class for Analyzers."""
|
"""Analyzer interface."""
|
||||||
pass
|
|
||||||
|
@classmethod
|
||||||
|
@abstractmethod
|
||||||
|
def analyze(cls, token: p.TokenBase) -> ValueLevelExpr:
|
||||||
|
"""Analyze a token tree, returning an expr tree."""
|
||||||
|
|
||||||
|
|
||||||
class Analyzer(SexpAnalyzer):
|
class Analyzer(AnalyzerBase):
|
||||||
"""A reference Analyzer implementation.
|
"""A reference Analyzer implementation.
|
||||||
|
|
||||||
Walks a parsed token tree, building up a syntax tree.
|
Walks a parsed token tree, building up a syntax tree.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def read(cls, token: p.TokenBase):
|
def _nows(cls, tokens):
|
||||||
if isinstance(token, p.WhitespaceToken):
|
return [t for t in tokens if not isinstance(t, p.WhitespaceToken)]
|
||||||
## Whitespace tokens are discarded when considering syntax
|
|
||||||
pass
|
|
||||||
|
|
||||||
elif isinstance(token, (p.StringToken, p.KeywordToken,
|
TACK0 = p.SymbolToken('⊢', '/⊢', None)
|
||||||
p.IntegerToken, p.RationalToken, p.FloatToken)):
|
TACK1 = p.SymbolToken('|-', '|-', None)
|
||||||
## These are atoms we don't do much with
|
|
||||||
pass
|
|
||||||
|
|
||||||
elif isinstance(token, p.SetToken):
|
|
||||||
## Set tokens have their own syntax object to allow for lexical sets
|
|
||||||
pass
|
|
||||||
|
|
||||||
elif isinstance(token, p.MappingToken):
|
|
||||||
## As with sets, mappings have their own syntax object
|
|
||||||
pass
|
|
||||||
|
|
||||||
elif isinstance(token, p.ListToken):
|
|
||||||
## This is the fun one because it's where most of the notation is implemented
|
|
||||||
pass
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def read_symexpr(cls, token: p.SymbolToken):
|
def _chomp(cls, tokens):
|
||||||
"""Emit a representation of using a binding."""
|
"""'chomp' an expression and optional ascription off the tokens, returning an expression and the remaining tokens."""
|
||||||
|
|
||||||
|
print(tokens)
|
||||||
|
if len(tokens) == 1:
|
||||||
|
return cls.analyze(tokens[0]), []
|
||||||
|
elif tokens[1] in [cls.TACK0, cls.TACK1]:
|
||||||
|
if len(tokens) >= 3:
|
||||||
|
return AscribeExpr(cls.analyze(tokens[0]), cls.analyze(tokens[2])), tokens[3:]
|
||||||
|
else:
|
||||||
|
raise SyntaxError(f"Analyzing tack at {tokens[1].pos}, did not find following type ascription!")
|
||||||
|
else:
|
||||||
|
return cls.analyze(tokens[0]), tokens[1::]
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def read_setexpr(cls, token: p.SetToken):
|
def _terms(cls, tokens):
|
||||||
"""Emit a SetExpr """
|
terms = []
|
||||||
|
tokens = cls._nows(tokens)
|
||||||
|
while tokens:
|
||||||
|
term, tokens = cls._chomp(tokens)
|
||||||
|
terms.append(term)
|
||||||
|
return terms
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def
|
def analyze(cls, token: p.TokenBase):
|
||||||
|
if isinstance(token, p.BooleanToken):
|
||||||
|
return BooleanExpr(token)
|
||||||
|
|
||||||
|
if isinstance(token, p.KeywordToken):
|
||||||
|
return KeywordExpr(token)
|
||||||
|
|
||||||
|
if isinstance(token, p.IntegerToken):
|
||||||
|
return IntegerExpr(token)
|
||||||
|
|
||||||
|
if isinstance(token, p.FractionToken):
|
||||||
|
return FractionExpr(token)
|
||||||
|
|
||||||
|
if isinstance(token, p.FloatToken):
|
||||||
|
return FloatExpr(token)
|
||||||
|
|
||||||
|
if isinstance(token, p.StringToken):
|
||||||
|
return StringExpr(token)
|
||||||
|
|
||||||
|
if isinstance(token, p.ListToken):
|
||||||
|
return cls.analyze_list(token)
|
||||||
|
|
||||||
|
LET = p.SymbolToken('let', 'let', None)
|
||||||
|
DO = p.SymbolToken('do', 'do', None)
|
||||||
|
FN = p.SymbolToken('fn', 'fn', None)
|
||||||
|
LIST = p.SymbolToken('list', 'list', None)
|
||||||
|
QUOTE = p.SymbolToken('quote', 'quote', None)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def analyze_list(cls, token: p.ListToken):
|
||||||
|
"""Analyze a list, for which there are several 'ground' forms."""
|
||||||
|
|
||||||
|
# Expunge any whitespace tokens
|
||||||
|
tokens = cls._nows(token.data)
|
||||||
|
|
||||||
|
if len(tokens) == 0:
|
||||||
|
return ListExpr([])
|
||||||
|
|
||||||
|
if tokens[0] == cls.QUOTE:
|
||||||
|
raise NotImplementedError("Quote isn't quite there!")
|
||||||
|
|
||||||
|
if tokens[0] == cls.LIST:
|
||||||
|
return ListExpr(cls._terms(tokens[1::]))
|
||||||
|
|
||||||
|
if tokens[0] == cls.DO:
|
||||||
|
return cls.analyze_do(tokens[1::])
|
||||||
|
|
||||||
|
if tokens[0] == cls.LET:
|
||||||
|
return cls.analyze_let(tokens[1::])
|
||||||
|
|
||||||
|
if tokens[0] == cls.FN:
|
||||||
|
return cls.analyze_fn(tokens[1::])
|
||||||
|
|
||||||
|
cls.analyze_invoke(tokens)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def analyze_let(cls, tokens):
|
||||||
|
assert len(tokens) >= 2
|
||||||
|
assert isinstance(tokens[0], p.ListToken)
|
||||||
|
bindings = []
|
||||||
|
binding_tokens = cls._nows(tokens[0].data)
|
||||||
|
while binding_tokens:
|
||||||
|
print("analyze_let", binding_tokens)
|
||||||
|
bindexpr, binding_tokens = cls._chomp(binding_tokens)
|
||||||
|
valexpr, binding_tokens = cls._chomp(binding_tokens)
|
||||||
|
bindings.append((bindexpr, valexpr))
|
||||||
|
|
||||||
|
return LetExpr(bindings, cls.analyze_do(tokens[1::]))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def analyze_do(cls, tokens):
|
||||||
|
exprs = cls._terms(tokens)
|
||||||
|
return DoExpr(exprs[::-1], exprs[-1])
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def analyze_fn(cls, tokens):
|
||||||
|
assert len(tokens) >= 2
|
||||||
|
assert isinstance(tokens[0], p.ListToken)
|
||||||
|
args = []
|
||||||
|
arg_tokens = cls._nows(tokens[0].data)
|
||||||
|
while arg_tokens:
|
||||||
|
argexpr, arg_tokens = cls._chomp(arg_tokens)
|
||||||
|
args.append(argexpr)
|
||||||
|
|
||||||
|
return FnExpr(args, cls.analyze_do(tokens[1::]))
|
||||||
|
|
||||||
## Analysis interface
|
## Analysis interface
|
||||||
def reads(buff: str,
|
def analyzes(buff: str,
|
||||||
reader: SexpReader = Reader,
|
analyzer: AnalyzerBase = Analyzer,
|
||||||
parser: p.SexpParser = p.Parser,
|
parser: p.SexpParser = p.Parser,
|
||||||
source_name = None):
|
source_name = None):
|
||||||
"""Parse a single s-expression from a string, returning its token tree."""
|
"""Parse a single s-expression from a string, returning its token tree."""
|
||||||
|
|
||||||
return read(StringIO(buff), parser, source_name or f"<string {id(buff):x}>")
|
return analyze(StringIO(buff), analyzer, parser, source_name or f"<string {id(buff):x}>")
|
||||||
|
|
||||||
|
|
||||||
def readf(path: str,
|
def analyzef(path: str,
|
||||||
reader: SexpReader = Reader,
|
analyzer: AnalyzerBase = Analyzer,
|
||||||
parser: p.SexpParser = p.Parser):
|
parser: p.SexpParser = p.Parser):
|
||||||
"""Parse a single s-expression from the file named by a string, returning its token tree."""
|
"""Parse a single s-expression from the file named by a string, returning its token tree."""
|
||||||
|
|
||||||
with open(path, "r") as f:
|
with open(path, "r") as f:
|
||||||
return read(f, parser, path)
|
return analyze(f, analyzer, parser, path)
|
||||||
|
|
||||||
|
|
||||||
def read(file: IO,
|
def analyze(file: IO,
|
||||||
reader: SexpReader = Reader,
|
analyzer: AnalyzerBase = Analyzer,
|
||||||
parser: p.SexpParser = p.Parser,
|
parser: p.SexpParser = p.Parser,
|
||||||
source_name = None):
|
source_name = None):
|
||||||
"""Parse a single sexpression from a file-like object, returning its token tree."""
|
"""Parse a single sexpression from a file-like object, returning its token tree."""
|
||||||
|
|
||||||
return parser.parse(
|
return analyzer.analyze(p.parse(file, parser, source_name))
|
||||||
PosTrackingBufferedReader(
|
|
||||||
file,
|
|
||||||
source_name=source_name
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
46
test/python/flowmetal/test_syntax_analyzer.py
Normal file
46
test/python/flowmetal/test_syntax_analyzer.py
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
"""
|
||||||
|
Tests covering the Flowmetal analyzer.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import flowmetal.parser as p
|
||||||
|
import flowmetal.syntax_analyzer as a
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('txt, exprtype', [
|
||||||
|
# Booleans
|
||||||
|
('true', a.ConstExpr),
|
||||||
|
('false', a.BooleanExpr),
|
||||||
|
# Integers
|
||||||
|
('1', a.ConstExpr),
|
||||||
|
('1', a.IntegerExpr),
|
||||||
|
# Fractions
|
||||||
|
('1/2', a.ConstExpr),
|
||||||
|
('1/2', a.FractionExpr),
|
||||||
|
# Floats
|
||||||
|
('1.0', a.ConstExpr),
|
||||||
|
('1.0', a.FloatExpr),
|
||||||
|
# Keywords
|
||||||
|
(':foo', a.ConstExpr),
|
||||||
|
(':foo', a.KeywordExpr),
|
||||||
|
# Strings
|
||||||
|
('"foo"', a.ConstExpr),
|
||||||
|
('"foo"', a.StringExpr),
|
||||||
|
])
|
||||||
|
def test_analyze_constants(txt, exprtype):
|
||||||
|
"""Make sure the analyzer can chew on constants."""
|
||||||
|
assert isinstance(a.analyzes(txt), exprtype)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('txt, exprtype, rettype', [
|
||||||
|
('()', a.ListExpr, None),
|
||||||
|
('(list)', a.ListExpr, None),
|
||||||
|
('(list 1)', a.ListExpr, a.BuiltinType.INTEGER),
|
||||||
|
('(do foo bar 1)', a.DoExpr, a.BuiltinType.INTEGER),
|
||||||
|
('(let [a 1] 1)', a.LetExpr, a.BuiltinType.INTEGER),
|
||||||
|
])
|
||||||
|
def test_analyze_rettype(txt, exprtype, rettype):
|
||||||
|
"""Make sure that do exprs work."""
|
||||||
|
assert isinstance(a.analyzes(txt), exprtype)
|
||||||
|
assert a.analyzes(txt).type == rettype
|
Loading…
Reference in a new issue