source/projects/calf/tests/python/test_lexer.py

128 lines
2.7 KiB
Python
Raw Normal View History

2021-04-09 07:16:16 +00:00
"""
Tests of calf.lexer
Tests both basic functionality, some examples and makes sure that arbitrary token sequences round
trip through the lexer.
"""
import calf.lexer as cl
from conftest import parametrize
import pytest
def lex_single_token(buffer):
"""Lexes a single token from the buffer."""
return next(iter(cl.lex_buffer(buffer)))
@parametrize(
"text,token_type",
[
2021-05-15 17:34:32 +00:00
(
"(",
"PAREN_LEFT",
),
(
")",
"PAREN_RIGHT",
),
(
"[",
"BRACKET_LEFT",
),
(
"]",
"BRACKET_RIGHT",
),
(
"{",
"BRACE_LEFT",
),
(
"}",
"BRACE_RIGHT",
),
(
"^",
"META",
),
(
"#",
"MACRO_DISPATCH",
),
2021-04-09 07:16:16 +00:00
("'", "SINGLE_QUOTE"),
2021-05-15 17:34:32 +00:00
(
"foo",
"SYMBOL",
),
2021-04-09 07:16:16 +00:00
("foo/bar", "SYMBOL"),
2021-05-15 17:34:32 +00:00
(
":foo",
"KEYWORD",
),
(
":foo/bar",
"KEYWORD",
),
(
" ,,\t ,, \t",
"WHITESPACE",
),
2021-04-09 07:16:16 +00:00
("\n\r", "WHITESPACE"),
("\n", "WHITESPACE"),
2021-05-15 17:34:32 +00:00
(
" , ",
"WHITESPACE",
),
2021-04-09 07:16:16 +00:00
("; this is a sample comment\n", "COMMENT"),
('"foo"', "STRING"),
('"foo bar baz"', "STRING"),
],
)
def test_lex_examples(text, token_type):
t = lex_single_token(text)
assert t.value == text
assert t.type == token_type
@parametrize(
"text,token_types",
[
("foo^bar", ["SYMBOL", "META", "SYMBOL"]),
("foo bar", ["SYMBOL", "WHITESPACE", "SYMBOL"]),
("foo-bar", ["SYMBOL"]),
("foo\nbar", ["SYMBOL", "WHITESPACE", "SYMBOL"]),
(
"{[^#()]}",
[
"BRACE_LEFT",
"BRACKET_LEFT",
"META",
"MACRO_DISPATCH",
"PAREN_LEFT",
"PAREN_RIGHT",
"BRACKET_RIGHT",
"BRACE_RIGHT",
],
),
("+", ["SYMBOL"]),
("-", ["SYMBOL"]),
("1", ["INTEGER"]),
("-1", ["INTEGER"]),
("-1.0", ["FLOAT"]),
("-1e3", ["FLOAT"]),
("+1.3e", ["FLOAT"]),
("f", ["SYMBOL"]),
("f1", ["SYMBOL"]),
("f1g2", ["SYMBOL"]),
("foo13-bar", ["SYMBOL"]),
("foo+13-12bar", ["SYMBOL"]),
("+-+-+-+-+", ["SYMBOL"]),
],
)
def test_lex_compound_examples(text, token_types):
t = cl.lex_buffer(text)
result_types = [token.type for token in t]
assert result_types == token_types