Get strings and some escapes sorta working

This commit is contained in:
Reid 'arrdem' McKenzie 2020-07-18 15:34:16 -06:00
parent e47adc9432
commit 7b47598b9f
2 changed files with 74 additions and 0 deletions

View file

@ -224,6 +224,8 @@ class Parser(SexpParser):
return cls.parse_list(f) return cls.parse_list(f)
elif f.peek() == "[": elif f.peek() == "[":
return cls.parse_sqlist(f) return cls.parse_sqlist(f)
elif f.peek() == '"':
return cls.parse_str(f)
elif cls.isspace(f.peek()): elif cls.isspace(f.peek()):
return cls.parse_whitespace(f) return cls.parse_whitespace(f)
elif f.peek() == ";": elif f.peek() == ";":
@ -357,6 +359,62 @@ class Parser(SexpParser):
return CommentToken(buff, buff, pos) return CommentToken(buff, buff, pos)
@classmethod
def handle_escape(cls, ch: str):
if ch == 'n':
return "\n"
elif ch == 'r':
return "\r"
elif ch == 'l':
return "\014" # form feed
elif ch == 't':
return "\t"
elif ch == '""':
return '""'
@classmethod
def parse_str(cls, f: PosTrackingBufferedReader):
with ReadThroughBuffer(f) as rtb:
assert rtb.read() == '"'
pos = rtb.pos()
content = []
while True:
if not rtb.peek():
raise
# Handle end of string
elif rtb.peek() == '"':
rtb.read()
break
# Handle escape sequences
elif rtb.peek() == '\\':
rtb.read() # Discard the escape leader
# Octal escape
if rtb.peek() == '0':
rtb.read()
buff = []
while rtb.peek() in '01234567':
buff.append(rtb.read())
content.append(chr(int(''.join(buff), 8)))
# Hex escape
elif rtb.peek() == 'x':
rtb.read() # Discard the escape leader
buff = []
while rtb.peek() in '0123456789abcdefABCDEF':
buff.append(rtb.read())
content.append(chr(int(''.join(buff), 16)))
else:
content.append(cls.handle_escape(rtb.read()))
else:
content.append(rtb.read())
buff = str(rtb)
return StringToken(content, buff, pos)
## Parsing interface ## Parsing interface
def parses(buff: str, def parses(buff: str,
parser: SexpParser = Parser, parser: SexpParser = Parser,

View file

@ -132,3 +132,19 @@ def test_ambiguous_floats(txt, tokenization):
"""Parse examples of 'difficult' floats and symbols.""" """Parse examples of 'difficult' floats and symbols."""
assert isinstance(p.parses(txt), tokenization), "Token type didn't match!" assert isinstance(p.parses(txt), tokenization), "Token type didn't match!"
assert p.parses(txt).raw == txt, "Parse wasn't total!" assert p.parses(txt).raw == txt, "Parse wasn't total!"
@pytest.mark.parametrize('txt,', [
r'"foo"',
r'"foo bar baz qux"',
r'"foo\nbar\tbaz\lqux"',
r'''"foo
bar
baz
qux"''',
r'""',
r'"\000 \x00"',
])
def test_string(txt):
"""Some examples of strings, and of escape sequences."""
assert isinstance(p.parses(txt), p.StringToken)