Get strings and some escapes sorta working
This commit is contained in:
parent
e47adc9432
commit
7b47598b9f
2 changed files with 74 additions and 0 deletions
|
@ -224,6 +224,8 @@ class Parser(SexpParser):
|
||||||
return cls.parse_list(f)
|
return cls.parse_list(f)
|
||||||
elif f.peek() == "[":
|
elif f.peek() == "[":
|
||||||
return cls.parse_sqlist(f)
|
return cls.parse_sqlist(f)
|
||||||
|
elif f.peek() == '"':
|
||||||
|
return cls.parse_str(f)
|
||||||
elif cls.isspace(f.peek()):
|
elif cls.isspace(f.peek()):
|
||||||
return cls.parse_whitespace(f)
|
return cls.parse_whitespace(f)
|
||||||
elif f.peek() == ";":
|
elif f.peek() == ";":
|
||||||
|
@ -357,6 +359,62 @@ class Parser(SexpParser):
|
||||||
return CommentToken(buff, buff, pos)
|
return CommentToken(buff, buff, pos)
|
||||||
|
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def handle_escape(cls, ch: str):
|
||||||
|
if ch == 'n':
|
||||||
|
return "\n"
|
||||||
|
elif ch == 'r':
|
||||||
|
return "\r"
|
||||||
|
elif ch == 'l':
|
||||||
|
return "\014" # form feed
|
||||||
|
elif ch == 't':
|
||||||
|
return "\t"
|
||||||
|
elif ch == '""':
|
||||||
|
return '""'
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def parse_str(cls, f: PosTrackingBufferedReader):
|
||||||
|
with ReadThroughBuffer(f) as rtb:
|
||||||
|
assert rtb.read() == '"'
|
||||||
|
pos = rtb.pos()
|
||||||
|
content = []
|
||||||
|
|
||||||
|
while True:
|
||||||
|
if not rtb.peek():
|
||||||
|
raise
|
||||||
|
# Handle end of string
|
||||||
|
elif rtb.peek() == '"':
|
||||||
|
rtb.read()
|
||||||
|
break
|
||||||
|
# Handle escape sequences
|
||||||
|
elif rtb.peek() == '\\':
|
||||||
|
rtb.read() # Discard the escape leader
|
||||||
|
# Octal escape
|
||||||
|
if rtb.peek() == '0':
|
||||||
|
rtb.read()
|
||||||
|
buff = []
|
||||||
|
while rtb.peek() in '01234567':
|
||||||
|
buff.append(rtb.read())
|
||||||
|
content.append(chr(int(''.join(buff), 8)))
|
||||||
|
|
||||||
|
# Hex escape
|
||||||
|
elif rtb.peek() == 'x':
|
||||||
|
rtb.read() # Discard the escape leader
|
||||||
|
buff = []
|
||||||
|
while rtb.peek() in '0123456789abcdefABCDEF':
|
||||||
|
buff.append(rtb.read())
|
||||||
|
content.append(chr(int(''.join(buff), 16)))
|
||||||
|
|
||||||
|
else:
|
||||||
|
content.append(cls.handle_escape(rtb.read()))
|
||||||
|
|
||||||
|
else:
|
||||||
|
content.append(rtb.read())
|
||||||
|
|
||||||
|
buff = str(rtb)
|
||||||
|
return StringToken(content, buff, pos)
|
||||||
|
|
||||||
|
|
||||||
## Parsing interface
|
## Parsing interface
|
||||||
def parses(buff: str,
|
def parses(buff: str,
|
||||||
parser: SexpParser = Parser,
|
parser: SexpParser = Parser,
|
||||||
|
|
|
@ -132,3 +132,19 @@ def test_ambiguous_floats(txt, tokenization):
|
||||||
"""Parse examples of 'difficult' floats and symbols."""
|
"""Parse examples of 'difficult' floats and symbols."""
|
||||||
assert isinstance(p.parses(txt), tokenization), "Token type didn't match!"
|
assert isinstance(p.parses(txt), tokenization), "Token type didn't match!"
|
||||||
assert p.parses(txt).raw == txt, "Parse wasn't total!"
|
assert p.parses(txt).raw == txt, "Parse wasn't total!"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('txt,', [
|
||||||
|
r'"foo"',
|
||||||
|
r'"foo bar baz qux"',
|
||||||
|
r'"foo\nbar\tbaz\lqux"',
|
||||||
|
r'''"foo
|
||||||
|
bar
|
||||||
|
baz
|
||||||
|
qux"''',
|
||||||
|
r'""',
|
||||||
|
r'"\000 \x00"',
|
||||||
|
])
|
||||||
|
def test_string(txt):
|
||||||
|
"""Some examples of strings, and of escape sequences."""
|
||||||
|
assert isinstance(p.parses(txt), p.StringToken)
|
||||||
|
|
Loading…
Reference in a new issue