Get strings and some escapes sorta working
This commit is contained in:
parent
e47adc9432
commit
7b47598b9f
2 changed files with 74 additions and 0 deletions
|
@ -224,6 +224,8 @@ class Parser(SexpParser):
|
|||
return cls.parse_list(f)
|
||||
elif f.peek() == "[":
|
||||
return cls.parse_sqlist(f)
|
||||
elif f.peek() == '"':
|
||||
return cls.parse_str(f)
|
||||
elif cls.isspace(f.peek()):
|
||||
return cls.parse_whitespace(f)
|
||||
elif f.peek() == ";":
|
||||
|
@ -357,6 +359,62 @@ class Parser(SexpParser):
|
|||
return CommentToken(buff, buff, pos)
|
||||
|
||||
|
||||
@classmethod
|
||||
def handle_escape(cls, ch: str):
|
||||
if ch == 'n':
|
||||
return "\n"
|
||||
elif ch == 'r':
|
||||
return "\r"
|
||||
elif ch == 'l':
|
||||
return "\014" # form feed
|
||||
elif ch == 't':
|
||||
return "\t"
|
||||
elif ch == '""':
|
||||
return '""'
|
||||
|
||||
@classmethod
|
||||
def parse_str(cls, f: PosTrackingBufferedReader):
|
||||
with ReadThroughBuffer(f) as rtb:
|
||||
assert rtb.read() == '"'
|
||||
pos = rtb.pos()
|
||||
content = []
|
||||
|
||||
while True:
|
||||
if not rtb.peek():
|
||||
raise
|
||||
# Handle end of string
|
||||
elif rtb.peek() == '"':
|
||||
rtb.read()
|
||||
break
|
||||
# Handle escape sequences
|
||||
elif rtb.peek() == '\\':
|
||||
rtb.read() # Discard the escape leader
|
||||
# Octal escape
|
||||
if rtb.peek() == '0':
|
||||
rtb.read()
|
||||
buff = []
|
||||
while rtb.peek() in '01234567':
|
||||
buff.append(rtb.read())
|
||||
content.append(chr(int(''.join(buff), 8)))
|
||||
|
||||
# Hex escape
|
||||
elif rtb.peek() == 'x':
|
||||
rtb.read() # Discard the escape leader
|
||||
buff = []
|
||||
while rtb.peek() in '0123456789abcdefABCDEF':
|
||||
buff.append(rtb.read())
|
||||
content.append(chr(int(''.join(buff), 16)))
|
||||
|
||||
else:
|
||||
content.append(cls.handle_escape(rtb.read()))
|
||||
|
||||
else:
|
||||
content.append(rtb.read())
|
||||
|
||||
buff = str(rtb)
|
||||
return StringToken(content, buff, pos)
|
||||
|
||||
|
||||
## Parsing interface
|
||||
def parses(buff: str,
|
||||
parser: SexpParser = Parser,
|
||||
|
|
|
@ -132,3 +132,19 @@ def test_ambiguous_floats(txt, tokenization):
|
|||
"""Parse examples of 'difficult' floats and symbols."""
|
||||
assert isinstance(p.parses(txt), tokenization), "Token type didn't match!"
|
||||
assert p.parses(txt).raw == txt, "Parse wasn't total!"
|
||||
|
||||
|
||||
@pytest.mark.parametrize('txt,', [
|
||||
r'"foo"',
|
||||
r'"foo bar baz qux"',
|
||||
r'"foo\nbar\tbaz\lqux"',
|
||||
r'''"foo
|
||||
bar
|
||||
baz
|
||||
qux"''',
|
||||
r'""',
|
||||
r'"\000 \x00"',
|
||||
])
|
||||
def test_string(txt):
|
||||
"""Some examples of strings, and of escape sequences."""
|
||||
assert isinstance(p.parses(txt), p.StringToken)
|
||||
|
|
Loading…
Reference in a new issue