diff --git a/src/python/flowmetal/parser.py b/src/python/flowmetal/parser.py index ccfba1e..291d008 100644 --- a/src/python/flowmetal/parser.py +++ b/src/python/flowmetal/parser.py @@ -224,6 +224,8 @@ class Parser(SexpParser): return cls.parse_list(f) elif f.peek() == "[": return cls.parse_sqlist(f) + elif f.peek() == '"': + return cls.parse_str(f) elif cls.isspace(f.peek()): return cls.parse_whitespace(f) elif f.peek() == ";": @@ -357,6 +359,62 @@ class Parser(SexpParser): return CommentToken(buff, buff, pos) + @classmethod + def handle_escape(cls, ch: str): + if ch == 'n': + return "\n" + elif ch == 'r': + return "\r" + elif ch == 'l': + return "\014" # form feed + elif ch == 't': + return "\t" + elif ch == '""': + return '""' + + @classmethod + def parse_str(cls, f: PosTrackingBufferedReader): + with ReadThroughBuffer(f) as rtb: + assert rtb.read() == '"' + pos = rtb.pos() + content = [] + + while True: + if not rtb.peek(): + raise + # Handle end of string + elif rtb.peek() == '"': + rtb.read() + break + # Handle escape sequences + elif rtb.peek() == '\\': + rtb.read() # Discard the escape leader + # Octal escape + if rtb.peek() == '0': + rtb.read() + buff = [] + while rtb.peek() in '01234567': + buff.append(rtb.read()) + content.append(chr(int(''.join(buff), 8))) + + # Hex escape + elif rtb.peek() == 'x': + rtb.read() # Discard the escape leader + buff = [] + while rtb.peek() in '0123456789abcdefABCDEF': + buff.append(rtb.read()) + content.append(chr(int(''.join(buff), 16))) + + else: + content.append(cls.handle_escape(rtb.read())) + + else: + content.append(rtb.read()) + + buff = str(rtb) + return StringToken(content, buff, pos) + + ## Parsing interface def parses(buff: str, parser: SexpParser = Parser, diff --git a/test/python/flowmetal/test_parser.py b/test/python/flowmetal/test_parser.py index 11fda87..5de274f 100644 --- a/test/python/flowmetal/test_parser.py +++ b/test/python/flowmetal/test_parser.py @@ -132,3 +132,19 @@ def test_ambiguous_floats(txt, tokenization): """Parse examples of 'difficult' floats and symbols.""" assert isinstance(p.parses(txt), tokenization), "Token type didn't match!" assert p.parses(txt).raw == txt, "Parse wasn't total!" + + +@pytest.mark.parametrize('txt,', [ + r'"foo"', + r'"foo bar baz qux"', + r'"foo\nbar\tbaz\lqux"', + r'''"foo + bar + baz + qux"''', + r'""', + r'"\000 \x00"', +]) +def test_string(txt): + """Some examples of strings, and of escape sequences.""" + assert isinstance(p.parses(txt), p.StringToken)