Get strings and some escapes sorta working

2020-07-18 15:34:16 -06:00 · 2020-07-18 15:34:16 -06:00 · 7b47598b9f
commit 7b47598b9f
parent e47adc9432
2 changed files with 74 additions and 0 deletions
--- a/src/python/flowmetal/parser.py
+++ b/src/python/flowmetal/parser.py
@ -224,6 +224,8 @@ class Parser(SexpParser):
            return cls.parse_list(f)
        elif f.peek() == "[":
            return cls.parse_sqlist(f)
+        elif f.peek() == '"':
+            return cls.parse_str(f)
        elif cls.isspace(f.peek()):
            return cls.parse_whitespace(f)
        elif f.peek() == ";":
@ -357,6 +359,62 @@ class Parser(SexpParser):
            return CommentToken(buff, buff, pos)


+    @classmethod
+    def handle_escape(cls, ch: str):
+        if ch == 'n':
+            return "\n"
+        elif ch == 'r':
+            return "\r"
+        elif ch == 'l':
+            return "\014"  # form feed
+        elif ch == 't':
+            return "\t"
+        elif ch == '""':
+            return '""'
+
+    @classmethod
+    def parse_str(cls, f: PosTrackingBufferedReader):
+        with ReadThroughBuffer(f) as rtb:
+            assert rtb.read() == '"'
+            pos = rtb.pos()
+            content = []
+
+            while True:
+                if not rtb.peek():
+                    raise
+                # Handle end of string
+                elif rtb.peek() == '"':
+                    rtb.read()
+                    break
+                # Handle escape sequences
+                elif rtb.peek() == '\\':
+                    rtb.read()  # Discard the escape leader
+                    # Octal escape
+                    if rtb.peek() == '0':
+                        rtb.read()
+                        buff = []
+                        while rtb.peek() in '01234567':
+                            buff.append(rtb.read())
+                        content.append(chr(int(''.join(buff), 8)))
+
+                    # Hex escape
+                    elif rtb.peek() == 'x':
+                        rtb.read()  # Discard the escape leader
+                        buff = []
+                        while rtb.peek() in '0123456789abcdefABCDEF':
+                            buff.append(rtb.read())
+                        content.append(chr(int(''.join(buff), 16)))
+
+                    else:
+                        content.append(cls.handle_escape(rtb.read()))
+
+                else:
+                    content.append(rtb.read())
+
+        buff = str(rtb)
+        return StringToken(content, buff, pos)
+
+
 ## Parsing interface
 def parses(buff: str,
           parser: SexpParser = Parser,
--- a/test/python/flowmetal/test_parser.py
+++ b/test/python/flowmetal/test_parser.py
@ -132,3 +132,19 @@ def test_ambiguous_floats(txt, tokenization):
    """Parse examples of 'difficult' floats and symbols."""
    assert isinstance(p.parses(txt), tokenization), "Token type didn't match!"
    assert p.parses(txt).raw == txt, "Parse wasn't total!"
+
+
+@pytest.mark.parametrize('txt,', [
+    r'"foo"',
+    r'"foo bar baz qux"',
+    r'"foo\nbar\tbaz\lqux"',
+    r'''"foo
+    bar
+    baz
+    qux"''',
+    r'""',
+    r'"\000 \x00"',
+])
+def test_string(txt):
+    """Some examples of strings, and of escape sequences."""
+    assert isinstance(p.parses(txt), p.StringToken)