Vendor in ast and utokenize from pycopy

This commit is contained in:
Reid 'arrdem' McKenzie 2023-03-08 15:09:58 -07:00
parent 471af02d9b
commit c4930e0be8
8 changed files with 2130 additions and 0 deletions

6
components/uast/BUILD Normal file
View file

@ -0,0 +1,6 @@
py_project(
name = "uast",
lib_deps = [
"//components/utokenize",
]
)

View file

@ -0,0 +1,5 @@
# pycopy-ast
A small parallel implementation of Python's `ast` module, vendored from `pycopy-ast==2.9.1`.
[pycopy-ast](https://pypi.org/project/pycopy-ast/) is released under the MIT license, copyright © Paul Sokolovsky 2021.

View file

@ -0,0 +1,126 @@
# (c) 2019 Paul Sokolovsky. MIT license.
from .types import *
def dump_to_stream(t, file):
if isinstance(t, AST):
file.write(type(t).__name__)
file.write("(")
comma = False
for k in t._fields:
if k.startswith("_"):
continue
res = ""
if comma:
res += ", "
res += k + "="
file.write(res)
dump_to_stream(getattr(t, k, None), file)
comma = True
file.write(")")
elif isinstance(t, list):
file.write("[")
comma = False
for v in t:
if comma:
file.write(", ")
dump_to_stream(v, file)
comma = True
file.write("]")
else:
file.write(repr(t))
def dump(t):
import io
buf = io.StringIO()
dump_to_stream(t, buf)
return buf.getvalue()
def iter_fields(t):
for k in t._fields:
if k.startswith("_"):
continue
yield (k, getattr(t, k, None))
def copy_location(new_node, old_node):
return new_node
def parse_tokens(token_stream, filename="<unknown>", mode="exec"):
import utokenize as tokenize
from . import parser
p = parser.Parser(token_stream)
p.match(tokenize.ENCODING)
if mode == "exec":
t = p.match_mod()
elif mode == "eval":
t = Expression(body=p.require_expr())
elif mode == "single":
t = Interactive(body=p.match_stmt())
else:
raise ValueError
return t
def parse_stream(stream, filename="<unknown>", mode="exec"):
import utokenize as tokenize
tstream = tokenize.tokenize(stream.readline)
return parse_tokens(tstream)
def parse(source, filename="<unknown>", mode="exec"):
import io
return parse_stream(io.StringIO(source), filename, mode)
class NodeVisitor:
def visit(self, node):
n = node.__class__.__name__
m = getattr(self, "visit_" + n, None)
if m:
return m(node)
else:
return self.generic_visit(node)
def generic_visit(self, node):
for f in node._fields:
val = getattr(node, f)
if isinstance(val, list):
for v in val:
if isinstance(v, AST):
self.visit(v)
elif isinstance(val, AST):
self.visit(val)
class NodeTransformer(NodeVisitor):
def generic_visit(self, node):
for f in node._fields:
val = getattr(node, f)
if isinstance(val, list):
newl = []
for v in val:
if not isinstance(v, AST):
newl.append(v)
continue
newv = self.visit(v)
if newv is None:
pass
elif isinstance(newv, list):
newl.extend(newv)
else:
newl.append(newv)
setattr(node, f, newl)
elif isinstance(val, AST):
newv = self.visit(val)
setattr(node, f, newv)
return node

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,467 @@
# (c) 2019 Paul Sokolovsky. MIT license.
class AST:
def __init__(self, **fields):
for k, v in fields.items():
setattr(self, k, v)
class mod(AST):
pass
class Module(mod):
_fields = ("body",)
class Interactive(mod):
_fields = ("body",)
class Expression(mod):
_fields = ("body",)
class Suite(mod):
_fields = ("body",)
class stmt(AST):
pass
class FunctionDef(stmt):
_fields = ("name", "args", "body", "decorator_list", "returns")
class AsyncFunctionDef(stmt):
_fields = ("name", "args", "body", "decorator_list", "returns")
class ClassDef(stmt):
_fields = ("name", "bases", "keywords", "body", "decorator_list")
class Return(stmt):
_fields = ("value",)
class Delete(stmt):
_fields = ("targets",)
class Assign(stmt):
_fields = ("targets", "value")
class AugAssign(stmt):
_fields = ("target", "op", "value")
class AnnAssign(stmt):
_fields = ("target", "annotation", "value", "simple")
class For(stmt):
_fields = ("target", "iter", "body", "orelse")
class AsyncFor(stmt):
_fields = ("target", "iter", "body", "orelse")
class While(stmt):
_fields = ("test", "body", "orelse")
class If(stmt):
_fields = ("test", "body", "orelse")
class With(stmt):
_fields = ("items", "body")
class AsyncWith(stmt):
_fields = ("items", "body")
class Raise(stmt):
_fields = ("exc", "cause")
class Try(stmt):
_fields = ("body", "handlers", "orelse", "finalbody")
class Assert(stmt):
_fields = ("test", "msg")
class Import(stmt):
_fields = ("names",)
class ImportFrom(stmt):
_fields = ("module", "names", "level")
class Global(stmt):
_fields = ("names",)
class Nonlocal(stmt):
_fields = ("names",)
class Expr(stmt):
_fields = ("value",)
class Pass(stmt):
_fields = ()
class Break(stmt):
_fields = ()
class Continue(stmt):
_fields = ()
class expr(AST):
pass
class BoolOp(expr):
_fields = ("op", "values")
class BinOp(expr):
_fields = ("left", "op", "right")
class UnaryOp(expr):
_fields = ("op", "operand")
class Lambda(expr):
_fields = ("args", "body")
class IfExp(expr):
_fields = ("test", "body", "orelse")
class Dict(expr):
_fields = ("keys", "values")
class Set(expr):
_fields = ("elts",)
class ListComp(expr):
_fields = ("elt", "generators")
class SetComp(expr):
_fields = ("elt", "generators")
class DictComp(expr):
_fields = ("key", "value", "generators")
class GeneratorExp(expr):
_fields = ("elt", "generators")
class Await(expr):
_fields = ("value",)
class Yield(expr):
_fields = ("value",)
class YieldFrom(expr):
_fields = ("value",)
class Compare(expr):
_fields = ("left", "ops", "comparators")
class Call(expr):
_fields = ("func", "args", "keywords")
class Num(expr):
_fields = ("n",)
class Str(expr):
_fields = ("s",)
class FormattedValue(expr):
_fields = ("value", "conversion", "format_spec")
class JoinedStr(expr):
_fields = ("values",)
class Bytes(expr):
_fields = ("s",)
class NameConstant(expr):
_fields = ("value",)
class Ellipsis(expr):
_fields = ()
class Constant(expr):
_fields = ("value",)
class Attribute(expr):
_fields = ("value", "attr", "ctx")
class Subscript(expr):
_fields = ("value", "slice", "ctx")
class Starred(expr):
_fields = ("value", "ctx")
class Name(expr):
_fields = ("id", "ctx")
class List(expr):
_fields = ("elts", "ctx")
class Tuple(expr):
_fields = ("elts", "ctx")
class expr_context(AST):
pass
class Load(expr_context):
_fields = ()
class Store(expr_context):
_fields = ()
class StoreConst(expr_context):
_fields = ()
class Del(expr_context):
_fields = ()
class AugLoad(expr_context):
_fields = ()
class AugStore(expr_context):
_fields = ()
class Param(expr_context):
_fields = ()
class slice(AST):
pass
class Slice(slice):
_fields = ("lower", "upper", "step")
class ExtSlice(slice):
_fields = ("dims",)
class Index(slice):
_fields = ("value",)
class boolop(AST):
pass
class And(boolop):
_fields = ()
class Or(boolop):
_fields = ()
class operator(AST):
pass
class Add(operator):
_fields = ()
class Sub(operator):
_fields = ()
class Mult(operator):
_fields = ()
class MatMult(operator):
_fields = ()
class Div(operator):
_fields = ()
class Mod(operator):
_fields = ()
class Pow(operator):
_fields = ()
class LShift(operator):
_fields = ()
class RShift(operator):
_fields = ()
class BitOr(operator):
_fields = ()
class BitXor(operator):
_fields = ()
class BitAnd(operator):
_fields = ()
class FloorDiv(operator):
_fields = ()
class unaryop(AST):
pass
class Invert(unaryop):
_fields = ()
class Not(unaryop):
_fields = ()
class UAdd(unaryop):
_fields = ()
class USub(unaryop):
_fields = ()
class cmpop(AST):
pass
class Eq(cmpop):
_fields = ()
class NotEq(cmpop):
_fields = ()
class Lt(cmpop):
_fields = ()
class LtE(cmpop):
_fields = ()
class Gt(cmpop):
_fields = ()
class GtE(cmpop):
_fields = ()
class Is(cmpop):
_fields = ()
class IsNot(cmpop):
_fields = ()
class In(cmpop):
_fields = ()
class NotIn(cmpop):
_fields = ()
class comprehension(AST):
_fields = ("target", "iter", "ifs", "is_async")
class excepthandler(AST):
pass
class ExceptHandler(excepthandler):
_fields = ("type", "name", "body")
class arguments(AST):
_fields = ("args", "vararg", "kwonlyargs", "kw_defaults", "kwarg", "defaults")
class arg(AST):
_fields = ("arg", "annotation")
class keyword(AST):
_fields = ("arg", "value")
class alias(AST):
_fields = ("name", "asname")
class withitem(AST):
_fields = ("context_expr", "optional_vars")

View file

@ -0,0 +1,3 @@
py_project(
name = "utokenize"
)

View file

@ -0,0 +1,5 @@
# pycopy-utokenize
A small Python parser, vendored from `pycopy-utokenize==2.0`.
[pycopy-utokenize](https://pypi.org/project/pycopy-utokenize/) is released under the MIT license, copyright © Paul Sokolovsky 2021.

View file

@ -0,0 +1,240 @@
# (c) 2019 Paul Sokolovsky, MIT license
from token import *
from collections import namedtuple
import io
COMMENT = N_TOKENS + 0
NL = N_TOKENS + 1
ENCODING = N_TOKENS + 2
tok_name[COMMENT] = "COMMENT"
tok_name[NL] = "NL"
tok_name[ENCODING] = "ENCODING"
class TokenInfo(namedtuple("TokenInfo", ("type", "string", "start", "end", "line"))):
def __str__(self):
return "TokenInfo(type=%d (%s), string=%r, startl=%d, line=%r)" % (
self.type,
tok_name[self.type],
self.string,
self.start,
self.line,
)
def get_indent(l):
for i in range(len(l)):
if l[i] != " " and l[i] != "\t":
return i, l[i:]
def get_str(l, readline):
lineno = 0
s = io.StringIO()
if l.startswith('"""') or l.startswith("'''"):
sep = l[0:3]
s += sep
l = l[3:]
pos = 0
while True:
i = l.find(sep, pos)
if i >= 0:
if i > 0 and l[i - 1] == "\\":
pos = i + 1
continue
break
s += l
l = readline()
pos = 0
assert l
lineno += 1
s += l[: i + 3]
return s.getvalue(), l[i + 3 :], lineno
lbuf = io.StringIO(l)
sep = lbuf.read(1)
s += sep
while True:
c = lbuf.read(1)
if not c:
break
s += c
if c == "\\":
c = lbuf.read(1)
s += c
if c == "\n":
lbuf = io.StringIO(readline())
lineno += 1
continue
elif c == sep:
break
return s.getvalue(), lbuf.read(), lineno
def generate_tokens(readline):
indent_stack = [0]
lineno = 0
paren_level = 0
no_newline = False
# generate_tokens() doesn't yield this, only tokenine() does.
# yield TokenInfo(ENCODING, "utf-8", 0, 0, "")
while True:
l = readline()
lineno += 1
org_l = l
if not l:
break
if not l.endswith("\n"):
l += "\n"
no_newline = True
i, l = get_indent(l)
if l == "\n":
yield TokenInfo(NL, l, lineno, 0, org_l)
continue
elif l == "\x0c\n":
yield TokenInfo(NL, "\n", lineno, 0, org_l)
continue
if l.startswith("#"):
yield TokenInfo(COMMENT, l.rstrip("\n"), lineno, 0, org_l)
yield TokenInfo(NL, "\n", lineno, 0, org_l)
continue
if paren_level == 0:
if i > indent_stack[-1]:
yield TokenInfo(INDENT, org_l[:i], lineno, 0, org_l)
indent_stack.append(i)
elif i < indent_stack[-1]:
while i != indent_stack[-1]:
yield TokenInfo(DEDENT, "", lineno, 0, org_l)
indent_stack.pop()
while l:
if l[0].isdigit() or (l.startswith(".") and len(l) > 1 and l[1].isdigit()):
seen_dot = False
t = ""
if l.startswith("0x") or l.startswith("0X"):
t = "0x"
l = l[2:]
elif l.startswith("0o") or l.startswith("0O"):
t = "0o"
l = l[2:]
elif l.startswith("0b") or l.startswith("0B"):
t = "0b"
l = l[2:]
while l and (
l[0].isdigit()
or l[0] == "."
or l[0] == "_"
or (t.startswith("0x") and l[0] in "ABCDEFabcdef")
):
if l[0] == ".":
if seen_dot:
break
seen_dot = True
t += l[0]
l = l[1:]
if l.startswith("e") or l.startswith("E"):
t += l[0]
l = l[1:]
if l[0] in ("+", "-"):
t += l[0]
l = l[1:]
while l and (l[0].isdigit() or l[0] == "_"):
t += l[0]
l = l[1:]
if l.startswith("j"):
t += l[0]
l = l[1:]
yield TokenInfo(NUMBER, t, lineno, 0, org_l)
elif l[0].isalpha() or l.startswith("_") or ord(l[0]) >= 0xAA:
name = ""
while l and (
l[0].isalpha()
or l[0].isdigit()
or l.startswith("_")
or ord(l[0]) >= 0xAA
):
name += l[0]
l = l[1:]
if (l.startswith('"') or l.startswith("'")) and name in (
"b",
"r",
"rb",
"br",
"u",
"f",
):
s, l, lineno_delta = get_str(l, readline)
yield TokenInfo(STRING, name + s, lineno, 0, org_l)
lineno += lineno_delta
else:
yield TokenInfo(NAME, name, lineno, 0, org_l)
elif l == "\\\n":
l = readline()
lineno += 1
elif l[0] == "\n":
nl = "" if no_newline else "\n"
if paren_level > 0:
yield TokenInfo(NL, nl, lineno, 0, org_l)
else:
yield TokenInfo(NEWLINE, nl, lineno, 0, org_l)
break
elif l[0].isspace():
l = l[1:]
elif l.startswith('"') or l.startswith("'"):
s, l, lineno_delta = get_str(l, readline)
yield TokenInfo(STRING, s, lineno, 0, org_l)
lineno += lineno_delta
elif l.startswith("#"):
yield TokenInfo(COMMENT, l.rstrip("\n"), lineno, 0, org_l)
l = "\n"
else:
for op in (
"**=",
"//=",
">>=",
"<<=",
"+=",
"-=",
"*=",
"/=",
"%=",
"@=",
"&=",
"|=",
"^=",
"**",
"//",
"<<",
">>",
"==",
"!=",
">=",
"<=",
"...",
"->",
):
if l.startswith(op):
yield TokenInfo(OP, op, lineno, 0, org_l)
l = l[len(op) :]
break
else:
yield TokenInfo(OP, l[0], lineno, 0, org_l)
if l[0] in ("(", "[", "{"):
paren_level += 1
elif l[0] in (")", "]", "}"):
paren_level -= 1
l = l[1:]
while indent_stack[-1] > 0:
yield TokenInfo(DEDENT, "", lineno, 0, "")
indent_stack.pop()
yield TokenInfo(ENDMARKER, "", lineno, 0, "")