rules_zapp/zapp/compiler/__main__.py

367 lines
10 KiB
Python
Raw Normal View History

2021-08-09 15:26:06 +00:00
"""
2021-09-25 07:06:41 +00:00
The Zapp! compiler.
2021-08-09 15:26:06 +00:00
"""
import argparse
import json
import os
import pathlib
import stat
import sys
import zipfile
from email.parser import Parser
2021-09-25 07:07:19 +00:00
from itertools import chain
from pathlib import Path
from pprint import pprint
from tempfile import TemporaryDirectory
2021-08-29 22:05:00 +00:00
from zapp.support.pep425 import compress_tags, decompress_tag
2021-09-25 07:07:19 +00:00
from zapp.support.unpack import cache_wheel_path
2021-08-09 15:26:06 +00:00
parser = argparse.ArgumentParser(description="The (bootstrap) Zapp compiler")
parser.add_argument("-o", "--out", dest="output", help="Output target file")
parser.add_argument("-d", "--debug", dest="debug", action="store_true", default=False)
parser.add_argument("manifest", help="The (JSON) manifest")
MAIN_TEMPLATE = """\
# -*- coding: utf-8 -*-
2021-09-25 07:06:41 +00:00
\"\"\"Zapp!-generated __main__\""\"
2021-08-09 15:26:06 +00:00
from importlib import import_module
import os
import sys
from runpy import _run_module_as_main
for script in {scripts!r}:
mod, sep, fn = script.partition(':')
mod_ok = all(part.isidentifier() for part in mod.split('.'))
fn_ok = all(part.isidentifier() for part in fn.split('.'))
if not mod_ok:
raise RuntimeError("Invalid module reference {{!r}}".format(mod))
if fn and not fn_ok:
raise RuntimeError("Invalid function reference {{!r}}".format(fn))
if mod and fn:
mod = import_module(mod)
getattr(mod, fn)()
else:
_run_module_as_main(mod)
"""
2022-11-21 08:39:42 +00:00
def dsub(d1: dict, d2: dict) -> dict:
"""Dictionary subtraction. Remove k/vs from d1 if they occur in d2."""
2022-11-21 08:39:42 +00:00
return {k: v for k, v in d1.items() if k in d2 and v == d2[k]}
2021-08-09 15:26:06 +00:00
def make_dunder_main(manifest):
"""Generate a __main__.py file for the given manifest."""
prelude = manifest.get("prelude_points", [])
main = manifest.get("entry_point")
scripts = prelude + [main]
return MAIN_TEMPLATE.format(**locals())
2021-08-09 15:26:06 +00:00
def dir_walk_prefixes(path):
"""Helper. Walk all slices of a path."""
segments = []
yield ""
for segment in path.split("/"):
segments.append(segment)
yield os.path.join(*segments)
def load_wheel(opts, manifest, path):
"""Load a single wheel, returning ..."""
def _parse_email(msg):
2021-08-29 22:05:00 +00:00
msg = Parser().parsestr(msg)
2021-09-25 07:07:19 +00:00
2021-08-29 22:05:00 +00:00
def _get(k):
v = msg.get_all(k)
if len(v) == 1:
return v[0]
else:
return v
2021-09-25 07:07:19 +00:00
2021-08-29 22:05:00 +00:00
return {k: _get(k) for k in msg.keys()}
with open(os.path.join(path, "METADATA")) as metaf:
meta = _parse_email(metaf.read())
with open(os.path.join(path, "WHEEL")) as wheelf:
wheel = _parse_email(wheelf.read())
prefix = os.path.dirname(path)
# Naive glob of sources; note that bazel may hvae inserted empty __init__.py trash
2021-09-25 07:07:19 +00:00
sources = [
(
dest,
spec,
)
2022-11-21 08:39:42 +00:00
for dest, spec in manifest["sources"].items()
2021-09-25 07:07:19 +00:00
if spec["source"].startswith(prefix)
]
# Retain only manifest-listed sources (dealing with __init__.py trash, but maybe not all conflicts)
with open(os.path.join(path, "RECORD")) as recordf:
known_srcs = set()
for line in recordf:
srcname, *_ = line.split(",")
known_srcs.add(srcname)
2022-11-21 08:39:42 +00:00
sources = {
dest: spec
for dest, spec in sources
if dest in known_srcs or not dest.endswith("__init__.py")
2022-11-21 08:39:42 +00:00
}
# FIXME: Check hashes & sizes of manifest-listed sources and abort on error/conflict.
# FIXME: Check for .so files or other compiled artifacts, adjust tags accordingly.
return {
# "record": record,
"meta": meta,
"wheel": wheel,
"sources": sources,
}
def wheel_name(wheel):
"""Construct the "canonical" filename of the wheel."""
2021-08-29 22:05:00 +00:00
# https://www.python.org/dev/peps/pep-0425/
tags = wheel["wheel"].get("Tag")
if isinstance(tags, list):
2021-08-29 22:05:00 +00:00
tags = "-" + compress_tags(chain(*[decompress_tag(t) for t in tags]))
elif isinstance(tags, str):
2021-08-29 22:05:00 +00:00
tags = "-" + tags
else:
tags = ""
return "".join(
[
wheel["meta"]["Name"],
"-",
wheel["meta"]["Version"],
tags,
".whl",
]
)
def zip_wheel(tmpdir, wheel):
"""Build a 'tempfile' containing the proper contents of the wheel."""
wn = wheel_name(wheel)
cached_path = cache_wheel_path(wn)
wheel_file = os.path.join(tmpdir, wn)
with zipfile.ZipFile(wheel_file, "w") as whl:
2022-11-21 08:39:42 +00:00
for dest, src in wheel["sources"].items():
2021-08-30 00:44:43 +00:00
whl.write(src["source"], dest)
try:
# Attempt to enter the (re)built wheel into the cache. This could fail
# due to coss-device rename problems, or due to something else having
# concurrently built the same wheel and won the race.
#
# FIXME: This probably needs some guardrails to ensure that we only put
# architecture-independent wheels into the cache this way to avoid the
# plethora of "missbehaved wheels" problems that pip deals with.
Path(wheel_file).rename(cached_path)
return str(cached_path)
except OSError:
return wheel_file
def rezip_wheels(opts, manifest):
"""Extract unzipped wheels from the manifest's inputs, simplifying the manifest.
Wheels which are unzipped should be re-zipped into the cache, if not present in the cache.
Files sourced from unzipped wheels should be removed, and a single wheel reference inserted."""
wheels = [
2021-08-30 00:44:43 +00:00
load_wheel(opts, manifest, os.path.dirname(s["source"]))
2022-11-21 08:39:42 +00:00
for _, s in manifest["sources"].items()
2021-08-30 00:44:43 +00:00
if s["source"].endswith("/WHEEL")
]
manifest["requirements"] = {}
# Zip up the wheels and insert wheel records to the manifest
for w in wheels:
# Try to cheat and hit in the local cache first rather than building wheels every time
2021-08-29 22:05:00 +00:00
wn = wheel_name(w)
2021-08-30 00:44:43 +00:00
# Expunge sources available in the wheel
manifest["sources"] = dsub(manifest["sources"], w["sources"])
2021-08-29 22:05:00 +00:00
# We may have a double-path dependency.
# If we DON'T, we have to zip
if wn not in manifest["wheels"]:
wf = cache_wheel_path(wn)
if wf.exists():
try:
wf.touch()
except OSError:
pass
wf = str(wf)
else:
if opts.debug:
print("---")
pprint({"$type": "whl", **w})
2021-08-29 22:05:00 +00:00
wf = zip_wheel(opts.tmpdir, w)
# Insert a new wheel source
manifest["wheels"][wn] = {"hashes": [], "source": wf, "manifest": w}
# Insert the requirement
manifest["requirements"][w["meta"]["Name"]] = w["meta"]["Version"]
return manifest
def ensure_srcs_map(opts, manifest):
manifest["sources"] = dict(manifest["sources"])
return manifest
2021-08-30 00:44:43 +00:00
def generate_dunder_inits(opts, manifest):
2021-08-09 15:26:06 +00:00
"""Hack the manifest to insert __init__ files as needed."""
sources = manifest["sources"]
for input_file in list(sources.keys()):
for path in dir_walk_prefixes(os.path.dirname(input_file)):
init_file = os.path.join(path, "__init__.py")
if init_file not in sources:
sources[init_file] = None
return manifest
def insert_manifest_json(opts, manifest):
"""Insert the manifest.json file."""
2021-08-30 00:44:43 +00:00
tempf = os.path.join(opts.tmpdir, "manifest.json")
# Note ordering to enable somewhat self-referential manifest
manifest["sources"]["zapp/manifest.json"] = {"source": tempf, "hashes": []}
with open(tempf, "w") as fp:
fp.write(json.dumps(manifest))
2021-08-09 15:26:06 +00:00
return manifest
2021-08-30 00:44:43 +00:00
def generate_dunder_main(opts, manifest):
"""Insert the __main__.py to the manifest."""
if "__main__.py" in manifest["sources"]:
print("Error: __main__.py conflict.", file=sys.stderr)
exit(1)
tempf = os.path.join(opts.tmpdir, "__main__.py")
# Note ordering to enable somewhat self-referential manifest
manifest["sources"]["__main__.py"] = {"source": tempf, "hashes": []}
with open(tempf, "w") as fp:
fp.write(make_dunder_main(manifest))
return manifest
def enable_unzipping(opts, manifest):
2021-08-09 15:26:06 +00:00
"""Inject unzipping behavior as needed."""
if manifest["wheels"]:
2021-09-25 07:07:19 +00:00
manifest["prelude_points"].extend(
[
"zapp.support.unpack:unpack_deps",
"zapp.support.unpack:install_deps",
]
)
2021-08-09 15:26:06 +00:00
2021-08-30 00:44:43 +00:00
if not manifest["zip_safe"]:
2021-09-25 07:07:19 +00:00
manifest["prelude_points"].extend(
[
"zapp.support.unpack:unpack_zapp",
]
)
2021-08-09 15:26:06 +00:00
return manifest
2022-11-21 08:39:42 +00:00
def fix_sources(opts, manifest):
manifest["sources"] = {f: m for f, m in manifest["sources"]}
return manifest
2021-08-09 15:26:06 +00:00
def main():
opts, args = parser.parse_known_args()
with open(opts.manifest) as fp:
manifest = json.load(fp)
with TemporaryDirectory() as d:
setattr(opts, "tmpdir", d)
2022-11-21 08:39:42 +00:00
manifest = fix_sources(opts, manifest)
manifest = rezip_wheels(opts, manifest)
manifest = ensure_srcs_map(opts, manifest)
2021-08-30 00:44:43 +00:00
manifest = enable_unzipping(opts, manifest)
# Patch the manifest to insert needed __init__ files
2021-08-30 00:44:43 +00:00
manifest = generate_dunder_inits(opts, manifest)
manifest = generate_dunder_main(opts, manifest)
# Generate and insert the manifest
# NOTE: This has to be the LAST thing we do
2021-08-30 00:44:43 +00:00
manifest = insert_manifest_json(opts, manifest)
if opts.debug:
print("---")
pprint(
{
"$type": "zapp",
"opts": {
k: getattr(opts, k) for k in dir(opts) if not k.startswith("_")
},
"manifest": manifest,
}
)
with open(opts.output, "w") as zapp:
shebang = "#!" + manifest["shebang"] + "\n"
zapp.write(shebang)
# Now we're gonna build the zapp from the manifest
with zipfile.ZipFile(opts.output, "a") as zapp:
# Append user-specified sources
for dest, src in sorted(manifest["sources"].items(), key=lambda x: x[0]):
if src is None:
zapp.writestr(dest, "")
else:
2021-08-30 00:44:43 +00:00
zapp.write(src["source"], dest)
# Append user-specified libraries
for whl, config in manifest["wheels"].items():
zapp.write(config["source"], ".deps/" + whl)
zapp = pathlib.Path(opts.output)
zapp.chmod(zapp.stat().st_mode | stat.S_IEXEC)
2021-08-09 15:26:06 +00:00
if __name__ == "__main__" or 1:
main()