From 5b0062468f9feca2dcbb0af38d6744fadcd95e92 Mon Sep 17 00:00:00 2001 From: Reid 'arrdem' McKenzie Date: Sun, 29 Aug 2021 15:07:56 -0600 Subject: [PATCH] Implement re-zipping unzipped wheels This patch teaches Zapp! to introspect the `sources` of a manifest, and look for the well-known `WHEEL` file(s) indicative of an unzipped/installed wheel in the input sources. A wheel can be (somewhat*) correctly reassembled by zipping its unzipped state, so in the presence of unzipped wheels Zapp! will re-zip them and enter them into the manifest appropriately for inclusion. This fixes #6 the nasty way, as there's no good way to make `rules_python` provide wheel dependencies or to translate unrolled wheels back to wheels during rule execution as this would violate Bazel's file dependency model. --- example/BUILD | 18 ++++ example/WORKSPACE | 13 ++- example/hello.py | 1 + zapp/BUILD | 31 +++++-- zapp/__main__.py | 21 +++-- zapp/compiler/__main__.py | 186 +++++++++++++++++++++++++++++++------- zapp/support/manifest.py | 31 ++++++- zapp/support/unpack.py | 9 +- zapp/zapp.bzl | 2 - 9 files changed, 242 insertions(+), 70 deletions(-) diff --git a/example/BUILD b/example/BUILD index 1b829b9..4e11154 100644 --- a/example/BUILD +++ b/example/BUILD @@ -2,6 +2,8 @@ load("@rules_zapp//zapp:zapp.bzl", "zapp_binary", ) +load("@rules_python//python:defs.bzl", "py_library") + load("@my_deps//:requirements.bzl", py_requirement="requirement", ) @@ -20,3 +22,19 @@ zapp_binary( py_requirement("pyyaml"), ] ) + +py_library( + name = "lib_hello", + srcs = [], + deps = [ + py_requirement("pyyaml"), + ] +) + +zapp_binary( + name = "hello_lib_deps", + main = "hello.py", + deps = [ + ":lib_hello", + ], +) diff --git a/example/WORKSPACE b/example/WORKSPACE index d6a68ea..29465d3 100644 --- a/example/WORKSPACE +++ b/example/WORKSPACE @@ -26,11 +26,16 @@ git_repository( tag = "0.3.0", ) -git_repository( +# git_repository( +# name = "rules_zapp", +# remote = "https://github.com/arrdem/rules_zapp.git", +# tag = "0.1.1", +# # branch = "trunk", +# ) + +local_repository( name = "rules_zapp", - remote = "https://github.com/arrdem/rules_zapp.git", - tag = "0.1.1", - # branch = "trunk", + path = "../", ) load("@rules_python//python:pip.bzl", "pip_install") diff --git a/example/hello.py b/example/hello.py index 6f8db37..88f8b23 100644 --- a/example/hello.py +++ b/example/hello.py @@ -9,6 +9,7 @@ def main(): try: import yaml + print("I have YAML! and nothing to do with it.", yaml.__file__) except ImportError: print("Don't have YAML.") diff --git a/zapp/BUILD b/zapp/BUILD index e793a7e..cc321aa 100644 --- a/zapp/BUILD +++ b/zapp/BUILD @@ -2,24 +2,35 @@ package(default_visibility = ["//visibility:public"]) load("zapp.bzl", "zapp_binary") -# Bootstrapping Zapp using py_binary -py_binary( - name = "zappc", - main = "compiler/__main__.py", - srcs = ["compiler/__main__.py"], -) - # Zapp plugins used as a runtime library by rules_zapp py_library( name = "zapp_support", srcs = glob(["support/**/*.py"]), imports = [ - "..", + "..", ] ) +# Bootstrapping Zapp using py_binary +py_binary( + name = "zappc", + main = "compiler/__main__.py", + srcs = glob(["support/**/*.py"]) + [ + "compiler/__main__.py" + ], + imports = [ + "..", + ], +) + # For testing of zappc zapp_binary( - name = "zappzappc", - main = "compiler/__main__.py", + name = "zappzappc", + main = "compiler/__main__.py", + srcs = glob(["support/**/*.py"]) + [ + "compiler/__main__.py" + ], + imports = [ + "..", + ], ) diff --git a/zapp/__main__.py b/zapp/__main__.py index 7114c14..db6db1b 100644 --- a/zapp/__main__.py +++ b/zapp/__main__.py @@ -6,10 +6,10 @@ import argparse import io import json import os -import sys -import zipfile import pathlib import stat +import sys +import zipfile parser = argparse.ArgumentParser(description="The (bootstrap) Zapp compiler") parser.add_argument("-o", "--out", dest="output", help="Output target file") @@ -100,17 +100,22 @@ def main(): if opts.debug: from pprint import pprint - pprint({ - "opts": {k: getattr(opts, k) for k in dir(opts) if not k.startswith("_")}, - "manifest": manifest - }) - with open(opts.output, 'w') as zapp: + pprint( + { + "opts": { + k: getattr(opts, k) for k in dir(opts) if not k.startswith("_") + }, + "manifest": manifest, + } + ) + + with open(opts.output, "w") as zapp: shebang = "#!" + manifest["shebang"] + "\n" zapp.write(shebang) # Now we're gonna build the zapp from the manifest - with zipfile.ZipFile(opts.output, 'a') as zapp: + with zipfile.ZipFile(opts.output, "a") as zapp: # Append the __main__.py generated record zapp.writestr("__main__.py", make_dunder_main(manifest)) diff --git a/zapp/compiler/__main__.py b/zapp/compiler/__main__.py index 7e31000..db16393 100644 --- a/zapp/compiler/__main__.py +++ b/zapp/compiler/__main__.py @@ -6,10 +6,15 @@ import argparse import io import json import os -import sys -import zipfile import pathlib import stat +import sys +import zipfile +from email.parser import Parser +from shutil import move +from tempfile import TemporaryDirectory + +from zapp.support.unpack import cache_wheel_path parser = argparse.ArgumentParser(description="The (bootstrap) Zapp compiler") parser.add_argument("-o", "--out", dest="output", help="Output target file") @@ -47,6 +52,12 @@ for script in {scripts!r}: """ +def dsub(d1, d2): + """Dictionary subtraction. Remove k/vs from d1 if they occur in d2.""" + + return {k: v for k, v in d1.items() if k not in d2 or d2[k] != v} + + def make_dunder_main(manifest): """Generate a __main__.py file for the given manifest.""" @@ -55,6 +66,7 @@ def make_dunder_main(manifest): scripts = prelude + [main] return MAIN_TEMPLATE.format(**locals()) + def dir_walk_prefixes(path): """Helper. Walk all slices of a path.""" @@ -65,6 +77,102 @@ def dir_walk_prefixes(path): yield os.path.join(*segments) +def load_wheel(opts, manifest, path): + """Load a single wheel, returning ...""" + + def _parse_email(msg): + return dict(Parser().parsestr(msg).items()) + + # RECORD seems to just record file reference checksums for validation + # with open(os.path.join(path, "RECORD")) as recordf: + # record = recordf.read() + + with open(os.path.join(path, "METADATA")) as metaf: + meta = _parse_email(metaf.read()) + + with open(os.path.join(path, "WHEEL")) as wheelf: + wheel = _parse_email(wheelf.read()) + + prefix = os.path.dirname(path) + + sources = {k: v for k, v in manifest["sources"].items() if v.startswith(prefix)} + + return { + # "record": record, + "meta": meta, + "wheel": wheel, + "sources": sources, + } + + +def wheel_name(wheel): + """Construct the "canonical" filename of the wheel.""" + + tags = wheel["wheel"].get("Tag") + if isinstance(tags, list): + tags = "-" + ".".join(sorted(wheel["wheel"]["Tag"])) + elif isinstance(tags, str): + tags = "-" + wheel["wheel"]["Tag"] + else: + tags = "" + + return "".join( + [ + wheel["meta"]["Name"], + "-", + wheel["meta"]["Version"], + tags, + ".whl", + ] + ) + + +def zip_wheel(tmpdir, wheel): + """Build a 'tempfile' containing the proper contents of the wheel.""" + + wheel_file = os.path.join(tmpdir, wheel_name(wheel)) + + with zipfile.ZipFile(wheel_file, "w") as whl: + for dest, src in wheel["sources"].items(): + whl.write(src, dest) + + return wheel_file + + +def rezip_wheels(opts, manifest): + """Extract unzipped wheels from the manifest's inputs, simplifying the manifest. + + Wheels which are unzipped should be re-zipped into the cache, if not present in the cache. + + Files sourced from unzipped wheels should be removed, and a single wheel reference inserted.""" + + wheels = [ + load_wheel(opts, manifest, os.path.dirname(p)) + for p in manifest["sources"].values() + if p.endswith("/WHEEL") + ] + + # Zip up the wheels and insert wheel records to the manifest + for w in wheels: + # Try to cheat and hit in the local cache first rather than building wheels every time + wf = cache_wheel_path(wheel_name(w)) + if wf.exists(): + try: + wf.touch() + except OSError: + pass + else: + wf = zip_wheel(opts.tmpdir, w) + + # Insert a new wheel source + manifest["wheels"][wheel_name(w)] = {"hashes": [], "source": wf} + + # Expunge sources available in the wheel + manifest["sources"] = dsub(manifest["sources"], w["sources"]) + + return manifest + + def generate_dunder_inits(manifest): """Hack the manifest to insert __init__ files as needed.""" @@ -106,47 +214,55 @@ def main(): with open(opts.manifest) as fp: manifest = json.load(fp) - manifest = insert_manifest_json(opts, manifest) - manifest = enable_unzipping(manifest) - # Patch the manifest to insert needed __init__ files - # NOTE: This has to be the LAST thing we do - manifest = generate_dunder_inits(manifest) + with TemporaryDirectory() as d: + setattr(opts, "tmpdir", d) - if opts.debug: - from pprint import pprint - pprint({ - "opts": {k: getattr(opts, k) for k in dir(opts) if not k.startswith("_")}, - "manifest": manifest - }) + manifest = rezip_wheels(opts, manifest) + manifest = insert_manifest_json(opts, manifest) + manifest = enable_unzipping(manifest) + # Patch the manifest to insert needed __init__ files + # NOTE: This has to be the LAST thing we do + manifest = generate_dunder_inits(manifest) - with open(opts.output, 'w') as zapp: - shebang = "#!" + manifest["shebang"] + "\n" - zapp.write(shebang) + if opts.debug: + from pprint import pprint - if "__main__.py" in manifest["sources"]: - print("Error: __main__.py conflict.", file=sys.stderr) - exit(1) + pprint( + { + "opts": { + k: getattr(opts, k) for k in dir(opts) if not k.startswith("_") + }, + "manifest": manifest, + } + ) - # Now we're gonna build the zapp from the manifest - with zipfile.ZipFile(opts.output, 'a') as zapp: + with open(opts.output, "w") as zapp: + shebang = "#!" + manifest["shebang"] + "\n" + zapp.write(shebang) - # Append the __main__.py generated record - zapp.writestr("__main__.py", make_dunder_main(manifest)) + if "__main__.py" in manifest["sources"]: + print("Error: __main__.py conflict.", file=sys.stderr) + exit(1) - # Append user-specified sources - for dest, src in sorted(manifest["sources"].items(), - key=lambda x: x[0]): - if src is None: - zapp.writestr(dest, "") - else: - zapp.write(src, dest) + # Now we're gonna build the zapp from the manifest + with zipfile.ZipFile(opts.output, "a") as zapp: - # Append user-specified libraries - for whl, config in manifest["wheels"].items(): - zapp.write(config["source"], ".deps/" + whl) + # Append the __main__.py generated record + zapp.writestr("__main__.py", make_dunder_main(manifest)) - zapp = pathlib.Path(opts.output) - zapp.chmod(zapp.stat().st_mode | stat.S_IEXEC) + # Append user-specified sources + for dest, src in sorted(manifest["sources"].items(), key=lambda x: x[0]): + if src is None: + zapp.writestr(dest, "") + else: + zapp.write(src, dest) + + # Append user-specified libraries + for whl, config in manifest["wheels"].items(): + zapp.write(config["source"], ".deps/" + whl) + + zapp = pathlib.Path(opts.output) + zapp.chmod(zapp.stat().st_mode | stat.S_IEXEC) if __name__ == "__main__" or 1: diff --git a/zapp/support/manifest.py b/zapp/support/manifest.py index 0de064f..e3bd590 100644 --- a/zapp/support/manifest.py +++ b/zapp/support/manifest.py @@ -1,17 +1,38 @@ """The Zapp runtime manifest API.""" +import json from copy import deepcopy from importlib.resources import open_text -import json - -with open_text("zapp", "manifest.json") as fp: - _MANIFEST = json.load(fp) +def once(f): + singleton = object() + state = singleton + + def helper(*args, **kwargs): + nonlocal state + if state is singleton: + state = f(*args, **kwargs) + return state + + return helper + + +def copied(f): + def helper(*args, **kwargs): + val = f(*args, **kwargs) + return deepcopy(val) + + return helper + + +@copied +@once def manifest(): """Return (a copy) of the runtime manifest.""" - return deepcopy(_MANIFEST) + with open_text("zapp", "manifest.json") as fp: + return json.load(fp) __all__ = ["manifest"] diff --git a/zapp/support/unpack.py b/zapp/support/unpack.py index 41d94d2..2a434ed 100644 --- a/zapp/support/unpack.py +++ b/zapp/support/unpack.py @@ -1,14 +1,11 @@ """Conditionally unpack a zapp (and its deps).""" -import sys import os +import sys from pathlib import Path from zipfile import ZipFile -from .manifest import manifest - - -MANIFEST = manifest() +from zapp.support.manifest import manifest def cache_root() -> Path: @@ -39,7 +36,7 @@ def unpack_deps(): # For each wheel, touch the existing cached wheel or unpack this one. with ZipFile(sys.argv[0], "r") as zf: - for whl, config in MANIFEST["wheels"].items(): + for whl, config in manifest()["wheels"].items(): cached_whl = cache_wheel_path(whl) if cached_whl.exists(): cached_whl.touch() diff --git a/zapp/zapp.bzl b/zapp/zapp.bzl index 5ef38aa..ddffb4b 100644 --- a/zapp/zapp.bzl +++ b/zapp/zapp.bzl @@ -67,8 +67,6 @@ def _zapp_impl(ctx): # TODO: also handle ctx.attr.src.data_runfiles.symlinks srcs = [ f for f in ctx.attr.src.default_runfiles.files.to_list() - # Strip out transitive sources from PyPi archives - if f.path.find("/pypi__") == -1 ] # Find the list of directories to add to sys