Implement re-zipping unzipped wheels

This patch teaches Zapp! to introspect the `sources` of a manifest, and
look for the well-known `WHEEL` file(s) indicative of an
unzipped/installed wheel in the input sources. A wheel can be (somewhat*)
correctly reassembled by zipping its unzipped state, so in the presence
of unzipped wheels Zapp! will re-zip them and enter them into the
manifest appropriately for inclusion.

This fixes #6 the nasty way, as there's no good way to make
`rules_python` provide wheel dependencies or to translate unrolled
wheels back to wheels during rule execution as this would violate
Bazel's file dependency model.
This commit is contained in:
Reid 'arrdem' McKenzie 2021-08-29 15:07:56 -06:00
parent e74b71c369
commit 5b0062468f
9 changed files with 242 additions and 70 deletions

View file

@ -2,6 +2,8 @@ load("@rules_zapp//zapp:zapp.bzl",
"zapp_binary",
)
load("@rules_python//python:defs.bzl", "py_library")
load("@my_deps//:requirements.bzl",
py_requirement="requirement",
)
@ -20,3 +22,19 @@ zapp_binary(
py_requirement("pyyaml"),
]
)
py_library(
name = "lib_hello",
srcs = [],
deps = [
py_requirement("pyyaml"),
]
)
zapp_binary(
name = "hello_lib_deps",
main = "hello.py",
deps = [
":lib_hello",
],
)

View file

@ -26,11 +26,16 @@ git_repository(
tag = "0.3.0",
)
git_repository(
# git_repository(
# name = "rules_zapp",
# remote = "https://github.com/arrdem/rules_zapp.git",
# tag = "0.1.1",
# # branch = "trunk",
# )
local_repository(
name = "rules_zapp",
remote = "https://github.com/arrdem/rules_zapp.git",
tag = "0.1.1",
# branch = "trunk",
path = "../",
)
load("@rules_python//python:pip.bzl", "pip_install")

View file

@ -9,6 +9,7 @@ def main():
try:
import yaml
print("I have YAML! and nothing to do with it.", yaml.__file__)
except ImportError:
print("Don't have YAML.")

View file

@ -2,13 +2,6 @@ package(default_visibility = ["//visibility:public"])
load("zapp.bzl", "zapp_binary")
# Bootstrapping Zapp using py_binary
py_binary(
name = "zappc",
main = "compiler/__main__.py",
srcs = ["compiler/__main__.py"],
)
# Zapp plugins used as a runtime library by rules_zapp
py_library(
name = "zapp_support",
@ -18,8 +11,26 @@ py_library(
]
)
# Bootstrapping Zapp using py_binary
py_binary(
name = "zappc",
main = "compiler/__main__.py",
srcs = glob(["support/**/*.py"]) + [
"compiler/__main__.py"
],
imports = [
"..",
],
)
# For testing of zappc
zapp_binary(
name = "zappzappc",
main = "compiler/__main__.py",
srcs = glob(["support/**/*.py"]) + [
"compiler/__main__.py"
],
imports = [
"..",
],
)

View file

@ -6,10 +6,10 @@ import argparse
import io
import json
import os
import sys
import zipfile
import pathlib
import stat
import sys
import zipfile
parser = argparse.ArgumentParser(description="The (bootstrap) Zapp compiler")
parser.add_argument("-o", "--out", dest="output", help="Output target file")
@ -100,17 +100,22 @@ def main():
if opts.debug:
from pprint import pprint
pprint({
"opts": {k: getattr(opts, k) for k in dir(opts) if not k.startswith("_")},
"manifest": manifest
})
with open(opts.output, 'w') as zapp:
pprint(
{
"opts": {
k: getattr(opts, k) for k in dir(opts) if not k.startswith("_")
},
"manifest": manifest,
}
)
with open(opts.output, "w") as zapp:
shebang = "#!" + manifest["shebang"] + "\n"
zapp.write(shebang)
# Now we're gonna build the zapp from the manifest
with zipfile.ZipFile(opts.output, 'a') as zapp:
with zipfile.ZipFile(opts.output, "a") as zapp:
# Append the __main__.py generated record
zapp.writestr("__main__.py", make_dunder_main(manifest))

View file

@ -6,10 +6,15 @@ import argparse
import io
import json
import os
import sys
import zipfile
import pathlib
import stat
import sys
import zipfile
from email.parser import Parser
from shutil import move
from tempfile import TemporaryDirectory
from zapp.support.unpack import cache_wheel_path
parser = argparse.ArgumentParser(description="The (bootstrap) Zapp compiler")
parser.add_argument("-o", "--out", dest="output", help="Output target file")
@ -47,6 +52,12 @@ for script in {scripts!r}:
"""
def dsub(d1, d2):
"""Dictionary subtraction. Remove k/vs from d1 if they occur in d2."""
return {k: v for k, v in d1.items() if k not in d2 or d2[k] != v}
def make_dunder_main(manifest):
"""Generate a __main__.py file for the given manifest."""
@ -55,6 +66,7 @@ def make_dunder_main(manifest):
scripts = prelude + [main]
return MAIN_TEMPLATE.format(**locals())
def dir_walk_prefixes(path):
"""Helper. Walk all slices of a path."""
@ -65,6 +77,102 @@ def dir_walk_prefixes(path):
yield os.path.join(*segments)
def load_wheel(opts, manifest, path):
"""Load a single wheel, returning ..."""
def _parse_email(msg):
return dict(Parser().parsestr(msg).items())
# RECORD seems to just record file reference checksums for validation
# with open(os.path.join(path, "RECORD")) as recordf:
# record = recordf.read()
with open(os.path.join(path, "METADATA")) as metaf:
meta = _parse_email(metaf.read())
with open(os.path.join(path, "WHEEL")) as wheelf:
wheel = _parse_email(wheelf.read())
prefix = os.path.dirname(path)
sources = {k: v for k, v in manifest["sources"].items() if v.startswith(prefix)}
return {
# "record": record,
"meta": meta,
"wheel": wheel,
"sources": sources,
}
def wheel_name(wheel):
"""Construct the "canonical" filename of the wheel."""
tags = wheel["wheel"].get("Tag")
if isinstance(tags, list):
tags = "-" + ".".join(sorted(wheel["wheel"]["Tag"]))
elif isinstance(tags, str):
tags = "-" + wheel["wheel"]["Tag"]
else:
tags = ""
return "".join(
[
wheel["meta"]["Name"],
"-",
wheel["meta"]["Version"],
tags,
".whl",
]
)
def zip_wheel(tmpdir, wheel):
"""Build a 'tempfile' containing the proper contents of the wheel."""
wheel_file = os.path.join(tmpdir, wheel_name(wheel))
with zipfile.ZipFile(wheel_file, "w") as whl:
for dest, src in wheel["sources"].items():
whl.write(src, dest)
return wheel_file
def rezip_wheels(opts, manifest):
"""Extract unzipped wheels from the manifest's inputs, simplifying the manifest.
Wheels which are unzipped should be re-zipped into the cache, if not present in the cache.
Files sourced from unzipped wheels should be removed, and a single wheel reference inserted."""
wheels = [
load_wheel(opts, manifest, os.path.dirname(p))
for p in manifest["sources"].values()
if p.endswith("/WHEEL")
]
# Zip up the wheels and insert wheel records to the manifest
for w in wheels:
# Try to cheat and hit in the local cache first rather than building wheels every time
wf = cache_wheel_path(wheel_name(w))
if wf.exists():
try:
wf.touch()
except OSError:
pass
else:
wf = zip_wheel(opts.tmpdir, w)
# Insert a new wheel source
manifest["wheels"][wheel_name(w)] = {"hashes": [], "source": wf}
# Expunge sources available in the wheel
manifest["sources"] = dsub(manifest["sources"], w["sources"])
return manifest
def generate_dunder_inits(manifest):
"""Hack the manifest to insert __init__ files as needed."""
@ -106,6 +214,10 @@ def main():
with open(opts.manifest) as fp:
manifest = json.load(fp)
with TemporaryDirectory() as d:
setattr(opts, "tmpdir", d)
manifest = rezip_wheels(opts, manifest)
manifest = insert_manifest_json(opts, manifest)
manifest = enable_unzipping(manifest)
# Patch the manifest to insert needed __init__ files
@ -114,12 +226,17 @@ def main():
if opts.debug:
from pprint import pprint
pprint({
"opts": {k: getattr(opts, k) for k in dir(opts) if not k.startswith("_")},
"manifest": manifest
})
with open(opts.output, 'w') as zapp:
pprint(
{
"opts": {
k: getattr(opts, k) for k in dir(opts) if not k.startswith("_")
},
"manifest": manifest,
}
)
with open(opts.output, "w") as zapp:
shebang = "#!" + manifest["shebang"] + "\n"
zapp.write(shebang)
@ -128,14 +245,13 @@ def main():
exit(1)
# Now we're gonna build the zapp from the manifest
with zipfile.ZipFile(opts.output, 'a') as zapp:
with zipfile.ZipFile(opts.output, "a") as zapp:
# Append the __main__.py generated record
zapp.writestr("__main__.py", make_dunder_main(manifest))
# Append user-specified sources
for dest, src in sorted(manifest["sources"].items(),
key=lambda x: x[0]):
for dest, src in sorted(manifest["sources"].items(), key=lambda x: x[0]):
if src is None:
zapp.writestr(dest, "")
else:

View file

@ -1,17 +1,38 @@
"""The Zapp runtime manifest API."""
import json
from copy import deepcopy
from importlib.resources import open_text
import json
with open_text("zapp", "manifest.json") as fp:
_MANIFEST = json.load(fp)
def once(f):
singleton = object()
state = singleton
def helper(*args, **kwargs):
nonlocal state
if state is singleton:
state = f(*args, **kwargs)
return state
return helper
def copied(f):
def helper(*args, **kwargs):
val = f(*args, **kwargs)
return deepcopy(val)
return helper
@copied
@once
def manifest():
"""Return (a copy) of the runtime manifest."""
return deepcopy(_MANIFEST)
with open_text("zapp", "manifest.json") as fp:
return json.load(fp)
__all__ = ["manifest"]

View file

@ -1,14 +1,11 @@
"""Conditionally unpack a zapp (and its deps)."""
import sys
import os
import sys
from pathlib import Path
from zipfile import ZipFile
from .manifest import manifest
MANIFEST = manifest()
from zapp.support.manifest import manifest
def cache_root() -> Path:
@ -39,7 +36,7 @@ def unpack_deps():
# For each wheel, touch the existing cached wheel or unpack this one.
with ZipFile(sys.argv[0], "r") as zf:
for whl, config in MANIFEST["wheels"].items():
for whl, config in manifest()["wheels"].items():
cached_whl = cache_wheel_path(whl)
if cached_whl.exists():
cached_whl.touch()

View file

@ -67,8 +67,6 @@ def _zapp_impl(ctx):
# TODO: also handle ctx.attr.src.data_runfiles.symlinks
srcs = [
f for f in ctx.attr.src.default_runfiles.files.to_list()
# Strip out transitive sources from PyPi archives
if f.path.find("/pypi__") == -1
]
# Find the list of directories to add to sys