source/tools/python/test_licenses.py

145 lines
4.2 KiB
Python
Raw Normal View History

2021-04-08 06:37:51 +00:00
"""
Validate 3rdparty library licenses as approved.
"""
import re
2021-09-20 00:05:22 +00:00
from pkg_resources import (
DistInfoDistribution,
working_set,
)
2021-04-08 06:37:51 +00:00
import pytest
2021-05-31 18:28:46 +00:00
2021-04-08 06:37:51 +00:00
# Licenses approved as representing non-copyleft and not precluding commercial usage.
# This is all easy, there's a good schema here.
APPROVED_LICENSES = [
2021-09-03 04:10:35 +00:00
MIT := "License :: OSI Approved :: MIT License",
2021-05-31 18:28:46 +00:00
APACHE := "License :: OSI Approved :: Apache Software License",
2021-09-03 04:10:35 +00:00
BSD := "License :: OSI Approved :: BSD License",
MPL10 := "License :: OSI Approved :: Mozilla Public License 1.0 (MPL)",
MPL11 := "License :: OSI Approved :: Mozilla Public License 1.1 (MPL 1.1)",
MPL20 := "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)",
PSFL := "License :: OSI Approved :: Python Software Foundation License",
LGPL := "License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)",
LGPL3 := "License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)",
ISCL := "License :: OSI Approved :: ISC License (ISCL)",
2021-05-31 18:28:46 +00:00
]
UNAPPROVED_LICENSES = [
GPL1 := "License :: OSI Approved :: GNU General Public License",
GPL2 := "License :: OSI Approved :: GNU General Public License v2 (GPLv2)",
GPL3 := "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
2021-04-08 06:37:51 +00:00
]
# This data is GARBO.
LICENSES_BY_LOWERNAME = {
2021-05-31 18:28:46 +00:00
"apache 2.0": APACHE,
"apache": APACHE,
"http://www.apache.org/licenses/license-2.0": APACHE,
"bsd 3": BSD,
"bsd": BSD,
"gpl": GPL1,
"gpl2": GPL2,
"gpl3": GPL3,
2021-08-14 15:25:47 +00:00
"lgpl": LGPL,
"lgpl3": LGPL3,
2021-05-31 18:28:46 +00:00
"isc": ISCL,
"mit": MIT,
"mpl": MPL10,
"mpl 2.0": MPL20,
"psf": PSFL,
2021-04-08 06:37:51 +00:00
}
# Mash in some cases.
LICENSES_BY_LOWERNAME.update(
2021-08-30 07:06:21 +00:00
{lic.split(" :: ")[-1].lower(): lic for lic in APPROVED_LICENSES}
2021-04-08 06:37:51 +00:00
)
# As a workaround for packages which don"t have correct meadata on PyPi, hand-verified packages
APPROVED_PACKAGES = [
"yamllint", # WARNING: YAMLLINT IS GLP3"d.
"Flask_Log_Request_ID", # MIT, currently depended on as a git dep.
2021-05-15 00:47:16 +00:00
"anosql", # BSD
2021-04-08 06:37:51 +00:00
]
2021-05-31 18:28:46 +00:00
def bash_license(ln):
while True:
2021-09-03 04:10:35 +00:00
lnn = re.sub(
2021-09-25 04:37:38 +00:00
r"[(),]|( version)|( license)|( ?v(?=\d))|([ -]clause)|(or later)",
"",
ln.lower(),
2021-09-03 04:10:35 +00:00
)
2021-05-31 18:28:46 +00:00
if ln != lnn:
ln = lnn
else:
break
2021-04-08 06:37:51 +00:00
2021-05-31 18:28:46 +00:00
ln = LICENSES_BY_LOWERNAME.get(ln, ln)
return ln
2021-04-08 06:37:51 +00:00
2021-09-03 04:10:35 +00:00
@pytest.mark.parametrize(
"a,b",
[
("MIT", MIT),
("mit", MIT),
("BSD", BSD),
("BSD 3-clause", BSD),
("BSD 3 clause", BSD),
("GPL3", GPL3),
("GPL v3", GPL3),
("GPLv3", GPL3),
],
)
2021-05-31 18:28:46 +00:00
def test_bash_license(a, b):
assert bash_license(a) == b
2021-04-08 06:37:51 +00:00
def licenses(dist: DistInfoDistribution):
"""Get dist metadata (the licenses list) from PyPi.
2021-04-08 06:37:51 +00:00
pip and other tools use the local dist metadata to introspect licenses which requires that
2021-04-08 06:37:51 +00:00
packages be installed. Going to PyPi isn't strictly reproducible both because the PyPi database
could be updated and we could see network failures but there really isn't a good way to solve
this problem.
"""
2021-08-30 07:06:21 +00:00
lics = []
name = dist.project_name
version = dist.version
print(name, version, type(dist))
meta = dist.get_metadata(dist.PKG_INFO).split("\n")
2021-09-25 04:37:38 +00:00
classifiers = [
l.replace("Classifier: ", "", 1) for l in meta if l.startswith("Classifier: ")
]
license = bash_license(
next((l for l in meta if l.startswith("License:")), "License: UNKNOWN").replace(
"License: ", "", 1
)
)
lics.extend(l for l in classifiers if l.startswith("License ::"))
if not lics:
lics.append(license)
2021-04-08 06:37:51 +00:00
2021-08-30 07:06:21 +00:00
return lics
2021-04-08 06:37:51 +00:00
2021-09-25 04:37:38 +00:00
@pytest.mark.parametrize(
"dist",
(w for w in working_set if w.location.find("arrdem_source_pypi") != -1),
ids=lambda dist: dist.project_name,
)
def test_approved_license(dist: DistInfoDistribution):
2021-04-08 06:37:51 +00:00
"""Ensure that a given package is either allowed by name or uses an approved license."""
_licenses = licenses(dist)
print(dist.location)
assert dist.project_name in APPROVED_PACKAGES or any(
2021-08-30 07:06:21 +00:00
lic in APPROVED_LICENSES for lic in _licenses
), f"{dist.project_name} ({dist.location}) was not approved and its license(s) were unknown {_licenses!r}"