source/tools/python/test_licenses.py

156 lines
4.7 KiB
Python
Raw Normal View History

2021-04-08 06:37:51 +00:00
"""
Validate 3rdparty library licenses as approved.
"""
import re
import pytest
import requests
2021-05-31 18:28:46 +00:00
import requirements
from requirements.requirement import Requirement
2021-04-08 06:37:51 +00:00
# Licenses approved as representing non-copyleft and not precluding commercial usage.
# This is all easy, there's a good schema here.
APPROVED_LICENSES = [
2021-05-31 18:28:46 +00:00
MIT := "License :: OSI Approved :: MIT License",
APACHE := "License :: OSI Approved :: Apache Software License",
BSD := "License :: OSI Approved :: BSD License",
MPL10 := "License :: OSI Approved :: Mozilla Public License 1.0 (MPL)",
MPL11 := "License :: OSI Approved :: Mozilla Public License 1.1 (MPL 1.1)",
MPL20 := "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)",
PSFL := "License :: OSI Approved :: Python Software Foundation License",
LGPL := "License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)",
2021-08-14 15:25:47 +00:00
LGPL3 := "License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)",
2021-05-31 18:28:46 +00:00
ISCL := "License :: OSI Approved :: ISC License (ISCL)",
]
UNAPPROVED_LICENSES = [
GPL1 := "License :: OSI Approved :: GNU General Public License",
GPL2 := "License :: OSI Approved :: GNU General Public License v2 (GPLv2)",
GPL3 := "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
2021-04-08 06:37:51 +00:00
]
# This data is GARBO.
LICENSES_BY_LOWERNAME = {
2021-05-31 18:28:46 +00:00
"apache 2.0": APACHE,
"apache": APACHE,
"http://www.apache.org/licenses/license-2.0": APACHE,
"bsd 3": BSD,
"bsd": BSD,
"gpl": GPL1,
"gpl2": GPL2,
"gpl3": GPL3,
2021-08-14 15:25:47 +00:00
"lgpl": LGPL,
"lgpl3": LGPL3,
2021-05-31 18:28:46 +00:00
"isc": ISCL,
"mit": MIT,
"mpl": MPL10,
"mpl 2.0": MPL20,
"psf": PSFL,
2021-04-08 06:37:51 +00:00
}
# Mash in some cases.
LICENSES_BY_LOWERNAME.update(
{l.split(" :: ")[-1].lower(): l for l in APPROVED_LICENSES}
)
# As a workaround for packages which don"t have correct meadata on PyPi, hand-verified packages
APPROVED_PACKAGES = [
"yamllint", # WARNING: YAMLLINT IS GLP3"d.
"Flask_Log_Request_ID", # MIT, currently depended on as a git dep.
2021-05-15 00:47:16 +00:00
"anosql", # BSD
2021-04-08 06:37:51 +00:00
]
2021-05-31 18:28:46 +00:00
with open("tools/python/requirements.txt") as fd:
PACKAGES = list(requirements.parse(fd))
2021-04-08 06:37:51 +00:00
2021-05-31 18:28:46 +00:00
def bash_license(ln):
while True:
lnn = re.sub(r"[(),]|( version)|( license)|( ?v(?=\d))|([ -]clause)", "", ln.lower())
if ln != lnn:
ln = lnn
else:
break
2021-04-08 06:37:51 +00:00
2021-05-31 18:28:46 +00:00
ln = LICENSES_BY_LOWERNAME.get(ln, ln)
return ln
2021-04-08 06:37:51 +00:00
2021-05-31 18:28:46 +00:00
@pytest.mark.parametrize("a,b", [
("MIT", MIT),
("mit", MIT),
("BSD", BSD),
("BSD 3-clause", BSD),
("BSD 3 clause", BSD),
("GPL3", GPL3),
("GPL v3", GPL3),
("GPLv3", GPL3),
])
def test_bash_license(a, b):
assert bash_license(a) == b
2021-04-08 06:37:51 +00:00
2021-05-31 18:28:46 +00:00
def licenses(package: Requirement):
2021-04-08 06:37:51 +00:00
"""Get package metadata (the licenses list) from PyPi.
pip and other tools use the local package metadata to introspect licenses which requires that
packages be installed. Going to PyPi isn't strictly reproducible both because the PyPi database
could be updated and we could see network failures but there really isn't a good way to solve
this problem.
"""
l = []
2021-05-31 18:28:46 +00:00
version = next((v for op, v in package.specs if op == "=="), None)
print(package.name, version)
2021-04-08 06:37:51 +00:00
# If we don't have a version (eg. forked git dep) assume we've got the same license constraints
# as the latest upstream release. After all we can't re-license stuff.
if not version:
2021-05-31 18:28:46 +00:00
blob = requests.get(
f"https://pypi.org/pypi/{package.name}/json",
headers={"Accept": "application/json"}
).json()
2021-04-08 06:37:51 +00:00
if ln := bash_license(blob.get("license")):
l.append(ln)
else:
try:
version = list(blob.get("releases", {}).keys())[-1]
except IndexError:
pass
# If we have a version, try to pull that release's metadata since it may have more/better.
if version:
blob = requests.get(
2021-05-31 18:28:46 +00:00
f"https://pypi.org/pypi/{package.name}/{version}/json",
headers={"Accept": "application/json"}
2021-04-08 06:37:51 +00:00
).json()
l = [
c
for c in blob.get("info", {}).get("classifiers", [])
if c.startswith("License")
]
ln = blob.get("info", {}).get("license")
if ln and not l:
l.append(bash_license(ln))
return l
2021-05-31 18:28:46 +00:00
@pytest.mark.parametrize("package", PACKAGES)
def test_approved_license(package):
2021-04-08 06:37:51 +00:00
"""Ensure that a given package is either allowed by name or uses an approved license."""
2021-05-31 18:28:46 +00:00
_licenses = licenses(package)
assert package.name in APPROVED_PACKAGES or any(
2021-04-08 06:37:51 +00:00
l in APPROVED_LICENSES for l in _licenses
), f"{package} was not approved and its license(s) were unknown {_licenses!r}"