Make yamlschema publishable

This commit is contained in:
Reid 'arrdem' McKenzie 2021-05-17 14:38:13 -06:00
parent 65879a3e7b
commit f194018616
5 changed files with 97 additions and 24 deletions

View file

@ -1,10 +1,8 @@
py_library( py_library(
name="yamlschema", name="yamlschema",
srcs=[ srcs=glob(["src/python/**/*.py"]),
"yamlschema.py",
],
imports=[ imports=[
".", "src/python",
], ],
deps=[ deps=[
py_requirement("PyYAML"), py_requirement("PyYAML"),
@ -13,7 +11,7 @@ py_library(
py_pytest( py_pytest(
name="test_yamlschema", name="test_yamlschema",
srcs=glob(["test_*.py"]), srcs=glob(["test/python/test_*.py"]),
data=glob(["*.json", "*.yaml"]), data=glob(["*.json", "*.yaml"]),
deps=[ deps=[
":yamlschema", ":yamlschema",

View file

@ -1,3 +1,44 @@
# YAML Schema # YAML Schema
A pocket library that implements some amount of jsonschema validation against YAML documents. A library that implements some amount of jsonschema validation against YAML document ASTs.
Unlike other JSON-schema validation tools which give document path relative errors, this approach allows for line & file errors more appropriate to user-facing tools.
## API Overview
### `yamlschema.LintRecord(level, node, schema, message)`
LintRecords are what linting produces.
Each LintRecord contains the YAML AST node which failed validation, the schema it failed to validate against, and some metadata.
`level` is a `LintLevel` which attempts to explain what "kind" of error this piece of lint represents.
For instance `LintLevel.MISSING` encodes missing `properties`.
`LintLevel.MISSMATCH` encodes type mismatches.
`LintLevel.UNEXPECTED` encodes unexpected/disallowed keys and other errors.
### `yamlschema.YamlLinter(schema)`
The linter itself is implemented as a class with a variety of instance methods; allowing the linter to be hacked by users much in the same way that JSON encodiers and decoders can be hacked.
The linter "interface" consists of `__init__(schema: dict)`; being a loaded JSON schema as a dict tree and `lint_document(schema, node) -> Iterable[LintRecord]` which initiates the recursive linting.
The reference implementation of the linter recursively calls `lint_document` on every sub-structure in the document.
### `yamlschema.lint_file(schema, path, cls=YamlLinter)`
As conveniences, yamlschema gives you a couple entrypoints that handle constructing the linter class, using `yaml.compose()` to get an AST and starting linting for you.
`lint_file` and `lint_buffer` respectively allow the user to either bring a file path or a string of YAML.
## Example
``` python-console
>>> from yamlschema import lint_buffer
>>> list(lint_buffer({"type": "integer"}, "---\n1.0"))
[
LintRecord(
level=<LintLevel.MISSMATCH: 2>,
node=ScalarNode(tag='tag:yaml.org,2002:float', value='1.0'),
schema={'type': 'integer'},
message="Expected an integer, got a 'tag:yaml.org,2002:float'"
)
]
```

View file

@ -0,0 +1,29 @@
from setuptools import setup
setup(
name="arrdem.yamlschema",
# Package metadata
version="0.1.0",
license="MIT",
description="Detailed JSON schema validation for YAML",
long_description=open("README.md").read(),
long_description_content_type="text/markdown",
author="Reid 'arrdem' McKenzie",
author_email="me@arrdem.com",
url="https://github.com/arrdem/source",
classifiers=[
"License :: OSI Approved :: MIT License",
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.8",
],
# Package setup
package_dir={"": "src/python"},
packages=[
"yamlschema",
],
install_requires=[
"PyYAML~=5.4.1",
],
)

View file

@ -49,16 +49,22 @@ class YamlLinter(object):
return schema return schema
elif ref := schema.get("$ref"): elif ref := schema.get("$ref"):
assert ref.startswith("#/") document_url, path = ref.split("#")
path = ref.lstrip("#/").split("/") # FIXME (arrdem 2021-05-17):
# Build support for loading and caching schemas from elsewhere.
assert not document_url
assert path.startswith("/")
path = path[1:].split("/")
schema = self._schema schema = self._schema
for e in path: for e in path:
if not e:
raise ValueError(f"Unable to dereference {ref}; contains empty segment!")
if not (schema := schema.get(e)): if not (schema := schema.get(e)):
raise ValueError(f"Unable to dereference {ref}") raise ValueError(f"Unable to dereference {ref}; references missing sub-document!")
return schema return schema
def lint_mapping(self, schema, node: Node) -> t.Iterable[str]: def lint_mapping(self, schema, node: Node) -> t.Iterable[LintRecord]:
"""FIXME.""" """FIXME."""
if schema["type"] != "object" or not isinstance(node, MappingNode): if schema["type"] != "object" or not isinstance(node, MappingNode):
@ -71,7 +77,7 @@ class YamlLinter(object):
additional_type: t.Union[dict, bool] = schema.get("additionalProperties", True) additional_type: t.Union[dict, bool] = schema.get("additionalProperties", True)
properties: dict = schema.get("properties", {}) properties: dict = schema.get("properties", {})
required: t.Iterable[str] = schema.get("required", []) required: t.Iterable[LintRecord] = schema.get("required", [])
for k in required: for k in required:
if k not in [_k.value for _k, _v in node.value]: if k not in [_k.value for _k, _v in node.value]:
@ -97,7 +103,7 @@ class YamlLinter(object):
f"Key {k.value!r} is not allowed by schema {str(node.start_mark).lstrip()}", f"Key {k.value!r} is not allowed by schema {str(node.start_mark).lstrip()}",
) )
def lint_sequence(self, schema, node: Node) -> t.Iterable[str]: def lint_sequence(self, schema, node: Node) -> t.Iterable[LintRecord]:
"""FIXME. """FIXME.
There aren't sequences we need to lint in the current schema design, punting. There aren't sequences we need to lint in the current schema design, punting.
@ -117,7 +123,7 @@ class YamlLinter(object):
for item in node.value: for item in node.value:
yield from self.lint_document(item, subschema) yield from self.lint_document(item, subschema)
def lint_scalar(self, schema, node: Node) -> t.Iterable[str]: def lint_scalar(self, schema, node: Node) -> t.Iterable[LintRecord]:
"""FIXME. """FIXME.
The only terminal we care about linting in the current schema is {"type": "string"}. The only terminal we care about linting in the current schema is {"type": "string"}.
@ -133,7 +139,7 @@ class YamlLinter(object):
else: else:
raise NotImplementedError(f"Scalar type {schema['type']} is not supported") raise NotImplementedError(f"Scalar type {schema['type']} is not supported")
def lint_string(self, schema, node: Node) -> t.Iterable[str]: def lint_string(self, schema, node: Node) -> t.Iterable[LintRecord]:
"""FIXME.""" """FIXME."""
if node.tag != "tag:yaml.org,2002:str": if node.tag != "tag:yaml.org,2002:str":
@ -162,27 +168,27 @@ class YamlLinter(object):
f"Expected a string matching the pattern", f"Expected a string matching the pattern",
) )
def lint_integer(self, schema, node: Node) -> t.Iterable[str]: def lint_integer(self, schema, node: Node) -> t.Iterable[LintRecord]:
if node.tag == "tag:yaml.org,2002:int": if node.tag == "tag:yaml.org,2002:int":
value = int(node.value) value = int(node.value)
yield from self._lint_num_range(schema, node, value) yield from self._lint_num_range(schema, node, value)
else: else:
yield LintRecord( yield LintRecord(
LintLevel.MISSMATCH, node, schema, f"Expected an integer, got a {node}" LintLevel.MISSMATCH, node, schema, f"Expected an integer, got a {node.tag}"
) )
def lint_number(self, schema, node: Node) -> t.Iterable[str]: def lint_number(self, schema, node: Node) -> t.Iterable[LintRecord]:
if node.tag == "tag:yaml.org,2002:float": if node.tag == "tag:yaml.org,2002:float":
value = float(node.value) value = float(node.value)
yield from self._lint_num_range(schema, node, value) yield from self._lint_num_range(schema, node, value)
else: else:
yield LintRecord( yield LintRecord(
LintLevel.MISSMATCH, node, schema, f"Expected an integer, got a {node}" LintLevel.MISSMATCH, node, schema, f"Expected an integer, got a {node.tag}"
) )
def _lint_num_range(self, schema, node: Node, value) -> t.Iterable[str]: def _lint_num_range(self, schema, node: Node, value) -> t.Iterable[LintRecord]:
""""FIXME.""" """"FIXME."""
if (base := schema.get("multipleOf")) is not None: if (base := schema.get("multipleOf")) is not None:
@ -230,7 +236,7 @@ class YamlLinter(object):
f"Expected a value greater than or equal to {min}, got {value}", f"Expected a value greater than or equal to {min}, got {value}",
) )
def lint_document(self, node, schema=None) -> t.Iterable[str]: def lint_document(self, node, schema=None) -> t.Iterable[LintRecord]:
"""Lint a document. """Lint a document.
Given a Node within a document (or the root of a document!), return a Given a Node within a document (or the root of a document!), return a
@ -266,15 +272,14 @@ class YamlLinter(object):
raise RuntimeError(f"Unsupported PyYAML node {type(node)}") raise RuntimeError(f"Unsupported PyYAML node {type(node)}")
def lint_node(schema, node, cls=YamlLinter): def lint_node(schema, node, cls=YamlLinter) -> t.Iterable[LintRecord]:
"""Lint a composed PyYAML AST node using a schema and linter.""" """Lint a composed PyYAML AST node using a schema and linter."""
print(repr(node))
linter = cls(schema) linter = cls(schema)
yield from linter.lint_document(node) yield from linter.lint_document(node)
def lint_buffer(schema, buff: str, cls=YamlLinter): def lint_buffer(schema, buff: str, cls=YamlLinter) -> t.Iterable[LintRecord]:
"""Lint a buffer (string).""" """Lint a buffer (string)."""
with StringIO(buff) as f: with StringIO(buff) as f:
@ -282,7 +287,7 @@ def lint_buffer(schema, buff: str, cls=YamlLinter):
yield from lint_node(schema, node, cls=cls) yield from lint_node(schema, node, cls=cls)
def lint_file(schema, path, cls=YamlLinter): def lint_file(schema, path, cls=YamlLinter) -> t.Iterable[LintRecord]:
"""Lint a file.""" """Lint a file."""
with open(path) as f: with open(path) as f: