diff --git a/projects/yamlschema/BUILD b/projects/yamlschema/BUILD index 5ad0b9a..ce0e500 100644 --- a/projects/yamlschema/BUILD +++ b/projects/yamlschema/BUILD @@ -1,10 +1,8 @@ py_library( name="yamlschema", - srcs=[ - "yamlschema.py", - ], + srcs=glob(["src/python/**/*.py"]), imports=[ - ".", + "src/python", ], deps=[ py_requirement("PyYAML"), @@ -13,7 +11,7 @@ py_library( py_pytest( name="test_yamlschema", - srcs=glob(["test_*.py"]), + srcs=glob(["test/python/test_*.py"]), data=glob(["*.json", "*.yaml"]), deps=[ ":yamlschema", diff --git a/projects/yamlschema/README.md b/projects/yamlschema/README.md index 22cf4b2..d8e6977 100644 --- a/projects/yamlschema/README.md +++ b/projects/yamlschema/README.md @@ -1,3 +1,44 @@ # YAML Schema -A pocket library that implements some amount of jsonschema validation against YAML documents. +A library that implements some amount of jsonschema validation against YAML document ASTs. +Unlike other JSON-schema validation tools which give document path relative errors, this approach allows for line & file errors more appropriate to user-facing tools. + +## API Overview + +### `yamlschema.LintRecord(level, node, schema, message)` + +LintRecords are what linting produces. +Each LintRecord contains the YAML AST node which failed validation, the schema it failed to validate against, and some metadata. + +`level` is a `LintLevel` which attempts to explain what "kind" of error this piece of lint represents. +For instance `LintLevel.MISSING` encodes missing `properties`. +`LintLevel.MISSMATCH` encodes type mismatches. +`LintLevel.UNEXPECTED` encodes unexpected/disallowed keys and other errors. + +### `yamlschema.YamlLinter(schema)` + +The linter itself is implemented as a class with a variety of instance methods; allowing the linter to be hacked by users much in the same way that JSON encodiers and decoders can be hacked. + +The linter "interface" consists of `__init__(schema: dict)`; being a loaded JSON schema as a dict tree and `lint_document(schema, node) -> Iterable[LintRecord]` which initiates the recursive linting. + +The reference implementation of the linter recursively calls `lint_document` on every sub-structure in the document. + +### `yamlschema.lint_file(schema, path, cls=YamlLinter)` + +As conveniences, yamlschema gives you a couple entrypoints that handle constructing the linter class, using `yaml.compose()` to get an AST and starting linting for you. +`lint_file` and `lint_buffer` respectively allow the user to either bring a file path or a string of YAML. + +## Example + +``` python-console +>>> from yamlschema import lint_buffer +>>> list(lint_buffer({"type": "integer"}, "---\n1.0")) +[ + LintRecord( + level=, + node=ScalarNode(tag='tag:yaml.org,2002:float', value='1.0'), + schema={'type': 'integer'}, + message="Expected an integer, got a 'tag:yaml.org,2002:float'" + ) +] +``` diff --git a/projects/yamlschema/setup.py b/projects/yamlschema/setup.py new file mode 100644 index 0000000..4e7791d --- /dev/null +++ b/projects/yamlschema/setup.py @@ -0,0 +1,29 @@ +from setuptools import setup + +setup( + name="arrdem.yamlschema", + # Package metadata + version="0.1.0", + license="MIT", + description="Detailed JSON schema validation for YAML", + long_description=open("README.md").read(), + long_description_content_type="text/markdown", + author="Reid 'arrdem' McKenzie", + author_email="me@arrdem.com", + url="https://github.com/arrdem/source", + classifiers=[ + "License :: OSI Approved :: MIT License", + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + ], + # Package setup + package_dir={"": "src/python"}, + packages=[ + "yamlschema", + ], + install_requires=[ + "PyYAML~=5.4.1", + ], +) diff --git a/projects/yamlschema/yamlschema.py b/projects/yamlschema/src/python/yamlschema/__init__.py similarity index 86% rename from projects/yamlschema/yamlschema.py rename to projects/yamlschema/src/python/yamlschema/__init__.py index 1f023b5..1fe7850 100644 --- a/projects/yamlschema/yamlschema.py +++ b/projects/yamlschema/src/python/yamlschema/__init__.py @@ -49,16 +49,22 @@ class YamlLinter(object): return schema elif ref := schema.get("$ref"): - assert ref.startswith("#/") - path = ref.lstrip("#/").split("/") + document_url, path = ref.split("#") + # FIXME (arrdem 2021-05-17): + # Build support for loading and caching schemas from elsewhere. + assert not document_url + assert path.startswith("/") + path = path[1:].split("/") schema = self._schema for e in path: + if not e: + raise ValueError(f"Unable to dereference {ref}; contains empty segment!") if not (schema := schema.get(e)): - raise ValueError(f"Unable to dereference {ref}") + raise ValueError(f"Unable to dereference {ref}; references missing sub-document!") return schema - def lint_mapping(self, schema, node: Node) -> t.Iterable[str]: + def lint_mapping(self, schema, node: Node) -> t.Iterable[LintRecord]: """FIXME.""" if schema["type"] != "object" or not isinstance(node, MappingNode): @@ -71,7 +77,7 @@ class YamlLinter(object): additional_type: t.Union[dict, bool] = schema.get("additionalProperties", True) properties: dict = schema.get("properties", {}) - required: t.Iterable[str] = schema.get("required", []) + required: t.Iterable[LintRecord] = schema.get("required", []) for k in required: if k not in [_k.value for _k, _v in node.value]: @@ -97,7 +103,7 @@ class YamlLinter(object): f"Key {k.value!r} is not allowed by schema {str(node.start_mark).lstrip()}", ) - def lint_sequence(self, schema, node: Node) -> t.Iterable[str]: + def lint_sequence(self, schema, node: Node) -> t.Iterable[LintRecord]: """FIXME. There aren't sequences we need to lint in the current schema design, punting. @@ -117,7 +123,7 @@ class YamlLinter(object): for item in node.value: yield from self.lint_document(item, subschema) - def lint_scalar(self, schema, node: Node) -> t.Iterable[str]: + def lint_scalar(self, schema, node: Node) -> t.Iterable[LintRecord]: """FIXME. The only terminal we care about linting in the current schema is {"type": "string"}. @@ -133,7 +139,7 @@ class YamlLinter(object): else: raise NotImplementedError(f"Scalar type {schema['type']} is not supported") - def lint_string(self, schema, node: Node) -> t.Iterable[str]: + def lint_string(self, schema, node: Node) -> t.Iterable[LintRecord]: """FIXME.""" if node.tag != "tag:yaml.org,2002:str": @@ -162,27 +168,27 @@ class YamlLinter(object): f"Expected a string matching the pattern", ) - def lint_integer(self, schema, node: Node) -> t.Iterable[str]: + def lint_integer(self, schema, node: Node) -> t.Iterable[LintRecord]: if node.tag == "tag:yaml.org,2002:int": value = int(node.value) yield from self._lint_num_range(schema, node, value) else: yield LintRecord( - LintLevel.MISSMATCH, node, schema, f"Expected an integer, got a {node}" + LintLevel.MISSMATCH, node, schema, f"Expected an integer, got a {node.tag}" ) - def lint_number(self, schema, node: Node) -> t.Iterable[str]: + def lint_number(self, schema, node: Node) -> t.Iterable[LintRecord]: if node.tag == "tag:yaml.org,2002:float": value = float(node.value) yield from self._lint_num_range(schema, node, value) else: yield LintRecord( - LintLevel.MISSMATCH, node, schema, f"Expected an integer, got a {node}" + LintLevel.MISSMATCH, node, schema, f"Expected an integer, got a {node.tag}" ) - def _lint_num_range(self, schema, node: Node, value) -> t.Iterable[str]: + def _lint_num_range(self, schema, node: Node, value) -> t.Iterable[LintRecord]: """"FIXME.""" if (base := schema.get("multipleOf")) is not None: @@ -230,7 +236,7 @@ class YamlLinter(object): f"Expected a value greater than or equal to {min}, got {value}", ) - def lint_document(self, node, schema=None) -> t.Iterable[str]: + def lint_document(self, node, schema=None) -> t.Iterable[LintRecord]: """Lint a document. Given a Node within a document (or the root of a document!), return a @@ -266,15 +272,14 @@ class YamlLinter(object): raise RuntimeError(f"Unsupported PyYAML node {type(node)}") -def lint_node(schema, node, cls=YamlLinter): +def lint_node(schema, node, cls=YamlLinter) -> t.Iterable[LintRecord]: """Lint a composed PyYAML AST node using a schema and linter.""" - print(repr(node)) linter = cls(schema) yield from linter.lint_document(node) -def lint_buffer(schema, buff: str, cls=YamlLinter): +def lint_buffer(schema, buff: str, cls=YamlLinter) -> t.Iterable[LintRecord]: """Lint a buffer (string).""" with StringIO(buff) as f: @@ -282,7 +287,7 @@ def lint_buffer(schema, buff: str, cls=YamlLinter): yield from lint_node(schema, node, cls=cls) -def lint_file(schema, path, cls=YamlLinter): +def lint_file(schema, path, cls=YamlLinter) -> t.Iterable[LintRecord]: """Lint a file.""" with open(path) as f: diff --git a/projects/yamlschema/test_yamlschema.py b/projects/yamlschema/test/python/test_yamlschema.py similarity index 100% rename from projects/yamlschema/test_yamlschema.py rename to projects/yamlschema/test/python/test_yamlschema.py