From 46e7ff7ddfa67fe25498bc91f02de0fe11d5d65c Mon Sep 17 00:00:00 2001 From: Dmitry Dygalo Date: Sun, 17 May 2020 17:35:11 +0200 Subject: [PATCH] feat: FFI-based Python bindings --- .github/workflows/build.yml | 50 +++++++ .github/workflows/python-release.yml | 47 +++++++ .gitignore | 7 +- .pre-commit-config.yaml | 37 +++++ .yamllint | 4 + CHANGELOG.md | 2 +- Cargo.toml | 2 +- LICENSE | 2 +- README.md | 2 +- benches/canada_schema.json | 2 +- benches/small_invalid.json | 2 +- benches/small_schema.json | 2 +- benches/small_valid.json | 2 +- python/Cargo.toml | 41 ++++++ python/MANIFEST.in | 6 + python/README.rst | 98 +++++++++++++ python/benches/bench.py | 67 +++++++++ python/benches/conftest.py | 2 + python/build-sdist.sh | 15 ++ python/build-wheels.sh | 18 +++ python/pyproject.toml | 15 ++ python/pysrc/jsonschema_rs/__init__.py | 3 + python/rust-toolchain | 1 + python/setup.py | 36 +++++ python/src/lib.rs | 154 +++++++++++++++++++++ python/src/ser.rs | 184 +++++++++++++++++++++++++ python/src/string.rs | 51 +++++++ python/src/types.rs | 30 ++++ python/tests-py/test_jsonschema.py | 79 +++++++++++ python/tests-py/test_suite.py | 48 +++++++ python/tox.ini | 14 ++ src/error.rs | 2 +- src/lib.rs | 2 +- 33 files changed, 1016 insertions(+), 11 deletions(-) create mode 100644 .github/workflows/python-release.yml create mode 100644 .pre-commit-config.yaml create mode 100644 .yamllint create mode 100644 python/Cargo.toml create mode 100644 python/MANIFEST.in create mode 100644 python/README.rst create mode 100644 python/benches/bench.py create mode 100644 python/benches/conftest.py create mode 100755 python/build-sdist.sh create mode 100755 python/build-wheels.sh create mode 100644 python/pyproject.toml create mode 100644 python/pysrc/jsonschema_rs/__init__.py create mode 100644 python/rust-toolchain create mode 100644 python/setup.py create mode 100644 python/src/lib.rs create mode 100644 python/src/ser.rs create mode 100644 python/src/string.rs create mode 100644 python/src/types.rs create mode 100644 python/tests-py/test_jsonschema.py create mode 100644 python/tests-py/test_suite.py create mode 100644 python/tox.ini diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 848d0d6..4d9b496 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -17,6 +17,22 @@ jobs: - name: Run commitsar uses: docker://commitsar/commitsar + pre-commit: + name: Generic pre-commit checks + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + with: + fetch-depth: 1 + + - uses: actions/setup-python@v1 + with: + python-version: 3.7 + + - run: pip install pre-commit + - run: pre-commit run --all-files + working-directory: ./python + check: name: Check runs-on: ubuntu-latest @@ -91,6 +107,40 @@ jobs: token: ${{ secrets.CODECOV_TOKEN }} file: ${{ steps.coverage.outputs.report }} + test-python: + strategy: + matrix: + os: [ubuntu-latest] + python: [3.5, 3.6, 3.7, 3.8] + + name: Python ${{ matrix.python }} on ${{ matrix.os }} + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python }} + - name: Start background server # User for integration tests + run: | + # This assumes that python3 is installed on the system + /usr/bin/env python3 -m pip install flask + # Starts the server in background + /usr/bin/env python3 ./tests/suite/bin/jsonschema_suite serve & + + - run: /usr/bin/env python3 -m pip install setuptools_rust tox + working-directory: ./python + + - run: ./build-sdist.sh + working-directory: ./python + + - name: Run ${{ matrix.python }} tox job + run: tox -e py${TOX_JOB//.} # Strip dot from python version to match tox job + working-directory: ./python + env: + TOX_JOB: ${{ matrix.python }} + fmt: name: Rustfmt runs-on: ubuntu-latest diff --git a/.github/workflows/python-release.yml b/.github/workflows/python-release.yml new file mode 100644 index 0000000..95c695b --- /dev/null +++ b/.github/workflows/python-release.yml @@ -0,0 +1,47 @@ +name: Python Release + +on: + push: + tags: + - python-v* + +jobs: + + create_wheels_manylinux: + runs-on: ubuntu-latest + name: Create wheels for manylinux + container: quay.io/pypa/manylinux2010_x86_64 + steps: + - uses: actions/checkout@v1 + + - run: /usr/bin/env python3 -m pip install setuptools_rust twine + + - name: Build and audit wheels + working-directory: ./python + run: sh build-wheels.sh + + - name: Upload package + working-directory: ./python + run: twine upload ./dist/* --username=${PYPI_USERNAME} --password=${PYPI_PASSWORD} + env: + PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }} + PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + + create_source_dist: + name: Create sdist package + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v1 + + - run: /usr/bin/env python3 -m pip install setuptools_rust twine + + - name: Build sdist + working-directory: ./python + run: sh build-sdist.sh + + - name: Upload package + working-directory: ./python + run: twine upload ./dist/* --username=${PYPI_USERNAME} --password=${PYPI_PASSWORD} + env: + PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }} + PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }} diff --git a/.gitignore b/.gitignore index 408b8a5..105f7fb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ /target Cargo.lock -.idea \ No newline at end of file +.idea +*.so +.hypothesis +.tox +*.tar.gz +*.egg-info diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..09e5755 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,37 @@ +default_language_version: + python: python3.7 + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v2.5.0 + hooks: + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace + exclude: ^.*\.(md|rst)$ + - id: debug-statements + - id: mixed-line-ending + args: [--fix=lf] + - id: check-merge-conflict + + - repo: https://github.com/jorisroovers/gitlint + rev: v0.13.1 + hooks: + - id: gitlint + + - repo: https://github.com/adrienverge/yamllint + rev: v1.21.0 + hooks: + - id: yamllint + + - repo: https://github.com/ambv/black + rev: stable + hooks: + - id: black + types: [python] + + - repo: https://github.com/pre-commit/mirrors-isort + rev: v4.3.21 + hooks: + - id: isort + additional_dependencies: ["isort[pyproject]"] diff --git a/.yamllint b/.yamllint new file mode 100644 index 0000000..524ca96 --- /dev/null +++ b/.yamllint @@ -0,0 +1,4 @@ +extends: relaxed +rules: + line-length: + max: 120 diff --git a/CHANGELOG.md b/CHANGELOG.md index 808c1c2..0ac2da3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -63,4 +63,4 @@ [Unreleased]: https://github.com/Stranger6667/jsonschema-rs/compare/v0.3.0...HEAD [0.3.0]: https://github.com/Stranger6667/jsonschema-rs/compare/v0.2.0...v0.3.0 -[0.2.0]: https://github.com/Stranger6667/jsonschema-rs/compare/v0.1.0...v0.2.0 \ No newline at end of file +[0.2.0]: https://github.com/Stranger6667/jsonschema-rs/compare/v0.1.0...v0.2.0 diff --git a/Cargo.toml b/Cargo.toml index 60748a6..945c510 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,7 @@ readme = "README.md" description = "A crate for performing JSON schema validation" repository = "https://github.com/Stranger6667/jsonschema-rs" keywords = ["jsonschema", "validation"] -exclude = ["tests"] +exclude = ["tests", "python"] categories = ["web-programming"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/LICENSE b/LICENSE index 5281683..8269910 100644 --- a/LICENSE +++ b/LICENSE @@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. \ No newline at end of file +SOFTWARE. diff --git a/README.md b/README.md index 9b37464..440ed73 100644 --- a/README.md +++ b/README.md @@ -93,4 +93,4 @@ a validator is compiled every time. You can find benchmark code in `benches/jsonschema.rs`, Rust version is `1.44` -**NOTE**. This library is in early development. \ No newline at end of file +**NOTE**. This library is in early development. diff --git a/benches/canada_schema.json b/benches/canada_schema.json index 0d17765..953b201 100644 --- a/benches/canada_schema.json +++ b/benches/canada_schema.json @@ -91,4 +91,4 @@ "type" ], "type": "object" -} \ No newline at end of file +} diff --git a/benches/small_invalid.json b/benches/small_invalid.json index 583f89c..357d6b1 100644 --- a/benches/small_invalid.json +++ b/benches/small_invalid.json @@ -1 +1 @@ -[10, "world", [1, "a", true], {"a": "a", "b": "b", "c": "xy"}, "str", 5] \ No newline at end of file +[10, "world", [1, "a", true], {"a": "a", "b": "b", "c": "xy"}, "str", 5] diff --git a/benches/small_schema.json b/benches/small_schema.json index 2ff5750..9ef686b 100644 --- a/benches/small_schema.json +++ b/benches/small_schema.json @@ -37,4 +37,4 @@ {"type": "number", "multipleOf": 5} ]} ] -} \ No newline at end of file +} diff --git a/benches/small_valid.json b/benches/small_valid.json index 661b8ad..9b99003 100644 --- a/benches/small_valid.json +++ b/benches/small_valid.json @@ -1 +1 @@ -[9, "hello", [1, "a", true], {"a": "a", "b": "b", "d": "d"}, 42, 3] \ No newline at end of file +[9, "hello", [1, "a", true], {"a": "a", "b": "b", "d": "d"}, 42, 3] diff --git a/python/Cargo.toml b/python/Cargo.toml new file mode 100644 index 0000000..ca0daba --- /dev/null +++ b/python/Cargo.toml @@ -0,0 +1,41 @@ +[package] +name = "jsonschema-python" +version = "0.1.0" +authors = ["Dmitry Dygalo "] +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lib] +name = "jsonschema_rs" +crate-type = ["cdylib"] + +[dependencies] +serde_json = "1" +serde = "1" + +[dependencies.jsonschema] +path = "../" + +[dependencies.pyo3] +version = ">= 0.10" +features = ["extension-module"] + +[package.metadata.maturin] +requires-python = ">=3.5" +project-url = ["https://github.com/Stranger6667/jsonschema-rs"] +maintainer = "Dmitry Dygalo" +maintainer-email = "Dmitry Dygalo " +classifier = [ + "Development Status :: 3 - Alpha", + "Environment :: Console", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Rust", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: Implementation :: CPython", +] diff --git a/python/MANIFEST.in b/python/MANIFEST.in new file mode 100644 index 0000000..6fce3f9 --- /dev/null +++ b/python/MANIFEST.in @@ -0,0 +1,6 @@ +include Cargo.toml +include pyproject.toml +include rust-toolchain +recursive-include src * +recursive-include jsonschema/src/ * +include jsonschema/Cargo.toml diff --git a/python/README.rst b/python/README.rst new file mode 100644 index 0000000..fa49d8b --- /dev/null +++ b/python/README.rst @@ -0,0 +1,98 @@ +jsonschema-rs +============= + +|Build| |Version| |Python versions| |License| + +Fast JSON Schema validation for Python implemented in Rust. + +Supported drafts: + +- Draft 7 +- Draft 6 +- Draft 4 + +There are some notable restrictions at the moment: +- The underlying crate doesn't support arbitrary precision integers yet, which may lead to ``SystemError`` when such value is used; +- ``multipleOf`` keyword validation may produce false-negative results on some input. See `#84 `_ for more details + +Installation +------------ + +To install ``jsonschema-rs`` via ``pip`` run the following command: + +.. code:: bash + + pip install jsonschema-rs + +Usage +----- + +To check if the input document is valid: + +.. code:: python + + import jsonschema_rs + + validator = jsonschema_rs.JSONSchema({"minimum": 42}) + validator.is_valid(45) # True + +**NOTE**. This library is in early development and not yet provide a way to show validation errors (even though it is implemented in the underlying Rust crate). + +Performance +----------- + +According to our benchmarks, ``jsonschema-rs`` is usually faster than existing alternatives in real-life scenarios. + +However, for single-keyword or boolean schemas it might be slower than ``fastjsonschema``. + +Compiled validators (when the input schema is compiled once and reused later) + ++----------------+------------------------+----------------------+----------------------+------------------------+ +| library | ``false`` | ``{"minimum": 10}`` | small | big | ++================+========================+======================+======================+========================+ +| jsonschema-rs | 320.3 ns | 329.32 ns | 1.15 us | 5.8 ms | ++----------------+------------------------+----------------------+----------------------+------------------------+ +| fastjsonschema | 52.29 ns (**x0.16**) | 134.43 ns (**x0.4**) | 6.01 us (**x5.22**) | 587.5 ms (**x101.29**) | ++----------------+------------------------+----------------------+----------------------+------------------------+ +| jsonschema | 289.97 ns (**x0.9**) | 2.52 us (**x7.65**) | 74.98 us (**x65.2**) | 2.02 s (**x348.27**) | ++----------------+------------------------+----------------------+----------------------+------------------------+ + +Validators are not compiled (``jsonschema``) or compiled on every validation: + ++----------------+------------------------+-------------------------+-----------------------+-------------------------+ +| library | ``false`` | ``{"minimum": 10}`` | small | big | ++================+========================+=========================+=======================+=========================+ +| jsonschema-rs | 402.35 ns | 908.06 ns | 9.54 us | 5.9 ms | ++----------------+------------------------+-------------------------+-----------------------+-------------------------+ +| fastjsonschema | 64.08 us (**x159.26**) | 119.57 us (**x131.67**) | 1.43 ms (**x149.89**) | 599.84 ms (**x101.66**) | ++----------------+------------------------+-------------------------+-----------------------+-------------------------+ +| jsonschema | 67.74 us (**x168.36**) | 76.62 us (**x84.37**) | 1.02 ms (**x106.91**) | 2.11 s (**x357.62**) | ++----------------+------------------------+-------------------------+-----------------------+-------------------------+ + +The bigger the input is the bigger is performance win. + +In the examples below, ``big`` and ``small`` schemas refer to more realistic schemas and input instances. +You can take a look at benchmarks in ``benches/bench.py``. Ratios are given against ``jsonschema-rs``. + +Python support +-------------- + +``jsonschema-rs`` supports Python 3.5, 3.6, 3.7 and 3.8. + +License +------- + +The code in this project is licensed under `MIT license`_. +By contributing to ``jsonschema-rs``, you agree that your contributions +will be licensed under its MIT license. + +.. |Build| image:: https://github.com/Stranger6667/jsonschema-rs/workflows/build/badge.svg + :target: https://github.com/Stranger6667/jsonschema-rs/actions +.. |Version| image:: https://img.shields.io/pypi/v/jsonschema-rs.svg + :target: https://pypi.org/project/jsonschema-rs/ +.. |Python versions| image:: https://img.shields.io/pypi/pyversions/jsonschema-rs.svg + :target: https://pypi.org/project/jsonschema-rs/ +.. |License| image:: https://img.shields.io/pypi/l/jsonschema-rs.svg + :target: https://opensource.org/licenses/MIT + +.. _MIT license: https://opensource.org/licenses/MIT diff --git a/python/benches/bench.py b/python/benches/bench.py new file mode 100644 index 0000000..0c728c3 --- /dev/null +++ b/python/benches/bench.py @@ -0,0 +1,67 @@ +import json + +import fastjsonschema +import jsonschema +import pytest + +import jsonschema_rs + + +def load_json(filename): + with open(filename) as fd: + return json.load(fd) + + +BIG_SCHEMA = load_json("../benches/canada_schema.json") +BIG_INSTANCE = load_json("../benches/canada.json") +SMALL_SCHEMA = load_json("../benches/small_schema.json") +SMALL_INSTANCE_VALID = [9, "hello", [1, "a", True], {"a": "a", "b": "b", "d": "d"}, 42, 3] + + +@pytest.fixture(params=[True, False]) +def is_compiled(request): + return request.param + + +@pytest.fixture(params=["rust", "python", "python-fast"]) +def args(request, is_compiled): + schema, instance = request.node.get_closest_marker("data").args + if request.param == "rust": + if is_compiled: + return jsonschema_rs.JSONSchema(schema).is_valid, instance + else: + return jsonschema_rs.is_valid, schema, instance + if request.param == "python": + if is_compiled: + return jsonschema.validators.validator_for(schema)(schema).is_valid, instance + else: + return jsonschema.validate, instance, schema + if request.param == "python-fast": + if is_compiled: + return fastjsonschema.compile(schema), instance + else: + return fastjsonschema.validate, schema, instance + + +@pytest.mark.data(True, True) +@pytest.mark.benchmark(group="boolean") +def test_boolean(benchmark, args): + benchmark(*args) + + +@pytest.mark.data({"minimum": 10}, 10) +@pytest.mark.benchmark(group="minimum") +def test_minimum(benchmark, args): + benchmark(*args) + + +@pytest.mark.data(SMALL_SCHEMA, SMALL_INSTANCE_VALID) +@pytest.mark.benchmark(group="small") +def test_small_schema(benchmark, args): + benchmark(*args) + + +@pytest.mark.data(BIG_SCHEMA, BIG_INSTANCE) +@pytest.mark.benchmark(group="big") +def test_big_schema(benchmark, args): + benchmark(*args) diff --git a/python/benches/conftest.py b/python/benches/conftest.py new file mode 100644 index 0000000..3ab1ae9 --- /dev/null +++ b/python/benches/conftest.py @@ -0,0 +1,2 @@ +def pytest_configure(config): + config.addinivalue_line("markers", "data(schema, instance): add data for benchmarking") diff --git a/python/build-sdist.sh b/python/build-sdist.sh new file mode 100755 index 0000000..f829eca --- /dev/null +++ b/python/build-sdist.sh @@ -0,0 +1,15 @@ +#!/bin/bash +# `setuptools_rust` and `maturin` don't support some local dependencies as `jsonschema` is (it is in the parent directory) +# As a workaround we create a modified distribution of this library that has `jsonschema` crate as a dependency in +# the same directory, then the sources are copied as declared in MANIFEST.in and the resulting package can be +# installed properly +set -ex + +ln -sf ../ jsonschema +# Modify cargo.toml to include this symlink +sed -i 's/\.\.\//jsonschema/' Cargo.toml +# Build the source distribution +python setup.py sdist +# Rollback local changes after a source distribution is ready +rm jsonschema +sed -i 's/"jsonschema"/"\.\.\/"/' Cargo.toml diff --git a/python/build-wheels.sh b/python/build-wheels.sh new file mode 100755 index 0000000..0203724 --- /dev/null +++ b/python/build-wheels.sh @@ -0,0 +1,18 @@ +#!/bin/bash +set -ex + +yum install openssl-devel -y + +curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain nightly -y +export PATH="$HOME/.cargo/bin:$PATH" + +for PYBIN in /opt/python/{cp35-cp35m,cp36-cp36m,cp37-cp37m,cp38-cp38}/bin; do + export PYTHON_SYS_EXECUTABLE="$PYBIN/python" + + "${PYBIN}/pip" install -U setuptools wheel setuptools-rust + "${PYBIN}/python" setup.py bdist_wheel +done + +for whl in dist/*.whl; do + auditwheel repair "$whl" -w dist/ +done diff --git a/python/pyproject.toml b/python/pyproject.toml new file mode 100644 index 0000000..d211912 --- /dev/null +++ b/python/pyproject.toml @@ -0,0 +1,15 @@ +[build-system] +requires = ["setuptools", "wheel", "setuptools-rust"] + +[tool.black] +line-length = 120 +target_version = ["py37"] + +[tool.isort] +# config compatible with Black +line_length = 120 +multi_line_output = 3 +default_section = "THIRDPARTY" +include_trailing_comma = true +known_first_party = "jsonschema_rs" +known_third_party = [] diff --git a/python/pysrc/jsonschema_rs/__init__.py b/python/pysrc/jsonschema_rs/__init__.py new file mode 100644 index 0000000..8d827ea --- /dev/null +++ b/python/pysrc/jsonschema_rs/__init__.py @@ -0,0 +1,3 @@ +from .jsonschema_rs import * + +del jsonschema_rs diff --git a/python/rust-toolchain b/python/rust-toolchain new file mode 100644 index 0000000..bf867e0 --- /dev/null +++ b/python/rust-toolchain @@ -0,0 +1 @@ +nightly diff --git a/python/setup.py b/python/setup.py new file mode 100644 index 0000000..f87f668 --- /dev/null +++ b/python/setup.py @@ -0,0 +1,36 @@ +from setuptools import find_packages, setup +from setuptools_rust import Binding, RustExtension + +setup( + name="jsonschema_rs", + version="0.1.0", + description="Fast JSON Schema validation for Python implemented in Rust", + long_description=open("README.rst", encoding="utf-8").read(), + long_description_content_type="text/x-rst", + keywords="jsonschema validation rust", + author="Dmitry Dygalo", + author_email="dadygalo@gmail.com", + maintainer="Dmitry Dygalo", + maintainer_email="dadygalo@gmail.com", + python_requires=">=3.5", + url="https://github.com/Stranger6667/jsonschema-rs/python", + license="MIT", + rust_extensions=[RustExtension("jsonschema_rs.jsonschema_rs", binding=Binding.PyO3)], + extras={"tests": ["pytest", "hypothesis"], "bench": ["pytest-benchmark"]}, + classifiers=[ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Rust", + ], + packages=find_packages(where="pysrc"), + package_dir={"": "pysrc"}, + zip_safe=False, +) diff --git a/python/src/lib.rs b/python/src/lib.rs new file mode 100644 index 0000000..6835440 --- /dev/null +++ b/python/src/lib.rs @@ -0,0 +1,154 @@ +#![feature(core_intrinsics)] +#![warn( + clippy::doc_markdown, + clippy::redundant_closure, + clippy::explicit_iter_loop, + clippy::match_same_arms, + clippy::needless_borrow, + clippy::print_stdout, + clippy::integer_arithmetic, + clippy::cast_possible_truncation, + clippy::result_unwrap_used, + clippy::result_map_unwrap_or_else, + clippy::option_unwrap_used, + clippy::option_map_unwrap_or_else, + clippy::option_map_unwrap_or +)] +use jsonschema::Draft; +use pyo3::prelude::*; +use pyo3::types::PyAny; +use pyo3::{exceptions, wrap_pyfunction, PyObjectProtocol}; +use serde_json::Value; + +mod ser; +mod string; +mod types; + +const MODULE_DOCSTRING: &str = "JSON Schema validation for Python written in Rust."; +const DRAFT7: u8 = 7; +const DRAFT6: u8 = 6; +const DRAFT4: u8 = 4; + +#[derive(Debug)] +enum JSONSchemaError { + Compilation(jsonschema::CompilationError), +} + +impl From for PyErr { + fn from(error: JSONSchemaError) -> PyErr { + exceptions::ValueError::py_err(match error { + JSONSchemaError::Compilation(_) => "Invalid schema", + }) + } +} + +fn get_draft(draft: Option) -> PyResult { + if let Some(value) = draft { + match value { + DRAFT4 => Ok(jsonschema::Draft::Draft4), + DRAFT6 => Ok(jsonschema::Draft::Draft6), + DRAFT7 => Ok(jsonschema::Draft::Draft7), + _ => Err(exceptions::ValueError::py_err(format!( + "Unknown draft: {}", + value + ))), + } + } else { + Ok(jsonschema::Draft::default()) + } +} + +/// A shortcut for validating the input instance against the schema. +/// +/// >>> is_valid({"minimum": 5}, 3) +/// False +/// +/// If your workflow implies validating against the same schema, consider using `JSONSchema.is_valid` +/// instead. +#[pyfunction] +#[text_signature = "(schema, instance, draft=None)"] +fn is_valid(schema: &PyAny, instance: &PyAny, draft: Option) -> PyResult { + let draft = get_draft(draft).map(Some)?; + let schema = ser::to_value(schema)?; + let instance = ser::to_value(instance)?; + let compiled = + jsonschema::JSONSchema::compile(&schema, draft).map_err(JSONSchemaError::Compilation)?; + Ok(compiled.is_valid(&instance)) +} + +/// JSON Schema compiled into a validation tree. +/// +/// >>> compiled = JSONSchema({"minimum": 5}) +/// >>> compiled.is_valid(3) +/// False +/// +/// By default Draft 7 will be used for compilation. +#[pyclass] +#[text_signature = "(schema, draft=None)"] +struct JSONSchema { + schema: jsonschema::JSONSchema<'static>, + raw_schema: &'static Value, +} + +#[pymethods] +impl JSONSchema { + #[new] + fn new(schema: &PyAny, draft: Option) -> PyResult { + let draft = get_draft(draft).map(Some)?; + let raw_schema = ser::to_value(schema)?; + // Currently, it is the simplest way to pass a reference to `JSONSchema` + // It is cleaned up in the `Drop` implementation + let schema: &'static Value = Box::leak(Box::new(raw_schema)); + Ok(JSONSchema { + schema: jsonschema::JSONSchema::compile(schema, draft) + .map_err(JSONSchemaError::Compilation)?, + raw_schema: schema, + }) + } + + /// Perform fast validation against the compiled schema. + /// + /// >>> compiled = JSONSchema({"minimum": 5}) + /// >>> compiled.is_valid(3) + /// False + /// + /// The output is a boolean value, that indicates whether the instance is valid or not. + #[text_signature = "(instance)"] + fn is_valid(&self, instance: &PyAny) -> bool { + let instance = ser::to_value(instance).unwrap(); + self.schema.is_valid(&instance) + } +} + +const SCHEMA_LENGTH_LIMIT: usize = 32; + +#[pyproto] +impl<'p> PyObjectProtocol<'p> for JSONSchema { + fn __repr__(&self) -> PyResult { + let mut schema = self.raw_schema.to_string(); + if schema.len() > SCHEMA_LENGTH_LIMIT { + schema.truncate(SCHEMA_LENGTH_LIMIT); + schema = format!("{}...}}", schema); + } + Ok(format!("", schema)) + } +} + +impl Drop for JSONSchema { + fn drop(&mut self) { + // Since `self.raw_schema` is not used anywhere else, there should be no double-free + unsafe { Box::from_raw(self.raw_schema as *const _ as *mut Value) }; + } +} + +#[pymodule] +fn jsonschema_rs(_py: Python, module: &PyModule) -> PyResult<()> { + types::init(); + module.add_wrapped(wrap_pyfunction!(is_valid))?; + module.add_class::()?; + module.add("Draft4", DRAFT4)?; + module.add("Draft6", DRAFT6)?; + module.add("Draft7", DRAFT7)?; + module.add("__doc__", MODULE_DOCSTRING)?; + Ok(()) +} diff --git a/python/src/ser.rs b/python/src/ser.rs new file mode 100644 index 0000000..3c0423b --- /dev/null +++ b/python/src/ser.rs @@ -0,0 +1,184 @@ +use pyo3::exceptions; +use pyo3::ffi::*; +use pyo3::prelude::*; +use pyo3::types::PyAny; +use pyo3::AsPyPointer; +use serde::ser::{self, Serialize, SerializeMap, SerializeSeq}; +use serde::Serializer; + +use crate::{string, types}; +use std::ffi::CStr; + +pub const RECURSION_LIMIT: u8 = 255; + +#[derive(Clone)] +pub enum ObjectType { + Str, + Int, + Bool, + None, + Float, + List, + Dict, + Unknown(String), +} + +pub(crate) struct SerializePyObject { + object: *mut pyo3::ffi::PyObject, + object_type: ObjectType, + recursion_depth: u8, +} + +impl SerializePyObject { + #[inline] + pub fn new(object: *mut pyo3::ffi::PyObject, recursion_depth: u8) -> Self { + SerializePyObject { + object, + object_type: get_object_type_from_object(object), + recursion_depth, + } + } + + #[inline] + pub fn with_obtype( + object: *mut pyo3::ffi::PyObject, + object_type: ObjectType, + recursion_depth: u8, + ) -> Self { + SerializePyObject { + object, + object_type, + recursion_depth, + } + } +} + +fn get_object_type_from_object(object: *mut pyo3::ffi::PyObject) -> ObjectType { + unsafe { + let object_type = Py_TYPE(object); + get_object_type(object_type) + } +} + +#[inline] +pub fn get_object_type(object_type: *mut pyo3::ffi::PyTypeObject) -> ObjectType { + if object_type == unsafe { types::STR_TYPE } { + ObjectType::Str + } else if object_type == unsafe { types::FLOAT_TYPE } { + ObjectType::Float + } else if object_type == unsafe { types::BOOL_TYPE } { + ObjectType::Bool + } else if object_type == unsafe { types::INT_TYPE } { + ObjectType::Int + } else if object_type == unsafe { types::NONE_TYPE } { + ObjectType::None + } else if object_type == unsafe { types::LIST_TYPE } { + ObjectType::List + } else if object_type == unsafe { types::DICT_TYPE } { + ObjectType::Dict + } else { + let type_name = unsafe { CStr::from_ptr((*object_type).tp_name).to_string_lossy() }; + ObjectType::Unknown(type_name.to_string()) + } +} + +/// Convert a Python value to `serde_json::Value` +impl Serialize for SerializePyObject { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + match self.object_type { + ObjectType::Str => { + let mut str_size: pyo3::ffi::Py_ssize_t = 0; + let uni = unsafe { string::read_utf8_from_str(self.object, &mut str_size) }; + let slice = unsafe { + std::str::from_utf8_unchecked(std::slice::from_raw_parts( + uni, + str_size as usize, + )) + }; + serializer.serialize_str(slice) + } + ObjectType::Int => serializer.serialize_i64(unsafe { PyLong_AsLongLong(self.object) }), + ObjectType::Float => { + serializer.serialize_f64(unsafe { PyFloat_AS_DOUBLE(self.object) }) + } + ObjectType::Bool => serializer.serialize_bool(self.object == unsafe { types::TRUE }), + ObjectType::None => serializer.serialize_unit(), + ObjectType::Dict => { + if std::intrinsics::unlikely(self.recursion_depth == RECURSION_LIMIT) { + return Err(ser::Error::custom("Recursion limit reached")); + } + let length = unsafe { (*self.object.cast::()).ma_used } as usize; + if std::intrinsics::unlikely(length == 0) { + serializer.serialize_map(Some(0))?.end() + } else { + let mut map = serializer.serialize_map(Some(length))?; + let mut pos = 0isize; + let mut str_size: pyo3::ffi::Py_ssize_t = 0; + let mut key: *mut pyo3::ffi::PyObject = std::ptr::null_mut(); + let mut value: *mut pyo3::ffi::PyObject = std::ptr::null_mut(); + for _ in 0..length { + unsafe { + pyo3::ffi::PyDict_Next(self.object, &mut pos, &mut key, &mut value); + } + let uni = unsafe { string::read_utf8_from_str(key, &mut str_size) }; + let slice = unsafe { + std::str::from_utf8_unchecked(std::slice::from_raw_parts( + uni, + str_size as usize, + )) + }; + #[allow(clippy::integer_arithmetic)] + map.serialize_entry( + slice, + &SerializePyObject::new(value, self.recursion_depth + 1), + )?; + } + map.end() + } + } + ObjectType::List => { + if std::intrinsics::unlikely(self.recursion_depth == RECURSION_LIMIT) { + return Err(ser::Error::custom("Recursion limit reached")); + } + let length = unsafe { PyList_GET_SIZE(self.object) } as usize; + if std::intrinsics::unlikely(length == 0) { + serializer.serialize_seq(Some(0))?.end() + } else { + let mut type_ptr = std::ptr::null_mut(); + let mut ob_type = ObjectType::Str; + let mut sequence = serializer.serialize_seq(Some(length))?; + for i in 0..length { + let elem = unsafe { PyList_GET_ITEM(self.object, i as isize) }; + let current_ob_type = unsafe { Py_TYPE(elem) }; + if current_ob_type != type_ptr { + type_ptr = current_ob_type; + ob_type = get_object_type(current_ob_type) + } + #[allow(clippy::integer_arithmetic)] + sequence.serialize_element(&SerializePyObject::with_obtype( + elem, + ob_type.clone(), + self.recursion_depth + 1, + ))? + } + sequence.end() + } + } + ObjectType::Unknown(ref type_name) => Err(ser::Error::custom(format!( + "Unsupported type: '{}'", + type_name + ))), + } + } +} + +#[inline] +pub(crate) fn to_value(object: &PyAny) -> PyResult { + Ok( + serde_json::to_value(SerializePyObject::new(object.as_ptr(), 0)) + .map_err(|err| exceptions::ValueError::py_err(err.to_string()))?, + ) +} diff --git a/python/src/string.rs b/python/src/string.rs new file mode 100644 index 0000000..c5cda56 --- /dev/null +++ b/python/src/string.rs @@ -0,0 +1,51 @@ +use pyo3::ffi::{PyTypeObject, PyUnicode_AsUTF8AndSize, Py_UNICODE, Py_hash_t, Py_ssize_t}; +use std::os::raw::c_char; + +#[repr(C)] +struct PyASCIIObject { + pub ob_refcnt: Py_ssize_t, + pub ob_type: *mut PyTypeObject, + pub length: Py_ssize_t, + pub hash: Py_hash_t, + pub state: u32, + pub wstr: *mut c_char, +} + +#[repr(C)] +struct PyCompactUnicodeObject { + pub ob_refcnt: Py_ssize_t, + pub ob_type: *mut PyTypeObject, + pub length: Py_ssize_t, + pub hash: Py_hash_t, + pub state: u32, + pub wstr: *mut Py_UNICODE, + pub utf8_length: Py_ssize_t, + pub utf8: *mut c_char, + pub wstr_length: Py_ssize_t, +} + +const STATE_ASCII: u32 = 0b00000000000000000000000001000000; +const STATE_COMPACT: u32 = 0b00000000000000000000000000100000; + +/// Read a UTF-8 string from a pointer and change the given size if needed. +pub unsafe fn read_utf8_from_str( + object_pointer: *mut pyo3::ffi::PyObject, + size: &mut Py_ssize_t, +) -> *const u8 { + if std::intrinsics::likely( + (*object_pointer.cast::()).state & STATE_ASCII == STATE_ASCII, + ) { + *size = (*object_pointer.cast::()).length; + object_pointer.cast::().offset(1) as *const u8 + } else if std::intrinsics::likely( + (*object_pointer.cast::()).state & STATE_COMPACT == STATE_COMPACT, + ) && !(*object_pointer.cast::()) + .utf8 + .is_null() + { + *size = (*object_pointer.cast::()).utf8_length; + (*object_pointer.cast::()).utf8 as *const u8 + } else { + PyUnicode_AsUTF8AndSize(object_pointer, size) as *const u8 + } +} diff --git a/python/src/types.rs b/python/src/types.rs new file mode 100644 index 0000000..33ac839 --- /dev/null +++ b/python/src/types.rs @@ -0,0 +1,30 @@ +use pyo3::ffi::*; +use std::sync::Once; + +pub static mut TRUE: *mut pyo3::ffi::PyObject = 0 as *mut pyo3::ffi::PyObject; + +pub static mut STR_TYPE: *mut PyTypeObject = 0 as *mut PyTypeObject; +pub static mut INT_TYPE: *mut PyTypeObject = 0 as *mut PyTypeObject; +pub static mut BOOL_TYPE: *mut PyTypeObject = 0 as *mut PyTypeObject; +pub static mut NONE_TYPE: *mut PyTypeObject = 0 as *mut PyTypeObject; +pub static mut FLOAT_TYPE: *mut PyTypeObject = 0 as *mut PyTypeObject; +pub static mut LIST_TYPE: *mut PyTypeObject = 0 as *mut PyTypeObject; +pub static mut DICT_TYPE: *mut PyTypeObject = 0 as *mut PyTypeObject; + +static INIT: Once = Once::new(); + +/// Set empty type object pointers with their actual values. +/// We need these Python-side type objects for direct comparison during conversion to serde types +/// NOTE. This function should be called before any serialization logic +pub fn init() { + INIT.call_once(|| unsafe { + TRUE = Py_True(); + STR_TYPE = Py_TYPE(PyUnicode_New(0, 255)); + DICT_TYPE = Py_TYPE(PyDict_New()); + LIST_TYPE = Py_TYPE(PyList_New(0 as Py_ssize_t)); + NONE_TYPE = Py_TYPE(Py_None()); + BOOL_TYPE = Py_TYPE(TRUE); + INT_TYPE = Py_TYPE(PyLong_FromLongLong(0)); + FLOAT_TYPE = Py_TYPE(PyFloat_FromDouble(0.0)); + }); +} diff --git a/python/tests-py/test_jsonschema.py b/python/tests-py/test_jsonschema.py new file mode 100644 index 0000000..6ea297b --- /dev/null +++ b/python/tests-py/test_jsonschema.py @@ -0,0 +1,79 @@ +from contextlib import suppress + +import pytest +from hypothesis import given +from hypothesis import strategies as st + +from jsonschema_rs import JSONSchema, is_valid + +json = st.recursive( + st.none() | st.booleans() | st.floats() | st.integers() | st.text(), + lambda children: st.lists(children, min_size=1) | st.dictionaries(st.text(), children, min_size=1), +) + + +@given(instance=json) +def test_instance_processing(instance): + with suppress(Exception): + is_valid(True, instance) + + +@given(instance=json) +def test_schema_processing(instance): + with suppress(Exception): + is_valid(instance, True) + + +def test_invalid_schema(): + with pytest.raises(ValueError): + is_valid(2 ** 64, True) + + +def test_invalid_type(): + with pytest.raises(ValueError, match="Unsupported type: 'set'"): + is_valid(set(), True) + + +def test_repr(): + assert repr(JSONSchema({"minimum": 5})) == '' + + +def test_recursive_dict(): + instance = {} + instance["foo"] = instance + with pytest.raises(ValueError): + is_valid(True, instance) + + +def test_recursive_list(): + instance = [] + instance.append(instance) + with pytest.raises(ValueError): + is_valid(True, instance) + + +@pytest.mark.parametrize("schema, draft, error", (([], None, "Invalid schema"), ({}, 5, "Unknown draft: 5"),)) +def test_initialization_errors(schema, draft, error): + with pytest.raises(ValueError, match=error): + JSONSchema(schema, draft) + + +@given(minimum=st.integers().map(abs)) +def test_minimum(minimum): + with suppress(SystemError): + assert is_valid({"minimum": minimum}, minimum) + assert is_valid({"minimum": minimum}, minimum - 1) is False + + +@given(maximum=st.integers().map(abs)) +def test_maximum(maximum): + with suppress(SystemError): + assert is_valid({"maximum": maximum}, maximum) + assert is_valid({"maximum": maximum}, maximum + 1) is False + + +@pytest.mark.xfail(reason="The underlying Rust crate has not enough precision.") +@given(multiple_of=(st.integers() | st.floats(allow_infinity=False, allow_nan=False)).filter(lambda x: x > 0)) +def test_multiple_of(multiple_of): + with suppress(SystemError): + assert is_valid({"multipleOf": multiple_of}, multiple_of * 3) diff --git a/python/tests-py/test_suite.py b/python/tests-py/test_suite.py new file mode 100644 index 0000000..f110d39 --- /dev/null +++ b/python/tests-py/test_suite.py @@ -0,0 +1,48 @@ +import json +import os + +import pytest + +import jsonschema_rs + +SUPPORTED_DRAFTS = (4, 6, 7) +NOT_SUPPORTED_CASES = {4: ("bignum.json",), 6: ("bignum.json",), 7: ("bignum.json",)} + + +def load_file(path): + with open(path) as fd: + for block in json.load(fd): + yield block + + +def maybe_optional(draft, schema, instance, expected, description, filename): + output = (draft, schema, instance, expected, description) + if filename in NOT_SUPPORTED_CASES.get(draft, ()): + output = pytest.param( + *output, marks=pytest.mark.skip(reason="{filename} is not supported".format(filename=filename)) + ) + return output + + +def pytest_generate_tests(metafunc): + cases = [ + maybe_optional(draft, block["schema"], test["data"], test["valid"], test["description"], filename) + for draft in SUPPORTED_DRAFTS + for root, dirs, files in os.walk("../tests/suite/tests/draft{draft}/".format(draft=draft)) + for filename in files + for block in load_file(os.path.join(root, filename)) + for test in block["tests"] + ] + metafunc.parametrize("draft, schema, instance, expected, description", cases) + + +def test_draft(draft, schema, instance, expected, description): + try: + result = jsonschema_rs.is_valid(schema, instance, int(draft)) + assert result is expected, "{description}: {schema} | {instance}".format( + description=description, schema=schema, instance=instance + ) + except ValueError: + pytest.fail( + "{description}: {schema} | {instance}".format(description=description, schema=schema, instance=instance) + ) diff --git a/python/tox.ini b/python/tox.ini new file mode 100644 index 0000000..406a9bf --- /dev/null +++ b/python/tox.ini @@ -0,0 +1,14 @@ +[tox] +# This source package is built via `build-sdist.sh` as a workaround for the fact, that `setuptools_rust` doesn't support +# local dependencies (`jsonschema` is in the parent directory) +sdistsrc=dist/*.tar.gz +envlist = + py{35,36,37,38} + +[testenv] +deps = + pytest + pytest-benchmark + hypothesis +commands = + python -m pytest tests-py {posargs:} diff --git a/src/error.rs b/src/error.rs index f2aac6a..792b73e 100644 --- a/src/error.rs +++ b/src/error.rs @@ -65,7 +65,7 @@ pub struct ValidationError<'a> { /// if let Err(errors) = result { /// for error in errors { /// println!("Validation error: {}", error) -/// } +/// } /// } /// } /// ``` diff --git a/src/lib.rs b/src/lib.rs index fd75778..63e2b25 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,7 +21,7 @@ //! if let Err(errors) = result { //! for error in errors { //! println!("Validation error: {}", error) -//! } +//! } //! } //! Ok(()) //! }