feat: FFI-based Python bindings

This commit is contained in:
Dmitry Dygalo 2020-05-17 17:35:11 +02:00 committed by Dmitry Dygalo
parent abcfc2ad2a
commit 46e7ff7ddf
33 changed files with 1016 additions and 11 deletions

View File

@ -17,6 +17,22 @@ jobs:
- name: Run commitsar
uses: docker://commitsar/commitsar
pre-commit:
name: Generic pre-commit checks
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
with:
fetch-depth: 1
- uses: actions/setup-python@v1
with:
python-version: 3.7
- run: pip install pre-commit
- run: pre-commit run --all-files
working-directory: ./python
check:
name: Check
runs-on: ubuntu-latest
@ -91,6 +107,40 @@ jobs:
token: ${{ secrets.CODECOV_TOKEN }}
file: ${{ steps.coverage.outputs.report }}
test-python:
strategy:
matrix:
os: [ubuntu-latest]
python: [3.5, 3.6, 3.7, 3.8]
name: Python ${{ matrix.python }} on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2
with:
submodules: true
- uses: actions/setup-python@v1
with:
python-version: ${{ matrix.python }}
- name: Start background server # User for integration tests
run: |
# This assumes that python3 is installed on the system
/usr/bin/env python3 -m pip install flask
# Starts the server in background
/usr/bin/env python3 ./tests/suite/bin/jsonschema_suite serve &
- run: /usr/bin/env python3 -m pip install setuptools_rust tox
working-directory: ./python
- run: ./build-sdist.sh
working-directory: ./python
- name: Run ${{ matrix.python }} tox job
run: tox -e py${TOX_JOB//.} # Strip dot from python version to match tox job
working-directory: ./python
env:
TOX_JOB: ${{ matrix.python }}
fmt:
name: Rustfmt
runs-on: ubuntu-latest

47
.github/workflows/python-release.yml vendored Normal file
View File

@ -0,0 +1,47 @@
name: Python Release
on:
push:
tags:
- python-v*
jobs:
create_wheels_manylinux:
runs-on: ubuntu-latest
name: Create wheels for manylinux
container: quay.io/pypa/manylinux2010_x86_64
steps:
- uses: actions/checkout@v1
- run: /usr/bin/env python3 -m pip install setuptools_rust twine
- name: Build and audit wheels
working-directory: ./python
run: sh build-wheels.sh
- name: Upload package
working-directory: ./python
run: twine upload ./dist/* --username=${PYPI_USERNAME} --password=${PYPI_PASSWORD}
env:
PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }}
PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
create_source_dist:
name: Create sdist package
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- run: /usr/bin/env python3 -m pip install setuptools_rust twine
- name: Build sdist
working-directory: ./python
run: sh build-sdist.sh
- name: Upload package
working-directory: ./python
run: twine upload ./dist/* --username=${PYPI_USERNAME} --password=${PYPI_PASSWORD}
env:
PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }}
PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }}

7
.gitignore vendored
View File

@ -1,3 +1,8 @@
/target
Cargo.lock
.idea
.idea
*.so
.hypothesis
.tox
*.tar.gz
*.egg-info

37
.pre-commit-config.yaml Normal file
View File

@ -0,0 +1,37 @@
default_language_version:
python: python3.7
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.5.0
hooks:
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
exclude: ^.*\.(md|rst)$
- id: debug-statements
- id: mixed-line-ending
args: [--fix=lf]
- id: check-merge-conflict
- repo: https://github.com/jorisroovers/gitlint
rev: v0.13.1
hooks:
- id: gitlint
- repo: https://github.com/adrienverge/yamllint
rev: v1.21.0
hooks:
- id: yamllint
- repo: https://github.com/ambv/black
rev: stable
hooks:
- id: black
types: [python]
- repo: https://github.com/pre-commit/mirrors-isort
rev: v4.3.21
hooks:
- id: isort
additional_dependencies: ["isort[pyproject]"]

4
.yamllint Normal file
View File

@ -0,0 +1,4 @@
extends: relaxed
rules:
line-length:
max: 120

View File

@ -63,4 +63,4 @@
[Unreleased]: https://github.com/Stranger6667/jsonschema-rs/compare/v0.3.0...HEAD
[0.3.0]: https://github.com/Stranger6667/jsonschema-rs/compare/v0.2.0...v0.3.0
[0.2.0]: https://github.com/Stranger6667/jsonschema-rs/compare/v0.1.0...v0.2.0
[0.2.0]: https://github.com/Stranger6667/jsonschema-rs/compare/v0.1.0...v0.2.0

View File

@ -8,7 +8,7 @@ readme = "README.md"
description = "A crate for performing JSON schema validation"
repository = "https://github.com/Stranger6667/jsonschema-rs"
keywords = ["jsonschema", "validation"]
exclude = ["tests"]
exclude = ["tests", "python"]
categories = ["web-programming"]
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

View File

@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
SOFTWARE.

View File

@ -93,4 +93,4 @@ a validator is compiled every time.
You can find benchmark code in `benches/jsonschema.rs`, Rust version is `1.44`
**NOTE**. This library is in early development.
**NOTE**. This library is in early development.

View File

@ -91,4 +91,4 @@
"type"
],
"type": "object"
}
}

View File

@ -1 +1 @@
[10, "world", [1, "a", true], {"a": "a", "b": "b", "c": "xy"}, "str", 5]
[10, "world", [1, "a", true], {"a": "a", "b": "b", "c": "xy"}, "str", 5]

View File

@ -37,4 +37,4 @@
{"type": "number", "multipleOf": 5}
]}
]
}
}

View File

@ -1 +1 @@
[9, "hello", [1, "a", true], {"a": "a", "b": "b", "d": "d"}, 42, 3]
[9, "hello", [1, "a", true], {"a": "a", "b": "b", "d": "d"}, 42, 3]

41
python/Cargo.toml Normal file
View File

@ -0,0 +1,41 @@
[package]
name = "jsonschema-python"
version = "0.1.0"
authors = ["Dmitry Dygalo <dadygalo@gmail.com>"]
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
name = "jsonschema_rs"
crate-type = ["cdylib"]
[dependencies]
serde_json = "1"
serde = "1"
[dependencies.jsonschema]
path = "../"
[dependencies.pyo3]
version = ">= 0.10"
features = ["extension-module"]
[package.metadata.maturin]
requires-python = ">=3.5"
project-url = ["https://github.com/Stranger6667/jsonschema-rs"]
maintainer = "Dmitry Dygalo"
maintainer-email = "Dmitry Dygalo <dadygalo@gmail.com>"
classifier = [
"Development Status :: 3 - Alpha",
"Environment :: Console",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Programming Language :: Rust",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.5",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: Implementation :: CPython",
]

6
python/MANIFEST.in Normal file
View File

@ -0,0 +1,6 @@
include Cargo.toml
include pyproject.toml
include rust-toolchain
recursive-include src *
recursive-include jsonschema/src/ *
include jsonschema/Cargo.toml

98
python/README.rst Normal file
View File

@ -0,0 +1,98 @@
jsonschema-rs
=============
|Build| |Version| |Python versions| |License|
Fast JSON Schema validation for Python implemented in Rust.
Supported drafts:
- Draft 7
- Draft 6
- Draft 4
There are some notable restrictions at the moment:
- The underlying crate doesn't support arbitrary precision integers yet, which may lead to ``SystemError`` when such value is used;
- ``multipleOf`` keyword validation may produce false-negative results on some input. See `#84 <https://github.com/Stranger6667/jsonschema-rs/issues/84>`_ for more details
Installation
------------
To install ``jsonschema-rs`` via ``pip`` run the following command:
.. code:: bash
pip install jsonschema-rs
Usage
-----
To check if the input document is valid:
.. code:: python
import jsonschema_rs
validator = jsonschema_rs.JSONSchema({"minimum": 42})
validator.is_valid(45) # True
**NOTE**. This library is in early development and not yet provide a way to show validation errors (even though it is implemented in the underlying Rust crate).
Performance
-----------
According to our benchmarks, ``jsonschema-rs`` is usually faster than existing alternatives in real-life scenarios.
However, for single-keyword or boolean schemas it might be slower than ``fastjsonschema``.
Compiled validators (when the input schema is compiled once and reused later)
+----------------+------------------------+----------------------+----------------------+------------------------+
| library | ``false`` | ``{"minimum": 10}`` | small | big |
+================+========================+======================+======================+========================+
| jsonschema-rs | 320.3 ns | 329.32 ns | 1.15 us | 5.8 ms |
+----------------+------------------------+----------------------+----------------------+------------------------+
| fastjsonschema | 52.29 ns (**x0.16**) | 134.43 ns (**x0.4**) | 6.01 us (**x5.22**) | 587.5 ms (**x101.29**) |
+----------------+------------------------+----------------------+----------------------+------------------------+
| jsonschema | 289.97 ns (**x0.9**) | 2.52 us (**x7.65**) | 74.98 us (**x65.2**) | 2.02 s (**x348.27**) |
+----------------+------------------------+----------------------+----------------------+------------------------+
Validators are not compiled (``jsonschema``) or compiled on every validation:
+----------------+------------------------+-------------------------+-----------------------+-------------------------+
| library | ``false`` | ``{"minimum": 10}`` | small | big |
+================+========================+=========================+=======================+=========================+
| jsonschema-rs | 402.35 ns | 908.06 ns | 9.54 us | 5.9 ms |
+----------------+------------------------+-------------------------+-----------------------+-------------------------+
| fastjsonschema | 64.08 us (**x159.26**) | 119.57 us (**x131.67**) | 1.43 ms (**x149.89**) | 599.84 ms (**x101.66**) |
+----------------+------------------------+-------------------------+-----------------------+-------------------------+
| jsonschema | 67.74 us (**x168.36**) | 76.62 us (**x84.37**) | 1.02 ms (**x106.91**) | 2.11 s (**x357.62**) |
+----------------+------------------------+-------------------------+-----------------------+-------------------------+
The bigger the input is the bigger is performance win.
In the examples below, ``big`` and ``small`` schemas refer to more realistic schemas and input instances.
You can take a look at benchmarks in ``benches/bench.py``. Ratios are given against ``jsonschema-rs``.
Python support
--------------
``jsonschema-rs`` supports Python 3.5, 3.6, 3.7 and 3.8.
License
-------
The code in this project is licensed under `MIT license`_.
By contributing to ``jsonschema-rs``, you agree that your contributions
will be licensed under its MIT license.
.. |Build| image:: https://github.com/Stranger6667/jsonschema-rs/workflows/build/badge.svg
:target: https://github.com/Stranger6667/jsonschema-rs/actions
.. |Version| image:: https://img.shields.io/pypi/v/jsonschema-rs.svg
:target: https://pypi.org/project/jsonschema-rs/
.. |Python versions| image:: https://img.shields.io/pypi/pyversions/jsonschema-rs.svg
:target: https://pypi.org/project/jsonschema-rs/
.. |License| image:: https://img.shields.io/pypi/l/jsonschema-rs.svg
:target: https://opensource.org/licenses/MIT
.. _MIT license: https://opensource.org/licenses/MIT

67
python/benches/bench.py Normal file
View File

@ -0,0 +1,67 @@
import json
import fastjsonschema
import jsonschema
import pytest
import jsonschema_rs
def load_json(filename):
with open(filename) as fd:
return json.load(fd)
BIG_SCHEMA = load_json("../benches/canada_schema.json")
BIG_INSTANCE = load_json("../benches/canada.json")
SMALL_SCHEMA = load_json("../benches/small_schema.json")
SMALL_INSTANCE_VALID = [9, "hello", [1, "a", True], {"a": "a", "b": "b", "d": "d"}, 42, 3]
@pytest.fixture(params=[True, False])
def is_compiled(request):
return request.param
@pytest.fixture(params=["rust", "python", "python-fast"])
def args(request, is_compiled):
schema, instance = request.node.get_closest_marker("data").args
if request.param == "rust":
if is_compiled:
return jsonschema_rs.JSONSchema(schema).is_valid, instance
else:
return jsonschema_rs.is_valid, schema, instance
if request.param == "python":
if is_compiled:
return jsonschema.validators.validator_for(schema)(schema).is_valid, instance
else:
return jsonschema.validate, instance, schema
if request.param == "python-fast":
if is_compiled:
return fastjsonschema.compile(schema), instance
else:
return fastjsonschema.validate, schema, instance
@pytest.mark.data(True, True)
@pytest.mark.benchmark(group="boolean")
def test_boolean(benchmark, args):
benchmark(*args)
@pytest.mark.data({"minimum": 10}, 10)
@pytest.mark.benchmark(group="minimum")
def test_minimum(benchmark, args):
benchmark(*args)
@pytest.mark.data(SMALL_SCHEMA, SMALL_INSTANCE_VALID)
@pytest.mark.benchmark(group="small")
def test_small_schema(benchmark, args):
benchmark(*args)
@pytest.mark.data(BIG_SCHEMA, BIG_INSTANCE)
@pytest.mark.benchmark(group="big")
def test_big_schema(benchmark, args):
benchmark(*args)

View File

@ -0,0 +1,2 @@
def pytest_configure(config):
config.addinivalue_line("markers", "data(schema, instance): add data for benchmarking")

15
python/build-sdist.sh Executable file
View File

@ -0,0 +1,15 @@
#!/bin/bash
# `setuptools_rust` and `maturin` don't support some local dependencies as `jsonschema` is (it is in the parent directory)
# As a workaround we create a modified distribution of this library that has `jsonschema` crate as a dependency in
# the same directory, then the sources are copied as declared in MANIFEST.in and the resulting package can be
# installed properly
set -ex
ln -sf ../ jsonschema
# Modify cargo.toml to include this symlink
sed -i 's/\.\.\//jsonschema/' Cargo.toml
# Build the source distribution
python setup.py sdist
# Rollback local changes after a source distribution is ready
rm jsonschema
sed -i 's/"jsonschema"/"\.\.\/"/' Cargo.toml

18
python/build-wheels.sh Executable file
View File

@ -0,0 +1,18 @@
#!/bin/bash
set -ex
yum install openssl-devel -y
curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain nightly -y
export PATH="$HOME/.cargo/bin:$PATH"
for PYBIN in /opt/python/{cp35-cp35m,cp36-cp36m,cp37-cp37m,cp38-cp38}/bin; do
export PYTHON_SYS_EXECUTABLE="$PYBIN/python"
"${PYBIN}/pip" install -U setuptools wheel setuptools-rust
"${PYBIN}/python" setup.py bdist_wheel
done
for whl in dist/*.whl; do
auditwheel repair "$whl" -w dist/
done

15
python/pyproject.toml Normal file
View File

@ -0,0 +1,15 @@
[build-system]
requires = ["setuptools", "wheel", "setuptools-rust"]
[tool.black]
line-length = 120
target_version = ["py37"]
[tool.isort]
# config compatible with Black
line_length = 120
multi_line_output = 3
default_section = "THIRDPARTY"
include_trailing_comma = true
known_first_party = "jsonschema_rs"
known_third_party = []

View File

@ -0,0 +1,3 @@
from .jsonschema_rs import *
del jsonschema_rs

1
python/rust-toolchain Normal file
View File

@ -0,0 +1 @@
nightly

36
python/setup.py Normal file
View File

@ -0,0 +1,36 @@
from setuptools import find_packages, setup
from setuptools_rust import Binding, RustExtension
setup(
name="jsonschema_rs",
version="0.1.0",
description="Fast JSON Schema validation for Python implemented in Rust",
long_description=open("README.rst", encoding="utf-8").read(),
long_description_content_type="text/x-rst",
keywords="jsonschema validation rust",
author="Dmitry Dygalo",
author_email="dadygalo@gmail.com",
maintainer="Dmitry Dygalo",
maintainer_email="dadygalo@gmail.com",
python_requires=">=3.5",
url="https://github.com/Stranger6667/jsonschema-rs/python",
license="MIT",
rust_extensions=[RustExtension("jsonschema_rs.jsonschema_rs", binding=Binding.PyO3)],
extras={"tests": ["pytest", "hypothesis"], "bench": ["pytest-benchmark"]},
classifiers=[
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.5",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Rust",
],
packages=find_packages(where="pysrc"),
package_dir={"": "pysrc"},
zip_safe=False,
)

154
python/src/lib.rs Normal file
View File

@ -0,0 +1,154 @@
#![feature(core_intrinsics)]
#![warn(
clippy::doc_markdown,
clippy::redundant_closure,
clippy::explicit_iter_loop,
clippy::match_same_arms,
clippy::needless_borrow,
clippy::print_stdout,
clippy::integer_arithmetic,
clippy::cast_possible_truncation,
clippy::result_unwrap_used,
clippy::result_map_unwrap_or_else,
clippy::option_unwrap_used,
clippy::option_map_unwrap_or_else,
clippy::option_map_unwrap_or
)]
use jsonschema::Draft;
use pyo3::prelude::*;
use pyo3::types::PyAny;
use pyo3::{exceptions, wrap_pyfunction, PyObjectProtocol};
use serde_json::Value;
mod ser;
mod string;
mod types;
const MODULE_DOCSTRING: &str = "JSON Schema validation for Python written in Rust.";
const DRAFT7: u8 = 7;
const DRAFT6: u8 = 6;
const DRAFT4: u8 = 4;
#[derive(Debug)]
enum JSONSchemaError {
Compilation(jsonschema::CompilationError),
}
impl From<JSONSchemaError> for PyErr {
fn from(error: JSONSchemaError) -> PyErr {
exceptions::ValueError::py_err(match error {
JSONSchemaError::Compilation(_) => "Invalid schema",
})
}
}
fn get_draft(draft: Option<u8>) -> PyResult<Draft> {
if let Some(value) = draft {
match value {
DRAFT4 => Ok(jsonschema::Draft::Draft4),
DRAFT6 => Ok(jsonschema::Draft::Draft6),
DRAFT7 => Ok(jsonschema::Draft::Draft7),
_ => Err(exceptions::ValueError::py_err(format!(
"Unknown draft: {}",
value
))),
}
} else {
Ok(jsonschema::Draft::default())
}
}
/// A shortcut for validating the input instance against the schema.
///
/// >>> is_valid({"minimum": 5}, 3)
/// False
///
/// If your workflow implies validating against the same schema, consider using `JSONSchema.is_valid`
/// instead.
#[pyfunction]
#[text_signature = "(schema, instance, draft=None)"]
fn is_valid(schema: &PyAny, instance: &PyAny, draft: Option<u8>) -> PyResult<bool> {
let draft = get_draft(draft).map(Some)?;
let schema = ser::to_value(schema)?;
let instance = ser::to_value(instance)?;
let compiled =
jsonschema::JSONSchema::compile(&schema, draft).map_err(JSONSchemaError::Compilation)?;
Ok(compiled.is_valid(&instance))
}
/// JSON Schema compiled into a validation tree.
///
/// >>> compiled = JSONSchema({"minimum": 5})
/// >>> compiled.is_valid(3)
/// False
///
/// By default Draft 7 will be used for compilation.
#[pyclass]
#[text_signature = "(schema, draft=None)"]
struct JSONSchema {
schema: jsonschema::JSONSchema<'static>,
raw_schema: &'static Value,
}
#[pymethods]
impl JSONSchema {
#[new]
fn new(schema: &PyAny, draft: Option<u8>) -> PyResult<Self> {
let draft = get_draft(draft).map(Some)?;
let raw_schema = ser::to_value(schema)?;
// Currently, it is the simplest way to pass a reference to `JSONSchema`
// It is cleaned up in the `Drop` implementation
let schema: &'static Value = Box::leak(Box::new(raw_schema));
Ok(JSONSchema {
schema: jsonschema::JSONSchema::compile(schema, draft)
.map_err(JSONSchemaError::Compilation)?,
raw_schema: schema,
})
}
/// Perform fast validation against the compiled schema.
///
/// >>> compiled = JSONSchema({"minimum": 5})
/// >>> compiled.is_valid(3)
/// False
///
/// The output is a boolean value, that indicates whether the instance is valid or not.
#[text_signature = "(instance)"]
fn is_valid(&self, instance: &PyAny) -> bool {
let instance = ser::to_value(instance).unwrap();
self.schema.is_valid(&instance)
}
}
const SCHEMA_LENGTH_LIMIT: usize = 32;
#[pyproto]
impl<'p> PyObjectProtocol<'p> for JSONSchema {
fn __repr__(&self) -> PyResult<String> {
let mut schema = self.raw_schema.to_string();
if schema.len() > SCHEMA_LENGTH_LIMIT {
schema.truncate(SCHEMA_LENGTH_LIMIT);
schema = format!("{}...}}", schema);
}
Ok(format!("<JSONSchema: {}>", schema))
}
}
impl Drop for JSONSchema {
fn drop(&mut self) {
// Since `self.raw_schema` is not used anywhere else, there should be no double-free
unsafe { Box::from_raw(self.raw_schema as *const _ as *mut Value) };
}
}
#[pymodule]
fn jsonschema_rs(_py: Python, module: &PyModule) -> PyResult<()> {
types::init();
module.add_wrapped(wrap_pyfunction!(is_valid))?;
module.add_class::<JSONSchema>()?;
module.add("Draft4", DRAFT4)?;
module.add("Draft6", DRAFT6)?;
module.add("Draft7", DRAFT7)?;
module.add("__doc__", MODULE_DOCSTRING)?;
Ok(())
}

184
python/src/ser.rs Normal file
View File

@ -0,0 +1,184 @@
use pyo3::exceptions;
use pyo3::ffi::*;
use pyo3::prelude::*;
use pyo3::types::PyAny;
use pyo3::AsPyPointer;
use serde::ser::{self, Serialize, SerializeMap, SerializeSeq};
use serde::Serializer;
use crate::{string, types};
use std::ffi::CStr;
pub const RECURSION_LIMIT: u8 = 255;
#[derive(Clone)]
pub enum ObjectType {
Str,
Int,
Bool,
None,
Float,
List,
Dict,
Unknown(String),
}
pub(crate) struct SerializePyObject {
object: *mut pyo3::ffi::PyObject,
object_type: ObjectType,
recursion_depth: u8,
}
impl SerializePyObject {
#[inline]
pub fn new(object: *mut pyo3::ffi::PyObject, recursion_depth: u8) -> Self {
SerializePyObject {
object,
object_type: get_object_type_from_object(object),
recursion_depth,
}
}
#[inline]
pub fn with_obtype(
object: *mut pyo3::ffi::PyObject,
object_type: ObjectType,
recursion_depth: u8,
) -> Self {
SerializePyObject {
object,
object_type,
recursion_depth,
}
}
}
fn get_object_type_from_object(object: *mut pyo3::ffi::PyObject) -> ObjectType {
unsafe {
let object_type = Py_TYPE(object);
get_object_type(object_type)
}
}
#[inline]
pub fn get_object_type(object_type: *mut pyo3::ffi::PyTypeObject) -> ObjectType {
if object_type == unsafe { types::STR_TYPE } {
ObjectType::Str
} else if object_type == unsafe { types::FLOAT_TYPE } {
ObjectType::Float
} else if object_type == unsafe { types::BOOL_TYPE } {
ObjectType::Bool
} else if object_type == unsafe { types::INT_TYPE } {
ObjectType::Int
} else if object_type == unsafe { types::NONE_TYPE } {
ObjectType::None
} else if object_type == unsafe { types::LIST_TYPE } {
ObjectType::List
} else if object_type == unsafe { types::DICT_TYPE } {
ObjectType::Dict
} else {
let type_name = unsafe { CStr::from_ptr((*object_type).tp_name).to_string_lossy() };
ObjectType::Unknown(type_name.to_string())
}
}
/// Convert a Python value to `serde_json::Value`
impl Serialize for SerializePyObject {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
match self.object_type {
ObjectType::Str => {
let mut str_size: pyo3::ffi::Py_ssize_t = 0;
let uni = unsafe { string::read_utf8_from_str(self.object, &mut str_size) };
let slice = unsafe {
std::str::from_utf8_unchecked(std::slice::from_raw_parts(
uni,
str_size as usize,
))
};
serializer.serialize_str(slice)
}
ObjectType::Int => serializer.serialize_i64(unsafe { PyLong_AsLongLong(self.object) }),
ObjectType::Float => {
serializer.serialize_f64(unsafe { PyFloat_AS_DOUBLE(self.object) })
}
ObjectType::Bool => serializer.serialize_bool(self.object == unsafe { types::TRUE }),
ObjectType::None => serializer.serialize_unit(),
ObjectType::Dict => {
if std::intrinsics::unlikely(self.recursion_depth == RECURSION_LIMIT) {
return Err(ser::Error::custom("Recursion limit reached"));
}
let length = unsafe { (*self.object.cast::<PyDictObject>()).ma_used } as usize;
if std::intrinsics::unlikely(length == 0) {
serializer.serialize_map(Some(0))?.end()
} else {
let mut map = serializer.serialize_map(Some(length))?;
let mut pos = 0isize;
let mut str_size: pyo3::ffi::Py_ssize_t = 0;
let mut key: *mut pyo3::ffi::PyObject = std::ptr::null_mut();
let mut value: *mut pyo3::ffi::PyObject = std::ptr::null_mut();
for _ in 0..length {
unsafe {
pyo3::ffi::PyDict_Next(self.object, &mut pos, &mut key, &mut value);
}
let uni = unsafe { string::read_utf8_from_str(key, &mut str_size) };
let slice = unsafe {
std::str::from_utf8_unchecked(std::slice::from_raw_parts(
uni,
str_size as usize,
))
};
#[allow(clippy::integer_arithmetic)]
map.serialize_entry(
slice,
&SerializePyObject::new(value, self.recursion_depth + 1),
)?;
}
map.end()
}
}
ObjectType::List => {
if std::intrinsics::unlikely(self.recursion_depth == RECURSION_LIMIT) {
return Err(ser::Error::custom("Recursion limit reached"));
}
let length = unsafe { PyList_GET_SIZE(self.object) } as usize;
if std::intrinsics::unlikely(length == 0) {
serializer.serialize_seq(Some(0))?.end()
} else {
let mut type_ptr = std::ptr::null_mut();
let mut ob_type = ObjectType::Str;
let mut sequence = serializer.serialize_seq(Some(length))?;
for i in 0..length {
let elem = unsafe { PyList_GET_ITEM(self.object, i as isize) };
let current_ob_type = unsafe { Py_TYPE(elem) };
if current_ob_type != type_ptr {
type_ptr = current_ob_type;
ob_type = get_object_type(current_ob_type)
}
#[allow(clippy::integer_arithmetic)]
sequence.serialize_element(&SerializePyObject::with_obtype(
elem,
ob_type.clone(),
self.recursion_depth + 1,
))?
}
sequence.end()
}
}
ObjectType::Unknown(ref type_name) => Err(ser::Error::custom(format!(
"Unsupported type: '{}'",
type_name
))),
}
}
}
#[inline]
pub(crate) fn to_value(object: &PyAny) -> PyResult<serde_json::Value> {
Ok(
serde_json::to_value(SerializePyObject::new(object.as_ptr(), 0))
.map_err(|err| exceptions::ValueError::py_err(err.to_string()))?,
)
}

51
python/src/string.rs Normal file
View File

@ -0,0 +1,51 @@
use pyo3::ffi::{PyTypeObject, PyUnicode_AsUTF8AndSize, Py_UNICODE, Py_hash_t, Py_ssize_t};
use std::os::raw::c_char;
#[repr(C)]
struct PyASCIIObject {
pub ob_refcnt: Py_ssize_t,
pub ob_type: *mut PyTypeObject,
pub length: Py_ssize_t,
pub hash: Py_hash_t,
pub state: u32,
pub wstr: *mut c_char,
}
#[repr(C)]
struct PyCompactUnicodeObject {
pub ob_refcnt: Py_ssize_t,
pub ob_type: *mut PyTypeObject,
pub length: Py_ssize_t,
pub hash: Py_hash_t,
pub state: u32,
pub wstr: *mut Py_UNICODE,
pub utf8_length: Py_ssize_t,
pub utf8: *mut c_char,
pub wstr_length: Py_ssize_t,
}
const STATE_ASCII: u32 = 0b00000000000000000000000001000000;
const STATE_COMPACT: u32 = 0b00000000000000000000000000100000;
/// Read a UTF-8 string from a pointer and change the given size if needed.
pub unsafe fn read_utf8_from_str(
object_pointer: *mut pyo3::ffi::PyObject,
size: &mut Py_ssize_t,
) -> *const u8 {
if std::intrinsics::likely(
(*object_pointer.cast::<PyASCIIObject>()).state & STATE_ASCII == STATE_ASCII,
) {
*size = (*object_pointer.cast::<PyASCIIObject>()).length;
object_pointer.cast::<PyASCIIObject>().offset(1) as *const u8
} else if std::intrinsics::likely(
(*object_pointer.cast::<PyASCIIObject>()).state & STATE_COMPACT == STATE_COMPACT,
) && !(*object_pointer.cast::<PyCompactUnicodeObject>())
.utf8
.is_null()
{
*size = (*object_pointer.cast::<PyCompactUnicodeObject>()).utf8_length;
(*object_pointer.cast::<PyCompactUnicodeObject>()).utf8 as *const u8
} else {
PyUnicode_AsUTF8AndSize(object_pointer, size) as *const u8
}
}

30
python/src/types.rs Normal file
View File

@ -0,0 +1,30 @@
use pyo3::ffi::*;
use std::sync::Once;
pub static mut TRUE: *mut pyo3::ffi::PyObject = 0 as *mut pyo3::ffi::PyObject;
pub static mut STR_TYPE: *mut PyTypeObject = 0 as *mut PyTypeObject;
pub static mut INT_TYPE: *mut PyTypeObject = 0 as *mut PyTypeObject;
pub static mut BOOL_TYPE: *mut PyTypeObject = 0 as *mut PyTypeObject;
pub static mut NONE_TYPE: *mut PyTypeObject = 0 as *mut PyTypeObject;
pub static mut FLOAT_TYPE: *mut PyTypeObject = 0 as *mut PyTypeObject;
pub static mut LIST_TYPE: *mut PyTypeObject = 0 as *mut PyTypeObject;
pub static mut DICT_TYPE: *mut PyTypeObject = 0 as *mut PyTypeObject;
static INIT: Once = Once::new();
/// Set empty type object pointers with their actual values.
/// We need these Python-side type objects for direct comparison during conversion to serde types
/// NOTE. This function should be called before any serialization logic
pub fn init() {
INIT.call_once(|| unsafe {
TRUE = Py_True();
STR_TYPE = Py_TYPE(PyUnicode_New(0, 255));
DICT_TYPE = Py_TYPE(PyDict_New());
LIST_TYPE = Py_TYPE(PyList_New(0 as Py_ssize_t));
NONE_TYPE = Py_TYPE(Py_None());
BOOL_TYPE = Py_TYPE(TRUE);
INT_TYPE = Py_TYPE(PyLong_FromLongLong(0));
FLOAT_TYPE = Py_TYPE(PyFloat_FromDouble(0.0));
});
}

View File

@ -0,0 +1,79 @@
from contextlib import suppress
import pytest
from hypothesis import given
from hypothesis import strategies as st
from jsonschema_rs import JSONSchema, is_valid
json = st.recursive(
st.none() | st.booleans() | st.floats() | st.integers() | st.text(),
lambda children: st.lists(children, min_size=1) | st.dictionaries(st.text(), children, min_size=1),
)
@given(instance=json)
def test_instance_processing(instance):
with suppress(Exception):
is_valid(True, instance)
@given(instance=json)
def test_schema_processing(instance):
with suppress(Exception):
is_valid(instance, True)
def test_invalid_schema():
with pytest.raises(ValueError):
is_valid(2 ** 64, True)
def test_invalid_type():
with pytest.raises(ValueError, match="Unsupported type: 'set'"):
is_valid(set(), True)
def test_repr():
assert repr(JSONSchema({"minimum": 5})) == '<JSONSchema: {"minimum":5}>'
def test_recursive_dict():
instance = {}
instance["foo"] = instance
with pytest.raises(ValueError):
is_valid(True, instance)
def test_recursive_list():
instance = []
instance.append(instance)
with pytest.raises(ValueError):
is_valid(True, instance)
@pytest.mark.parametrize("schema, draft, error", (([], None, "Invalid schema"), ({}, 5, "Unknown draft: 5"),))
def test_initialization_errors(schema, draft, error):
with pytest.raises(ValueError, match=error):
JSONSchema(schema, draft)
@given(minimum=st.integers().map(abs))
def test_minimum(minimum):
with suppress(SystemError):
assert is_valid({"minimum": minimum}, minimum)
assert is_valid({"minimum": minimum}, minimum - 1) is False
@given(maximum=st.integers().map(abs))
def test_maximum(maximum):
with suppress(SystemError):
assert is_valid({"maximum": maximum}, maximum)
assert is_valid({"maximum": maximum}, maximum + 1) is False
@pytest.mark.xfail(reason="The underlying Rust crate has not enough precision.")
@given(multiple_of=(st.integers() | st.floats(allow_infinity=False, allow_nan=False)).filter(lambda x: x > 0))
def test_multiple_of(multiple_of):
with suppress(SystemError):
assert is_valid({"multipleOf": multiple_of}, multiple_of * 3)

View File

@ -0,0 +1,48 @@
import json
import os
import pytest
import jsonschema_rs
SUPPORTED_DRAFTS = (4, 6, 7)
NOT_SUPPORTED_CASES = {4: ("bignum.json",), 6: ("bignum.json",), 7: ("bignum.json",)}
def load_file(path):
with open(path) as fd:
for block in json.load(fd):
yield block
def maybe_optional(draft, schema, instance, expected, description, filename):
output = (draft, schema, instance, expected, description)
if filename in NOT_SUPPORTED_CASES.get(draft, ()):
output = pytest.param(
*output, marks=pytest.mark.skip(reason="{filename} is not supported".format(filename=filename))
)
return output
def pytest_generate_tests(metafunc):
cases = [
maybe_optional(draft, block["schema"], test["data"], test["valid"], test["description"], filename)
for draft in SUPPORTED_DRAFTS
for root, dirs, files in os.walk("../tests/suite/tests/draft{draft}/".format(draft=draft))
for filename in files
for block in load_file(os.path.join(root, filename))
for test in block["tests"]
]
metafunc.parametrize("draft, schema, instance, expected, description", cases)
def test_draft(draft, schema, instance, expected, description):
try:
result = jsonschema_rs.is_valid(schema, instance, int(draft))
assert result is expected, "{description}: {schema} | {instance}".format(
description=description, schema=schema, instance=instance
)
except ValueError:
pytest.fail(
"{description}: {schema} | {instance}".format(description=description, schema=schema, instance=instance)
)

14
python/tox.ini Normal file
View File

@ -0,0 +1,14 @@
[tox]
# This source package is built via `build-sdist.sh` as a workaround for the fact, that `setuptools_rust` doesn't support
# local dependencies (`jsonschema` is in the parent directory)
sdistsrc=dist/*.tar.gz
envlist =
py{35,36,37,38}
[testenv]
deps =
pytest
pytest-benchmark
hypothesis
commands =
python -m pytest tests-py {posargs:}

View File

@ -65,7 +65,7 @@ pub struct ValidationError<'a> {
/// if let Err(errors) = result {
/// for error in errors {
/// println!("Validation error: {}", error)
/// }
/// }
/// }
/// }
/// ```

View File

@ -21,7 +21,7 @@
//! if let Err(errors) = result {
//! for error in errors {
//! println!("Validation error: {}", error)
//! }
//! }
//! }
//! Ok(())
//! }