feat: FFI-based Python bindings

This commit is contained in:
Dmitry Dygalo 2020-05-17 17:35:11 +02:00 committed by Dmitry Dygalo
parent abcfc2ad2a
commit 46e7ff7ddf
33 changed files with 1016 additions and 11 deletions

View File

@ -17,6 +17,22 @@ jobs:
- name: Run commitsar
uses: docker://commitsar/commitsar
pre-commit:
name: Generic pre-commit checks
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
with:
fetch-depth: 1
- uses: actions/setup-python@v1
with:
python-version: 3.7
- run: pip install pre-commit
- run: pre-commit run --all-files
working-directory: ./python
check:
name: Check
runs-on: ubuntu-latest
@ -91,6 +107,40 @@ jobs:
token: ${{ secrets.CODECOV_TOKEN }}
file: ${{ steps.coverage.outputs.report }}
test-python:
strategy:
matrix:
os: [ubuntu-latest]
python: [3.5, 3.6, 3.7, 3.8]
name: Python ${{ matrix.python }} on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2
with:
submodules: true
- uses: actions/setup-python@v1
with:
python-version: ${{ matrix.python }}
- name: Start background server # User for integration tests
run: |
# This assumes that python3 is installed on the system
/usr/bin/env python3 -m pip install flask
# Starts the server in background
/usr/bin/env python3 ./tests/suite/bin/jsonschema_suite serve &
- run: /usr/bin/env python3 -m pip install setuptools_rust tox
working-directory: ./python
- run: ./build-sdist.sh
working-directory: ./python
- name: Run ${{ matrix.python }} tox job
run: tox -e py${TOX_JOB//.} # Strip dot from python version to match tox job
working-directory: ./python
env:
TOX_JOB: ${{ matrix.python }}
fmt:
name: Rustfmt
runs-on: ubuntu-latest

47
.github/workflows/python-release.yml vendored Normal file
View File

@ -0,0 +1,47 @@
name: Python Release
on:
push:
tags:
- python-v*
jobs:
create_wheels_manylinux:
runs-on: ubuntu-latest
name: Create wheels for manylinux
container: quay.io/pypa/manylinux2010_x86_64
steps:
- uses: actions/checkout@v1
- run: /usr/bin/env python3 -m pip install setuptools_rust twine
- name: Build and audit wheels
working-directory: ./python
run: sh build-wheels.sh
- name: Upload package
working-directory: ./python
run: twine upload ./dist/* --username=${PYPI_USERNAME} --password=${PYPI_PASSWORD}
env:
PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }}
PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
create_source_dist:
name: Create sdist package
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- run: /usr/bin/env python3 -m pip install setuptools_rust twine
- name: Build sdist
working-directory: ./python
run: sh build-sdist.sh
- name: Upload package
working-directory: ./python
run: twine upload ./dist/* --username=${PYPI_USERNAME} --password=${PYPI_PASSWORD}
env:
PYPI_USERNAME: ${{ secrets.PYPI_USERNAME }}
PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }}

5
.gitignore vendored
View File

@ -1,3 +1,8 @@
/target
Cargo.lock
.idea
*.so
.hypothesis
.tox
*.tar.gz
*.egg-info

37
.pre-commit-config.yaml Normal file
View File

@ -0,0 +1,37 @@
default_language_version:
python: python3.7
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.5.0
hooks:
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
exclude: ^.*\.(md|rst)$
- id: debug-statements
- id: mixed-line-ending
args: [--fix=lf]
- id: check-merge-conflict
- repo: https://github.com/jorisroovers/gitlint
rev: v0.13.1
hooks:
- id: gitlint
- repo: https://github.com/adrienverge/yamllint
rev: v1.21.0
hooks:
- id: yamllint
- repo: https://github.com/ambv/black
rev: stable
hooks:
- id: black
types: [python]
- repo: https://github.com/pre-commit/mirrors-isort
rev: v4.3.21
hooks:
- id: isort
additional_dependencies: ["isort[pyproject]"]

4
.yamllint Normal file
View File

@ -0,0 +1,4 @@
extends: relaxed
rules:
line-length:
max: 120

View File

@ -8,7 +8,7 @@ readme = "README.md"
description = "A crate for performing JSON schema validation"
repository = "https://github.com/Stranger6667/jsonschema-rs"
keywords = ["jsonschema", "validation"]
exclude = ["tests"]
exclude = ["tests", "python"]
categories = ["web-programming"]
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

41
python/Cargo.toml Normal file
View File

@ -0,0 +1,41 @@
[package]
name = "jsonschema-python"
version = "0.1.0"
authors = ["Dmitry Dygalo <dadygalo@gmail.com>"]
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[lib]
name = "jsonschema_rs"
crate-type = ["cdylib"]
[dependencies]
serde_json = "1"
serde = "1"
[dependencies.jsonschema]
path = "../"
[dependencies.pyo3]
version = ">= 0.10"
features = ["extension-module"]
[package.metadata.maturin]
requires-python = ">=3.5"
project-url = ["https://github.com/Stranger6667/jsonschema-rs"]
maintainer = "Dmitry Dygalo"
maintainer-email = "Dmitry Dygalo <dadygalo@gmail.com>"
classifier = [
"Development Status :: 3 - Alpha",
"Environment :: Console",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
"Programming Language :: Rust",
"Programming Language :: Python :: 3 :: Only",
"Programming Language :: Python :: 3.5",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: Implementation :: CPython",
]

6
python/MANIFEST.in Normal file
View File

@ -0,0 +1,6 @@
include Cargo.toml
include pyproject.toml
include rust-toolchain
recursive-include src *
recursive-include jsonschema/src/ *
include jsonschema/Cargo.toml

98
python/README.rst Normal file
View File

@ -0,0 +1,98 @@
jsonschema-rs
=============
|Build| |Version| |Python versions| |License|
Fast JSON Schema validation for Python implemented in Rust.
Supported drafts:
- Draft 7
- Draft 6
- Draft 4
There are some notable restrictions at the moment:
- The underlying crate doesn't support arbitrary precision integers yet, which may lead to ``SystemError`` when such value is used;
- ``multipleOf`` keyword validation may produce false-negative results on some input. See `#84 <https://github.com/Stranger6667/jsonschema-rs/issues/84>`_ for more details
Installation
------------
To install ``jsonschema-rs`` via ``pip`` run the following command:
.. code:: bash
pip install jsonschema-rs
Usage
-----
To check if the input document is valid:
.. code:: python
import jsonschema_rs
validator = jsonschema_rs.JSONSchema({"minimum": 42})
validator.is_valid(45) # True
**NOTE**. This library is in early development and not yet provide a way to show validation errors (even though it is implemented in the underlying Rust crate).
Performance
-----------
According to our benchmarks, ``jsonschema-rs`` is usually faster than existing alternatives in real-life scenarios.
However, for single-keyword or boolean schemas it might be slower than ``fastjsonschema``.
Compiled validators (when the input schema is compiled once and reused later)
+----------------+------------------------+----------------------+----------------------+------------------------+
| library | ``false`` | ``{"minimum": 10}`` | small | big |
+================+========================+======================+======================+========================+
| jsonschema-rs | 320.3 ns | 329.32 ns | 1.15 us | 5.8 ms |
+----------------+------------------------+----------------------+----------------------+------------------------+
| fastjsonschema | 52.29 ns (**x0.16**) | 134.43 ns (**x0.4**) | 6.01 us (**x5.22**) | 587.5 ms (**x101.29**) |
+----------------+------------------------+----------------------+----------------------+------------------------+
| jsonschema | 289.97 ns (**x0.9**) | 2.52 us (**x7.65**) | 74.98 us (**x65.2**) | 2.02 s (**x348.27**) |
+----------------+------------------------+----------------------+----------------------+------------------------+
Validators are not compiled (``jsonschema``) or compiled on every validation:
+----------------+------------------------+-------------------------+-----------------------+-------------------------+
| library | ``false`` | ``{"minimum": 10}`` | small | big |
+================+========================+=========================+=======================+=========================+
| jsonschema-rs | 402.35 ns | 908.06 ns | 9.54 us | 5.9 ms |
+----------------+------------------------+-------------------------+-----------------------+-------------------------+
| fastjsonschema | 64.08 us (**x159.26**) | 119.57 us (**x131.67**) | 1.43 ms (**x149.89**) | 599.84 ms (**x101.66**) |
+----------------+------------------------+-------------------------+-----------------------+-------------------------+
| jsonschema | 67.74 us (**x168.36**) | 76.62 us (**x84.37**) | 1.02 ms (**x106.91**) | 2.11 s (**x357.62**) |
+----------------+------------------------+-------------------------+-----------------------+-------------------------+
The bigger the input is the bigger is performance win.
In the examples below, ``big`` and ``small`` schemas refer to more realistic schemas and input instances.
You can take a look at benchmarks in ``benches/bench.py``. Ratios are given against ``jsonschema-rs``.
Python support
--------------
``jsonschema-rs`` supports Python 3.5, 3.6, 3.7 and 3.8.
License
-------
The code in this project is licensed under `MIT license`_.
By contributing to ``jsonschema-rs``, you agree that your contributions
will be licensed under its MIT license.
.. |Build| image:: https://github.com/Stranger6667/jsonschema-rs/workflows/build/badge.svg
:target: https://github.com/Stranger6667/jsonschema-rs/actions
.. |Version| image:: https://img.shields.io/pypi/v/jsonschema-rs.svg
:target: https://pypi.org/project/jsonschema-rs/
.. |Python versions| image:: https://img.shields.io/pypi/pyversions/jsonschema-rs.svg
:target: https://pypi.org/project/jsonschema-rs/
.. |License| image:: https://img.shields.io/pypi/l/jsonschema-rs.svg
:target: https://opensource.org/licenses/MIT
.. _MIT license: https://opensource.org/licenses/MIT

67
python/benches/bench.py Normal file
View File

@ -0,0 +1,67 @@
import json
import fastjsonschema
import jsonschema
import pytest
import jsonschema_rs
def load_json(filename):
with open(filename) as fd:
return json.load(fd)
BIG_SCHEMA = load_json("../benches/canada_schema.json")
BIG_INSTANCE = load_json("../benches/canada.json")
SMALL_SCHEMA = load_json("../benches/small_schema.json")
SMALL_INSTANCE_VALID = [9, "hello", [1, "a", True], {"a": "a", "b": "b", "d": "d"}, 42, 3]
@pytest.fixture(params=[True, False])
def is_compiled(request):
return request.param
@pytest.fixture(params=["rust", "python", "python-fast"])
def args(request, is_compiled):
schema, instance = request.node.get_closest_marker("data").args
if request.param == "rust":
if is_compiled:
return jsonschema_rs.JSONSchema(schema).is_valid, instance
else:
return jsonschema_rs.is_valid, schema, instance
if request.param == "python":
if is_compiled:
return jsonschema.validators.validator_for(schema)(schema).is_valid, instance
else:
return jsonschema.validate, instance, schema
if request.param == "python-fast":
if is_compiled:
return fastjsonschema.compile(schema), instance
else:
return fastjsonschema.validate, schema, instance
@pytest.mark.data(True, True)
@pytest.mark.benchmark(group="boolean")
def test_boolean(benchmark, args):
benchmark(*args)
@pytest.mark.data({"minimum": 10}, 10)
@pytest.mark.benchmark(group="minimum")
def test_minimum(benchmark, args):
benchmark(*args)
@pytest.mark.data(SMALL_SCHEMA, SMALL_INSTANCE_VALID)
@pytest.mark.benchmark(group="small")
def test_small_schema(benchmark, args):
benchmark(*args)
@pytest.mark.data(BIG_SCHEMA, BIG_INSTANCE)
@pytest.mark.benchmark(group="big")
def test_big_schema(benchmark, args):
benchmark(*args)

View File

@ -0,0 +1,2 @@
def pytest_configure(config):
config.addinivalue_line("markers", "data(schema, instance): add data for benchmarking")

15
python/build-sdist.sh Executable file
View File

@ -0,0 +1,15 @@
#!/bin/bash
# `setuptools_rust` and `maturin` don't support some local dependencies as `jsonschema` is (it is in the parent directory)
# As a workaround we create a modified distribution of this library that has `jsonschema` crate as a dependency in
# the same directory, then the sources are copied as declared in MANIFEST.in and the resulting package can be
# installed properly
set -ex
ln -sf ../ jsonschema
# Modify cargo.toml to include this symlink
sed -i 's/\.\.\//jsonschema/' Cargo.toml
# Build the source distribution
python setup.py sdist
# Rollback local changes after a source distribution is ready
rm jsonschema
sed -i 's/"jsonschema"/"\.\.\/"/' Cargo.toml

18
python/build-wheels.sh Executable file
View File

@ -0,0 +1,18 @@
#!/bin/bash
set -ex
yum install openssl-devel -y
curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain nightly -y
export PATH="$HOME/.cargo/bin:$PATH"
for PYBIN in /opt/python/{cp35-cp35m,cp36-cp36m,cp37-cp37m,cp38-cp38}/bin; do
export PYTHON_SYS_EXECUTABLE="$PYBIN/python"
"${PYBIN}/pip" install -U setuptools wheel setuptools-rust
"${PYBIN}/python" setup.py bdist_wheel
done
for whl in dist/*.whl; do
auditwheel repair "$whl" -w dist/
done

15
python/pyproject.toml Normal file
View File

@ -0,0 +1,15 @@
[build-system]
requires = ["setuptools", "wheel", "setuptools-rust"]
[tool.black]
line-length = 120
target_version = ["py37"]
[tool.isort]
# config compatible with Black
line_length = 120
multi_line_output = 3
default_section = "THIRDPARTY"
include_trailing_comma = true
known_first_party = "jsonschema_rs"
known_third_party = []

View File

@ -0,0 +1,3 @@
from .jsonschema_rs import *
del jsonschema_rs

1
python/rust-toolchain Normal file
View File

@ -0,0 +1 @@
nightly

36
python/setup.py Normal file
View File

@ -0,0 +1,36 @@
from setuptools import find_packages, setup
from setuptools_rust import Binding, RustExtension
setup(
name="jsonschema_rs",
version="0.1.0",
description="Fast JSON Schema validation for Python implemented in Rust",
long_description=open("README.rst", encoding="utf-8").read(),
long_description_content_type="text/x-rst",
keywords="jsonschema validation rust",
author="Dmitry Dygalo",
author_email="dadygalo@gmail.com",
maintainer="Dmitry Dygalo",
maintainer_email="dadygalo@gmail.com",
python_requires=">=3.5",
url="https://github.com/Stranger6667/jsonschema-rs/python",
license="MIT",
rust_extensions=[RustExtension("jsonschema_rs.jsonschema_rs", binding=Binding.PyO3)],
extras={"tests": ["pytest", "hypothesis"], "bench": ["pytest-benchmark"]},
classifiers=[
"Development Status :: 3 - Alpha",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.5",
"Programming Language :: Python :: 3.6",
"Programming Language :: Python :: 3.7",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Rust",
],
packages=find_packages(where="pysrc"),
package_dir={"": "pysrc"},
zip_safe=False,
)

154
python/src/lib.rs Normal file
View File

@ -0,0 +1,154 @@
#![feature(core_intrinsics)]
#![warn(
clippy::doc_markdown,
clippy::redundant_closure,
clippy::explicit_iter_loop,
clippy::match_same_arms,
clippy::needless_borrow,
clippy::print_stdout,
clippy::integer_arithmetic,
clippy::cast_possible_truncation,
clippy::result_unwrap_used,
clippy::result_map_unwrap_or_else,
clippy::option_unwrap_used,
clippy::option_map_unwrap_or_else,
clippy::option_map_unwrap_or
)]
use jsonschema::Draft;
use pyo3::prelude::*;
use pyo3::types::PyAny;
use pyo3::{exceptions, wrap_pyfunction, PyObjectProtocol};
use serde_json::Value;
mod ser;
mod string;
mod types;
const MODULE_DOCSTRING: &str = "JSON Schema validation for Python written in Rust.";
const DRAFT7: u8 = 7;
const DRAFT6: u8 = 6;
const DRAFT4: u8 = 4;
#[derive(Debug)]
enum JSONSchemaError {
Compilation(jsonschema::CompilationError),
}
impl From<JSONSchemaError> for PyErr {
fn from(error: JSONSchemaError) -> PyErr {
exceptions::ValueError::py_err(match error {
JSONSchemaError::Compilation(_) => "Invalid schema",
})
}
}
fn get_draft(draft: Option<u8>) -> PyResult<Draft> {
if let Some(value) = draft {
match value {
DRAFT4 => Ok(jsonschema::Draft::Draft4),
DRAFT6 => Ok(jsonschema::Draft::Draft6),
DRAFT7 => Ok(jsonschema::Draft::Draft7),
_ => Err(exceptions::ValueError::py_err(format!(
"Unknown draft: {}",
value
))),
}
} else {
Ok(jsonschema::Draft::default())
}
}
/// A shortcut for validating the input instance against the schema.
///
/// >>> is_valid({"minimum": 5}, 3)
/// False
///
/// If your workflow implies validating against the same schema, consider using `JSONSchema.is_valid`
/// instead.
#[pyfunction]
#[text_signature = "(schema, instance, draft=None)"]
fn is_valid(schema: &PyAny, instance: &PyAny, draft: Option<u8>) -> PyResult<bool> {
let draft = get_draft(draft).map(Some)?;
let schema = ser::to_value(schema)?;
let instance = ser::to_value(instance)?;
let compiled =
jsonschema::JSONSchema::compile(&schema, draft).map_err(JSONSchemaError::Compilation)?;
Ok(compiled.is_valid(&instance))
}
/// JSON Schema compiled into a validation tree.
///
/// >>> compiled = JSONSchema({"minimum": 5})
/// >>> compiled.is_valid(3)
/// False
///
/// By default Draft 7 will be used for compilation.
#[pyclass]
#[text_signature = "(schema, draft=None)"]
struct JSONSchema {
schema: jsonschema::JSONSchema<'static>,
raw_schema: &'static Value,
}
#[pymethods]
impl JSONSchema {
#[new]
fn new(schema: &PyAny, draft: Option<u8>) -> PyResult<Self> {
let draft = get_draft(draft).map(Some)?;
let raw_schema = ser::to_value(schema)?;
// Currently, it is the simplest way to pass a reference to `JSONSchema`
// It is cleaned up in the `Drop` implementation
let schema: &'static Value = Box::leak(Box::new(raw_schema));
Ok(JSONSchema {
schema: jsonschema::JSONSchema::compile(schema, draft)
.map_err(JSONSchemaError::Compilation)?,
raw_schema: schema,
})
}
/// Perform fast validation against the compiled schema.
///
/// >>> compiled = JSONSchema({"minimum": 5})
/// >>> compiled.is_valid(3)
/// False
///
/// The output is a boolean value, that indicates whether the instance is valid or not.
#[text_signature = "(instance)"]
fn is_valid(&self, instance: &PyAny) -> bool {
let instance = ser::to_value(instance).unwrap();
self.schema.is_valid(&instance)
}
}
const SCHEMA_LENGTH_LIMIT: usize = 32;
#[pyproto]
impl<'p> PyObjectProtocol<'p> for JSONSchema {
fn __repr__(&self) -> PyResult<String> {
let mut schema = self.raw_schema.to_string();
if schema.len() > SCHEMA_LENGTH_LIMIT {
schema.truncate(SCHEMA_LENGTH_LIMIT);
schema = format!("{}...}}", schema);
}
Ok(format!("<JSONSchema: {}>", schema))
}
}
impl Drop for JSONSchema {
fn drop(&mut self) {
// Since `self.raw_schema` is not used anywhere else, there should be no double-free
unsafe { Box::from_raw(self.raw_schema as *const _ as *mut Value) };
}
}
#[pymodule]
fn jsonschema_rs(_py: Python, module: &PyModule) -> PyResult<()> {
types::init();
module.add_wrapped(wrap_pyfunction!(is_valid))?;
module.add_class::<JSONSchema>()?;
module.add("Draft4", DRAFT4)?;
module.add("Draft6", DRAFT6)?;
module.add("Draft7", DRAFT7)?;
module.add("__doc__", MODULE_DOCSTRING)?;
Ok(())
}

184
python/src/ser.rs Normal file
View File

@ -0,0 +1,184 @@
use pyo3::exceptions;
use pyo3::ffi::*;
use pyo3::prelude::*;
use pyo3::types::PyAny;
use pyo3::AsPyPointer;
use serde::ser::{self, Serialize, SerializeMap, SerializeSeq};
use serde::Serializer;
use crate::{string, types};
use std::ffi::CStr;
pub const RECURSION_LIMIT: u8 = 255;
#[derive(Clone)]
pub enum ObjectType {
Str,
Int,
Bool,
None,
Float,
List,
Dict,
Unknown(String),
}
pub(crate) struct SerializePyObject {
object: *mut pyo3::ffi::PyObject,
object_type: ObjectType,
recursion_depth: u8,
}
impl SerializePyObject {
#[inline]
pub fn new(object: *mut pyo3::ffi::PyObject, recursion_depth: u8) -> Self {
SerializePyObject {
object,
object_type: get_object_type_from_object(object),
recursion_depth,
}
}
#[inline]
pub fn with_obtype(
object: *mut pyo3::ffi::PyObject,
object_type: ObjectType,
recursion_depth: u8,
) -> Self {
SerializePyObject {
object,
object_type,
recursion_depth,
}
}
}
fn get_object_type_from_object(object: *mut pyo3::ffi::PyObject) -> ObjectType {
unsafe {
let object_type = Py_TYPE(object);
get_object_type(object_type)
}
}
#[inline]
pub fn get_object_type(object_type: *mut pyo3::ffi::PyTypeObject) -> ObjectType {
if object_type == unsafe { types::STR_TYPE } {
ObjectType::Str
} else if object_type == unsafe { types::FLOAT_TYPE } {
ObjectType::Float
} else if object_type == unsafe { types::BOOL_TYPE } {
ObjectType::Bool
} else if object_type == unsafe { types::INT_TYPE } {
ObjectType::Int
} else if object_type == unsafe { types::NONE_TYPE } {
ObjectType::None
} else if object_type == unsafe { types::LIST_TYPE } {
ObjectType::List
} else if object_type == unsafe { types::DICT_TYPE } {
ObjectType::Dict
} else {
let type_name = unsafe { CStr::from_ptr((*object_type).tp_name).to_string_lossy() };
ObjectType::Unknown(type_name.to_string())
}
}
/// Convert a Python value to `serde_json::Value`
impl Serialize for SerializePyObject {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
match self.object_type {
ObjectType::Str => {
let mut str_size: pyo3::ffi::Py_ssize_t = 0;
let uni = unsafe { string::read_utf8_from_str(self.object, &mut str_size) };
let slice = unsafe {
std::str::from_utf8_unchecked(std::slice::from_raw_parts(
uni,
str_size as usize,
))
};
serializer.serialize_str(slice)
}
ObjectType::Int => serializer.serialize_i64(unsafe { PyLong_AsLongLong(self.object) }),
ObjectType::Float => {
serializer.serialize_f64(unsafe { PyFloat_AS_DOUBLE(self.object) })
}
ObjectType::Bool => serializer.serialize_bool(self.object == unsafe { types::TRUE }),
ObjectType::None => serializer.serialize_unit(),
ObjectType::Dict => {
if std::intrinsics::unlikely(self.recursion_depth == RECURSION_LIMIT) {
return Err(ser::Error::custom("Recursion limit reached"));
}
let length = unsafe { (*self.object.cast::<PyDictObject>()).ma_used } as usize;
if std::intrinsics::unlikely(length == 0) {
serializer.serialize_map(Some(0))?.end()
} else {
let mut map = serializer.serialize_map(Some(length))?;
let mut pos = 0isize;
let mut str_size: pyo3::ffi::Py_ssize_t = 0;
let mut key: *mut pyo3::ffi::PyObject = std::ptr::null_mut();
let mut value: *mut pyo3::ffi::PyObject = std::ptr::null_mut();
for _ in 0..length {
unsafe {
pyo3::ffi::PyDict_Next(self.object, &mut pos, &mut key, &mut value);
}
let uni = unsafe { string::read_utf8_from_str(key, &mut str_size) };
let slice = unsafe {
std::str::from_utf8_unchecked(std::slice::from_raw_parts(
uni,
str_size as usize,
))
};
#[allow(clippy::integer_arithmetic)]
map.serialize_entry(
slice,
&SerializePyObject::new(value, self.recursion_depth + 1),
)?;
}
map.end()
}
}
ObjectType::List => {
if std::intrinsics::unlikely(self.recursion_depth == RECURSION_LIMIT) {
return Err(ser::Error::custom("Recursion limit reached"));
}
let length = unsafe { PyList_GET_SIZE(self.object) } as usize;
if std::intrinsics::unlikely(length == 0) {
serializer.serialize_seq(Some(0))?.end()
} else {
let mut type_ptr = std::ptr::null_mut();
let mut ob_type = ObjectType::Str;
let mut sequence = serializer.serialize_seq(Some(length))?;
for i in 0..length {
let elem = unsafe { PyList_GET_ITEM(self.object, i as isize) };
let current_ob_type = unsafe { Py_TYPE(elem) };
if current_ob_type != type_ptr {
type_ptr = current_ob_type;
ob_type = get_object_type(current_ob_type)
}
#[allow(clippy::integer_arithmetic)]
sequence.serialize_element(&SerializePyObject::with_obtype(
elem,
ob_type.clone(),
self.recursion_depth + 1,
))?
}
sequence.end()
}
}
ObjectType::Unknown(ref type_name) => Err(ser::Error::custom(format!(
"Unsupported type: '{}'",
type_name
))),
}
}
}
#[inline]
pub(crate) fn to_value(object: &PyAny) -> PyResult<serde_json::Value> {
Ok(
serde_json::to_value(SerializePyObject::new(object.as_ptr(), 0))
.map_err(|err| exceptions::ValueError::py_err(err.to_string()))?,
)
}

51
python/src/string.rs Normal file
View File

@ -0,0 +1,51 @@
use pyo3::ffi::{PyTypeObject, PyUnicode_AsUTF8AndSize, Py_UNICODE, Py_hash_t, Py_ssize_t};
use std::os::raw::c_char;
#[repr(C)]
struct PyASCIIObject {
pub ob_refcnt: Py_ssize_t,
pub ob_type: *mut PyTypeObject,
pub length: Py_ssize_t,
pub hash: Py_hash_t,
pub state: u32,
pub wstr: *mut c_char,
}
#[repr(C)]
struct PyCompactUnicodeObject {
pub ob_refcnt: Py_ssize_t,
pub ob_type: *mut PyTypeObject,
pub length: Py_ssize_t,
pub hash: Py_hash_t,
pub state: u32,
pub wstr: *mut Py_UNICODE,
pub utf8_length: Py_ssize_t,
pub utf8: *mut c_char,
pub wstr_length: Py_ssize_t,
}
const STATE_ASCII: u32 = 0b00000000000000000000000001000000;
const STATE_COMPACT: u32 = 0b00000000000000000000000000100000;
/// Read a UTF-8 string from a pointer and change the given size if needed.
pub unsafe fn read_utf8_from_str(
object_pointer: *mut pyo3::ffi::PyObject,
size: &mut Py_ssize_t,
) -> *const u8 {
if std::intrinsics::likely(
(*object_pointer.cast::<PyASCIIObject>()).state & STATE_ASCII == STATE_ASCII,
) {
*size = (*object_pointer.cast::<PyASCIIObject>()).length;
object_pointer.cast::<PyASCIIObject>().offset(1) as *const u8
} else if std::intrinsics::likely(
(*object_pointer.cast::<PyASCIIObject>()).state & STATE_COMPACT == STATE_COMPACT,
) && !(*object_pointer.cast::<PyCompactUnicodeObject>())
.utf8
.is_null()
{
*size = (*object_pointer.cast::<PyCompactUnicodeObject>()).utf8_length;
(*object_pointer.cast::<PyCompactUnicodeObject>()).utf8 as *const u8
} else {
PyUnicode_AsUTF8AndSize(object_pointer, size) as *const u8
}
}

30
python/src/types.rs Normal file
View File

@ -0,0 +1,30 @@
use pyo3::ffi::*;
use std::sync::Once;
pub static mut TRUE: *mut pyo3::ffi::PyObject = 0 as *mut pyo3::ffi::PyObject;
pub static mut STR_TYPE: *mut PyTypeObject = 0 as *mut PyTypeObject;
pub static mut INT_TYPE: *mut PyTypeObject = 0 as *mut PyTypeObject;
pub static mut BOOL_TYPE: *mut PyTypeObject = 0 as *mut PyTypeObject;
pub static mut NONE_TYPE: *mut PyTypeObject = 0 as *mut PyTypeObject;
pub static mut FLOAT_TYPE: *mut PyTypeObject = 0 as *mut PyTypeObject;
pub static mut LIST_TYPE: *mut PyTypeObject = 0 as *mut PyTypeObject;
pub static mut DICT_TYPE: *mut PyTypeObject = 0 as *mut PyTypeObject;
static INIT: Once = Once::new();
/// Set empty type object pointers with their actual values.
/// We need these Python-side type objects for direct comparison during conversion to serde types
/// NOTE. This function should be called before any serialization logic
pub fn init() {
INIT.call_once(|| unsafe {
TRUE = Py_True();
STR_TYPE = Py_TYPE(PyUnicode_New(0, 255));
DICT_TYPE = Py_TYPE(PyDict_New());
LIST_TYPE = Py_TYPE(PyList_New(0 as Py_ssize_t));
NONE_TYPE = Py_TYPE(Py_None());
BOOL_TYPE = Py_TYPE(TRUE);
INT_TYPE = Py_TYPE(PyLong_FromLongLong(0));
FLOAT_TYPE = Py_TYPE(PyFloat_FromDouble(0.0));
});
}

View File

@ -0,0 +1,79 @@
from contextlib import suppress
import pytest
from hypothesis import given
from hypothesis import strategies as st
from jsonschema_rs import JSONSchema, is_valid
json = st.recursive(
st.none() | st.booleans() | st.floats() | st.integers() | st.text(),
lambda children: st.lists(children, min_size=1) | st.dictionaries(st.text(), children, min_size=1),
)
@given(instance=json)
def test_instance_processing(instance):
with suppress(Exception):
is_valid(True, instance)
@given(instance=json)
def test_schema_processing(instance):
with suppress(Exception):
is_valid(instance, True)
def test_invalid_schema():
with pytest.raises(ValueError):
is_valid(2 ** 64, True)
def test_invalid_type():
with pytest.raises(ValueError, match="Unsupported type: 'set'"):
is_valid(set(), True)
def test_repr():
assert repr(JSONSchema({"minimum": 5})) == '<JSONSchema: {"minimum":5}>'
def test_recursive_dict():
instance = {}
instance["foo"] = instance
with pytest.raises(ValueError):
is_valid(True, instance)
def test_recursive_list():
instance = []
instance.append(instance)
with pytest.raises(ValueError):
is_valid(True, instance)
@pytest.mark.parametrize("schema, draft, error", (([], None, "Invalid schema"), ({}, 5, "Unknown draft: 5"),))
def test_initialization_errors(schema, draft, error):
with pytest.raises(ValueError, match=error):
JSONSchema(schema, draft)
@given(minimum=st.integers().map(abs))
def test_minimum(minimum):
with suppress(SystemError):
assert is_valid({"minimum": minimum}, minimum)
assert is_valid({"minimum": minimum}, minimum - 1) is False
@given(maximum=st.integers().map(abs))
def test_maximum(maximum):
with suppress(SystemError):
assert is_valid({"maximum": maximum}, maximum)
assert is_valid({"maximum": maximum}, maximum + 1) is False
@pytest.mark.xfail(reason="The underlying Rust crate has not enough precision.")
@given(multiple_of=(st.integers() | st.floats(allow_infinity=False, allow_nan=False)).filter(lambda x: x > 0))
def test_multiple_of(multiple_of):
with suppress(SystemError):
assert is_valid({"multipleOf": multiple_of}, multiple_of * 3)

View File

@ -0,0 +1,48 @@
import json
import os
import pytest
import jsonschema_rs
SUPPORTED_DRAFTS = (4, 6, 7)
NOT_SUPPORTED_CASES = {4: ("bignum.json",), 6: ("bignum.json",), 7: ("bignum.json",)}
def load_file(path):
with open(path) as fd:
for block in json.load(fd):
yield block
def maybe_optional(draft, schema, instance, expected, description, filename):
output = (draft, schema, instance, expected, description)
if filename in NOT_SUPPORTED_CASES.get(draft, ()):
output = pytest.param(
*output, marks=pytest.mark.skip(reason="{filename} is not supported".format(filename=filename))
)
return output
def pytest_generate_tests(metafunc):
cases = [
maybe_optional(draft, block["schema"], test["data"], test["valid"], test["description"], filename)
for draft in SUPPORTED_DRAFTS
for root, dirs, files in os.walk("../tests/suite/tests/draft{draft}/".format(draft=draft))
for filename in files
for block in load_file(os.path.join(root, filename))
for test in block["tests"]
]
metafunc.parametrize("draft, schema, instance, expected, description", cases)
def test_draft(draft, schema, instance, expected, description):
try:
result = jsonschema_rs.is_valid(schema, instance, int(draft))
assert result is expected, "{description}: {schema} | {instance}".format(
description=description, schema=schema, instance=instance
)
except ValueError:
pytest.fail(
"{description}: {schema} | {instance}".format(description=description, schema=schema, instance=instance)
)

14
python/tox.ini Normal file
View File

@ -0,0 +1,14 @@
[tox]
# This source package is built via `build-sdist.sh` as a workaround for the fact, that `setuptools_rust` doesn't support
# local dependencies (`jsonschema` is in the parent directory)
sdistsrc=dist/*.tar.gz
envlist =
py{35,36,37,38}
[testenv]
deps =
pytest
pytest-benchmark
hypothesis
commands =
python -m pytest tests-py {posargs:}