feat(python): Custom format checkers

Signed-off-by: Dmitry Dygalo <dmitry@dygalo.dev>
This commit is contained in:
Dmitry Dygalo 2024-05-01 21:43:10 +02:00 committed by Dmitry Dygalo
parent 0a00839482
commit e564888da5
8 changed files with 199 additions and 52 deletions

View File

@ -4,7 +4,7 @@
### Added
- Custom keywords support. [#379](https://github.com/Stranger6667/jsonschema-rs/issues/429)
- Custom keywords support. [#379](https://github.com/Stranger6667/jsonschema-rs/issues/379)
- Expose `JsonPointerNode` that can be converted into `JSONPointer`.
This is needed for the upcoming custom validators support.
@ -24,6 +24,8 @@
- Bump `regex` to `1.10`.
- Bump `url` to `2.5`.
- Build CLI only if the `cli` feature is enabled.
- **BREAKING**: Extend `CompilationOptions` to support more ways to define custom format checkers (for example in Python bindings).
In turn it changes `ValidationErrorKind::Format` to contain a `String` instead of a `&'static str`.
## [0.17.1] - 2023-07-05

View File

@ -2,6 +2,10 @@
## [Unreleased]
### Added
- Defining custom format checkers. [#245](https://github.com/Stranger6667/jsonschema-rs/issues/245)
### Changed
- Update `pyo3` to `0.21`.

View File

@ -57,6 +57,24 @@ validator = jsonschema_rs.JSONSchema.from_str('{"minimum": 42}')
...
```
You can define custom format checkers:
```python
import jsonschema_rs
def is_currency(value):
# The input value is always a string
return len(value) == 3 and value.isascii()
validator = jsonschema_rs.JSONSchema(
{"type": "string", "format": "currency"},
formats={"currency": is_currency}
)
validator.is_valid("USD") # True
validator.is_valid("invalid") # False
```
## Performance
According to our benchmarks, `jsonschema-rs` is usually faster than

View File

@ -16,12 +16,18 @@
)]
#![allow(clippy::upper_case_acronyms)]
use std::{
any::Any,
cell::RefCell,
panic::{self, AssertUnwindSafe},
};
use jsonschema::{paths::JSONPointer, Draft};
use pyo3::{
exceptions::{self, PyValueError},
ffi::PyUnicode_AsUTF8AndSize,
prelude::*,
types::{PyAny, PyList, PyType},
types::{PyAny, PyDict, PyList, PyString, PyType},
wrap_pyfunction,
};
#[macro_use]
@ -128,9 +134,14 @@ fn get_draft(draft: u8) -> PyResult<Draft> {
}
}
thread_local! {
static LAST_FORMAT_ERROR: RefCell<Option<PyErr>> = const { RefCell::new(None) };
}
fn make_options(
draft: Option<u8>,
with_meta_schemas: Option<bool>,
formats: Option<&Bound<'_, PyDict>>,
) -> PyResult<jsonschema::CompilationOptions> {
let mut options = jsonschema::JSONSchema::options();
if let Some(raw_draft_version) = draft {
@ -139,6 +150,37 @@ fn make_options(
if with_meta_schemas == Some(true) {
options.with_meta_schemas();
}
if let Some(formats) = formats {
for (name, callback) in formats.iter() {
if !callback.is_callable() {
return Err(exceptions::PyValueError::new_err(format!(
"Format checker for '{}' must be a callable",
name
)));
}
let callback: Py<PyAny> = callback.clone().unbind();
let call_py_callback = move |value: &str| {
Python::with_gil(|py| {
let value = PyString::new_bound(py, value);
callback.call_bound(py, (value,), None)?.is_truthy(py)
})
};
options.with_format(
name.to_string(),
move |value: &str| match call_py_callback(value) {
Ok(r) => r,
Err(e) => {
LAST_FORMAT_ERROR.with(|last| {
*last.borrow_mut() = Some(e);
});
std::panic::set_hook(Box::new(|_| {}));
// Should be caught
panic!("Format checker failed")
}
},
);
}
}
Ok(options)
}
@ -150,11 +192,15 @@ fn iter_on_error(
let instance = ser::to_value(instance)?;
let mut pyerrors = vec![];
if let Err(errors) = compiled.validate(&instance) {
for error in errors {
pyerrors.push(into_py_err(py, error)?);
}
};
panic::catch_unwind(AssertUnwindSafe(|| {
if let Err(errors) = compiled.validate(&instance) {
for error in errors {
pyerrors.push(into_py_err(py, error)?);
}
};
PyResult::Ok(())
}))
.map_err(handle_format_checked_panic)??;
Ok(ValidationErrorIter {
iter: pyerrors.into_iter(),
})
@ -166,7 +212,8 @@ fn raise_on_error(
instance: &Bound<'_, PyAny>,
) -> PyResult<()> {
let instance = ser::to_value(instance)?;
let result = compiled.validate(&instance);
let result = panic::catch_unwind(AssertUnwindSafe(|| compiled.validate(&instance)))
.map_err(handle_format_checked_panic)?;
let error = result
.err()
.map(|mut errors| errors.next().expect("Iterator should not be empty"));
@ -227,7 +274,7 @@ fn to_error_message(error: &jsonschema::ValidationError<'_>) -> String {
message
}
/// is_valid(schema, instance, draft=None, with_meta_schemas=False)
/// is_valid(schema, instance, draft=None, with_meta_schemas=False, formats=None)
///
/// A shortcut for validating the input instance against the schema.
///
@ -237,26 +284,28 @@ fn to_error_message(error: &jsonschema::ValidationError<'_>) -> String {
/// If your workflow implies validating against the same schema, consider using `JSONSchema.is_valid`
/// instead.
#[pyfunction]
#[pyo3(text_signature = "(schema, instance, draft=None, with_meta_schemas=False)")]
#[pyo3(text_signature = "(schema, instance, draft=None, with_meta_schemas=False, formats=None)")]
fn is_valid(
py: Python<'_>,
schema: &Bound<'_, PyAny>,
instance: &Bound<'_, PyAny>,
draft: Option<u8>,
with_meta_schemas: Option<bool>,
formats: Option<&Bound<'_, PyDict>>,
) -> PyResult<bool> {
let options = make_options(draft, with_meta_schemas)?;
let options = make_options(draft, with_meta_schemas, formats)?;
let schema = ser::to_value(schema)?;
match options.compile(&schema) {
Ok(compiled) => {
let instance = ser::to_value(instance)?;
Ok(compiled.is_valid(&instance))
panic::catch_unwind(AssertUnwindSafe(|| Ok(compiled.is_valid(&instance))))
.map_err(handle_format_checked_panic)?
}
Err(error) => Err(into_py_err(py, error)?),
}
}
/// validate(schema, instance, draft=None, with_meta_schemas=False)
/// validate(schema, instance, draft=None, with_meta_schemas=False, formats=None)
///
/// Validate the input instance and raise `ValidationError` in the error case
///
@ -268,15 +317,16 @@ fn is_valid(
/// If your workflow implies validating against the same schema, consider using `JSONSchema.validate`
/// instead.
#[pyfunction]
#[pyo3(text_signature = "(schema, instance, draft=None, with_meta_schemas=False)")]
#[pyo3(text_signature = "(schema, instance, draft=None, with_meta_schemas=False, formats=None)")]
fn validate(
py: Python<'_>,
schema: &Bound<'_, PyAny>,
instance: &Bound<'_, PyAny>,
draft: Option<u8>,
with_meta_schemas: Option<bool>,
formats: Option<&Bound<'_, PyDict>>,
) -> PyResult<()> {
let options = make_options(draft, with_meta_schemas)?;
let options = make_options(draft, with_meta_schemas, formats)?;
let schema = ser::to_value(schema)?;
match options.compile(&schema) {
Ok(compiled) => raise_on_error(py, &compiled, instance),
@ -284,7 +334,7 @@ fn validate(
}
}
/// iter_errors(schema, instance, draft=None, with_meta_schemas=False)
/// iter_errors(schema, instance, draft=None, with_meta_schemas=False, formats=None)
///
/// Iterate the validation errors of the input instance
///
@ -295,15 +345,16 @@ fn validate(
/// If your workflow implies validating against the same schema, consider using `JSONSchema.iter_errors`
/// instead.
#[pyfunction]
#[pyo3(text_signature = "(schema, instance, draft=None, with_meta_schemas=False)")]
#[pyo3(text_signature = "(schema, instance, draft=None, with_meta_schemas=False, formats=None)")]
fn iter_errors(
py: Python<'_>,
schema: &Bound<'_, PyAny>,
instance: &Bound<'_, PyAny>,
draft: Option<u8>,
with_meta_schemas: Option<bool>,
formats: Option<&Bound<'_, PyDict>>,
) -> PyResult<ValidationErrorIter> {
let options = make_options(draft, with_meta_schemas)?;
let options = make_options(draft, with_meta_schemas, formats)?;
let schema = ser::to_value(schema)?;
match options.compile(&schema) {
Ok(compiled) => iter_on_error(py, &compiled, instance),
@ -338,17 +389,29 @@ fn get_schema_repr(schema: &serde_json::Value) -> String {
repr
}
fn handle_format_checked_panic(err: Box<dyn Any + Send>) -> PyErr {
LAST_FORMAT_ERROR.with(|last| {
if let Some(err) = last.borrow_mut().take() {
let _ = panic::take_hook();
err
} else {
exceptions::PyRuntimeError::new_err(format!("Validation panicked: {:?}", err))
}
})
}
#[pymethods]
impl JSONSchema {
#[new]
#[pyo3(text_signature = "(schema, draft=None, with_meta_schemas=False)")]
#[pyo3(text_signature = "(schema, draft=None, with_meta_schemas=False, formats=None)")]
fn new(
py: Python<'_>,
pyschema: &Bound<'_, PyAny>,
draft: Option<u8>,
with_meta_schemas: Option<bool>,
formats: Option<&Bound<'_, PyDict>>,
) -> PyResult<Self> {
let options = make_options(draft, with_meta_schemas)?;
let options = make_options(draft, with_meta_schemas, formats)?;
let raw_schema = ser::to_value(pyschema)?;
match options.compile(&raw_schema) {
Ok(schema) => Ok(JSONSchema {
@ -358,7 +421,7 @@ impl JSONSchema {
Err(error) => Err(into_py_err(py, error)?),
}
}
/// from_str(string, draft=None, with_meta_schemas=False)
/// from_str(string, draft=None, with_meta_schemas=False, formats=None)
///
/// Create `JSONSchema` from a serialized JSON string.
///
@ -366,13 +429,14 @@ impl JSONSchema {
///
/// Use it if you have your schema as a string and want to utilize Rust JSON parsing.
#[classmethod]
#[pyo3(text_signature = "(string, draft=None, with_meta_schemas=False)")]
#[pyo3(text_signature = "(string, draft=None, with_meta_schemas=False, formats=None)")]
fn from_str(
_: &Bound<'_, PyType>,
py: Python<'_>,
pyschema: &Bound<'_, PyAny>,
draft: Option<u8>,
with_meta_schemas: Option<bool>,
formats: Option<&Bound<'_, PyDict>>,
) -> PyResult<Self> {
let obj_ptr = pyschema.as_ptr();
let object_type = unsafe { pyo3::ffi::Py_TYPE(obj_ptr) };
@ -389,7 +453,7 @@ impl JSONSchema {
let slice = unsafe { std::slice::from_raw_parts(ptr.cast::<u8>(), str_size as usize) };
let raw_schema = serde_json::from_slice(slice)
.map_err(|error| PyValueError::new_err(format!("Invalid string: {}", error)))?;
let options = make_options(draft, with_meta_schemas)?;
let options = make_options(draft, with_meta_schemas, formats)?;
match options.compile(&raw_schema) {
Ok(schema) => Ok(JSONSchema {
schema,
@ -412,7 +476,8 @@ impl JSONSchema {
#[pyo3(text_signature = "(instance)")]
fn is_valid(&self, instance: &Bound<'_, PyAny>) -> PyResult<bool> {
let instance = ser::to_value(instance)?;
Ok(self.schema.is_valid(&instance))
panic::catch_unwind(AssertUnwindSafe(|| Ok(self.schema.is_valid(&instance))))
.map_err(handle_format_checked_panic)?
}
/// validate(instance)

View File

@ -275,3 +275,41 @@ def test_dict_subclasses(type_, value, expected):
schema = {"type": "object", "properties": {"foo": {"type": "integer"}}}
document = type_({"foo": value})
assert is_valid(schema, document) is expected
def test_custom_format():
def is_currency(value):
return len(value) == 3 and value.isascii()
validator = JSONSchema({"type": "string", "format": "currency"}, formats={"currency": is_currency})
assert validator.is_valid("USD")
assert not validator.is_valid(42)
assert not validator.is_valid("invalid")
def test_custom_format_invalid_callback():
with pytest.raises(ValueError, match="Format checker for 'currency' must be a callable"):
JSONSchema({"type": "string", "format": "currency"}, formats={"currency": 42})
def test_custom_format_with_exception():
def is_currency(_):
raise ValueError("Invalid currency")
schema = {"type": "string", "format": "currency"}
formats = {"currency": is_currency}
validator = JSONSchema(schema, formats=formats)
with pytest.raises(ValueError, match="Invalid currency"):
validator.is_valid("USD")
with pytest.raises(ValueError, match="Invalid currency"):
validator.validate("USD")
with pytest.raises(ValueError, match="Invalid currency"):
for _ in validator.iter_errors("USD"):
pass
with pytest.raises(ValueError, match="Invalid currency"):
is_valid(schema, "USD", formats=formats)
with pytest.raises(ValueError, match="Invalid currency"):
validate(schema, "USD", formats=formats)
with pytest.raises(ValueError, match="Invalid currency"):
for _ in iter_errors(schema, "USD", formats=formats):
pass

View File

@ -5,7 +5,7 @@ use crate::{
DEFAULT_CONTENT_ENCODING_CHECKS_AND_CONVERTERS,
},
content_media_type::{ContentMediaTypeCheckType, DEFAULT_CONTENT_MEDIA_TYPE_CHECKS},
keywords::custom::KeywordFactory,
keywords::{custom::KeywordFactory, format::Format},
paths::JSONPointer,
resolver::{DefaultResolver, Resolver, SchemaResolver},
schemas, Keyword, ValidationError,
@ -273,7 +273,7 @@ pub struct CompilationOptions {
content_encoding_checks_and_converters:
AHashMap<&'static str, Option<(ContentEncodingCheckType, ContentEncodingConverterType)>>,
store: AHashMap<String, Arc<serde_json::Value>>,
formats: AHashMap<&'static str, fn(&str) -> bool>,
formats: AHashMap<String, Arc<dyn Format>>,
validate_formats: Option<bool>,
validate_schema: bool,
ignore_unknown_formats: bool,
@ -598,11 +598,15 @@ impl CompilationOptions {
/// ```
///
/// The format check function should receive `&str` and return `bool`.
pub fn with_format(&mut self, name: &'static str, format: fn(&str) -> bool) -> &mut Self {
self.formats.insert(name, format);
pub fn with_format<N, F>(&mut self, name: N, format: F) -> &mut Self
where
N: Into<String>,
F: Fn(&str) -> bool + Send + Sync + 'static,
{
self.formats.insert(name.into(), Arc::new(format));
self
}
pub(crate) fn format(&self, format: &str) -> FormatKV<'_> {
pub(crate) fn get_format(&self, format: &str) -> Option<(&String, &Arc<dyn Format>)> {
self.formats.get_key_value(format)
}
/// Do not perform schema validation during compilation.
@ -717,8 +721,6 @@ impl CompilationOptions {
self.keywords.get(name)
}
}
// format name & a pointer to a check function
type FormatKV<'a> = Option<(&'a &'static str, &'a fn(&str) -> bool)>;
impl fmt::Debug for CompilationOptions {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {

View File

@ -93,7 +93,7 @@ pub enum ValidationErrorKind {
/// If the referenced file is not found during ref resolution.
FileNotFound { error: io::Error },
/// When the input doesn't match to the specified format.
Format { format: &'static str },
Format { format: String },
/// May happen in `contentEncoding` validation if `base64` encoded data is invalid.
FromUtf8 { error: FromUtf8Error },
/// Invalid UTF-8 string during percent encoding when resolving happens
@ -416,16 +416,18 @@ impl<'a> ValidationError<'a> {
schema_path: JSONPointer::default(),
}
}
pub(crate) const fn format(
pub(crate) fn format(
schema_path: JSONPointer,
instance_path: JSONPointer,
instance: &'a Value,
format: &'static str,
format: impl Into<String>,
) -> ValidationError<'a> {
ValidationError {
instance_path,
instance: Cow::Borrowed(instance),
kind: ValidationErrorKind::Format { format },
kind: ValidationErrorKind::Format {
format: format.into(),
},
schema_path,
}
}

View File

@ -1,5 +1,5 @@
//! Validator for `format` keyword.
use std::{net::IpAddr, str::FromStr};
use std::{net::IpAddr, str::FromStr, sync::Arc};
use fancy_regex::Regex;
use once_cell::sync::Lazy;
@ -369,14 +369,14 @@ impl Validate for DurationValidator {
struct CustomFormatValidator {
schema_path: JSONPointer,
format_name: &'static str,
check: fn(&str) -> bool,
format_name: String,
check: Arc<dyn Format>,
}
impl CustomFormatValidator {
pub(crate) fn compile<'a>(
context: &CompilationContext,
format_name: &'static str,
check: fn(&str) -> bool,
format_name: String,
check: Arc<dyn Format>,
) -> CompilationResult<'a> {
let schema_path = context.as_pointer_with("format");
Ok(Box::new(CustomFormatValidator {
@ -398,28 +398,40 @@ impl Validate for CustomFormatValidator {
instance: &'instance Value,
instance_path: &JsonPointerNode,
) -> ErrorIterator<'instance> {
if let Value::String(_item) = instance {
if !self.is_valid(instance) {
return error(ValidationError::format(
self.schema_path.clone(),
instance_path.into(),
instance,
self.format_name,
));
}
if !self.is_valid(instance) {
return error(ValidationError::format(
self.schema_path.clone(),
instance_path.into(),
instance,
self.format_name.clone(),
));
}
no_error()
}
fn is_valid(&self, instance: &Value) -> bool {
if let Value::String(item) = instance {
(self.check)(item)
self.check.is_valid(item)
} else {
true
}
}
}
pub(crate) trait Format: Send + Sync + 'static {
fn is_valid(&self, value: &str) -> bool;
}
impl<F> Format for F
where
F: Fn(&str) -> bool + Send + Sync + 'static,
{
#[inline]
fn is_valid(&self, value: &str) -> bool {
self(value)
}
}
#[inline]
pub(crate) fn compile<'a>(
_: &'a Map<String, Value>,
@ -431,8 +443,12 @@ pub(crate) fn compile<'a>(
}
if let Value::String(format) = schema {
if let Some((format, func)) = context.config.format(format) {
return Some(CustomFormatValidator::compile(context, format, *func));
if let Some((name, func)) = context.config.get_format(format) {
return Some(CustomFormatValidator::compile(
context,
name.clone(),
func.clone(),
));
}
let draft_version = context.config.draft();
match format.as_str() {