feat: Custom keyword validation (#473)

Signed-off-by: Dmitry Dygalo <dmitry@dygalo.dev>
Co-authored-by: Benjamin Tobler <ben@tobler.nz>
Co-authored-by: Benjamin Tobler <benjamin.tobler@stedi.com>
Co-authored-by: Dmitry Dygalo <dmitry@dygalo.dev>
This commit is contained in:
Sam Roberts 2024-05-01 12:16:08 -04:00 committed by GitHub
parent 7946e978b5
commit aa94a4b24a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 440 additions and 7 deletions

View File

@ -4,6 +4,7 @@
### Added
- Custom keywords support. [#379](https://github.com/Stranger6667/jsonschema-rs/issues/429)
- Expose `JsonPointerNode` that can be converted into `JSONPointer`.
This is needed for the upcoming custom validators support.

View File

@ -6,7 +6,7 @@ pub(crate) mod options;
use crate::{
error::ErrorIterator,
keywords,
keywords::{self, custom::CustomKeyword, BoxedValidator},
output::Output,
paths::{JSONPointer, JsonPointerNode},
primitive_type::{PrimitiveType, PrimitiveTypesBitMap},
@ -198,7 +198,13 @@ pub(crate) fn compile_validators<'a>(
{
is_props = true;
}
if let Some(validator) = context
// Check if this keyword is overridden, then check the standard definitions
if let Some(factory) = context.config.get_keyword_factory(keyword) {
let path = context.as_pointer_with(keyword.to_owned());
let validator = CustomKeyword::new(factory.init(object, subschema, path)?);
let validator: BoxedValidator = Box::new(validator);
validators.push((keyword.clone(), validator));
} else if let Some(validator) = context
.config
.draft()
.get_validator(keyword)
@ -244,8 +250,17 @@ pub(crate) fn compile_validators<'a>(
#[cfg(test)]
mod tests {
use super::JSONSchema;
use crate::error::ValidationError;
use serde_json::{from_str, json, Value};
use crate::{
error::{self, no_error, ValidationError},
keywords::custom::Keyword,
paths::{JSONPointer, JsonPointerNode},
primitive_type::PrimitiveType,
ErrorIterator,
};
use num_cmp::NumCmp;
use once_cell::sync::Lazy;
use regex::Regex;
use serde_json::{from_str, json, Map, Value};
use std::{fs::File, io::Read, path::Path};
fn load(path: &str, idx: usize) -> Value {
@ -302,4 +317,240 @@ mod tests {
);
assert_eq!(errors[1].to_string(), r#""a" is shorter than 3 characters"#);
}
#[test]
fn custom_keyword_definition() {
/// Define a custom validator that verifies the object's keys consist of
/// only ASCII representable characters.
/// NOTE: This could be done with `propertyNames` + `pattern` but will be slower due to
/// regex usage.
struct CustomObjectValidator;
impl Keyword for CustomObjectValidator {
fn validate<'instance>(
&self,
instance: &'instance Value,
instance_path: &JsonPointerNode,
) -> ErrorIterator<'instance> {
let mut errors = vec![];
for key in instance.as_object().unwrap().keys() {
if !key.is_ascii() {
let error = ValidationError::custom(
JSONPointer::default(),
instance_path.into(),
instance,
"Key is not ASCII",
);
errors.push(error);
}
}
Box::new(errors.into_iter())
}
fn is_valid(&self, instance: &Value) -> bool {
for (key, _value) in instance.as_object().unwrap() {
if !key.is_ascii() {
return false;
}
}
true
}
}
fn custom_object_type_factory<'a>(
_: &'a Map<String, Value>,
schema: &'a Value,
path: JSONPointer,
) -> Result<Box<dyn Keyword>, ValidationError<'a>> {
const EXPECTED: &str = "ascii-keys";
if schema.as_str().map_or(true, |key| key != EXPECTED) {
Err(ValidationError::constant_string(
JSONPointer::default(),
path,
schema,
EXPECTED,
))
} else {
Ok(Box::new(CustomObjectValidator))
}
}
// Define a JSON schema that enforces the top level object has ASCII keys and has at least 1 property
let schema =
json!({ "custom-object-type": "ascii-keys", "type": "object", "minProperties": 1 });
let compiled = JSONSchema::options()
.with_keyword("custom-object-type", custom_object_type_factory)
.compile(&schema)
.unwrap();
// Verify schema validation detects object with too few properties
let instance = json!({});
assert!(compiled.validate(&instance).is_err());
assert!(!compiled.is_valid(&instance));
// Verify validator succeeds on a valid custom-object-type
let instance = json!({ "a" : 1 });
assert!(compiled.validate(&instance).is_ok());
assert!(compiled.is_valid(&instance));
// Verify validator detects invalid custom-object-type
let instance = json!({ "å" : 1 });
let error = compiled
.validate(&instance)
.expect_err("Should fail")
.next()
.expect("Not empty");
assert_eq!(error.to_string(), "Key is not ASCII");
assert!(!compiled.is_valid(&instance));
}
#[test]
fn custom_format_and_override_keyword() {
/// Check that a string has some number of digits followed by a dot followed by exactly 2 digits.
fn currency_format_checker(s: &str) -> bool {
static CURRENCY_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new("^(0|([1-9]+[0-9]*))(\\.[0-9]{2})$").expect("Invalid regex")
});
CURRENCY_RE.is_match(s)
}
/// A custom keyword validator that overrides "minimum"
/// so that "minimum" may apply to "currency"-formatted strings as well.
struct CustomMinimumValidator {
limit: f64,
limit_val: Value,
with_currency_format: bool,
schema_path: JSONPointer,
}
impl Keyword for CustomMinimumValidator {
fn validate<'instance>(
&self,
instance: &'instance Value,
instance_path: &JsonPointerNode,
) -> ErrorIterator<'instance> {
if self.is_valid(instance) {
no_error()
} else {
error::error(ValidationError::minimum(
self.schema_path.clone(),
instance_path.into(),
instance,
self.limit_val.clone(),
))
}
}
fn is_valid(&self, instance: &Value) -> bool {
match instance {
// Numeric comparison should happen just like original behavior
Value::Number(instance) => {
if let Some(item) = instance.as_u64() {
!NumCmp::num_lt(item, self.limit)
} else if let Some(item) = instance.as_i64() {
!NumCmp::num_lt(item, self.limit)
} else {
let item = instance.as_f64().expect("Always valid");
!NumCmp::num_lt(item, self.limit)
}
}
// String comparison should cast currency-formatted
Value::String(instance) => {
if self.with_currency_format && currency_format_checker(instance) {
// all preconditions for minimum applying are met
let value = instance
.parse::<f64>()
.expect("format validated by regex checker");
!NumCmp::num_lt(value, self.limit)
} else {
true
}
}
// In all other cases, the "minimum" keyword should not apply
_ => true,
}
}
}
/// Build a validator that overrides the standard `minimum` keyword
fn custom_minimum_factory<'a>(
parent: &'a Map<String, Value>,
schema: &'a Value,
schema_path: JSONPointer,
) -> Result<Box<dyn Keyword>, ValidationError<'a>> {
let limit = if let Value::Number(limit) = schema {
limit.as_f64().expect("Always valid")
} else {
return Err(ValidationError::single_type_error(
// There is no metaschema definition for a custom keyword, hence empty `schema` pointer
JSONPointer::default(),
schema_path,
schema,
PrimitiveType::Number,
));
};
let with_currency_format = parent
.get("format")
.map_or(false, |format| format == "currency");
Ok(Box::new(CustomMinimumValidator {
limit,
limit_val: schema.clone(),
with_currency_format,
schema_path,
}))
}
// Schema includes both the custom format and the overridden keyword
let schema = json!({ "minimum": 2, "type": "string", "format": "currency" });
let compiled = JSONSchema::options()
.with_format("currency", currency_format_checker)
.with_keyword("minimum", custom_minimum_factory)
.with_keyword("minimum-2", |_, _, _| todo!())
.compile(&schema)
.expect("Invalid schema");
// Control: verify schema validation rejects non-string types
let instance = json!(15);
assert!(compiled.validate(&instance).is_err());
assert!(!compiled.is_valid(&instance));
// Control: verify validator rejects ill-formatted strings
let instance = json!("not a currency");
assert!(compiled.validate(&instance).is_err());
assert!(!compiled.is_valid(&instance));
// Verify validator allows properly formatted strings that conform to custom keyword
let instance = json!("3.00");
assert!(compiled.validate(&instance).is_ok());
assert!(compiled.is_valid(&instance));
// Verify validator rejects properly formatted strings that do not conform to custom keyword
let instance = json!("1.99");
assert!(compiled.validate(&instance).is_err());
assert!(!compiled.is_valid(&instance));
// Define another schema that applies "minimum" to an integer to ensure original behavior
let schema = json!({ "minimum": 2, "type": "integer" });
let compiled = JSONSchema::options()
.with_format("currency", currency_format_checker)
.with_keyword("minimum", custom_minimum_factory)
.compile(&schema)
.expect("Invalid schema");
// Verify schema allows integers greater than 2
let instance = json!(3);
assert!(compiled.validate(&instance).is_ok());
assert!(compiled.is_valid(&instance));
// Verify schema rejects integers less than 2
let instance = json!(1);
assert!(compiled.validate(&instance).is_err());
assert!(!compiled.is_valid(&instance));
// Invalid `minimum` value
let schema = json!({ "minimum": "foo" });
let error = JSONSchema::options()
.with_keyword("minimum", custom_minimum_factory)
.compile(&schema)
.expect_err("Should fail");
assert_eq!(error.to_string(), "\"foo\" is not of type \"number\"");
}
}

View File

@ -5,8 +5,10 @@ use crate::{
DEFAULT_CONTENT_ENCODING_CHECKS_AND_CONVERTERS,
},
content_media_type::{ContentMediaTypeCheckType, DEFAULT_CONTENT_MEDIA_TYPE_CHECKS},
keywords::custom::KeywordFactory,
paths::JSONPointer,
resolver::{DefaultResolver, Resolver, SchemaResolver},
schemas, ValidationError,
schemas, Keyword, ValidationError,
};
use ahash::AHashMap;
use once_cell::sync::Lazy;
@ -275,6 +277,7 @@ pub struct CompilationOptions {
validate_formats: Option<bool>,
validate_schema: bool,
ignore_unknown_formats: bool,
keywords: AHashMap<String, Arc<dyn KeywordFactory>>,
}
impl Default for CompilationOptions {
@ -289,6 +292,7 @@ impl Default for CompilationOptions {
formats: AHashMap::default(),
validate_formats: None,
ignore_unknown_formats: true,
keywords: AHashMap::default(),
}
}
}
@ -637,6 +641,78 @@ impl CompilationOptions {
pub(crate) const fn are_unknown_formats_ignored(&self) -> bool {
self.ignore_unknown_formats
}
/// Register a custom keyword definition.
///
/// ## Example
///
/// ```rust
/// # use jsonschema::{ErrorIterator, JSONSchema, paths::{JsonPointerNode, JSONPointer}, Keyword, ValidationError};
/// # use serde_json::{json, Value, Map};
/// # use std::{sync::Arc, iter::once};
///
/// struct MyCustomValidator;
///
/// impl Keyword for MyCustomValidator {
/// fn validate<'instance>(
/// &self,
/// instance: &'instance Value,
/// instance_path: &JsonPointerNode,
/// ) -> ErrorIterator<'instance> {
/// // ... validate instance ...
/// if !instance.is_object() {
/// let error = ValidationError::custom(
/// JSONPointer::default(),
/// instance_path.into(),
/// instance,
/// "Boom!",
/// );
/// Box::new(once(error))
/// } else {
/// Box::new(None.into_iter())
/// }
/// }
/// fn is_valid(&self, instance: &Value) -> bool {
/// // ... determine if instance is valid ...
/// true
/// }
/// }
///
/// // You can create a factory function, or use a closure to create new validator instances.
/// fn custom_validator_factory<'a>(
/// parent: &'a Map<String, Value>,
/// schema: &'a Value,
/// path: JSONPointer,
/// ) -> Result<Box<dyn Keyword>, ValidationError<'a>> {
/// Ok(Box::new(MyCustomValidator))
/// }
///
/// assert!(JSONSchema::options()
/// .with_keyword("my-type", custom_validator_factory)
/// .with_keyword("my-type-with-closure", |_, _, _| Ok(Box::new(MyCustomValidator)))
/// .compile(&json!({ "my-type": "my-schema"}))
/// .expect("A valid schema")
/// .is_valid(&json!({ "a": "b"})));
/// ```
pub fn with_keyword<N, F>(&mut self, name: N, factory: F) -> &mut Self
where
N: Into<String>,
F: for<'a> Fn(
&'a serde_json::Map<String, serde_json::Value>,
&'a serde_json::Value,
JSONPointer,
) -> Result<Box<dyn Keyword>, ValidationError<'a>>
+ Send
+ Sync
+ 'static,
{
self.keywords.insert(name.into(), Arc::new(factory));
self
}
pub(crate) fn get_keyword_factory(&self, name: &str) -> Option<&Arc<dyn KeywordFactory>> {
self.keywords.get(name)
}
}
// format name & a pointer to a check function
type FormatKV<'a> = Option<(&'a &'static str, &'a fn(&str) -> bool)>;

View File

@ -7,8 +7,8 @@ use crate::{
use serde_json::{Map, Number, Value};
use std::{
borrow::Cow,
error, fmt,
fmt::Formatter,
error,
fmt::{self, Formatter},
io,
iter::{empty, once},
str::Utf8Error,
@ -80,6 +80,8 @@ pub enum ValidationErrorKind {
ContentEncoding { content_encoding: String },
/// The input value does not respect the defined contentMediaType
ContentMediaType { content_media_type: String },
/// Custom error message for user-defined validation.
Custom { message: String },
/// The input value doesn't match any of specified options.
Enum { options: Value },
/// Value is too large.
@ -735,6 +737,22 @@ impl<'a> ValidationError<'a> {
schema_path: JSONPointer::default(),
}
}
/// Create a new custom validation error.
pub fn custom(
schema_path: JSONPointer,
instance_path: JSONPointer,
instance: &'a Value,
message: impl Into<String>,
) -> ValidationError<'a> {
ValidationError {
instance_path,
instance: Cow::Borrowed(instance),
kind: ValidationErrorKind::Custom {
message: message.into(),
},
schema_path,
}
}
}
impl error::Error for ValidationError<'_> {}
@ -994,6 +1012,7 @@ impl fmt::Display for ValidationError<'_> {
.collect::<Vec<String>>()
.join(", ")
),
ValidationErrorKind::Custom { message } => f.write_str(message),
}
}
}

View File

@ -0,0 +1,84 @@
use crate::{
paths::{JSONPointer, JsonPointerNode},
validator::Validate,
ErrorIterator, ValidationError,
};
use serde_json::{Map, Value};
use std::fmt::{Display, Formatter};
pub(crate) struct CustomKeyword {
inner: Box<dyn Keyword>,
}
impl CustomKeyword {
pub(crate) fn new(inner: Box<dyn Keyword>) -> Self {
Self { inner }
}
}
impl Display for CustomKeyword {
fn fmt(&self, _: &mut Formatter<'_>) -> std::fmt::Result {
Ok(())
}
}
impl Validate for CustomKeyword {
fn validate<'instance>(
&self,
instance: &'instance Value,
instance_path: &JsonPointerNode,
) -> ErrorIterator<'instance> {
self.inner.validate(instance, instance_path)
}
fn is_valid(&self, instance: &Value) -> bool {
self.inner.is_valid(instance)
}
}
/// Trait that allows implementing custom validation for keywords.
pub trait Keyword: Send + Sync {
/// Validate [instance](Value) according to a custom specification
///
/// A custom keyword validator may be used when a validation that cannot be
/// easily or efficiently expressed in JSON schema.
///
/// The custom validation is applied in addition to the JSON schema validation.
fn validate<'instance>(
&self,
instance: &'instance Value,
instance_path: &JsonPointerNode,
) -> ErrorIterator<'instance>;
/// Validate [instance](Value) and return a boolean result.
/// Could be potentilly faster than `validate` method.
fn is_valid(&self, instance: &Value) -> bool;
}
pub(crate) trait KeywordFactory: Send + Sync {
fn init<'a>(
&self,
parent: &'a Map<String, Value>,
schema: &'a Value,
path: JSONPointer,
) -> Result<Box<dyn Keyword>, ValidationError<'a>>;
}
impl<F> KeywordFactory for F
where
F: for<'a> Fn(
&'a Map<String, Value>,
&'a Value,
JSONPointer,
) -> Result<Box<dyn Keyword>, ValidationError<'a>>
+ Send
+ Sync,
{
fn init<'a>(
&self,
parent: &'a Map<String, Value>,
schema: &'a Value,
path: JSONPointer,
) -> Result<Box<dyn Keyword>, ValidationError<'a>> {
self(parent, schema, path)
}
}

View File

@ -6,6 +6,7 @@ pub(crate) mod boolean;
pub(crate) mod const_;
pub(crate) mod contains;
pub(crate) mod content;
pub(crate) mod custom;
pub(crate) mod dependencies;
pub(crate) mod enum_;
pub(crate) mod exclusive_maximum;

View File

@ -99,6 +99,7 @@ mod validator;
pub use compilation::{options::CompilationOptions, JSONSchema};
pub use error::{ErrorIterator, ValidationError};
pub use keywords::custom::Keyword;
pub use resolver::{SchemaResolver, SchemaResolverError};
pub use schemas::Draft;