Compare commits

...

7 Commits

Author SHA1 Message Date
Sam Roberts 197246e95e
Merge a240bd5aae into 6cf82328e3 2024-04-20 17:15:57 +00:00
Dmitry Dygalo a240bd5aae
chore: update
Signed-off-by: Dmitry Dygalo <dmitry@dygalo.dev>
2024-04-20 19:01:03 +02:00
Dmitry Dygalo 194a0b0db7
fix: tests
Signed-off-by: Dmitry Dygalo <dmitry@dygalo.dev>
2024-04-14 16:45:52 +02:00
Sam Roberts 17b37cc7e6 (broken) attempt to change custom validation interface to a trait 2024-04-13 13:56:18 -04:00
Sam Roberts dfc52eb045 feat: Custom keyword validation supports overriding existing keyword behavior 2024-04-10 11:52:36 -04:00
Benjamin Tobler 92866a5eaf chore: Address Clippy errors 2024-04-10 11:52:36 -04:00
Benjamin Tobler 3a436710be feat: Custom keyword validation
Add support for user defined custom keyword validation. The user
provides and registers custom validator functions when configuring a
JSONSchema.

Custom keyword validators may be used when the user wants to enforce
constraints that can't, or can't easily, be expressed in JSON schema.
2024-04-10 11:28:41 -04:00
5 changed files with 499 additions and 6 deletions

View File

@ -6,7 +6,7 @@ pub(crate) mod options;
use crate::{
error::ErrorIterator,
keywords,
keywords::{self, custom_keyword::compile_custom_keyword_validator},
output::Output,
paths::{InstancePath, JSONPointer},
primitive_type::{PrimitiveType, PrimitiveTypesBitMap},
@ -198,7 +198,18 @@ pub(crate) fn compile_validators<'a>(
{
is_props = true;
}
if let Some(validator) = context
// first check if this keyword was added as a custom keyword
// it may override existing keyword behavior
if let Some(f) = context.config.get_custom_keyword_constructor(keyword) {
let validator = compile_custom_keyword_validator(
&context,
keyword.clone(),
f(),
subschema.clone(),
schema.clone(),
)?;
validators.push((keyword.clone(), validator));
} else if let Some(validator) = context
.config
.draft()
.get_validator(keyword)
@ -244,9 +255,20 @@ pub(crate) fn compile_validators<'a>(
#[cfg(test)]
mod tests {
use super::JSONSchema;
use crate::error::ValidationError;
use crate::{
compilation::options::CustomKeywordValidator, error::ValidationError, paths::JSONPointer,
ErrorIterator,
};
use num_cmp::NumCmp;
use regex::Regex;
use serde_json::{from_str, json, Value};
use std::{fs::File, io::Read, path::Path};
use std::{
borrow::Cow,
fs::File,
io::Read,
path::Path,
sync::{Arc, Mutex},
};
fn load(path: &str, idx: usize) -> Value {
let path = Path::new(path);
@ -302,4 +324,317 @@ mod tests {
);
assert_eq!(errors[1].to_string(), r#""a" is shorter than 3 characters"#);
}
#[test]
fn custom_keyword_definition() {
// Define a custom validator that verifies the object's keys consist of
// only ASCII representable characters.
struct CustomObjectValidator;
impl CustomKeywordValidator for CustomObjectValidator {
fn validate<'instance>(
&self,
instance: &'instance Value,
instance_path: JSONPointer,
subschema: Arc<Value>,
subschema_path: JSONPointer,
_schema: Arc<Value>,
) -> ErrorIterator<'instance> {
if subschema.as_str().map_or(true, |str| str != "ascii-keys") {
let error = ValidationError {
instance: Cow::Borrowed(instance),
kind: crate::error::ValidationErrorKind::Schema,
instance_path,
schema_path: subschema_path,
};
return Box::new(Some(error).into_iter()); // Invalid schema
}
let mut errors = vec![];
for (key, _value) in instance.as_object().unwrap() {
if !key.is_ascii() {
let error = ValidationError {
instance: Cow::Borrowed(instance),
kind: crate::error::ValidationErrorKind::Format { format: "ASCII" },
instance_path: instance_path.clone(),
schema_path: subschema_path.clone(),
};
errors.push(error);
}
}
Box::new(errors.into_iter())
}
fn is_valid(&self, instance: &Value, subschema: &Value, _schema: &Value) -> bool {
if subschema.as_str().map_or(true, |str| str != "ascii-keys") {
return false; // Invalid schema
}
for (key, _value) in instance.as_object().unwrap() {
if !key.is_ascii() {
return false;
}
}
true
}
}
// Define a JSON schema that enforces the top level object has ASCII keys and has at least 1 property
let schema =
json!({ "custom-object-type": "ascii-keys", "type": "object", "minProperties": 1 });
let json_schema = JSONSchema::options()
.with_custom_keyword("custom-object-type", || Box::new(CustomObjectValidator))
.compile(&schema)
.unwrap();
// Verify schema validation detects object with too few properties
let instance_err_not_object = json!({});
assert!(json_schema.validate(&instance_err_not_object).is_err());
assert!(!json_schema.is_valid(&instance_err_not_object));
// Verify validator succeeds on a valid custom-object-type
let instance_ok = json!({ "a" : 1 });
assert!(json_schema.validate(&instance_ok).is_ok());
assert!(json_schema.is_valid(&instance_ok));
// Verify validator detects invalid custom-object-type
let instance_err_non_ascii_keys = json!({ "å" : 1 });
assert!(json_schema.validate(&instance_err_non_ascii_keys).is_err());
assert!(!json_schema.is_valid(&instance_err_non_ascii_keys));
}
#[test]
fn custom_format_and_override_keyword() {
// prepare a custom format checker
// in this case, the format is "currency"
// checks that a string has some number of digits followed by a dot followed by
// exactly 2 digits.
const CURRENCY_RE_STR: &str = "^(0|([1-9]+[0-9]*))(\\.[0-9]{2})$";
fn currency_format_checker(s: &str) -> bool {
Regex::new(CURRENCY_RE_STR).unwrap().is_match(s)
}
// Define a custom keyword validator that overrides "minimum"
// so that "minimum" may apply to "currency"-formatted strings as well
struct CustomMinimumValidator;
impl CustomKeywordValidator for CustomMinimumValidator {
fn validate<'instance>(
&self,
instance: &'instance Value,
instance_path: JSONPointer,
subschema: Arc<Value>,
subschema_path: JSONPointer,
schema: Arc<Value>,
) -> ErrorIterator<'instance> {
let subschema: &Value = &subschema;
let limit = match subschema {
Value::Number(limit) => limit,
_ => {
let error = ValidationError {
instance: Cow::Borrowed(instance),
kind: crate::error::ValidationErrorKind::Schema,
instance_path,
schema_path: subschema_path,
};
return Box::new(Some(error).into_iter()); // Invalid schema
}
};
let mut errors = vec![];
let valid = match instance {
// numeric comparison should happen just like original behavior
Value::Number(instance) => {
if let Some(item) = instance.as_u64() {
!NumCmp::num_lt(item, limit.as_f64().unwrap())
} else if let Some(item) = limit.as_i64() {
!NumCmp::num_lt(item, limit.as_f64().unwrap())
} else {
let item = instance.as_f64().expect("Always valid");
!NumCmp::num_lt(item, limit.as_f64().unwrap())
}
}
// string comparison should cast currency-formatted
Value::String(instance) => {
let mut valid = true;
if let Some(schema) = schema.as_object() {
if let Some(format) = schema.get("format") {
if format == "currency" && currency_format_checker(instance) {
// all preconditions for minimum applying are met
let as_f64 = instance
.parse::<f64>()
.expect("format validated by regex checker");
println!("1 {:#?} {:#?}", as_f64, limit.as_f64().unwrap());
valid = !NumCmp::num_lt(as_f64, limit.as_f64().unwrap());
println!("valid {:#?}", valid);
}
}
}
valid
}
// in all other cases, the "minimum" keyword should not apply
_ => true,
};
if !valid {
let error = ValidationError {
instance: Cow::Borrowed(instance),
kind: crate::error::ValidationErrorKind::Minimum {
limit: subschema.clone(),
},
instance_path: instance_path.clone(),
schema_path: subschema_path.clone(),
};
errors.push(error);
}
Box::new(errors.into_iter())
}
fn is_valid(&self, instance: &Value, subschema: &Value, schema: &Value) -> bool {
let limit = match subschema {
Value::Number(limit) => limit,
_ => return false,
};
let valid = match instance {
// numeric comparison should happen just like original behavior
Value::Number(instance) => {
if let Some(item) = instance.as_u64() {
!NumCmp::num_lt(item, limit.as_f64().unwrap())
} else if let Some(item) = limit.as_i64() {
!NumCmp::num_lt(item, limit.as_f64().unwrap())
} else {
let item = instance.as_f64().expect("Always valid");
!NumCmp::num_lt(item, limit.as_f64().unwrap())
}
}
// string comparison should cast currency-formatted
Value::String(instance) => {
let mut valid = true;
if let Some(schema) = schema.as_object() {
if let Some(format) = schema.get("format") {
if format == "currency" && currency_format_checker(instance) {
// all preconditions for minimum applying are met
let as_f64 = instance
.parse::<f64>()
.expect("format validated by regex checker");
println!("1 {:#?} {:#?}", as_f64, limit.as_f64().unwrap());
valid = !NumCmp::num_lt(as_f64, limit.as_f64().unwrap());
println!("valid {:#?}", valid);
}
}
}
valid
}
// in all other cases, the "minimum" keyword should not apply
_ => true,
};
valid
}
}
// define compilation options that include the custom format and the overridden keyword
let mut options = JSONSchema::options();
let options = options
.with_format("currency", currency_format_checker)
.with_custom_keyword("minimum", || Box::new(CustomMinimumValidator));
// Define a schema that includes both the custom format and the overridden keyword
let schema = json!({ "minimum": 2, "type": "string", "format": "currency" });
let compiled = options.compile(&schema).unwrap();
// Control: verify schema validation rejects non-string types
let instance_err_not_string = json!(15);
assert!(compiled.validate(&instance_err_not_string).is_err());
assert!(!compiled.is_valid(&instance_err_not_string));
// Control: verify validator rejects ill-formatted strings
let instance_ok = json!("not a currency");
assert!(compiled.validate(&instance_ok).is_err());
assert!(!compiled.is_valid(&instance_ok));
// Verify validator allows properly formatted strings that conform to custom keyword
let instance_err_non_ascii_keys = json!("3.00");
assert!(compiled.validate(&instance_err_non_ascii_keys).is_ok());
assert!(compiled.is_valid(&instance_err_non_ascii_keys));
// Verify validator rejects properly formatted strings that do not conform to custom keyword
let instance_err_non_ascii_keys = json!("1.99");
assert!(compiled.validate(&instance_err_non_ascii_keys).is_err());
assert!(!compiled.is_valid(&instance_err_non_ascii_keys));
// Define another schema that applies "minimum" to an integer to ensure original behavior
let schema = json!({ "minimum": 2, "type": "integer" });
let compiled = options.compile(&schema).unwrap();
// Verify schema allows integers greater than 2
let instance_err_not_string = json!(3);
assert!(compiled.validate(&instance_err_not_string).is_ok());
assert!(compiled.is_valid(&instance_err_not_string));
// Verify schema rejects integers less than 2
let instance_ok = json!(1);
assert!(compiled.validate(&instance_ok).is_err());
assert!(!compiled.is_valid(&instance_ok));
}
#[test]
fn custom_keyword_with_inner_state() {
// Define a custom keyword validator that wraps "minimum"
// but maintains a counter of how many times the validator was applied.
struct CountingValidator {
count: Mutex<i64>,
}
impl CountingValidator {
fn increment(&self, amount: i64) {
let mut count = self.count.lock().expect("Lock is poisoned");
*count += amount;
}
}
impl CustomKeywordValidator for CountingValidator {
fn validate<'instance>(
&self,
_: &'instance Value,
_: JSONPointer,
subschema: Arc<Value>,
_: JSONPointer,
_: Arc<Value>,
) -> ErrorIterator<'instance> {
let amount = match &*subschema {
Value::Number(x) => x.as_i64().expect("countme value must be integer"),
_ => panic!("Validator requires numeric values"),
};
self.increment(amount);
Box::new(None.into_iter())
}
fn is_valid(&self, _: &Value, subschema: &Value, _: &Value) -> bool {
let amount = match subschema {
Value::Number(x) => x.as_i64().expect("countme value must be integer"),
_ => return false,
};
self.increment(amount);
true
}
}
// define compilation options that include the custom format and the overridden keyword
let count = Mutex::new(0);
let mut options = JSONSchema::options();
let options = options.with_custom_keyword("countme", || {
Box::new(CountingValidator {
count: Mutex::new(0),
})
});
// Define a schema that includes the custom keyword and therefore should increase the count
let schema = json!({ "countme": 3, "type": "string" });
let compiled = options.compile(&schema).unwrap();
// TODO: Communicate the increment changes via `validate` output, e.g. fail after N
// increments, etc.
// Because the schema has "countme" in it, whenever we run validation we should expect the validator's count to increase
let instance_ok = json!("i am a string");
assert_eq!(*count.lock().expect("Lock is poinsoned"), 0);
assert!(compiled.validate(&instance_ok).is_err());
assert_eq!(*count.lock().expect("Lock is poinsoned"), 3);
assert!(!compiled.is_valid(&instance_ok));
assert_eq!(*count.lock().expect("Lock is poinsoned"), 6);
// TODO compile a schema that doesn't have "countme" and ensure count does not increase
}
}

View File

@ -5,8 +5,9 @@ use crate::{
DEFAULT_CONTENT_ENCODING_CHECKS_AND_CONVERTERS,
},
content_media_type::{ContentMediaTypeCheckType, DEFAULT_CONTENT_MEDIA_TYPE_CHECKS},
paths::JSONPointer,
resolver::{DefaultResolver, Resolver, SchemaResolver},
schemas, ValidationError,
schemas, ErrorIterator, ValidationError,
};
use ahash::AHashMap;
use once_cell::sync::Lazy;
@ -275,6 +276,10 @@ pub struct CompilationOptions {
validate_formats: Option<bool>,
validate_schema: bool,
ignore_unknown_formats: bool,
custom_keywords: AHashMap<
String, // TODO<samgqroberts> 2024-04-13 should this also be a &'static str
CustomKeywordConstructor,
>,
}
impl Default for CompilationOptions {
@ -289,6 +294,7 @@ impl Default for CompilationOptions {
formats: AHashMap::default(),
validate_formats: None,
ignore_unknown_formats: true,
custom_keywords: AHashMap::default(),
}
}
}
@ -637,6 +643,65 @@ impl CompilationOptions {
pub(crate) const fn are_unknown_formats_ignored(&self) -> bool {
self.ignore_unknown_formats
}
/// Register a custom keyword definition.
///
/// Examples
///
/// ```rust
/// # use jsonschema::{CustomKeywordValidator, ErrorIterator, JSONSchema, paths::JSONPointer};
/// # use serde_json::{json, Value};
/// # use std::sync::Arc;
///
/// struct MyCustomValidator;
/// impl<'instance> CustomKeywordValidator<'instance, '_> for MyCustomValidator {
/// fn validate(
/// &self,
/// instance: &'instance Value,
/// instance_path: JSONPointer,
/// subschema: Arc<Value>,
/// subschema_path: JSONPointer,
/// schema: Arc<Value>
/// ) -> ErrorIterator<'instance> {
/// // ... validate instance ...
/// Box::new(None.into_iter())
/// }
/// fn is_valid(
/// &self,
/// instance: &Value,
/// subschema: &Value,
/// schema: &Value
/// ) -> bool {
/// // ... determine if instance is valid ...
/// true
/// }
/// }
///
/// assert!(JSONSchema::options()
/// .with_custom_keyword("my-type", MyCustomValidator)
/// .compile(&json!({ "my-type": "my-schema"}))
/// .expect("A valid schema")
/// .is_valid(&json!({ "a": "b"})));
/// ```
pub fn with_custom_keyword<T>(
&mut self,
keyword: T,
definition: impl Fn() -> Box<dyn CustomKeywordValidator> + Send + Sync + 'static,
) -> &mut Self
where
T: Into<String>,
{
self.custom_keywords
.insert(keyword.into(), Arc::new(definition));
self
}
pub(crate) fn get_custom_keyword_constructor(
&self,
keyword: &str,
) -> Option<&CustomKeywordConstructor> {
self.custom_keywords.get(keyword)
}
}
// format name & a pointer to a check function
type FormatKV<'a> = Option<(&'a &'static str, &'a fn(&str) -> bool)>;
@ -654,6 +719,35 @@ impl fmt::Debug for CompilationOptions {
}
}
pub(crate) type CustomKeywordConstructor =
Arc<dyn Fn() -> Box<dyn CustomKeywordValidator> + Send + Sync>;
/// Trait that allows implementing custom validation for keywords.
pub trait CustomKeywordValidator: Send + Sync {
/// Validate [instance](serde_json::Value) according to a custom specification
///
/// A custom keyword validator may be used when a validation that cannot, or
/// cannot be be easily or efficiently expressed in JSON schema.
///
/// The custom validation is applied in addition to the JSON schema validation.
/// Validate an instance returning any and all detected validation errors
fn validate<'instance>(
&self,
instance: &'instance serde_json::Value,
instance_path: JSONPointer,
subschema: Arc<serde_json::Value>,
subschema_path: JSONPointer,
schema: Arc<serde_json::Value>,
) -> ErrorIterator<'instance>;
/// Determine if an instance is valid
fn is_valid<'schema>(
&self,
instance: &serde_json::Value,
subschema: &'schema serde_json::Value,
schema: &'schema serde_json::Value,
) -> bool;
}
#[cfg(test)]
mod tests {
use super::CompilationOptions;

View File

@ -0,0 +1,60 @@
use crate::compilation::context::CompilationContext;
use crate::compilation::options::CustomKeywordValidator;
use crate::keywords::CompilationResult;
use crate::paths::{InstancePath, JSONPointer, PathChunk};
use crate::validator::Validate;
use crate::ErrorIterator;
use serde_json::Value;
use std::fmt::{Display, Formatter};
use std::sync::Arc;
/// Custom keyword validation implemented by user provided validation functions.
pub(crate) struct CompiledCustomKeywordValidator {
schema: Arc<Value>,
subschema: Arc<Value>,
subschema_path: JSONPointer,
validator: Box<dyn CustomKeywordValidator>,
}
impl Display for CompiledCustomKeywordValidator {
fn fmt(&self, _: &mut Formatter<'_>) -> std::fmt::Result {
Ok(())
}
}
impl Validate for CompiledCustomKeywordValidator {
fn validate<'instance>(
&self,
instance: &'instance Value,
instance_path: &InstancePath,
) -> ErrorIterator<'instance> {
self.validator.validate(
instance,
instance_path.into(),
self.subschema.clone(),
self.subschema_path.clone(),
self.schema.clone(),
)
}
fn is_valid(&self, instance: &Value) -> bool {
self.validator
.is_valid(instance, &self.subschema, &self.schema)
}
}
pub(crate) fn compile_custom_keyword_validator<'a>(
context: &CompilationContext,
keyword: impl Into<PathChunk>,
validator: Box<dyn CustomKeywordValidator>,
subschema: Value,
schema: Value,
) -> CompilationResult<'a> {
let subschema_path = context.as_pointer_with(keyword);
Ok(Box::new(CompiledCustomKeywordValidator {
schema: Arc::new(schema),
subschema: Arc::new(subschema),
subschema_path,
validator,
}))
}

View File

@ -6,6 +6,7 @@ pub(crate) mod boolean;
pub(crate) mod const_;
pub(crate) mod contains;
pub(crate) mod content;
pub(crate) mod custom_keyword;
pub(crate) mod dependencies;
pub(crate) mod enum_;
pub(crate) mod exclusive_maximum;

View File

@ -97,7 +97,10 @@ mod schema_node;
mod schemas;
mod validator;
pub use compilation::{options::CompilationOptions, JSONSchema};
pub use compilation::{
options::{CompilationOptions, CustomKeywordValidator},
JSONSchema,
};
pub use error::{ErrorIterator, ValidationError};
pub use resolver::{SchemaResolver, SchemaResolverError};
pub use schemas::Draft;