jsonschema-rs/jsonschema/src/resolver.rs

623 lines
20 KiB
Rust

//! Reference resolver. Implements logic, required by `$ref` keyword.
//! Is able to load documents from remote locations via HTTP(S).
use crate::{
compilation::DEFAULT_ROOT_URL,
error::ValidationError,
schemas::{id_of, Draft},
};
use ahash::AHashMap;
use parking_lot::RwLock;
use serde_json::Value;
use std::{borrow::Cow, sync::Arc};
use url::Url;
/// An opaque error type that is returned
/// by resolvers on resolution failures.
pub type SchemaResolverError = anyhow::Error;
/// A resolver that resolves external schema references.
/// Internal references such as `#/definitions` and JSON pointers are handled internally.
///
/// All operations are blocking and it is not possible to return futures.
/// As a workaround, errors can be returned that will contain the schema URLs to resolve
/// and can be resolved outside the validation process if needed.
///
/// # Example
///
/// ```no_run
/// # use serde_json::{json, Value};
/// # use anyhow::anyhow;
/// # use jsonschema::{SchemaResolver, SchemaResolverError};
/// # use std::sync::Arc;
/// # use url::Url;
///
/// struct MyCustomResolver;
///
/// impl SchemaResolver for MyCustomResolver {
/// fn resolve(&self, root_schema: &Value, url: &Url, _original_reference: &str) -> Result<Arc<Value>, SchemaResolverError> {
/// match url.scheme() {
/// "json-schema" => {
/// Err(anyhow!("cannot resolve schema without root schema ID"))
/// },
/// "http" | "https" => {
/// Ok(Arc::new(json!({ "description": "an external schema" })))
/// }
/// _ => Err(anyhow!("scheme is not supported"))
/// }
/// }
/// }
/// ```
pub trait SchemaResolver: Send + Sync {
/// Resolve an external schema via an URL.
///
/// Relative URLs are resolved based on the root schema's ID,
/// if there is no root schema ID available, the scheme `json-schema` is used
/// and any relative paths are turned into absolutes.
///
/// Additionally the original reference string is also passed,
/// in most cases it should not be needed, but it preserves some information,
/// such as relative paths that are lost when the URL is built.
fn resolve(
&self,
root_schema: &Value,
url: &Url,
original_reference: &str,
) -> Result<Arc<Value>, SchemaResolverError>;
}
pub(crate) struct DefaultResolver;
impl SchemaResolver for DefaultResolver {
fn resolve(
&self,
_root_schema: &Value,
url: &Url,
_reference: &str,
) -> Result<Arc<Value>, SchemaResolverError> {
match url.scheme() {
"http" | "https" => {
#[cfg(all(feature = "reqwest", not(feature = "resolve-http")))]
{
compile_error!("the `reqwest` feature does not enable HTTP schema resolving anymore, use the `resolve-http` feature instead");
}
#[cfg(any(feature = "resolve-http", test))]
{
let response = reqwest::blocking::get(url.as_str())?;
let document: Value = response.json()?;
Ok(Arc::new(document))
}
#[cfg(not(any(feature = "resolve-http", test)))]
Err(anyhow::anyhow!("`resolve-http` feature or a custom resolver is required to resolve external schemas via HTTP"))
}
"file" => {
#[cfg(any(feature = "resolve-file", test))]
{
if let Ok(path) = url.to_file_path() {
let f = std::fs::File::open(path)?;
let document: Value = serde_json::from_reader(f)?;
Ok(Arc::new(document))
} else {
Err(anyhow::anyhow!("invalid file path"))
}
}
#[cfg(not(any(feature = "resolve-file", test)))]
{
Err(anyhow::anyhow!("`resolve-file` feature or a custom resolver is required to resolve external schemas via files"))
}
}
"json-schema" => Err(anyhow::anyhow!(
"cannot resolve relative external schema without root schema ID"
)),
_ => Err(anyhow::anyhow!("unknown scheme {}", url.scheme())),
}
}
}
pub(crate) struct Resolver {
external_resolver: Arc<dyn SchemaResolver>,
root_schema: Arc<Value>,
// canonical_id: sub-schema mapping to resolve documents by their ID
// canonical_id is composed with the root document id
// (if not specified, then `DEFAULT_ROOT_URL` is used for this purpose)
schemas: AHashMap<String, Arc<Value>>,
store: RwLock<AHashMap<String, Arc<Value>>>,
}
impl std::fmt::Debug for Resolver {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Resolver")
.field("root_schema", &self.root_schema)
.field("schemas", &self.schemas)
.field("store", &self.store)
.finish()
}
}
impl Resolver {
pub(crate) fn new<'a>(
external_resolver: Arc<dyn SchemaResolver>,
draft: Draft,
scope: &Url,
schema: Arc<Value>,
store: AHashMap<String, Arc<Value>>,
) -> Result<Resolver, ValidationError<'a>> {
let mut schemas: AHashMap<String, Arc<Value>> = AHashMap::new();
// traverse the schema and store all named ones under their canonical ids
find_schemas(draft, &schema, scope, &mut |id, schema| {
schemas.insert(id, Arc::new(schema.clone()));
None
})?;
Ok(Resolver {
external_resolver,
root_schema: schema,
schemas,
store: RwLock::new(store),
})
}
/// Load a document for the given `url`.
/// It may be:
/// - the root document (`DEFAULT_ROOT_URL`) case;
/// - named subschema that is stored in `self.schemas`;
/// - document from a remote location;
fn resolve_url(&self, url: &Url, orig_ref: &str) -> Result<Arc<Value>, ValidationError<'_>> {
match url.as_str() {
DEFAULT_ROOT_URL => Ok(self.root_schema.clone()),
url_str => match self.schemas.get(url_str) {
Some(value) => Ok(value.clone()),
None => {
if let Some(cached) = self.store.read().get(url_str) {
return Ok(cached.clone());
}
let resolved = self
.external_resolver
.resolve(&self.root_schema, url, orig_ref)
.map_err(|error| ValidationError::resolver(url.clone(), error))?;
self.store
.write()
.insert(url.clone().into(), resolved.clone());
Ok(resolved)
}
},
}
}
/// Resolve a URL possibly containing a fragment to a `serde_json::Value`.
///
/// Note that this copies the fragment from the underlying schema, so if
/// you are memory constrained you may want to cache the result of this
/// call.
pub(crate) fn resolve_fragment(
&self,
draft: Draft,
url: &Url,
orig_ref: &str,
) -> Result<(Url, Arc<Value>), ValidationError> {
let mut resource = url.clone();
resource.set_fragment(None);
let fragment =
percent_encoding::percent_decode_str(url.fragment().unwrap_or("")).decode_utf8()?;
// Location-independent identifiers are searched before trying to resolve by
// fragment-less url
if let Some(document) = self.schemas.get(url.as_str()) {
return Ok((resource, Arc::clone(document)));
}
// Each resolved document may be in a changed subfolder
// They are tracked when JSON pointer is resolved and added to the resource
let document = self.resolve_url(&resource, orig_ref)?;
if fragment.is_empty() {
return Ok((resource, Arc::clone(&document)));
}
match pointer(draft, &document, fragment.as_ref()) {
Some((folders, resolved)) => {
let joined_folders = join_folders(resource, &folders)?;
Ok((joined_folders, Arc::new(resolved.clone())))
}
None => Err(ValidationError::invalid_reference(url.as_str().to_string())),
}
}
}
fn join_folders(mut resource: Url, folders: &[&str]) -> Result<Url, url::ParseError> {
if folders.len() > 1 {
for i in folders.iter().skip(1) {
resource = resource.join(i)?;
}
}
Ok(resource)
}
/// Find all sub-schemas in the document and execute callback on each of them.
#[inline]
pub(crate) fn find_schemas<'a, F>(
draft: Draft,
schema: &'a Value,
base_url: &Url,
callback: &mut F,
) -> Result<Option<&'a Value>, url::ParseError>
where
F: FnMut(String, &'a Value) -> Option<&'a Value>,
{
match schema {
Value::Object(item) => {
if let Some(url) = id_of(draft, schema) {
let mut new_url = base_url.join(url)?;
// Empty fragments are discouraged and are not distinguishable absent fragments
if let Some("") = new_url.fragment() {
new_url.set_fragment(None);
}
if let Some(x) = callback(new_url.to_string(), schema) {
return Ok(Some(x));
}
for (key, subschema) in item {
if key == "enum" || key == "const" {
continue;
}
let result = find_schemas(draft, subschema, &new_url, callback)?;
if result.is_some() {
return Ok(result);
}
}
} else {
for (key, subschema) in item {
if key == "enum" || key == "const" {
continue;
}
let result = find_schemas(draft, subschema, base_url, callback)?;
if result.is_some() {
return Ok(result);
}
}
}
}
Value::Array(items) => {
for item in items {
let result = find_schemas(draft, item, base_url, callback)?;
if result.is_some() {
return Ok(result);
}
}
}
_ => {}
}
Ok(None)
}
/// Searching twice is better than unconditionally allocating a String twice
trait MaybeReplaceExt<'a> {
fn maybe_replace(self, needle: &str, replacement: &str) -> Cow<'a, str>;
}
impl<'a> MaybeReplaceExt<'a> for &'a str {
fn maybe_replace(self, needle: &str, replacement: &str) -> Cow<'a, str> {
if memchr::memmem::find(self.as_bytes(), needle.as_bytes()).is_some() {
self.replace(needle, replacement).into()
} else {
self.into()
}
}
}
impl<'a> MaybeReplaceExt<'a> for Cow<'a, str> {
fn maybe_replace(self, needle: &str, replacement: &str) -> Cow<'a, str> {
if memchr::memmem::find(self.as_bytes(), needle.as_bytes()).is_some() {
self.replace(needle, replacement).into()
} else {
self
}
}
}
/// Based on `serde_json`, but tracks folders in the traversed documents.
pub(crate) fn pointer<'a>(
draft: Draft,
document: &'a Value,
pointer: &str,
) -> Option<(Vec<&'a str>, &'a Value)> {
if !pointer.starts_with('/') {
return None;
}
let tokens = pointer
.split('/')
.skip(1)
.map(|x| x.maybe_replace("~1", "/").maybe_replace("~0", "~"));
let mut target = document;
let mut folders = vec![];
for token in tokens {
let target_opt = match *target {
Value::Object(ref map) => {
if let Some(id) = id_of(draft, target) {
folders.push(id);
}
map.get(&*token)
}
Value::Array(ref list) => parse_index(&token).and_then(|x| list.get(x)),
_ => return None,
};
if let Some(t) = target_opt {
target = t;
} else {
return None;
}
}
Some((folders, target))
}
fn parse_index(s: &str) -> Option<usize> {
if s.starts_with('+') || (s.starts_with('0') && s.len() != 1) {
None
} else {
s.parse().ok()
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
use std::sync::Arc;
use url::Url;
fn make_resolver(schema: &Value) -> Resolver {
Resolver::new(
Arc::new(DefaultResolver),
Draft::Draft7,
&Url::parse("json-schema:///").unwrap(),
Arc::new(schema.clone()),
AHashMap::new(),
)
.unwrap()
}
#[test]
fn only_keyword() {
// When only one keyword is specified
let schema = json!({"type": "string"});
let resolver = make_resolver(&schema);
// Then in the resolver schema there should be no schemas
assert_eq!(resolver.schemas.len(), 0);
}
#[test]
fn sub_schema_in_object() {
// When only one sub-schema is specified inside an object
let schema = json!({
"allOf": [{"$ref": "#foo"}],
"definitions": {
"A": {"$id": "#foo", "type": "integer"}
}
});
let resolver = make_resolver(&schema);
// Then in the resolver schema there should be only this schema
assert_eq!(resolver.schemas.len(), 1);
assert_eq!(
resolver
.schemas
.get("json-schema:///#foo")
.map(AsRef::as_ref),
schema.pointer("/definitions/A")
);
}
#[test]
fn sub_schemas_in_array() {
// When sub-schemas are specified inside an array
let schema = json!({
"definitions": {
"A": [
{"$id": "#foo", "type": "integer"},
{"$id": "#bar", "type": "string"},
]
}
});
let resolver = make_resolver(&schema);
// Then in the resolver schema there should be only these schemas
assert_eq!(resolver.schemas.len(), 2);
assert_eq!(
resolver
.schemas
.get("json-schema:///#foo")
.map(AsRef::as_ref),
schema.pointer("/definitions/A/0")
);
assert_eq!(
resolver
.schemas
.get("json-schema:///#bar")
.map(AsRef::as_ref),
schema.pointer("/definitions/A/1")
);
}
#[test]
fn root_schema_id() {
// When the root schema has an ID
let schema = json!({
"$id": "http://localhost:1234/tree",
"definitions": {
"node": {
"$id": "http://localhost:1234/node",
"description": "node",
"properties": {
"subtree": {"$ref": "tree"},
"value": {"type": "number"}
},
"required": ["value"],
"type": "object"
}
},
"description": "tree of nodes",
"properties": {
"meta": {"type": "string"},
"nodes": {
"items": {"$ref": "node"},
"type": "array"
}
},
"required": ["meta", "nodes"],
"type": "object"
});
let resolver = make_resolver(&schema);
// Then in the resolver schema there should be root & sub-schema
assert_eq!(resolver.schemas.len(), 2);
assert_eq!(
resolver
.schemas
.get("http://localhost:1234/tree")
.map(AsRef::as_ref),
schema.pointer("")
);
assert_eq!(
resolver
.schemas
.get("http://localhost:1234/node")
.map(AsRef::as_ref),
schema.pointer("/definitions/node")
);
}
#[test]
fn location_independent_with_absolute_uri() {
let schema = json!({
"allOf": [{"$ref": "http://localhost:1234/bar#foo"}],
"definitions": {
"A": {"$id": "http://localhost:1234/bar#foo", "type": "integer"}
}
});
let resolver = make_resolver(&schema);
assert_eq!(resolver.schemas.len(), 1);
assert_eq!(
resolver
.schemas
.get("http://localhost:1234/bar#foo")
.map(AsRef::as_ref),
schema.pointer("/definitions/A")
);
}
#[test]
fn location_independent_with_absolute_uri_base_change() {
let schema = json!({
"$id": "http://localhost:1234/root",
"allOf":[{"$ref": "http://localhost:1234/nested.json#foo"}],
"definitions": {
"A": {
"$id": "nested.json",
"definitions": {
"B": {
"$id": "#foo",
"type": "integer"
}
}
}
}
});
let resolver = make_resolver(&schema);
assert_eq!(resolver.schemas.len(), 3);
assert_eq!(
resolver
.schemas
.get("http://localhost:1234/root")
.map(AsRef::as_ref),
schema.pointer("")
);
assert_eq!(
resolver
.schemas
.get("http://localhost:1234/nested.json")
.map(AsRef::as_ref),
schema.pointer("/definitions/A")
);
assert_eq!(
resolver
.schemas
.get("http://localhost:1234/nested.json#foo")
.map(AsRef::as_ref),
schema.pointer("/definitions/A/definitions/B")
);
}
#[test]
fn base_uri_change() {
let schema = json!({
"$id": "http://localhost:1234/",
"items": {
"$id":"folder/",
"items": {"$ref": "folderInteger.json"}
}
});
let resolver = make_resolver(&schema);
assert_eq!(resolver.schemas.len(), 2);
assert_eq!(
resolver
.schemas
.get("http://localhost:1234/")
.map(AsRef::as_ref),
schema.pointer("")
);
assert_eq!(
resolver
.schemas
.get("http://localhost:1234/folder/")
.map(AsRef::as_ref),
schema.pointer("/items")
);
}
#[test]
fn base_uri_change_folder() {
let schema = json!({
"$id": "http://localhost:1234/scope_change_defs1.json",
"definitions": {
"baz": {
"$id": "folder/",
"items": {"$ref": "folderInteger.json"},
"type":"array"
}
},
"properties": {
"list": {"$ref": "#/definitions/baz"}
},
"type": "object"
});
let resolver = make_resolver(&schema);
assert_eq!(resolver.schemas.len(), 2);
assert_eq!(
resolver
.schemas
.get("http://localhost:1234/scope_change_defs1.json")
.map(AsRef::as_ref),
schema.pointer("")
);
assert_eq!(
resolver
.schemas
.get("http://localhost:1234/folder/")
.map(AsRef::as_ref),
schema.pointer("/definitions/baz")
);
}
#[test]
fn resolve_ref() {
let schema = json!({
"$ref": "#/definitions/c",
"definitions": {
"a": {"type": "integer"},
"b": {"$ref": "#/definitions/a"},
"c": {"$ref": "#/definitions/b"}
}
});
let resolver = make_resolver(&schema);
let url = Url::parse("json-schema:///#/definitions/a").unwrap();
let (resource, resolved) = resolver
.resolve_fragment(Draft::Draft7, &url, "#/definitions/a")
.unwrap();
assert_eq!(resource, Url::parse("json-schema:///").unwrap());
assert_eq!(resolved.as_ref(), schema.pointer("/definitions/a").unwrap());
}
}