perf: Replace heap-allocated `InstancePath` with stack-only linked list

This commit is contained in:
Dmitry Dygalo 2021-04-30 12:22:44 +02:00 committed by Dmitry Dygalo
parent 867b515df1
commit 1a1c6c3afb
10 changed files with 59 additions and 68 deletions

View File

@ -4,7 +4,8 @@
### Performance
- Avoid `String` allocation in `JSONPointer.into_vec`
- Avoid `String` allocation in `JSONPointer.into_vec`.
- Replace heap-allocated `InstancePath` with stack-only linked list.
## [0.8.0] - 2021-04-27

View File

@ -94,9 +94,9 @@ Ratios are given against compiled `JSONSchema` using its `validate`. The `is_val
| Case | jsonschema_valid | valico | jsonschema.validate | jsonschema.is_valid |
| ------------- | ----------------------- | ----------------------- | --------------------- | ---------------------- |
| Big valid | - | 95.008 ms (**x12.27**) | 7.74 ms | 5.785 ms (**x0.74**) |
| Small valid | 2.04 us (**x4.18**) | 3.67 us (**x7.53**) | 487.38 ns | 113.3 ns (**x0.23**) |
| Small invalid | 397.52 ns (**x0.64**) | 3.73 us (**x6.02**) | 619.32 ns | 5.53 ns (**x0.008**) |
| Big valid | - | 95.008 ms (**x12.46**) | 7.62 ms | 5.785 ms (**x0.75**) |
| Small valid | 2.04 us (**x5.39**) | 3.67 us (**x9.70**) | 378.21 ns | 113.3 ns (**x0.29**) |
| Small invalid | 397.52 ns (**x0.76**) | 3.73 us (**x7.19**) | 518.70 ns | 5.53 ns (**x0.01**) |
Unfortunately, `jsonschema_valid` mistakenly considers the Kubernetes Open API schema as invalid and therefore can't be compared with other libraries in this case.

View File

@ -70,17 +70,17 @@ Ratios are given against the ``validate`` variant.
+-------------------------+------------------------+-----------------------+----------------------------+---------------------------+
| library | ``false`` | ``{"minimum": 10}`` | small | kubernetes-openapi |
+=========================+========================+=======================+============================+===========================+
| jsonschema-rs[validate] | 215.85 ns | 216.10 ns | 1.29 us | 15.35 ms |
| jsonschema-rs[validate] | 200.82 ns | 203.10 ns | 1.15 us | 15.21 ms |
+-------------------------+------------------------+-----------------------+----------------------------+---------------------------+
| jsonschema-rs[is_valid] | 187.60 ns (**x0.86**) | 185.24 ns (**x0.85**) | 938.79 ns (**x0.72**) | 13.81 ms (**x0.89**) |
| jsonschema-rs[is_valid] | 187.60 ns (**x0.93**) | 185.24 ns (**x0.91**) | 938.79 ns (**x0.81**) | 13.81 ms (**x0.90**) |
+-------------------------+------------------------+-----------------------+----------------------------+---------------------------+
| fastjsonschema[CPython] | 58.57 ns (**x0.27**) | 109.10 ns (**x0.50**) | 4.21 us (**x3.26**) | 91.79 ms (**x5.97**) |
| fastjsonschema[CPython] | 58.57 ns (**x0.29**) | 109.10 ns (**x0.53**) | 4.21 us (**x3.66**) | 91.79 ms (**x6.03**) |
+-------------------------+------------------------+-----------------------+----------------------------+---------------------------+
| fastjsonschema[PyPy] | 1.32 ns (**x0.006**) | 33.39 ns (**x0.15**) | 1.17 us (**x0.9**) | 44.27 ms (**x2.88**) |
| fastjsonschema[PyPy] | 1.32 ns (**x0.006**) | 33.39 ns (**x0.16**) | 1.17 us (**x1.01**) | 44.27 ms (**x2.91**) |
+-------------------------+------------------------+-----------------------+----------------------------+---------------------------+
| jsonschema[CPython] | 226.48 ns (**x1.04**) | 1.88 us (**x8.69**) | 58.14 us (**x45.06**) | 1.07 s (**x69.7**) |
| jsonschema[CPython] | 226.48 ns (**x1.12**) | 1.88 us (**x9.25**) | 58.14 us (**x50.55**) | 1.07 s (**x70.34**) |
+-------------------------+------------------------+-----------------------+----------------------------+---------------------------+
| jsonschema[PyPy] | 41.18 ns (**x0.19**) | 224.94 ns (**x1.04**) | 25.97 us (**x20.13**) | 663.30 ms (**x43.21**) |
| jsonschema[PyPy] | 41.18 ns (**x0.20**) | 224.94 ns (**x1.10**) | 25.97 us (**x22.58**) | 663.30 ms (**x43.60**) |
+-------------------------+------------------------+-----------------------+----------------------------+---------------------------+
The bigger the input is the bigger is performance win. You can take a look at benchmarks in ``benches/bench.py``.

View File

@ -14,7 +14,6 @@ use crate::{
use context::CompilationContext;
use options::CompilationOptions;
use serde_json::Value;
use std::cell::RefCell;
use url::Url;
pub(crate) const DEFAULT_ROOT_URL: &str = "json-schema:///";
@ -64,14 +63,7 @@ impl<'a> JSONSchema<'a> {
let mut errors = self
.validators
.iter()
.flat_map(move |validator| {
validator.validate(
self,
instance,
// The path capacity should be the average depth so we avoid extra allocations
&InstancePath::new(RefCell::new(Vec::with_capacity(6))),
)
})
.flat_map(move |validator| validator.validate(self, instance, &InstancePath::new()))
.peekable();
if errors.peek().is_none() {
Ok(())

View File

@ -54,10 +54,7 @@ impl Validate for AdditionalItemsObjectValidator {
.skip(self.items_count)
.flat_map(|(idx, item)| {
self.validators.iter().flat_map(move |validator| {
instance_path.push(idx);
let errors = validator.validate(schema, item, instance_path);
instance_path.pop();
errors
validator.validate(schema, item, &instance_path.push(idx))
})
})
.collect();

View File

@ -60,10 +60,7 @@ macro_rules! is_valid_patterns {
macro_rules! validate {
($validators:expr, $schema:ident, $value:ident, $instance_path:expr, $property_name:expr) => {{
$validators.iter().flat_map(move |validator| {
$instance_path.push($property_name);
let errors = validator.validate($schema, $value, $instance_path);
$instance_path.pop();
errors
validator.validate($schema, $value, &$instance_path.push($property_name))
})
}};
}

View File

@ -53,10 +53,7 @@ impl Validate for ItemsArrayValidator {
.enumerate()
.flat_map(move |(idx, (item, validators))| {
validators.iter().flat_map(move |validator| {
instance_path.push(idx);
let errors = validator.validate(schema, item, instance_path);
instance_path.pop();
errors
validator.validate(schema, item, &instance_path.push(idx))
})
})
.collect();
@ -108,10 +105,7 @@ impl Validate for ItemsObjectValidator {
.iter()
.flat_map(move |validator| {
items.iter().enumerate().flat_map(move |(idx, item)| {
instance_path.push(idx);
let errors = validator.validate(schema, item, instance_path);
instance_path.pop();
errors
validator.validate(schema, item, &instance_path.push(idx))
})
})
.collect();

View File

@ -62,10 +62,11 @@ impl Validate for PatternPropertiesValidator {
.filter(move |(key, _)| re.is_match(key))
.flat_map(move |(key, value)| {
validators.iter().flat_map(move |validator| {
instance_path.push(key.to_owned());
let errors = validator.validate(schema, value, instance_path);
instance_path.pop();
errors
validator.validate(
schema,
value,
&instance_path.push(key.to_owned()),
)
})
})
})

View File

@ -57,10 +57,7 @@ impl Validate for PropertiesValidator {
let option = item.get(name);
option.into_iter().flat_map(move |item| {
validators.iter().flat_map(move |validator| {
instance_path.push(name.to_string());
let errors = validator.validate(schema, item, instance_path);
instance_path.pop();
errors
validator.validate(schema, item, &instance_path.push(name.to_string()))
})
})
})

View File

@ -1,6 +1,6 @@
//! Facilities for working with paths within schemas or validated instances.
use std::fmt;
use std::fmt::Write;
use std::{cell::RefCell, fmt, ops::Deref};
#[derive(Clone, Debug, Eq, PartialEq)]
/// JSON Pointer as a wrapper around individual path components
@ -46,22 +46,42 @@ pub(crate) enum PathChunk {
Index(usize),
}
pub(crate) type InstancePathInner = RefCell<Vec<PathChunk>>;
#[derive(Debug)]
pub(crate) struct InstancePath<'a> {
pub(crate) chunk: Option<PathChunk>,
pub(crate) parent: Option<&'a InstancePath<'a>>,
}
#[derive(Clone, Debug)]
pub(crate) struct InstancePath(InstancePathInner);
impl<'a> InstancePath<'a> {
pub(crate) fn new() -> Self {
InstancePath {
chunk: None,
parent: None,
}
}
impl InstancePath {
pub(crate) fn new(inner: InstancePathInner) -> Self {
Self(inner)
pub(crate) fn push(&'a self, chunk: impl Into<PathChunk>) -> Self {
InstancePath {
chunk: Some(chunk.into()),
parent: Some(self),
}
}
#[inline]
pub(crate) fn push(&self, value: impl Into<PathChunk>) {
self.borrow_mut().push(value.into())
}
#[inline]
pub(crate) fn pop(&self) {
self.borrow_mut().pop();
pub(crate) fn to_vec(&'a self) -> Vec<PathChunk> {
// The path capacity should be the average depth so we avoid extra allocations
let mut result = Vec::with_capacity(6);
let mut current = self;
if let Some(chunk) = &current.chunk {
result.push(chunk.clone())
}
while let Some(next) = current.parent {
current = next;
if let Some(chunk) = &current.chunk {
result.push(chunk.clone())
}
}
result.reverse();
result
}
}
@ -78,18 +98,10 @@ impl From<usize> for PathChunk {
}
}
impl Deref for InstancePath {
type Target = InstancePathInner;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl From<&InstancePath> for JSONPointer {
impl<'a> From<&'a InstancePath<'a>> for JSONPointer {
#[inline]
fn from(path: &InstancePath) -> Self {
JSONPointer(path.0.borrow().iter().map(|item| item.to_owned()).collect())
fn from(path: &'a InstancePath<'a>) -> Self {
JSONPointer(path.to_vec())
}
}