Skip to content

Commit

Permalink
Introduce the jsonSchemaMerge reduction strategy (#1132)
Browse files Browse the repository at this point in the history
  • Loading branch information
jshearer authored Aug 8, 2023
1 parent 826ef6c commit 8b4f042
Show file tree
Hide file tree
Showing 5 changed files with 144 additions and 1 deletion.
2 changes: 2 additions & 0 deletions crates/doc/src/inference.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ pub enum Reduction {

Append,
FirstWriteWins,
JsonSchemaMerge,
LastWriteWins,
Maximize,
Merge,
Expand Down Expand Up @@ -177,6 +178,7 @@ impl From<&reduce::Strategy> for Reduction {
Strategy::Set(_) => Reduction::Set,
Strategy::Sum => Reduction::Sum,
Strategy::Merge(_) => Reduction::Merge,
Strategy::JsonSchemaMerge => Reduction::JsonSchemaMerge,
}
}
}
Expand Down
3 changes: 3 additions & 0 deletions crates/doc/src/reduce/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use std::cmp::Ordering;
pub mod strategy;
pub use strategy::Strategy;

mod schema;
mod set;

pub static DEFAULT_STRATEGY: &Strategy = &Strategy::LastWriteWins;
Expand All @@ -20,6 +21,8 @@ pub enum Error {
SumNumericOverflow,
#[error("'sum' strategy expects numbers")]
SumWrongType,
#[error("'json-schema-merge' strategy expects objects containing valid JSON schemas. {}", .detail.as_deref().unwrap_or_default())]
JsonSchemaMergeWrongType { detail: Option<String> },
#[error("'merge' strategy expects objects or arrays")]
MergeWrongType,
#[error(
Expand Down
133 changes: 133 additions & 0 deletions crates/doc/src/reduce/schema.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
use super::{count_nodes_heap, Cursor, Error, Result};
use crate::{inference::Shape, schema::SchemaBuilder, AsNode, HeapNode};
use json::schema::index::IndexBuilder;

pub fn json_schema_merge<'alloc, L: AsNode, R: AsNode>(
cur: Cursor<'alloc, '_, '_, '_, '_, L, R>,
) -> Result<HeapNode<'alloc>> {
let Cursor {
tape,
loc,
full: _,
lhs,
rhs,
alloc,
} = cur;

let (lhs, rhs) = (lhs.into_heap_node(alloc), rhs.into_heap_node(alloc));

*tape = &tape[count_nodes_heap(&rhs)..];

// Ensure that we're working with objects on both sides
// Question: Should we actually relax this to support
// reducing valid schemas like "true" and "false"?
let (
lhs @ HeapNode::Object(_),
rhs @ HeapNode::Object(_)
) = (lhs, rhs) else {
return Err(Error::with_location(Error::JsonSchemaMergeWrongType { detail: None }, loc) )
};

let left = shape_from_node(lhs).map_err(|e| Error::with_location(e, loc))?;
let right = shape_from_node(rhs).map_err(|e| Error::with_location(e, loc))?;

let mut merged_shape = Shape::union(left, right);
merged_shape.enforce_field_count_limits(json::Location::Root);

// Union together the LHS and RHS, and convert back from `Shape` into `HeapNode`.
let merged_doc = serde_json::to_value(&SchemaBuilder::new(merged_shape).root_schema())
.and_then(|value| HeapNode::from_serde(value, alloc))
.map_err(|e| {
Error::with_location(
Error::JsonSchemaMergeWrongType {
detail: Some(e.to_string()),
},
loc,
)
})?;

Ok(merged_doc)
}

fn shape_from_node<'a, N: AsNode>(node: N) -> Result<Shape> {
// Should this be something more specific/useful?
let url = url::Url::parse("json-schema-reduction:///").unwrap();

let serialized =
serde_json::to_value(node.as_node()).map_err(|e| Error::JsonSchemaMergeWrongType {
detail: Some(e.to_string()),
})?;

let schema = json::schema::build::build_schema::<crate::Annotation>(url.clone(), &serialized)
.map_err(|e| Error::JsonSchemaMergeWrongType {
detail: Some(e.to_string()),
})?;

let mut index = IndexBuilder::new();
index.add(&schema).unwrap();
index.verify_references().unwrap();
let index = index.into_index();

Ok(Shape::infer(
index
.must_fetch(&url)
.map_err(|e| Error::JsonSchemaMergeWrongType {
detail: Some(e.to_string()),
})?,
&index,
))
}

#[cfg(test)]
mod test {
use super::super::test::*;
use super::*;

#[test]
fn test_merge_json_schemas() {
run_reduce_cases(
json!({ "reduce": { "strategy": "jsonSchemaMerge" } }),
vec![
Partial {
rhs: json!({
"type": "string",
"maxLength": 5,
"minLength": 5
}),
expect: Ok(json!({
"type": "string",
"maxLength": 5,
"minLength": 5
})),
},
Partial {
rhs: json!("oops!"),
expect: Err(Error::JsonSchemaMergeWrongType { detail: None }),
},
Partial {
rhs: json!({
"type": "foo"
}),
expect: Err(Error::JsonSchemaMergeWrongType {
detail: Some(
r#"at keyword 'type' of schema 'json-schema-reduction:///': expected a type or array of types: invalid type name: 'foo'"#.to_owned(),
),
}),
},
Partial {
rhs: json!({
"type": "string",
"minLength": 8,
"maxLength": 10
}),
expect: Ok(json!({
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "string",
"minLength": 5,
"maxLength": 10,
})),
},
],
)
}
}
6 changes: 5 additions & 1 deletion crates/doc/src/reduce/strategy.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use super::{
compare_key_lazy, count_nodes, count_nodes_generic, count_nodes_heap, reduce_item, reduce_prop,
Cursor, Error, Result,
schema::json_schema_merge, Cursor, Error, Result,
};
use crate::{
lazy::{LazyArray, LazyDestructured, LazyObject},
Expand Down Expand Up @@ -75,6 +75,9 @@ pub enum Strategy {
/// In the future, we may allow for arbitrary-sized integer and
/// floating-point representations which use a string encoding scheme.
Sum,
/// Deep-merge the JSON schemas in LHS and RHS
/// both of which must be objects containing valid json schemas.
JsonSchemaMerge,
}

impl std::convert::TryFrom<&serde_json::Value> for Strategy {
Expand Down Expand Up @@ -120,6 +123,7 @@ impl Strategy {
Strategy::Minimize(min) => Self::minimize(cur, min),
Strategy::Set(set) => set.apply(cur),
Strategy::Sum => Self::sum(cur),
Strategy::JsonSchemaMerge => json_schema_merge(cur),
}
}

Expand Down
1 change: 1 addition & 0 deletions crates/flow-web/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ fn reduce_description(reduce: doc::inference::Reduction) -> &'static str {
Reduction::Set => "set",
Reduction::Sum => "sum",
Reduction::Multiple => "multiple strategies may apply",
Reduction::JsonSchemaMerge => "merge json schemas",
}
}

Expand Down

0 comments on commit 8b4f042

Please sign in to comment.