Skip to content

Commit

Permalink
mechanical updates for refactor of doc::inference
Browse files Browse the repository at this point in the history
Update to use doc::shape and friends instead.
  • Loading branch information
jgraettinger committed Aug 9, 2023
1 parent 26780b7 commit ddd8f01
Show file tree
Hide file tree
Showing 17 changed files with 73 additions and 65 deletions.
2 changes: 1 addition & 1 deletion crates/agent/src/evolution.rs
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,7 @@ impl ResourceSpecUpdater {
let mut builder = doc::SchemaIndexBuilder::new();
builder.add(&schema)?;
let index = builder.into_index();
let shape = doc::inference::Shape::infer(&schema, &index);
let shape = doc::Shape::infer(&schema, &index);

for (ptr, _, prop_shape, _) in shape.locations() {
if prop_shape.annotations.contains_key("x-collection-name") {
Expand Down
6 changes: 3 additions & 3 deletions crates/assemble/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use doc::inference::{Exists, Shape};
use doc::shape::{location::Exists, Shape};
use json::schema::{formats, types};
use proto_flow::flow;
use proto_gazette::{broker, consumer};
Expand Down Expand Up @@ -602,7 +602,7 @@ pub fn label_selector(t: models::LabelSelector) -> broker::LabelSelector {
#[cfg(test)]
mod test {
use super::*;
use doc::inference::StringShape;
use doc::shape::StringShape;
use serde_json::json;

#[test]
Expand All @@ -620,7 +620,7 @@ mod test {
min_length: 10,
max_length: Some(123),
},
..Default::default()
..Shape::anything()
};

let out1 = inference(&shape, Exists::Must);
Expand Down
9 changes: 3 additions & 6 deletions crates/derive-typescript/src/codegen/mapper.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use super::ast::{ASTProperty, ASTTuple, AST};
use doc::inference::{ArrayShape, ObjShape, Provenance, Shape};
use doc::shape::{ArrayShape, ObjShape, Provenance, Shape};
use json::schema::{types, Keyword};
use regex::Regex;
use std::collections::BTreeMap;
Expand Down Expand Up @@ -56,7 +56,7 @@ impl Mapper {
let index = self.validator.schema_index();
let shape = match index.fetch(url) {
Some(schema) => Shape::infer(schema, index),
None => Shape::default(),
None => Shape::anything(),
};
self.to_ast(&shape)
}
Expand Down Expand Up @@ -180,10 +180,7 @@ impl Mapper {
// type that accommodates *all* types used across any property.
// See: https://basarat.gitbook.io/typescript/type-system/index-signatures

let mut merged = Shape {
type_: types::INVALID,
..Shape::default()
};
let mut merged = Shape::nothing();
let mut has_optional = false;

for prop in &obj.properties {
Expand Down
2 changes: 1 addition & 1 deletion crates/derive/src/combine_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -638,7 +638,7 @@ pub mod test {
docs: &[(bool, serde_json::Value)],
) -> Vec<Vec<String>> {
let validator = crate::new_validator(&schema_json).unwrap();
let shape = doc::inference::Shape::infer(&validator.schemas()[0], validator.schema_index());
let shape = doc::Shape::infer(&validator.schemas()[0], validator.schema_index());

let projections: Vec<_> = shape
.locations()
Expand Down
12 changes: 7 additions & 5 deletions crates/flow-web/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
mod utils;

use doc::inference::{Exists, Reduction, Shape};
use doc::Annotation;
use doc::{
shape::{location::Exists, Reduction},
Annotation, Shape,
};
use json::schema;
use serde::{Deserialize, Serialize};
use wasm_bindgen::prelude::*;

mod utils;

// When the `wee_alloc` feature is enabled, use `wee_alloc` as the global
// allocator.
#[cfg(feature = "wee_alloc")]
Expand All @@ -30,7 +32,7 @@ pub struct Property {
pub enum_vals: Vec<serde_json::Value>,
pub string_format: Option<String>,
}
fn reduce_description(reduce: doc::inference::Reduction) -> &'static str {
fn reduce_description(reduce: Reduction) -> &'static str {
match reduce {
Reduction::Unset => "unset",
Reduction::Append => "append",
Expand Down
2 changes: 1 addition & 1 deletion crates/flowctl/src/preview/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::{dataplane, local_specs};
use anyhow::Context;
use doc::schema::to_schema;
use doc::shape::schema::to_schema;
use futures::channel::mpsc;
use futures::{SinkExt, StreamExt, TryStreamExt};
use prost::Message;
Expand Down
2 changes: 1 addition & 1 deletion crates/flowctl/src/raw/discover.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use anyhow::Context;
use doc::{inference::Shape, SchemaIndexBuilder};
use doc::{SchemaIndexBuilder, Shape};
use json::schema::{build::build_schema, types};
use models::{
Capture, CaptureBinding, CaptureDef, CaptureEndpoint, Catalog, Collection, CollectionDef,
Expand Down
2 changes: 1 addition & 1 deletion crates/flowctl/src/raw/suggest_schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use crate::{
};
use anyhow::anyhow;
use bytelines::AsyncByteLines;
use doc::{inference::Shape, schema::SchemaBuilder, FailedValidation, SchemaIndexBuilder};
use doc::{shape::schema::SchemaBuilder, FailedValidation, SchemaIndexBuilder, Shape};
use futures::{Stream, StreamExt, TryStreamExt};
use json::schema::build::build_schema;
use models::Schema;
Expand Down
32 changes: 23 additions & 9 deletions crates/json/src/schema/formats.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
use std::{net::IpAddr, str::FromStr};

use addr::{parse_domain_name, parse_email_address};
use bigdecimal::BigDecimal;
use fancy_regex::Regex;
use iri_string::spec::{IriSpec, UriSpec};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use std::{net::IpAddr, str::FromStr};
use time::macros::format_description;
use uuid::Uuid;

use crate::validator::ValidationResult;

Expand Down Expand Up @@ -100,17 +99,20 @@ impl Format {
Self::Time => {
// [first] will choose the first matching format to parse the value
// see https://time-rs.github.io/book/api/format-description.html for more info
let full_format = format_description!(version = 2, "[first
[[hour]:[minute]:[second][optional [.[subsecond]]]Z]
[[hour]:[minute]:[second][optional [.[subsecond]]]z]
[[hour]:[minute]:[second][optional [.[subsecond]]][offset_hour]:[offset_minute]]
]");
let full_format = format_description!(
version = 2,
"[first
[[hour]:[minute]:[second][optional [.[subsecond]]]Z]
[[hour]:[minute]:[second][optional [.[subsecond]]]z]
[[hour]:[minute]:[second][optional [.[subsecond]]][offset_hour]:[offset_minute]]
]"
);

ValidationResult::from(time::Time::parse(
val,
&time::format_description::FormatItem::First(full_format),
))
},
}
Self::Email => ValidationResult::from(parse_email_address(val)),
Self::Hostname => ValidationResult::from(parse_domain_name(val)),
// The rules/test cases for these are absolutely bonkers
Expand Down Expand Up @@ -180,6 +182,18 @@ impl Format {
),
}
}

// Detect the Format matched by a given, arbitrary string (if any).
pub fn detect(val: &str) -> Option<Self> {
match val {
_ if Format::Integer.validate(val).is_ok() => Some(Format::Integer),
_ if Format::Number.validate(val).is_ok() => Some(Format::Number),
_ if Format::DateTime.validate(val).is_ok() => Some(Format::DateTime),
_ if Format::Date.validate(val).is_ok() => Some(Format::Date),
_ if Format::Uuid.validate(val).is_ok() => Some(Format::Uuid),
_ => None,
}
}
}

#[cfg(test)]
Expand Down
2 changes: 1 addition & 1 deletion crates/schema-inference/src/analyze.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use doc::schema::SchemaBuilder;
use doc::shape::schema::SchemaBuilder;
use schemars::schema::RootSchema;
use serde_json::Value as JsonValue;
use std::io::BufRead;
Expand Down
12 changes: 6 additions & 6 deletions crates/schema-inference/src/inference.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
use crate::shape;
use doc::inference::{ArrayShape, ObjProperty, ObjShape, Shape, StringShape};
use doc::shape::{ArrayShape, ObjProperty, ObjShape, Shape, StringShape};
use json::schema::{formats::Format, types};
use serde_json::Value as JsonValue;

pub fn infer_shape(value: &JsonValue) -> Shape {
let mut shape = Shape {
type_: types::Set::for_value(value),
..Default::default()
..Shape::nothing()
};

if let JsonValue::String(value) = value {
Expand All @@ -32,7 +32,7 @@ fn infer_string_shape(value: &str) -> StringShape {

StringShape {
format,
..Default::default()
..StringShape::new()
}
}

Expand All @@ -44,10 +44,10 @@ fn infer_array_shape(inner: &[JsonValue]) -> ArrayShape {
{
ArrayShape {
tuple: vec![shape],
..Default::default()
..ArrayShape::new()
}
} else {
Default::default()
ArrayShape::new()
}
}

Expand Down Expand Up @@ -84,7 +84,7 @@ fn infer_object_shape(inner: &serde_json::Map<String, JsonValue>) -> ObjShape {
..Default::default()
})),
*/
..Default::default()
..ObjShape::new()
}
}

Expand Down
19 changes: 8 additions & 11 deletions crates/schema-inference/src/server.rs
Original file line number Diff line number Diff line change
@@ -1,17 +1,13 @@
use std::net::{IpAddr, SocketAddr};
use std::time::Duration;

use crate::inference::infer_shape;
use crate::json_decoder::{JsonCodec, JsonCodecError};
use crate::shape;
use bytesize::ByteSize;
use doc::schema::SchemaBuilder;
use serde_json::json;

use anyhow::Context;
use assemble::journal_selector;
use doc::inference::Shape;
use bytesize::ByteSize;
use doc::shape::schema::SchemaBuilder;
use doc::Shape;
use futures::{Stream, TryStreamExt};
use futures_util::StreamExt;
use journal_client::broker::{fragments_response, FragmentsRequest, JournalSpec};
use journal_client::fragments::FragmentIter;
use journal_client::list::list_journals;
Expand All @@ -20,11 +16,12 @@ use journal_client::read::uncommitted::{
};
use journal_client::{connect_journal_client, ConnectError};
use models;

use futures_util::StreamExt;
use schemars::schema::RootSchema;
use serde::{Deserialize, Serialize};
use serde_json::json;
use serde_json::Value;
use std::net::{IpAddr, SocketAddr};
use std::time::Duration;
use thiserror::Error;
use tokio::sync::watch::Receiver;
use tokio::time::{sleep, Instant};
Expand Down Expand Up @@ -88,7 +85,7 @@ impl InferenceError {

/// Used to roll up shapes inferred from collections,
/// as well as metadata about the data that went into them
#[derive(Default, Debug)]
#[derive(Debug)]
struct ShapeAndMeta {
shape: Shape,
/// The number of documents that were read to infer this shape
Expand Down
4 changes: 2 additions & 2 deletions crates/schema-inference/src/shape.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use doc::inference::{ArrayShape, ObjProperty, ObjShape, Shape, StringShape};
use doc::shape::{ArrayShape, ObjProperty, ObjShape, Shape, StringShape};
use itertools::{EitherOrBoth, Itertools};
use json::schema::types;

Expand Down Expand Up @@ -46,7 +46,7 @@ pub fn merge(lhs: Shape, rhs: Shape) -> Shape {
array,
object,
string,
..Default::default()
..Shape::anything()
}
}

Expand Down
11 changes: 3 additions & 8 deletions crates/schemalate/src/firebolt/firebolt_schema_builder.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
use super::errors::*;
use super::firebolt_queries::{CreateTable, DropTable, InsertFromTable};
use super::firebolt_types::{Column, FireboltType, Table, TableSchema, TableType};

use doc::inference::Shape;
use doc::shape::Shape;
use doc::{Annotation, Pointer};
use json::schema::{self, types};
use proto_flow::flow::materialization_spec::Binding;
Expand Down Expand Up @@ -163,12 +162,8 @@ pub fn build_firebolt_queries_bundle(
})
}

pub fn build_drop_query(
table: &Table,
) -> Result<String, Error> {
Ok(DropTable {
table,
}.to_string())
pub fn build_drop_query(table: &Table) -> Result<String, Error> {
Ok(DropTable { table }.to_string())
}

fn projection_type_to_firebolt_type(shape: &Shape) -> Option<FireboltType> {
Expand Down
2 changes: 1 addition & 1 deletion crates/schemalate/src/markdown.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use anyhow::Context;
use doc::{
inference::{Exists, Shape},
shape::{location::Exists, Shape},
Schema, SchemaIndexBuilder,
};
use itertools::Itertools;
Expand Down
4 changes: 2 additions & 2 deletions crates/validation/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ pub enum Error {
KeyHasReduction {
ptr: String,
schema: Url,
strategy: doc::inference::Reduction,
strategy: doc::shape::Reduction,
},
#[error("{category} projection {field} does not exist in collection {collection}")]
NoSuchProjection {
Expand Down Expand Up @@ -184,7 +184,7 @@ pub enum Error {
#[error(transparent)]
SchemaIndex(#[from] json::schema::index::Error),
#[error(transparent)]
SchemaShape(#[from] doc::inference::Error),
SchemaShape(#[from] doc::shape::inspections::Error),
#[error(transparent)]
SerdeJson(#[from] serde_json::Error),
}
Expand Down
15 changes: 9 additions & 6 deletions crates/validation/src/schema.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
use super::Error;
use doc::{inference, validation};
use doc::{
shape::{self, location::Exists},
validation, Shape,
};
use json::schema::types;
use proto_flow::flow::collection_spec::derivation::ShuffleType;

Expand All @@ -10,14 +13,14 @@ pub struct Schema {
// Validator of this schema.
pub validator: validation::Validator,
// Inferred schema shape.
pub shape: inference::Shape,
pub shape: Shape,
}

impl Schema {
pub fn new(bundle: &str) -> Result<Self, Error> {
let schema = doc::validation::build_bundle(bundle)?;
let validator = doc::Validator::new(schema)?;
let shape = inference::Shape::infer(&validator.schemas()[0], validator.schema_index());
let shape = Shape::infer(&validator.schemas()[0], validator.schema_index());

Ok(Self {
curi: validator.schemas()[0].curi.clone(),
Expand Down Expand Up @@ -51,12 +54,12 @@ impl Schema {
ptr: ptr.to_string(),
unmatched,
});
} else if exists == inference::Exists::Implicit {
} else if exists == Exists::Implicit {
return Err(Error::PtrIsImplicit {
ptr: ptr.to_string(),
schema: self.curi.clone(),
});
} else if exists == inference::Exists::Cannot {
} else if exists == Exists::Cannot {
return Err(Error::PtrCannotExist {
ptr: ptr.to_string(),
schema: self.curi.clone(),
Expand All @@ -78,7 +81,7 @@ impl Schema {

if !matches!(
shape.reduction,
inference::Reduction::Unset | inference::Reduction::LastWriteWins,
shape::Reduction::Unset | shape::Reduction::LastWriteWins,
) {
return Err(Error::KeyHasReduction {
ptr: ptr.to_string(),
Expand Down

0 comments on commit ddd8f01

Please sign in to comment.