From 31595de1eae3b00da07e46c7e78a34961f883b2a Mon Sep 17 00:00:00 2001 From: Sam Andreae Date: Thu, 3 Aug 2023 17:32:03 +0100 Subject: [PATCH 01/14] BlobStore for retrieving raw blob data from the db (#484) * Use p2panda-rs version which has blob schema * `SchemaProvider` now has 4 system schema :-) * Update proptests * Implement BlobStore with `get_blob` method * Clippy happy * Fix imports * fmt * Remove some BlobStoreErrors and error tests * Update CHANGELOG * add `get_blob_by_view_id` method * clippy --- CHANGELOG.md | 2 + Cargo.lock | 3 +- aquadoggo/Cargo.toml | 5 +- aquadoggo/src/db/errors.rs | 42 +++- aquadoggo/src/db/stores/blob.rs | 297 ++++++++++++++++++++++++ aquadoggo/src/db/stores/mod.rs | 1 + aquadoggo/src/proptests/tests.rs | 2 +- aquadoggo/src/schema/schema_provider.rs | 2 +- aquadoggo_cli/Cargo.toml | 1 + 9 files changed, 340 insertions(+), 15 deletions(-) create mode 100644 aquadoggo/src/db/stores/blob.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 74e31dc9d..9b159c27f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +- Introduce `BlobStore` [#484](https://github.com/p2panda/aquadoggo/pull/484) + ## [0.5.0] ### Added diff --git a/Cargo.lock b/Cargo.lock index 4c412a346..a1532b32b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3108,8 +3108,7 @@ checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" [[package]] name = "p2panda-rs" version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "462f0e5a6df45b0b9ad387fdf77f9d43c75610d72036c1b1de0aebfe10f434b0" +source = "git+https://github.com/p2panda/p2panda?rev=17f4fcb1dcf7cebabd6d9b5a824399e9384d96b2#17f4fcb1dcf7cebabd6d9b5a824399e9384d96b2" dependencies = [ "arrayvec 0.5.2", "async-trait", diff --git a/aquadoggo/Cargo.toml b/aquadoggo/Cargo.toml index 7dd1a5652..fcdead087 100644 --- a/aquadoggo/Cargo.toml +++ b/aquadoggo/Cargo.toml @@ -56,6 +56,9 @@ log = "0.4.19" once_cell = "1.18.0" openssl-probe = "0.1.5" p2panda-rs = { version = "0.7.1", features = ["storage-provider"] } +p2panda-rs = { git = "https://github.com/p2panda/p2panda", rev = "17f4fcb1dcf7cebabd6d9b5a824399e9384d96b2", features = [ + "storage-provider", +] } rand = "0.8.5" regex = "1.9.3" serde = { version = "1.0.152", features = ["derive"] } @@ -91,7 +94,7 @@ http = "0.2.9" hyper = "0.14.19" libp2p-swarm-test = "0.2.0" once_cell = "1.17.0" -p2panda-rs = { version = "0.7.1", features = [ +p2panda-rs = { git = "https://github.com/p2panda/p2panda", rev = "17f4fcb1dcf7cebabd6d9b5a824399e9384d96b2", features = [ "test-utils", "storage-provider", ] } diff --git a/aquadoggo/src/db/errors.rs b/aquadoggo/src/db/errors.rs index 76fc4724e..ae7b1253c 100644 --- a/aquadoggo/src/db/errors.rs +++ b/aquadoggo/src/db/errors.rs @@ -13,33 +13,55 @@ pub enum SqlStoreError { #[error("Deletion of row from table {0} did not show any effect")] Deletion(String), + + /// Error returned from BlobStore. + #[error(transparent)] + BlobStoreError(#[from] BlobStoreError), } /// `SchemaStore` errors. #[derive(Error, Debug)] pub enum SchemaStoreError { - /// Catch all error which implementers can use for passing their own errors up the chain. - #[error("Error occured in DocumentStore: {0}")] - #[allow(dead_code)] - Custom(String), - /// Error returned from converting p2panda-rs `DocumentView` into `SchemaView. #[error(transparent)] - SystemSchemaError(#[from] SystemSchemaError), + SystemSchema(#[from] SystemSchemaError), /// Error returned from p2panda-rs `Schema` methods. #[error(transparent)] - SchemaError(#[from] SchemaError), + Schema(#[from] SchemaError), /// Error returned from p2panda-rs `SchemaId` methods. #[error(transparent)] - SchemaIdError(#[from] SchemaIdError), + SchemaId(#[from] SchemaIdError), /// Error returned from `DocumentStore` methods. #[error(transparent)] - DocumentStorageError(#[from] DocumentStorageError), + DocumentStorage(#[from] DocumentStorageError), /// Error returned from `OperationStore` methods. #[error(transparent)] - OperationStorageError(#[from] OperationStorageError), + OperationStorage(#[from] OperationStorageError), +} + +#[derive(Error, Debug)] +pub enum BlobStoreError { + /// Error when no "pieces" field found on blob document. + #[error("Missing \"pieces\" field on blob document")] + NotBlobDocument, + + /// Error when no pieces found for existing blob document. + #[error("No pieces found for the requested blob")] + NoBlobPiecesFound, + + /// Error when some pieces not found for existing blob document. + #[error("Some pieces missing for the requested blob")] + MissingPieces, + + /// Error when combined pieces length and claimed blob length don't match. + #[error("The combined pieces length and claimed blob length don't match")] + IncorrectLength, + + /// Error returned from `DocumentStore` methods. + #[error(transparent)] + DocumentStorageError(#[from] DocumentStorageError), } diff --git a/aquadoggo/src/db/stores/blob.rs b/aquadoggo/src/db/stores/blob.rs new file mode 100644 index 000000000..e413e32e8 --- /dev/null +++ b/aquadoggo/src/db/stores/blob.rs @@ -0,0 +1,297 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later + +use std::num::NonZeroU64; + +use p2panda_rs::document::traits::AsDocument; +use p2panda_rs::document::{DocumentId, DocumentViewId}; +use p2panda_rs::operation::OperationValue; +use p2panda_rs::schema::{Schema, SchemaId}; +use p2panda_rs::storage_provider::traits::DocumentStore; + +use crate::db::errors::BlobStoreError; +use crate::db::query::{Field, Filter, Order, Pagination, Select}; +use crate::db::stores::query::{Query, RelationList}; +use crate::db::SqlStore; + +/// The maximum allowed number of blob pieces per blob. +/// @TODO: do we want this? If so, what value should it be and we should add this to +/// p2panda-rs blob validation too. +const MAX_BLOB_PIECES: u64 = 10000; + +pub type BlobData = String; + +impl SqlStore { + /// Get the data for one blob from the store, identified by it's document id. + pub async fn get_blob(&self, id: &DocumentId) -> Result, BlobStoreError> { + // Get the root blob document. + let blob_document = match self.get_document(id).await? { + Some(document) => { + if document.schema_id != SchemaId::Blob(1) { + return Err(BlobStoreError::NotBlobDocument); + } + document + } + None => return Ok(None), + }; + document_to_blob_data(self, blob_document).await + } + + /// Get the data for one blob from the store, identified by it's document view id. + pub async fn get_blob_by_view_id( + &self, + view_id: &DocumentViewId, + ) -> Result, BlobStoreError> { + // Get the root blob document. + let blob_document = match self.get_document_by_view_id(view_id).await? { + Some(document) => { + if document.schema_id != SchemaId::Blob(1) { + return Err(BlobStoreError::NotBlobDocument); + } + document + } + None => return Ok(None), + }; + document_to_blob_data(self, blob_document).await + } +} + +/// Helper method for validation and parsing a document into blob data. +async fn document_to_blob_data( + store: &SqlStore, + blob: impl AsDocument, +) -> Result, BlobStoreError> { + // Get the length of the blob. + let length = match blob.get("length").unwrap() { + OperationValue::Integer(length) => length, + _ => panic!(), // We should never hit this as we already validated that this is a blob document. + }; + + // Get the number of pieces in the blob. + let num_pieces = match blob.get("pieces").unwrap() { + OperationValue::PinnedRelationList(list) => list.len(), + _ => panic!(), // We should never hit this as we already validated that this is a blob document. + }; + + // Now collect all exiting pieces for the blob. + // + // We do this using the stores' query method, targeting pieces which are in the relation + // list of the blob. + let schema = Schema::get_system(SchemaId::BlobPiece(1)).unwrap(); + let list = RelationList::new_pinned(blob.view_id(), "pieces"); + let pagination = Pagination { + first: NonZeroU64::new(MAX_BLOB_PIECES).unwrap(), + ..Default::default() + }; + + let args = Query::new( + &pagination, + &Select::new(&[Field::new("data")]), + &Filter::default(), + &Order::default(), + ); + + let (_, results) = store.query(schema, &args, Some(&list)).await?; + + // No pieces were found. + if results.is_empty() { + return Err(BlobStoreError::NoBlobPiecesFound); + }; + + // Not all pieces were found. + if results.len() != num_pieces { + return Err(BlobStoreError::MissingPieces); + } + + // Now we construct the blob data. + let mut blob_data = "".to_string(); + + for (_, blob_piece_document) in results { + match blob_piece_document + .get("data") + .expect("Blob piece document without \"data\" field") + { + OperationValue::String(data_str) => blob_data += data_str, + _ => panic!(), // We should never hit this as we only queried for blob piece documents. + } + } + + // Combined blob data length doesn't match the claimed length. + if blob_data.len() != *length as usize { + return Err(BlobStoreError::IncorrectLength); + }; + + Ok(Some(blob_data)) +} + +#[cfg(test)] +mod tests { + use p2panda_rs::document::DocumentId; + use p2panda_rs::identity::KeyPair; + use p2panda_rs::schema::SchemaId; + use p2panda_rs::test_utils::fixtures::{key_pair, random_document_view_id}; + use rstest::rstest; + + use crate::db::errors::BlobStoreError; + use crate::test_utils::{add_document, test_runner, TestNode}; + + #[rstest] + fn get_blob(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + let blob_data = "Hello, World!".to_string(); + + // Publish blob pieces and blob. + let blob_piece_view_id_1 = add_document( + &mut node, + &SchemaId::BlobPiece(1), + vec![("data", blob_data[..5].into())], + &key_pair, + ) + .await; + + let blob_piece_view_id_2 = add_document( + &mut node, + &SchemaId::BlobPiece(1), + vec![("data", blob_data[5..].into())], + &key_pair, + ) + .await; + let blob_view_id = add_document( + &mut node, + &SchemaId::Blob(1), + vec![ + ("length", { blob_data.len() as i64 }.into()), + ("mime_type", "text/plain".into()), + ( + "pieces", + vec![blob_piece_view_id_1, blob_piece_view_id_2].into(), + ), + ], + &key_pair, + ) + .await; + + let document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); + + // Get blob by document id. + let blob = node.context.store.get_blob(&document_id).await.unwrap(); + + assert!(blob.is_some()); + assert_eq!(blob.unwrap(), blob_data); + + // Get blob by view id. + let blob = node + .context + .store + .get_blob_by_view_id(&blob_view_id) + .await + .unwrap(); + + assert!(blob.is_some()); + assert_eq!(blob.unwrap(), blob_data) + }) + } + + #[rstest] + fn get_blob_errors(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + let blob_data = "Hello, World!".to_string(); + + // Publish a blob containing pieces which aren't in the store. + let blob_view_id = add_document( + &mut node, + &SchemaId::Blob(1), + vec![ + ("length", { blob_data.len() as i64 }.into()), + ("mime_type", "text/plain".into()), + ( + "pieces", + vec![random_document_view_id(), random_document_view_id()].into(), + ), + ], + &key_pair, + ) + .await; + + let blob_document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); + + // We get the correct `NoBlobPiecesFound` error. + let result = node.context.store.get_blob(&blob_document_id).await; + assert!( + matches!(result, Err(BlobStoreError::NoBlobPiecesFound)), + "{:?}", + result + ); + + // Publish one blob piece. + let blob_piece_view_id_1 = add_document( + &mut node, + &SchemaId::BlobPiece(1), + vec![("data", blob_data[..5].into())], + &key_pair, + ) + .await; + + // Publish a blob with one piece that is in the store and one that isn't. + let blob_view_id = add_document( + &mut node, + &SchemaId::Blob(1), + vec![ + ("length", { blob_data.len() as i64 }.into()), + ("mime_type", "text/plain".into()), + ( + "pieces", + vec![blob_piece_view_id_1.clone(), random_document_view_id()].into(), + ), + ], + &key_pair, + ) + .await; + + let blob_document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); + + // We should get the correct `MissingBlobPieces` error. + let result = node.context.store.get_blob(&blob_document_id).await; + assert!( + matches!(result, Err(BlobStoreError::MissingPieces)), + "{:?}", + result + ); + + // Publish one more blob piece, but it doesn't contain the correct number of bytes. + let blob_piece_view_id_2 = add_document( + &mut node, + &SchemaId::BlobPiece(1), + vec![("data", blob_data[9..].into())], + &key_pair, + ) + .await; + + // Publish a blob with two pieces that are in the store but they don't add up to the + // right byte length. + let blob_view_id = add_document( + &mut node, + &SchemaId::Blob(1), + vec![ + ("length", { blob_data.len() as i64 }.into()), + ("mime_type", "text/plain".into()), + ( + "pieces", + vec![blob_piece_view_id_1, blob_piece_view_id_2].into(), + ), + ], + &key_pair, + ) + .await; + + let blob_document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); + + // We get the correct `IncorrectLength` error. + let result = node.context.store.get_blob(&blob_document_id).await; + assert!( + matches!(result, Err(BlobStoreError::IncorrectLength)), + "{:?}", + result + ); + }) + } +} diff --git a/aquadoggo/src/db/stores/mod.rs b/aquadoggo/src/db/stores/mod.rs index 440b4b3e4..e5ccebf8e 100644 --- a/aquadoggo/src/db/stores/mod.rs +++ b/aquadoggo/src/db/stores/mod.rs @@ -2,6 +2,7 @@ //! Implementations of all `p2panda-rs` defined storage provider traits and additionally //! `aquadoggo` specific interfaces. +mod blob; pub mod document; mod entry; mod log; diff --git a/aquadoggo/src/proptests/tests.rs b/aquadoggo/src/proptests/tests.rs index 60db777c6..8aa72e367 100644 --- a/aquadoggo/src/proptests/tests.rs +++ b/aquadoggo/src/proptests/tests.rs @@ -28,7 +28,7 @@ async fn sanity_checks( schemas: &Vec, ) { let node_schemas = node.context.schema_provider.all().await; - assert_eq!(schemas.len(), node_schemas.len() - 2); // minus 2 for system schema + assert_eq!(schemas.len(), node_schemas.len() - 4); // minus 4 for system schema for schema_id in schemas { let result = node .context diff --git a/aquadoggo/src/schema/schema_provider.rs b/aquadoggo/src/schema/schema_provider.rs index 4eeeeb8f5..14262249c 100644 --- a/aquadoggo/src/schema/schema_provider.rs +++ b/aquadoggo/src/schema/schema_provider.rs @@ -154,7 +154,7 @@ mod test { async fn get_all_schemas() { let provider = SchemaProvider::default(); let result = provider.all().await; - assert_eq!(result.len(), 2); + assert_eq!(result.len(), 4); } #[tokio::test] diff --git a/aquadoggo_cli/Cargo.toml b/aquadoggo_cli/Cargo.toml index 6bc76a973..8ed1b02b1 100644 --- a/aquadoggo_cli/Cargo.toml +++ b/aquadoggo_cli/Cargo.toml @@ -32,6 +32,7 @@ log = "0.4.20" p2panda-rs = "0.7.1" path-clean = "1.0.1" serde = { version = "1.0.185", features = ["serde_derive"] } +p2panda-rs = { git = "https://github.com/p2panda/p2panda", rev = "17f4fcb1dcf7cebabd6d9b5a824399e9384d96b2" } tokio = { version = "1.28.2", features = ["full"] } toml = "0.7.6" From e5ac15091b459d252de9f04cfad5db5abdc307fe Mon Sep 17 00:00:00 2001 From: Sam Andreae Date: Wed, 9 Aug 2023 10:36:47 +0100 Subject: [PATCH 02/14] Enable deletion of dangling `document_views` and related `document_view_fields` from db (#491) * Add fk to document_view_fields with cascading DELETE * Introduce `prune_document_views` method to `DocumentStore` * Test for pruning document views * Test that pinned views don't get deleted * Update CHANGELOG * Clippy * Remove fk constraint on `operation_id` in `document_view_fields` table * Change table creation order in documents migration * Use IS NULL in SQL conditional * Don't use alias in SQL query --- CHANGELOG.md | 1 + .../20220510022755_create-documents.sql | 18 +- aquadoggo/src/db/stores/document.rs | 188 +++++++++++++++++- 3 files changed, 196 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b159c27f..b3bfef6b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +- Add method to store for pruning document views [#491](https://github.com/p2panda/aquadoggo/pull/491) - Introduce `BlobStore` [#484](https://github.com/p2panda/aquadoggo/pull/484) ## [0.5.0] diff --git a/aquadoggo/migrations/20220510022755_create-documents.sql b/aquadoggo/migrations/20220510022755_create-documents.sql index e1b2d0850..cc32860c5 100644 --- a/aquadoggo/migrations/20220510022755_create-documents.sql +++ b/aquadoggo/migrations/20220510022755_create-documents.sql @@ -1,20 +1,20 @@ -- SPDX-License-Identifier: AGPL-3.0-or-later -CREATE TABLE IF NOT EXISTS document_view_fields ( - document_view_id TEXT NOT NULL, - operation_id TEXT NOT NULL, - name TEXT NOT NULL, - FOREIGN KEY(operation_id) REFERENCES operations_v1(operation_id) -); - -CREATE INDEX idx_document_view_fields ON document_view_fields (document_view_id, operation_id, name); - CREATE TABLE IF NOT EXISTS document_views ( document_view_id TEXT NOT NULL UNIQUE, schema_id TEXT NOT NULL, PRIMARY KEY (document_view_id) ); +CREATE TABLE IF NOT EXISTS document_view_fields ( + document_view_id TEXT NOT NULL, + operation_id TEXT NOT NULL, + name TEXT NOT NULL, + FOREIGN KEY(document_view_id) REFERENCES document_views(document_view_id) ON DELETE CASCADE +); + +CREATE INDEX idx_document_view_fields ON document_view_fields (document_view_id, operation_id, name); + CREATE TABLE IF NOT EXISTS documents ( document_id TEXT NOT NULL UNIQUE, document_view_id TEXT NOT NULL, diff --git a/aquadoggo/src/db/stores/document.rs b/aquadoggo/src/db/stores/document.rs index 463880a2d..22db8423b 100644 --- a/aquadoggo/src/db/stores/document.rs +++ b/aquadoggo/src/db/stores/document.rs @@ -365,6 +365,80 @@ impl SqlStore { .await .map_err(|e| DocumentStorageError::FatalStorageError(e.to_string())) } + + /// Iterate over all views of a document and delete any which: + /// - are not the current view + /// - _and_ no document field exists in the database which contains a pinned relation to this view + #[allow(dead_code)] + async fn prune_document_views( + &self, + document_id: &DocumentId, + ) -> Result<(), DocumentStorageError> { + // Start a transaction, any db insertions after this point, and before the `commit()` + // will be rolled back in the event of an error. + let mut tx = self + .pool + .begin() + .await + .map_err(|e| DocumentStorageError::FatalStorageError(e.to_string()))?; + + // Collect all views _except_ the current view for this document + let document_view_ids: Vec = query_scalar( + " + SELECT + document_views.document_view_id, + documents.document_view_id + FROM + document_views + LEFT JOIN + documents + ON + documents.document_view_id = document_views.document_view_id + WHERE + document_views.document_id = $1 + AND + documents.document_view_id IS NULL + ", + ) + .bind(document_id.as_str()) + .fetch_all(&mut tx) + .await + .map_err(|err| DocumentStorageError::FatalStorageError(err.to_string()))?; + + // Iterate over all document views and delete them if no document field exists in the + // database which contains a pinned relation to this view. + // + // Deletes on "document_views" cascade to "document_view_fields" so rows there are also removed + // from the database. + for document_view_id in document_view_ids { + query( + " + DELETE FROM + document_views + WHERE + document_views.document_view_id = $1 + AND NOT EXISTS ( + SELECT * FROM operation_fields_v1 + WHERE + operation_fields_v1.field_type IN ('pinned_relation', 'pinned_relation_list') + AND + operation_fields_v1.value = $1 + ) + ", + ) + .bind(document_view_id) + .execute(&mut tx) + .await + .map_err(|err| DocumentStorageError::FatalStorageError(err.to_string()))?; + } + + // Commit the tx here as no errors occurred. + tx.commit() + .await + .map_err(|e| DocumentStorageError::FatalStorageError(e.to_string()))?; + + Ok(()) + } } // Helper method for getting rows from the `document_view_fields` table. @@ -532,20 +606,24 @@ mod tests { use p2panda_rs::document::materialization::build_graph; use p2panda_rs::document::traits::AsDocument; use p2panda_rs::document::{DocumentBuilder, DocumentId, DocumentViewFields, DocumentViewId}; + use p2panda_rs::identity::KeyPair; use p2panda_rs::operation::traits::AsOperation; use p2panda_rs::operation::{Operation, OperationId}; use p2panda_rs::storage_provider::traits::{DocumentStore, OperationStore}; use p2panda_rs::test_utils::constants; use p2panda_rs::test_utils::fixtures::{ - operation, random_document_id, random_document_view_id, random_operation_id, + key_pair, operation, random_document_id, random_document_view_id, random_operation_id, }; use p2panda_rs::test_utils::memory_store::helpers::{populate_store, PopulateStoreConfig}; use p2panda_rs::WithId; use rstest::rstest; use crate::db::stores::document::DocumentView; + use crate::materializer::tasks::reduce_task; + use crate::materializer::TaskInput; use crate::test_utils::{ - build_document, populate_and_materialize, populate_store_config, test_runner, TestNode, + add_schema_and_documents, build_document, populate_and_materialize, populate_store_config, + test_runner, TestNode, }; #[rstest] @@ -928,4 +1006,110 @@ mod tests { assert_eq!(schema_documents.len(), 10); }); } + + #[rstest] + fn prunes_document_views( + #[from(populate_store_config)] + #[with(2, 1, 1)] + config: PopulateStoreConfig, + ) { + test_runner(|mut node: TestNode| async move { + // Populate the store and materialize all documents. + let (_, document_ids) = populate_and_materialize(&mut node, &config).await; + let document_id = document_ids[0].clone(); + let first_document_view_id: DocumentViewId = document_id.as_str().parse().unwrap(); + + // Get the current document from the store. + let current_document = node.context.store.get_document(&document_id).await.unwrap(); + + // Get the current view id. + let current_document_view_id = current_document.unwrap().view_id().to_owned(); + + // Reduce a historic view of an existing document. + let _ = reduce_task( + node.context.clone(), + TaskInput::DocumentViewId(first_document_view_id.clone()), + ) + .await; + + // Get that view again to check it's in the db. + let document = node + .context + .store + .get_document_by_view_id(&first_document_view_id) + .await + .unwrap(); + assert!(document.is_some()); + + // Now prune dangling views for the document. + let result = node.context.store.prune_document_views(&document_id).await; + assert!(result.is_ok()); + + // Get the first document view again, it should no longer be there. + let document = node + .context + .store + .get_document_by_view_id(&first_document_view_id) + .await + .unwrap(); + assert!(document.is_none()); + + // Get the current view of the document to make sure that wasn't deleted too. + let document = node + .context + .store + .get_document_by_view_id(¤t_document_view_id) + .await + .unwrap(); + assert!(document.is_some()); + }); + } + + #[rstest] + fn does_not_prune_pinned_views( + #[from(populate_store_config)] + #[with(2, 1, 1)] + config: PopulateStoreConfig, + key_pair: KeyPair, + ) { + test_runner(|mut node: TestNode| async move { + // Populate the store and materialize all documents. + let (_, document_ids) = populate_and_materialize(&mut node, &config).await; + let document_id = document_ids[0].clone(); + let first_document_view_id: DocumentViewId = document_id.as_str().parse().unwrap(); + + // Reduce a historic view of an existing document. + let _ = reduce_task( + node.context.clone(), + TaskInput::DocumentViewId(first_document_view_id.clone()), + ) + .await; + + // Add a new document to the store which pins the first view of the above document. + add_schema_and_documents( + &mut node, + "new_schema", + vec![vec![( + "pin_document", + first_document_view_id.clone().into(), + Some(config.schema.id().to_owned()), + )]], + &key_pair, + ) + .await; + + // Now prune dangling views for the document. + let result = node.context.store.prune_document_views(&document_id).await; + assert!(result.is_ok()); + + // Get the first document view, it should still be in the store as it was pinned. + let document = node + .context + .store + .get_document_by_view_id(&first_document_view_id) + .await + .unwrap(); + assert!(document.is_some()); + }); + } } From 57c85abfdf3f9368f12dc652e740708fcabaa078 Mon Sep 17 00:00:00 2001 From: Sam Andreae Date: Wed, 9 Aug 2023 10:37:41 +0100 Subject: [PATCH 03/14] Add static file server to `http` service (#483) * Add static file server to `http` service * Create blobs directory when it doesn't exist * Export `BLOBS_DIR_NAME` const * Update CHANGELOG * Create temporary base_path dir when Configuration is ephemeral * `fmt` * Create all dirs in one step * Change CHANGELOG --- CHANGELOG.md | 1 + Cargo.lock | 45 ++++++++++++++++------ aquadoggo/Cargo.toml | 4 +- aquadoggo/src/config.rs | 9 +++++ aquadoggo/src/graphql/mutations/publish.rs | 8 ++-- aquadoggo/src/http/context.rs | 12 +++++- aquadoggo/src/http/mod.rs | 2 +- aquadoggo/src/http/service.rs | 23 ++++++++++- aquadoggo/src/test_utils/client.rs | 4 +- 9 files changed, 85 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b3bfef6b7..e367678f7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +- Serve static files from `blobs` directory [#480](https://github.com/p2panda/aquadoggo/pull/480) - Add method to store for pruning document views [#491](https://github.com/p2panda/aquadoggo/pull/491) - Introduce `BlobStore` [#484](https://github.com/p2panda/aquadoggo/pull/484) diff --git a/Cargo.lock b/Cargo.lock index a1532b32b..e99a5095f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -197,6 +197,7 @@ dependencies = [ "serde_bytes", "serde_json", "sqlx", + "tempfile", "thiserror", "tokio", "tokio-stream", @@ -312,7 +313,7 @@ dependencies = [ "async-lock", "async-task", "concurrent-queue", - "fastrand", + "fastrand 1.9.0", "futures-lite", "slab", ] @@ -797,7 +798,7 @@ dependencies = [ "async-lock", "async-task", "atomic-waker", - "fastrand", + "fastrand 1.9.0", "futures-lite", "log", ] @@ -1492,6 +1493,12 @@ dependencies = [ "instant", ] +[[package]] +name = "fastrand" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6999dc1837253364c2ebb0704ba97994bd874e8f195d665c50b7548f6ea92764" + [[package]] name = "fiat-crypto" version = "0.1.20" @@ -1605,7 +1612,7 @@ version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49a9d51ce47660b1e808d3c990b4709f2f415d928835a17dfd16991515c46bce" dependencies = [ - "fastrand", + "fastrand 1.9.0", "futures-core", "futures-io", "memchr", @@ -2829,6 +2836,16 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +[[package]] +name = "mime_guess" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4192263c238a5f0d0c6bfd21f336a313a4ce1c450542449ca191bb657b4642ef" +dependencies = [ + "mime", + "unicase", +] + [[package]] name = "minimal-lexical" version = "0.2.1" @@ -4540,15 +4557,14 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.6.0" +version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31c0432476357e58790aaa47a8efb0c5138f137343f3b5f23bd36a27e3b0a6d6" +checksum = "5486094ee78b2e5038a6382ed7645bc084dc2ec433426ca4c3cb61e2007b8998" dependencies = [ - "autocfg", "cfg-if", - "fastrand", + "fastrand 2.0.0", "redox_syscall 0.3.5", - "rustix 0.37.23", + "rustix 0.38.4", "windows-sys", ] @@ -4756,20 +4772,27 @@ dependencies = [ [[package]] name = "tower-http" -version = "0.3.5" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f873044bf02dd1e8239e9c1293ea39dad76dc594ec16185d0a1bf31d8dc8d858" +checksum = "55ae70283aba8d2a8b411c695c437fe25b8b5e44e23e780662002fc72fb47a82" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.3.3", "bytes", "futures-core", "futures-util", "http", "http-body", "http-range-header", + "httpdate", + "mime", + "mime_guess", + "percent-encoding", "pin-project-lite", + "tokio", + "tokio-util", "tower-layer", "tower-service", + "tracing", ] [[package]] diff --git a/aquadoggo/Cargo.toml b/aquadoggo/Cargo.toml index fcdead087..62341a73e 100644 --- a/aquadoggo/Cargo.toml +++ b/aquadoggo/Cargo.toml @@ -78,8 +78,9 @@ tokio = { version = "1.28.2", features = [ "time", ] } tokio-stream = { version = "0.1.14", features = ["sync"] } -tower-http = { version = "0.3.4", default-features = false, features = [ +tower-http = { version = "0.4.0", default-features = false, features = [ "cors", + "fs", ] } triggered = "0.1.2" void = "1.0.2" @@ -109,5 +110,6 @@ rstest = "0.15.0" rstest_reuse = "0.3.0" serde_bytes = "0.11.12" serde_json = "1.0.85" +tempfile = "3.7.0" tower = "0.4.13" tower-service = "0.3.2" diff --git a/aquadoggo/src/config.rs b/aquadoggo/src/config.rs index 15810cf04..100f48794 100644 --- a/aquadoggo/src/config.rs +++ b/aquadoggo/src/config.rs @@ -4,6 +4,15 @@ use p2panda_rs::schema::SchemaId; use crate::network::NetworkConfiguration; +/// Data directory name. +const DATA_DIR_NAME: &str = "aquadoggo"; + +/// Filename of default sqlite database. +const DEFAULT_SQLITE_NAME: &str = "aquadoggo-node.sqlite3"; + +/// Blobs directory +pub const BLOBS_DIR_NAME: &str = "blobs"; + /// Configuration object holding all important variables throughout the application. #[derive(Debug, Clone)] pub struct Configuration { diff --git a/aquadoggo/src/graphql/mutations/publish.rs b/aquadoggo/src/graphql/mutations/publish.rs index 774a963ba..f9c493827 100644 --- a/aquadoggo/src/graphql/mutations/publish.rs +++ b/aquadoggo/src/graphql/mutations/publish.rs @@ -125,7 +125,7 @@ mod tests { use crate::bus::ServiceMessage; use crate::graphql::GraphQLSchemaManager; - use crate::http::HttpServiceContext; + use crate::http::{HttpServiceContext, BLOBS_ROUTE}; use crate::test_utils::{ add_schema, doggo_fields, doggo_schema, graphql_test_client, populate_and_materialize, populate_store_config, test_runner, TestNode, @@ -237,7 +237,7 @@ mod tests { node.context.schema_provider.clone(), ) .await; - let context = HttpServiceContext::new(manager); + let context = HttpServiceContext::new(manager, BLOBS_ROUTE.into()); let response = context.schema.execute(publish_request).await; @@ -298,7 +298,7 @@ mod tests { node.context.schema_provider.clone(), ) .await; - let context = HttpServiceContext::new(manager); + let context = HttpServiceContext::new(manager, BLOBS_ROUTE.into()); let response = context .schema @@ -326,7 +326,7 @@ mod tests { node.context.schema_provider.clone(), ) .await; - let context = HttpServiceContext::new(manager); + let context = HttpServiceContext::new(manager, BLOBS_ROUTE.into()); context.schema.execute(publish_request).await; diff --git a/aquadoggo/src/http/context.rs b/aquadoggo/src/http/context.rs index 38c40a2e5..758d07bd9 100644 --- a/aquadoggo/src/http/context.rs +++ b/aquadoggo/src/http/context.rs @@ -1,16 +1,24 @@ // SPDX-License-Identifier: AGPL-3.0-or-later +use std::path::PathBuf; + use crate::graphql::GraphQLSchemaManager; #[derive(Clone)] pub struct HttpServiceContext { /// Dynamic GraphQL schema manager. pub schema: GraphQLSchemaManager, + + /// Path of the directory where blobs should be served from + pub blob_dir_path: PathBuf, } impl HttpServiceContext { /// Create a new HttpServiceContext. - pub fn new(schema: GraphQLSchemaManager) -> Self { - Self { schema } + pub fn new(schema: GraphQLSchemaManager, blob_dir_path: PathBuf) -> Self { + Self { + schema, + blob_dir_path, + } } } diff --git a/aquadoggo/src/http/mod.rs b/aquadoggo/src/http/mod.rs index db475580e..312cb4a5b 100644 --- a/aquadoggo/src/http/mod.rs +++ b/aquadoggo/src/http/mod.rs @@ -5,4 +5,4 @@ mod context; mod service; pub use context::HttpServiceContext; -pub use service::{build_server, http_service}; +pub use service::{build_server, http_service, BLOBS_ROUTE}; diff --git a/aquadoggo/src/http/service.rs b/aquadoggo/src/http/service.rs index 7a2f40e02..f8c6eb2f4 100644 --- a/aquadoggo/src/http/service.rs +++ b/aquadoggo/src/http/service.rs @@ -10,16 +10,22 @@ use axum::Router; use http::header::CONTENT_TYPE; use log::{debug, warn}; use tower_http::cors::{Any, CorsLayer}; +use tower_http::services::ServeDir; use crate::bus::ServiceSender; +use crate::config::BLOBS_DIR_NAME; use crate::context::Context; use crate::graphql::GraphQLSchemaManager; use crate::http::api::{handle_graphql_playground, handle_graphql_query}; use crate::http::context::HttpServiceContext; use crate::manager::{ServiceReadySender, Shutdown}; +/// Route to the GraphQL playground const GRAPHQL_ROUTE: &str = "/graphql"; +/// Route to the blobs static file server +pub const BLOBS_ROUTE: &str = "/blobs"; + /// Build HTTP server with GraphQL API. pub fn build_server(http_context: HttpServiceContext) -> Router { // Configure CORS middleware @@ -29,7 +35,12 @@ pub fn build_server(http_context: HttpServiceContext) -> Router { .allow_credentials(false) .allow_origin(Any); + // Construct static file server + let blob_service = ServeDir::new(http_context.blob_dir_path.clone()); + Router::new() + // Add blobs static file server + .nest_service(BLOBS_ROUTE, blob_service) // Add GraphQL routes .route( GRAPHQL_ROUTE, @@ -55,8 +66,15 @@ pub async fn http_service( let graphql_schema_manager = GraphQLSchemaManager::new(context.store.clone(), tx, context.schema_provider.clone()).await; + let blob_dir_path = context + .config + .base_path + .as_ref() + .expect("Base path not set") + .join(BLOBS_DIR_NAME); + // Introduce a new context for all HTTP routes - let http_context = HttpServiceContext::new(graphql_schema_manager); + let http_context = HttpServiceContext::new(graphql_schema_manager, blob_dir_path); // Start HTTP server with given port and re-attempt with random port if it was taken already let builder = if let Ok(builder) = axum::Server::try_bind(&http_address) { @@ -95,6 +113,7 @@ mod tests { use crate::graphql::GraphQLSchemaManager; use crate::http::context::HttpServiceContext; + use crate::http::service::BLOBS_DIR_NAME; use crate::schema::SchemaProvider; use crate::test_utils::TestClient; use crate::test_utils::{test_runner, TestNode}; @@ -108,7 +127,7 @@ mod tests { let schema_provider = SchemaProvider::default(); let graphql_schema_manager = GraphQLSchemaManager::new(node.context.store.clone(), tx, schema_provider).await; - let context = HttpServiceContext::new(graphql_schema_manager); + let context = HttpServiceContext::new(graphql_schema_manager, BLOBS_DIR_NAME.into()); let client = TestClient::new(build_server(context)); let response = client diff --git a/aquadoggo/src/test_utils/client.rs b/aquadoggo/src/test_utils/client.rs index b57cd86fe..cef4e1003 100644 --- a/aquadoggo/src/test_utils/client.rs +++ b/aquadoggo/src/test_utils/client.rs @@ -13,7 +13,7 @@ use tower::make::Shared; use tower_service::Service; use crate::graphql::GraphQLSchemaManager; -use crate::http::{build_server, HttpServiceContext}; +use crate::http::{build_server, HttpServiceContext, BLOBS_ROUTE}; use crate::test_utils::TestNode; /// GraphQL client which can be used for querying a node in tests. @@ -74,7 +74,7 @@ pub async fn graphql_test_client(node: &TestNode) -> TestClient { node.context.schema_provider.clone(), ) .await; - let http_context = HttpServiceContext::new(manager); + let http_context = HttpServiceContext::new(manager, BLOBS_ROUTE.into()); TestClient::new(build_server(http_context)) } From c472f0ff5ee962968252d25b9de49a8d4533d84e Mon Sep 17 00:00:00 2001 From: Sam Andreae Date: Tue, 22 Aug 2023 14:47:54 +0100 Subject: [PATCH 04/14] "blob" materializer task (#493) * Introduce blob materializer task * fmt * Materialize blobs to filesystem in "blob" task * Issue a "blob" task when dependencies met. * Some errors are not Critical errors * fmt * Use tempfile for data dir during tests * Test for blob filesystem materialization * fmt --- aquadoggo/src/config.rs | 2 + aquadoggo/src/materializer/service.rs | 3 +- aquadoggo/src/materializer/tasks/blob.rs | 274 ++++++++++++++++++ .../src/materializer/tasks/dependency.rs | 9 +- aquadoggo/src/materializer/tasks/mod.rs | 2 + aquadoggo/src/test_utils/runner.rs | 10 +- 6 files changed, 296 insertions(+), 4 deletions(-) create mode 100644 aquadoggo/src/materializer/tasks/blob.rs diff --git a/aquadoggo/src/config.rs b/aquadoggo/src/config.rs index 100f48794..29dfd6f4c 100644 --- a/aquadoggo/src/config.rs +++ b/aquadoggo/src/config.rs @@ -13,6 +13,8 @@ const DEFAULT_SQLITE_NAME: &str = "aquadoggo-node.sqlite3"; /// Blobs directory pub const BLOBS_DIR_NAME: &str = "blobs"; +pub const BLOBS_SYMLINK_DIR_NAME: &str = "documents"; + /// Configuration object holding all important variables throughout the application. #[derive(Debug, Clone)] pub struct Configuration { diff --git a/aquadoggo/src/materializer/service.rs b/aquadoggo/src/materializer/service.rs index 84bbb4bb3..7465ab0b0 100644 --- a/aquadoggo/src/materializer/service.rs +++ b/aquadoggo/src/materializer/service.rs @@ -8,7 +8,7 @@ use tokio::task; use crate::bus::{ServiceMessage, ServiceSender}; use crate::context::Context; use crate::manager::{ServiceReadySender, Shutdown}; -use crate::materializer::tasks::{dependency_task, reduce_task, schema_task}; +use crate::materializer::tasks::{blob_task, dependency_task, reduce_task, schema_task}; use crate::materializer::worker::{Factory, Task, TaskStatus}; use crate::materializer::TaskInput; @@ -38,6 +38,7 @@ pub async fn materializer_service( factory.register("reduce", pool_size, reduce_task); factory.register("dependency", pool_size, dependency_task); factory.register("schema", pool_size, schema_task); + factory.register("blob", pool_size, blob_task); // Get a listener for error signal from factory let on_error = factory.on_error(); diff --git a/aquadoggo/src/materializer/tasks/blob.rs b/aquadoggo/src/materializer/tasks/blob.rs new file mode 100644 index 000000000..7db88d9c7 --- /dev/null +++ b/aquadoggo/src/materializer/tasks/blob.rs @@ -0,0 +1,274 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later + +use std::fs::{self, File}; +use std::io::Write; +use std::os::unix::fs::symlink; + +use log::{debug, info}; +use p2panda_rs::document::traits::AsDocument; +use p2panda_rs::document::DocumentViewId; +use p2panda_rs::operation::OperationValue; +use p2panda_rs::schema::SchemaId; +use p2panda_rs::storage_provider::traits::{DocumentStore, OperationStore}; + +use crate::config::{BLOBS_DIR_NAME, BLOBS_SYMLINK_DIR_NAME}; +use crate::context::Context; +use crate::db::types::StorageDocument; +use crate::db::SqlStore; +use crate::materializer::worker::{TaskError, TaskResult}; +use crate::materializer::TaskInput; + +/// A blob task assembles and persists blobs to the filesystem. +/// +/// Blob tasks are dispatched whenever a blob or blob piece document has all its immediate +/// dependencies available in the store. +pub async fn blob_task(context: Context, input: TaskInput) -> TaskResult { + debug!("Working on {}", input); + + let input_view_id = match input { + TaskInput::DocumentViewId(view_id) => view_id, + _ => return Err(TaskError::Critical("Invalid task input".into())), + }; + + // Determine the schema of the updated view id. + let schema = context + .store + .get_schema_by_document_view(&input_view_id) + .await + .map_err(|err| TaskError::Critical(err.to_string()))? + .unwrap(); + + let updated_blobs: Vec = match schema { + // This task is about an updated blob document so we only handle that. + SchemaId::Blob(_) => { + let document = context + .store + .get_document_by_view_id(&input_view_id) + .await + .map_err(|err| TaskError::Failure(err.to_string()))? + .unwrap(); + Ok(vec![document]) + } + + // This task is about an updated blob piece document that may be used in one or more blob documents. + SchemaId::BlobPiece(_) => get_related_blobs(&input_view_id, &context).await, + _ => Err(TaskError::Critical(format!( + "Unknown system schema id: {}", + schema + ))), + }?; + + // The related blobs are not known yet to this node so we mark this task failed. + if updated_blobs.is_empty() { + return Err(TaskError::Failure( + "Related blob does not exist (yet)".into(), + )); + } + + // Materialize all updated blobs to the filesystem. + for blob_document in updated_blobs.iter() { + // Get the raw blob data. + let blob_data = context + .store + .get_blob_by_view_id(blob_document.view_id()) + .await + // We don't raise a critical error here, as it is possible that this method returns an + // error. + .map_err(|err| TaskError::Failure(err.to_string()))? + .unwrap(); + + // Compose, and when needed create, the path for the blob file. + let base_path = match &context.config.base_path { + Some(base_path) => base_path, + None => return Err(TaskError::Critical("No base path configured".to_string())), + }; + + let blob_dir = base_path + .join(BLOBS_DIR_NAME) + .join(blob_document.id().as_str()); + + fs::create_dir_all(&blob_dir).map_err(|err| TaskError::Critical(err.to_string()))?; + let blob_view_path = blob_dir.join(blob_document.view_id().to_string()); + + // Write the blob to the filesystem. + info!("Creating blob at path {blob_view_path:?}"); + + let mut file = File::create(&blob_view_path).unwrap(); + file.write_all(blob_data.as_bytes()).unwrap(); + + // create a symlink from `../documents/` -> `..//` + if is_current_view(&context.store, blob_document.view_id()).await? { + info!("Creating symlink from document id to current view"); + + let link_path = base_path + .join(BLOBS_DIR_NAME) + .join(BLOBS_SYMLINK_DIR_NAME) + .join(blob_document.id().as_str()); + + let _ = fs::remove_file(&link_path); + + symlink(blob_view_path, link_path) + .map_err(|err| TaskError::Critical(err.to_string()))?; + } + } + + Ok(None) +} + +/// Retrieve blobs that use the targeted blob piece as one of their fields. +async fn get_related_blobs( + target_blob_piece: &DocumentViewId, + context: &Context, +) -> Result, TaskError> { + // Retrieve all blob documents from the store + let blobs = context + .store + .get_documents_by_schema(&SchemaId::Blob(1)) + .await + .map_err(|err| TaskError::Critical(err.to_string())) + .unwrap(); + + // Collect all blobs that use the targeted blob piece + let mut related_blobs = vec![]; + for blob in blobs { + // We can unwrap the value here as all documents returned from the storage method above + // have a current view (they are not deleted). + let fields_value = blob.get("pieces").unwrap(); + + if let OperationValue::PinnedRelationList(fields) = fields_value { + if fields + .iter() + .any(|field_view_id| field_view_id == target_blob_piece) + { + related_blobs.push(blob) + } else { + continue; + } + } else { + // It is a critical if there are blobs in the store that don't match the blob schema. + Err(TaskError::Critical( + "Blob operation does not have a 'pieces' operation field".into(), + ))? + } + } + + Ok(related_blobs) +} + +// Check if this is the current view for this blob. +async fn is_current_view( + store: &SqlStore, + document_view_id: &DocumentViewId, +) -> Result { + let blob_document_id = store + .get_document_id_by_operation_id(document_view_id.graph_tips().first().unwrap()) + .await + .map_err(|err| TaskError::Critical(err.to_string()))? + .expect("Document for blob exists"); + + let current_blob_document = store + .get_document(&blob_document_id) + .await + .map_err(|err| TaskError::Critical(err.to_string()))? + .expect("Document for blob exists"); + + Ok(current_blob_document.view_id() == document_view_id) +} + +#[cfg(test)] +mod tests { + use std::fs; + + use p2panda_rs::document::DocumentId; + use p2panda_rs::identity::KeyPair; + use p2panda_rs::schema::SchemaId; + use p2panda_rs::test_utils::fixtures::key_pair; + use rstest::rstest; + + use crate::config::{BLOBS_DIR_NAME, BLOBS_SYMLINK_DIR_NAME}; + use crate::materializer::tasks::blob_task; + use crate::materializer::TaskInput; + use crate::test_utils::{add_document, test_runner, TestNode}; + + #[rstest] + fn materializes_blob_to_filesystem(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + let blob_data = "Hello, World!".to_string(); + + // Publish blob pieces and blob. + let blob_piece_view_id_1 = add_document( + &mut node, + &SchemaId::BlobPiece(1), + vec![("data", blob_data[..5].into())], + &key_pair, + ) + .await; + + let blob_piece_view_id_2 = add_document( + &mut node, + &SchemaId::BlobPiece(1), + vec![("data", blob_data[5..].into())], + &key_pair, + ) + .await; + + // Publish blob. + let blob_view_id = add_document( + &mut node, + &SchemaId::Blob(1), + vec![ + ("length", { blob_data.len() as i64 }.into()), + ("mime_type", "text/plain".into()), + ( + "pieces", + vec![blob_piece_view_id_1, blob_piece_view_id_2].into(), + ), + ], + &key_pair, + ) + .await; + + // Run blob task. + let result = blob_task( + node.context.clone(), + TaskInput::DocumentViewId(blob_view_id.clone()), + ) + .await; + + // It shouldn't fail. + assert!(result.is_ok(), "{:#?}", result); + // It should return no extra tasks. + assert!(result.unwrap().is_none()); + + // Convert blob view id to document id. + let document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); + + // Construct the expected path to the blob view file. + let base_path = node.context.config.base_path.as_ref().unwrap(); + let blob_path = base_path + .join(BLOBS_DIR_NAME) + .join(document_id.as_str()) + .join(blob_view_id.to_string()); + + // Read from this file + let retrieved_blob_data = fs::read_to_string(blob_path); + + // It should match the complete published blob data. + assert!(retrieved_blob_data.is_ok(), "{:?}", retrieved_blob_data); + assert_eq!(blob_data, retrieved_blob_data.unwrap()); + + // Construct the expected path to the blob symlink file location. + let blob_path = base_path + .join(BLOBS_DIR_NAME) + .join(BLOBS_SYMLINK_DIR_NAME) + .join(document_id.as_str()); + + // Read from this file + let retrieved_blob_data = fs::read_to_string(blob_path); + + // It should match the complete published blob data. + assert!(retrieved_blob_data.is_ok(), "{:?}", retrieved_blob_data); + assert_eq!(blob_data, retrieved_blob_data.unwrap()) + }) + } +} diff --git a/aquadoggo/src/materializer/tasks/dependency.rs b/aquadoggo/src/materializer/tasks/dependency.rs index 4ade3765f..02effd0c5 100644 --- a/aquadoggo/src/materializer/tasks/dependency.rs +++ b/aquadoggo/src/materializer/tasks/dependency.rs @@ -131,10 +131,17 @@ pub async fn dependency_task(context: Context, input: TaskInput) -> TaskResult { + next_tasks.push(Task::new( + "blob", + TaskInput::DocumentViewId(document_view.id().clone()), + )); + } _ => {} } } - // Now we check all the "parent" or "inverse" relations, that is _other_ documents pointing at // the one we're currently looking at let mut reverse_tasks = get_inverse_relation_tasks(&context, document.schema_id()).await?; diff --git a/aquadoggo/src/materializer/tasks/mod.rs b/aquadoggo/src/materializer/tasks/mod.rs index 7172b0cdb..674fab8f5 100644 --- a/aquadoggo/src/materializer/tasks/mod.rs +++ b/aquadoggo/src/materializer/tasks/mod.rs @@ -1,9 +1,11 @@ // SPDX-License-Identifier: AGPL-3.0-or-later +mod blob; mod dependency; mod reduce; mod schema; +pub use blob::blob_task; pub use dependency::dependency_task; pub use reduce::reduce_task; pub use schema::schema_task; diff --git a/aquadoggo/src/test_utils/runner.rs b/aquadoggo/src/test_utils/runner.rs index 634890fba..1b8f238b2 100644 --- a/aquadoggo/src/test_utils/runner.rs +++ b/aquadoggo/src/test_utils/runner.rs @@ -65,8 +65,11 @@ impl TestNodeManager { // Initialise test store using pool. let store = SqlStore::new(pool.clone()); + // Construct tempfile directory for the test runner. + let tmp_dir = tempfile::TempDir::new().unwrap(); + // Construct node config supporting any schema. - let cfg = Configuration::default(); + let cfg = Configuration::new(Some(tmp_dir.path().to_path_buf())).unwrap(); // Construct the actual test node let test_node = TestNode { @@ -101,8 +104,11 @@ pub fn test_runner(test: F) { let (_config, pool) = initialize_db().await; let store = SqlStore::new(pool); + // Construct tempfile directory for the test runner. + let tmp_dir = tempfile::TempDir::new().unwrap(); + // Construct node config supporting any schema. - let cfg = Configuration::default(); + let cfg = Configuration::new(Some(tmp_dir.path().to_path_buf())).unwrap(); // Construct the actual test node let node = TestNode { From b2206008d15146478475fe3c3e895fcc8d067e03 Mon Sep 17 00:00:00 2001 From: Sam Andreae Date: Thu, 31 Aug 2023 17:50:51 +0100 Subject: [PATCH 05/14] Fix after rebase --- Cargo.lock | 9 +++++++++ aquadoggo/Cargo.toml | 1 - aquadoggo_cli/Cargo.toml | 1 - 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e99a5095f..a09973914 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4956,6 +4956,15 @@ dependencies = [ "version_check", ] +[[package]] +name = "unicase" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6" +dependencies = [ + "version_check", +] + [[package]] name = "unicode-bidi" version = "0.3.13" diff --git a/aquadoggo/Cargo.toml b/aquadoggo/Cargo.toml index 62341a73e..801bff3f0 100644 --- a/aquadoggo/Cargo.toml +++ b/aquadoggo/Cargo.toml @@ -55,7 +55,6 @@ lipmaa-link = "0.2.2" log = "0.4.19" once_cell = "1.18.0" openssl-probe = "0.1.5" -p2panda-rs = { version = "0.7.1", features = ["storage-provider"] } p2panda-rs = { git = "https://github.com/p2panda/p2panda", rev = "17f4fcb1dcf7cebabd6d9b5a824399e9384d96b2", features = [ "storage-provider", ] } diff --git a/aquadoggo_cli/Cargo.toml b/aquadoggo_cli/Cargo.toml index 8ed1b02b1..a84e1e907 100644 --- a/aquadoggo_cli/Cargo.toml +++ b/aquadoggo_cli/Cargo.toml @@ -29,7 +29,6 @@ figment = { version = "0.10.10", features = ["toml", "env"] } hex = "0.4.3" libp2p = "0.52.0" log = "0.4.20" -p2panda-rs = "0.7.1" path-clean = "1.0.1" serde = { version = "1.0.185", features = ["serde_derive"] } p2panda-rs = { git = "https://github.com/p2panda/p2panda", rev = "17f4fcb1dcf7cebabd6d9b5a824399e9384d96b2" } From 71208cc8958ddea863d287c764b85c9adbe06dbb Mon Sep 17 00:00:00 2001 From: Sam Andreae Date: Thu, 31 Aug 2023 18:38:01 +0100 Subject: [PATCH 06/14] Refactor tmp blob dir creation after rebase --- aquadoggo/Cargo.toml | 2 +- aquadoggo/src/config.rs | 12 ++++++------ aquadoggo/src/http/service.rs | 12 +++--------- aquadoggo/src/materializer/tasks/blob.rs | 15 +++++---------- aquadoggo/src/node.rs | 14 ++++++++++++-- aquadoggo/src/test_utils/runner.rs | 13 +++++++------ aquadoggo_cli/src/config.rs | 1 + 7 files changed, 35 insertions(+), 34 deletions(-) diff --git a/aquadoggo/Cargo.toml b/aquadoggo/Cargo.toml index 801bff3f0..b2185efba 100644 --- a/aquadoggo/Cargo.toml +++ b/aquadoggo/Cargo.toml @@ -67,6 +67,7 @@ sqlx = { version = "0.6.1", features = [ "sqlite", "runtime-tokio-rustls", ] } +tempfile = "3.7.0" thiserror = "1.0.39" tokio = { version = "1.28.2", features = [ "macros", @@ -109,6 +110,5 @@ rstest = "0.15.0" rstest_reuse = "0.3.0" serde_bytes = "0.11.12" serde_json = "1.0.85" -tempfile = "3.7.0" tower = "0.4.13" tower-service = "0.3.2" diff --git a/aquadoggo/src/config.rs b/aquadoggo/src/config.rs index 29dfd6f4c..317e4cf90 100644 --- a/aquadoggo/src/config.rs +++ b/aquadoggo/src/config.rs @@ -1,15 +1,11 @@ // SPDX-License-Identifier: AGPL-3.0-or-later +use std::path::PathBuf; + use p2panda_rs::schema::SchemaId; use crate::network::NetworkConfiguration; -/// Data directory name. -const DATA_DIR_NAME: &str = "aquadoggo"; - -/// Filename of default sqlite database. -const DEFAULT_SQLITE_NAME: &str = "aquadoggo-node.sqlite3"; - /// Blobs directory pub const BLOBS_DIR_NAME: &str = "blobs"; @@ -32,6 +28,9 @@ pub struct Configuration { /// _not_ recommended for production settings. pub allow_schema_ids: AllowList, + /// Path to blobs directory. + pub blob_dir: Option, + /// URL / connection string to PostgreSQL or SQLite database. pub database_url: String, @@ -62,6 +61,7 @@ impl Default for Configuration { fn default() -> Self { Self { allow_schema_ids: AllowList::Wildcard, + blob_dir: None, database_url: "sqlite::memory:".into(), database_max_connections: 32, http_port: 2020, diff --git a/aquadoggo/src/http/service.rs b/aquadoggo/src/http/service.rs index f8c6eb2f4..8613a0d7d 100644 --- a/aquadoggo/src/http/service.rs +++ b/aquadoggo/src/http/service.rs @@ -13,7 +13,6 @@ use tower_http::cors::{Any, CorsLayer}; use tower_http::services::ServeDir; use crate::bus::ServiceSender; -use crate::config::BLOBS_DIR_NAME; use crate::context::Context; use crate::graphql::GraphQLSchemaManager; use crate::http::api::{handle_graphql_playground, handle_graphql_query}; @@ -66,15 +65,10 @@ pub async fn http_service( let graphql_schema_manager = GraphQLSchemaManager::new(context.store.clone(), tx, context.schema_provider.clone()).await; - let blob_dir_path = context - .config - .base_path - .as_ref() - .expect("Base path not set") - .join(BLOBS_DIR_NAME); + let blob_dir_path = context.config.blob_dir.as_ref().expect("Base path not set"); // Introduce a new context for all HTTP routes - let http_context = HttpServiceContext::new(graphql_schema_manager, blob_dir_path); + let http_context = HttpServiceContext::new(graphql_schema_manager, blob_dir_path.to_owned()); // Start HTTP server with given port and re-attempt with random port if it was taken already let builder = if let Ok(builder) = axum::Server::try_bind(&http_address) { @@ -111,9 +105,9 @@ mod tests { use serde_json::json; use tokio::sync::broadcast; + use crate::config::BLOBS_DIR_NAME; use crate::graphql::GraphQLSchemaManager; use crate::http::context::HttpServiceContext; - use crate::http::service::BLOBS_DIR_NAME; use crate::schema::SchemaProvider; use crate::test_utils::TestClient; use crate::test_utils::{test_runner, TestNode}; diff --git a/aquadoggo/src/materializer/tasks/blob.rs b/aquadoggo/src/materializer/tasks/blob.rs index 7db88d9c7..456e83863 100644 --- a/aquadoggo/src/materializer/tasks/blob.rs +++ b/aquadoggo/src/materializer/tasks/blob.rs @@ -11,7 +11,7 @@ use p2panda_rs::operation::OperationValue; use p2panda_rs::schema::SchemaId; use p2panda_rs::storage_provider::traits::{DocumentStore, OperationStore}; -use crate::config::{BLOBS_DIR_NAME, BLOBS_SYMLINK_DIR_NAME}; +use crate::config::BLOBS_SYMLINK_DIR_NAME; use crate::context::Context; use crate::db::types::StorageDocument; use crate::db::SqlStore; @@ -78,14 +78,12 @@ pub async fn blob_task(context: Context, input: TaskInput) -> TaskResult base_path, None => return Err(TaskError::Critical("No base path configured".to_string())), }; - let blob_dir = base_path - .join(BLOBS_DIR_NAME) - .join(blob_document.id().as_str()); + let blob_dir = base_path.join(blob_document.id().as_str()); fs::create_dir_all(&blob_dir).map_err(|err| TaskError::Critical(err.to_string()))?; let blob_view_path = blob_dir.join(blob_document.view_id().to_string()); @@ -101,7 +99,6 @@ pub async fn blob_task(context: Context, input: TaskInput) -> TaskResult Self { + pub async fn start(key_pair: KeyPair, mut config: Configuration) -> Self { // Initialize database and get connection pool let pool = initialize_db(&config) .await @@ -62,6 +64,14 @@ impl Node { let schema_provider = SchemaProvider::new(application_schema, config.allow_schema_ids.clone()); + // Create tmp dirs for blob storage. + // + // @TODO: implement configuring this path for persistent storage. + let tmp_dir = tempfile::TempDir::new().unwrap(); + let blob_dir_path = tmp_dir.path().join(BLOBS_DIR_NAME); + fs::create_dir_all(blob_dir_path.join(BLOBS_SYMLINK_DIR_NAME)).unwrap(); + config.blob_dir = Some(blob_dir_path); + // Create service manager with shared data between services let context = Context::new(store, key_pair, config, schema_provider); let mut manager = diff --git a/aquadoggo/src/test_utils/runner.rs b/aquadoggo/src/test_utils/runner.rs index 1b8f238b2..b700bb695 100644 --- a/aquadoggo/src/test_utils/runner.rs +++ b/aquadoggo/src/test_utils/runner.rs @@ -1,13 +1,14 @@ // SPDX-License-Identifier: AGPL-3.0-or-later -use std::panic; use std::sync::Arc; +use std::{fs, panic}; use futures::Future; use p2panda_rs::identity::KeyPair; use tokio::runtime::Builder; use tokio::sync::Mutex; +use crate::config::{BLOBS_DIR_NAME, BLOBS_SYMLINK_DIR_NAME}; use crate::context::Context; use crate::db::Pool; use crate::db::SqlStore; @@ -65,11 +66,8 @@ impl TestNodeManager { // Initialise test store using pool. let store = SqlStore::new(pool.clone()); - // Construct tempfile directory for the test runner. - let tmp_dir = tempfile::TempDir::new().unwrap(); - // Construct node config supporting any schema. - let cfg = Configuration::new(Some(tmp_dir.path().to_path_buf())).unwrap(); + let cfg = Configuration::default(); // Construct the actual test node let test_node = TestNode { @@ -106,9 +104,12 @@ pub fn test_runner(test: F) { // Construct tempfile directory for the test runner. let tmp_dir = tempfile::TempDir::new().unwrap(); + let blob_dir_path = tmp_dir.path().join(BLOBS_DIR_NAME); + fs::create_dir_all(blob_dir_path.join(BLOBS_SYMLINK_DIR_NAME)).unwrap(); // Construct node config supporting any schema. - let cfg = Configuration::new(Some(tmp_dir.path().to_path_buf())).unwrap(); + let mut cfg = Configuration::default(); + cfg.blob_dir = Some(blob_dir_path); // Construct the actual test node let node = TestNode { diff --git a/aquadoggo_cli/src/config.rs b/aquadoggo_cli/src/config.rs index f9b51a77d..8b146e38b 100644 --- a/aquadoggo_cli/src/config.rs +++ b/aquadoggo_cli/src/config.rs @@ -340,6 +340,7 @@ impl TryFrom for NodeConfiguration { Ok(NodeConfiguration { allow_schema_ids, + blob_dir: None, database_url: value.database_url, database_max_connections: value.database_max_connections, http_port: value.http_port, From 6c5d4777f70c79c6fe4c7d9951a3b6fbbf0697a3 Mon Sep 17 00:00:00 2001 From: Sam Andreae Date: Thu, 31 Aug 2023 18:52:53 +0100 Subject: [PATCH 07/14] Task for automatic garbage collection of unused documents and views (#500) * Return removed document view ids from `prune_document_views` * Introduce "prune" task * Issue "prune" tasks from "reduce" tasks * Only issue dependency tasks in test utils * Don't issue a dependency task when reduced document is deleted * Update all tests * Clippy * More comments in prune task * Correctly process effected child relations of deleted views * Improve comments in prune task * Correct item name * Test helper method for updating documents * Pruning SQL gets a little more epic * Test for recursive pruning * fmt * Tests for "prune" task * Update CHANGELOG * Remove unused method * Improve doc string * Use transaction in prune method * SQL re-use and more comments * Change comments again * Add cascading deletes to operation_fields_v1 and document_views * Add a trigger which purges a documents logs, entries and operations from the database * Add purge method to document store w/ tests * Re-use common sql strings * Refactor prune_document_views() into separate methods * Remove prune_documents_view() method from DocumentStore * Implement pruning logic in task * Rename "prune" task "garbage_collection" * Check if all views for a document have been removed in "garbage_collection" task * fmt + clippy * Add purge_blob method to BlobStore * Use purge_blob in "garbage_collection" task * Don't remove row from `logs` table on purge * Correct method naming * Some comment improvements * Remove unused import * Move add_blob to test utils * Move assert_query method to test utils * Tests for "garbage_collection" task * Change task name to "garbage_collection" * Clippy * Update CHANGELOG entry * Move SQL trigger into purge_document method on the store * Inline all SQL query constant strings * Update CHANGELOG * Add more comments in purge_blob method * Add test for purging updated blob * Clippy * fmt * Improve comments in garbage_collection task --- CHANGELOG.md | 1 + .../20220509090252_create-operations.sql | 2 +- .../20230114140233_alter-documents.sql | 2 +- aquadoggo/src/db/errors.rs | 4 + aquadoggo/src/db/stores/blob.rs | 322 ++++++++- aquadoggo/src/db/stores/document.rs | 398 +++++++++-- aquadoggo/src/materializer/service.rs | 5 +- .../src/materializer/tasks/dependency.rs | 7 +- .../materializer/tasks/garbage_collection.rs | 650 ++++++++++++++++++ aquadoggo/src/materializer/tasks/mod.rs | 2 + aquadoggo/src/materializer/tasks/reduce.rs | 57 +- aquadoggo/src/test_utils/mod.rs | 4 +- aquadoggo/src/test_utils/node.rs | 128 +++- 13 files changed, 1465 insertions(+), 117 deletions(-) create mode 100644 aquadoggo/src/materializer/tasks/garbage_collection.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index e367678f7..83ee86c04 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Serve static files from `blobs` directory [#480](https://github.com/p2panda/aquadoggo/pull/480) - Add method to store for pruning document views [#491](https://github.com/p2panda/aquadoggo/pull/491) - Introduce `BlobStore` [#484](https://github.com/p2panda/aquadoggo/pull/484) +- Task for automatic garbage collection of unused documents and views [#500](https://github.com/p2panda/aquadoggo/pull/500) ## [0.5.0] diff --git a/aquadoggo/migrations/20220509090252_create-operations.sql b/aquadoggo/migrations/20220509090252_create-operations.sql index d33712c6d..383de95a8 100644 --- a/aquadoggo/migrations/20220509090252_create-operations.sql +++ b/aquadoggo/migrations/20220509090252_create-operations.sql @@ -16,7 +16,7 @@ CREATE TABLE IF NOT EXISTS operation_fields_v1 ( field_type TEXT NOT NULL, value TEXT NULL, list_index INT NOT NULL, - FOREIGN KEY(operation_id) REFERENCES operations_v1(operation_id) + FOREIGN KEY(operation_id) REFERENCES operations_v1(operation_id) ON DELETE CASCADE ); CREATE INDEX idx_operation_fields_v1 ON operation_fields_v1 (operation_id, name); diff --git a/aquadoggo/migrations/20230114140233_alter-documents.sql b/aquadoggo/migrations/20230114140233_alter-documents.sql index ec8e11be6..023475477 100644 --- a/aquadoggo/migrations/20230114140233_alter-documents.sql +++ b/aquadoggo/migrations/20230114140233_alter-documents.sql @@ -1,3 +1,3 @@ -- SPDX-License-Identifier: AGPL-3.0-or-later -ALTER TABLE document_views ADD COLUMN document_id TEXT NOT NULL REFERENCES documents(document_id); \ No newline at end of file +ALTER TABLE document_views ADD COLUMN document_id TEXT NOT NULL REFERENCES documents(document_id) ON DELETE CASCADE; \ No newline at end of file diff --git a/aquadoggo/src/db/errors.rs b/aquadoggo/src/db/errors.rs index ae7b1253c..d76f6b061 100644 --- a/aquadoggo/src/db/errors.rs +++ b/aquadoggo/src/db/errors.rs @@ -17,6 +17,10 @@ pub enum SqlStoreError { /// Error returned from BlobStore. #[error(transparent)] BlobStoreError(#[from] BlobStoreError), + + /// Error returned from `DocumentStore` methods. + #[error(transparent)] + DocumentStorage(#[from] DocumentStorageError), } /// `SchemaStore` errors. diff --git a/aquadoggo/src/db/stores/blob.rs b/aquadoggo/src/db/stores/blob.rs index e413e32e8..1cc491d67 100644 --- a/aquadoggo/src/db/stores/blob.rs +++ b/aquadoggo/src/db/stores/blob.rs @@ -7,8 +7,9 @@ use p2panda_rs::document::{DocumentId, DocumentViewId}; use p2panda_rs::operation::OperationValue; use p2panda_rs::schema::{Schema, SchemaId}; use p2panda_rs::storage_provider::traits::DocumentStore; +use sqlx::{query_scalar, AnyPool}; -use crate::db::errors::BlobStoreError; +use crate::db::errors::{BlobStoreError, SqlStoreError}; use crate::db::query::{Field, Filter, Order, Pagination, Select}; use crate::db::stores::query::{Query, RelationList}; use crate::db::SqlStore; @@ -53,6 +54,109 @@ impl SqlStore { }; document_to_blob_data(self, blob_document).await } + + /// Purge blob data from the node _if_ it is not related to from another document. + pub async fn purge_blob(&self, document_id: &DocumentId) -> Result<(), SqlStoreError> { + // Collect the view id of any existing document views which contain a relation to the blob + // which is the purge target. + let blob_reverse_relations = reverse_relations(&self.pool, document_id, None).await?; + + // If there are no documents referring to the blob then we continue with the purge. + if blob_reverse_relations.is_empty() { + // Collect the document view ids of all pieces this blob has ever referred to in it's + // `pieces` + let blob_piece_ids: Vec = query_scalar( + " + SELECT + operation_fields_v1.value + FROM + operation_fields_v1 + LEFT JOIN + operations_v1 + ON + operations_v1.operation_id = operation_fields_v1.operation_id + WHERE + operations_v1.document_id = $1 + AND + operation_fields_v1.name = 'pieces' + ", + ) + .bind(document_id.to_string()) + .fetch_all(&self.pool) + .await + .map_err(|e| SqlStoreError::Transaction(e.to_string()))?; + + // Purge the blob document itself. + self.purge_document(document_id).await?; + + // Now iterate over each collected blob piece in order to check if they are still + // needed by any other blob document, and if not purge them as well. + for blob_piece_id in blob_piece_ids { + let blob_piece_id: DocumentId = blob_piece_id + .parse() + .expect("Document Id's from the store are valid"); + + // Collect reverse relations for this blob piece. + let blob_piece_reverse_relations = + reverse_relations(&self.pool, &blob_piece_id, Some(SchemaId::Blob(1))).await?; + + // If there are none then purge the blob piece. + if blob_piece_reverse_relations.is_empty() { + self.purge_document(&blob_piece_id).await?; + } + } + } + + Ok(()) + } +} + +/// Helper for getting the document ids of any document which relates to the specified document. +/// +/// Optionally pass in a `SchemaId` to restrict the results to documents of a certain schema. +async fn reverse_relations( + pool: &AnyPool, + document_id: &DocumentId, + schema_id: Option, +) -> Result, SqlStoreError> { + let schema_id_condition = match schema_id { + Some(schema_id) => format!("AND document_views.schema_id = '{}'", schema_id), + None => String::new(), + }; + + query_scalar(&format!( + " + SELECT + document_view_fields.document_view_id + FROM + document_view_fields + LEFT JOIN + operation_fields_v1 + ON + document_view_fields.operation_id = operation_fields_v1.operation_id + AND + document_view_fields.name = operation_fields_v1.name + LEFT JOIN + document_views + ON + document_view_fields.document_view_id = document_views.document_view_id + WHERE + operation_fields_v1.field_type + IN + ('pinned_relation', 'pinned_relation_list', 'relation', 'relation_list') + {schema_id_condition} + AND + operation_fields_v1.value IN ( + SELECT document_views.document_view_id + FROM document_views + WHERE document_views.document_id = $1 + ) OR operation_fields_v1.value = $1 + ", + )) + .bind(document_id.to_string()) + .fetch_all(pool) + .await + .map_err(|e| SqlStoreError::Transaction(e.to_string())) } /// Helper method for validation and parsing a document into blob data. @@ -72,7 +176,7 @@ async fn document_to_blob_data( _ => panic!(), // We should never hit this as we already validated that this is a blob document. }; - // Now collect all exiting pieces for the blob. + // Now collect all existing pieces for the blob. // // We do this using the stores' query method, targeting pieces which are in the relation // list of the blob. @@ -129,46 +233,20 @@ mod tests { use p2panda_rs::identity::KeyPair; use p2panda_rs::schema::SchemaId; use p2panda_rs::test_utils::fixtures::{key_pair, random_document_view_id}; + use p2panda_rs::test_utils::memory_store::helpers::PopulateStoreConfig; use rstest::rstest; use crate::db::errors::BlobStoreError; - use crate::test_utils::{add_document, test_runner, TestNode}; + use crate::test_utils::{ + add_blob, add_document, add_schema_and_documents, assert_query, populate_and_materialize, + populate_store_config, test_runner, update_document, TestNode, + }; #[rstest] fn get_blob(key_pair: KeyPair) { test_runner(|mut node: TestNode| async move { let blob_data = "Hello, World!".to_string(); - - // Publish blob pieces and blob. - let blob_piece_view_id_1 = add_document( - &mut node, - &SchemaId::BlobPiece(1), - vec![("data", blob_data[..5].into())], - &key_pair, - ) - .await; - - let blob_piece_view_id_2 = add_document( - &mut node, - &SchemaId::BlobPiece(1), - vec![("data", blob_data[5..].into())], - &key_pair, - ) - .await; - let blob_view_id = add_document( - &mut node, - &SchemaId::Blob(1), - vec![ - ("length", { blob_data.len() as i64 }.into()), - ("mime_type", "text/plain".into()), - ( - "pieces", - vec![blob_piece_view_id_1, blob_piece_view_id_2].into(), - ), - ], - &key_pair, - ) - .await; + let blob_view_id = add_blob(&mut node, &blob_data, &key_pair).await; let document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); @@ -294,4 +372,180 @@ mod tests { ); }) } + + #[rstest] + fn purge_blob(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + let blob_data = "Hello, World!".to_string(); + let blob_view_id = add_blob(&mut node, &blob_data, &key_pair).await; + + // There is one blob and two blob pieces in database. + // + // These are the rows we expect to exist in each table. + assert_query(&node, "SELECT entry_hash FROM entries", 3).await; + assert_query(&node, "SELECT operation_id FROM operations_v1", 3).await; + assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 6).await; + assert_query(&node, "SELECT log_id FROM logs", 3).await; + assert_query(&node, "SELECT document_id FROM documents", 3).await; + assert_query(&node, "SELECT document_id FROM document_views", 3).await; + assert_query(&node, "SELECT name FROM document_view_fields", 5).await; + + // Purge this blob from the database, we now expect all tables to be empty (except the + // logs table). + let document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); + let result = node.context.store.purge_blob(&document_id).await; + assert!(result.is_ok(), "{:#?}", result); + assert_query(&node, "SELECT entry_hash FROM entries", 0).await; + assert_query(&node, "SELECT operation_id FROM operations_v1", 0).await; + assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 0).await; + assert_query(&node, "SELECT log_id FROM logs", 3).await; + assert_query(&node, "SELECT document_id FROM documents", 0).await; + assert_query(&node, "SELECT document_id FROM document_views", 0).await; + assert_query(&node, "SELECT name FROM document_view_fields", 0).await; + + let result = node.context.store.purge_blob(&document_id).await; + + assert!(result.is_ok(), "{:#?}", result) + }) + } + + #[rstest] + fn purge_blob_only_purges_blobs( + #[from(populate_store_config)] + #[with(1, 1, 1)] + config: PopulateStoreConfig, + key_pair: KeyPair, + ) { + test_runner(|mut node: TestNode| async move { + let _ = populate_and_materialize(&mut node, &config).await; + + let blob_data = "Hello, World!".to_string(); + let blob_view_id = add_blob(&mut node, &blob_data, &key_pair).await; + + // There is one blob and two blob pieces in database. + // + // These are the rows we expect to exist in each table. + assert_query(&node, "SELECT entry_hash FROM entries", 4).await; + assert_query(&node, "SELECT operation_id FROM operations_v1", 4).await; + assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 19).await; + assert_query(&node, "SELECT log_id FROM logs", 4).await; + assert_query(&node, "SELECT document_id FROM documents", 4).await; + assert_query(&node, "SELECT document_id FROM document_views", 4).await; + assert_query(&node, "SELECT name FROM document_view_fields", 15).await; + + let document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); + let result = node.context.store.purge_blob(&document_id).await; + assert!(result.is_ok(), "{:#?}", result); + assert_query(&node, "SELECT entry_hash FROM entries", 1).await; + assert_query(&node, "SELECT operation_id FROM operations_v1", 1).await; + assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 13).await; + assert_query(&node, "SELECT log_id FROM logs", 4).await; + assert_query(&node, "SELECT document_id FROM documents", 1).await; + assert_query(&node, "SELECT document_id FROM document_views", 1).await; + assert_query(&node, "SELECT name FROM document_view_fields", 10).await; + + let result = node.context.store.purge_blob(&document_id).await; + + assert!(result.is_ok(), "{:#?}", result) + }) + } + + #[rstest] + fn does_not_purge_blob_if_still_pinned(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + let blob_data = "Hello, World!".to_string(); + let blob_view_id = add_blob(&mut node, &blob_data, &key_pair).await; + + let _ = add_schema_and_documents( + &mut node, + "img", + vec![vec![( + "blob", + blob_view_id.clone().into(), + Some(SchemaId::Blob(1)), + )]], + &key_pair, + ) + .await; + + assert_query(&node, "SELECT entry_hash FROM entries", 6).await; + assert_query(&node, "SELECT operation_id FROM operations_v1", 6).await; + assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 12).await; + assert_query(&node, "SELECT log_id FROM logs", 6).await; + assert_query(&node, "SELECT document_id FROM documents", 6).await; + assert_query(&node, "SELECT document_id FROM document_views", 6).await; + assert_query(&node, "SELECT name FROM document_view_fields", 11).await; + + // Purge this blob from the database, we now expect all tables to be empty. + let document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); + let result = node.context.store.purge_blob(&document_id).await; + assert!(result.is_ok(), "{:#?}", result); + assert_query(&node, "SELECT entry_hash FROM entries", 6).await; + assert_query(&node, "SELECT operation_id FROM operations_v1", 6).await; + assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 12).await; + assert_query(&node, "SELECT log_id FROM logs", 6).await; + assert_query(&node, "SELECT document_id FROM documents", 6).await; + assert_query(&node, "SELECT document_id FROM document_views", 6).await; + assert_query(&node, "SELECT name FROM document_view_fields", 11).await; + + let result = node.context.store.purge_blob(&document_id).await; + + assert!(result.is_ok(), "{:#?}", result) + }) + } + + #[rstest] + fn purge_all_pieces_of_updated_blob(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + let blob_data = "Hello, World!".to_string(); + let blob_view_id = add_blob(&mut node, &blob_data, &key_pair).await; + + // Create a new blob piece. + let new_blob_pieces = add_document( + &mut node, + &SchemaId::BlobPiece(1), + vec![("data", "more blob data".into())], + &key_pair, + ) + .await; + + // Update the blob document to point at the new blob piece. + let _ = update_document( + &mut node, + &SchemaId::Blob(1), + vec![("pieces", vec![new_blob_pieces].into())], + &blob_view_id, + &key_pair, + ) + .await; + + // There is one blob and three blob pieces in database. + // + // These are the rows we expect to exist in each table. + assert_query(&node, "SELECT entry_hash FROM entries", 5).await; + assert_query(&node, "SELECT operation_id FROM operations_v1", 5).await; + assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 8).await; + assert_query(&node, "SELECT log_id FROM logs", 4).await; + assert_query(&node, "SELECT document_id FROM documents", 4).await; + assert_query(&node, "SELECT document_id FROM document_views", 5).await; + assert_query(&node, "SELECT name FROM document_view_fields", 9).await; + + // Purge this blob from the database, we now expect all tables to be empty (except the + // logs table). + let document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); + let result = node.context.store.purge_blob(&document_id).await; + assert!(result.is_ok(), "{:#?}", result); + assert_query(&node, "SELECT entry_hash FROM entries", 0).await; + assert_query(&node, "SELECT operation_id FROM operations_v1", 0).await; + assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 0).await; + assert_query(&node, "SELECT log_id FROM logs", 4).await; + assert_query(&node, "SELECT document_id FROM documents", 0).await; + assert_query(&node, "SELECT document_id FROM document_views", 0).await; + assert_query(&node, "SELECT name FROM document_view_fields", 0).await; + + let result = node.context.store.purge_blob(&document_id).await; + + assert!(result.is_ok(), "{:#?}", result) + }) + } } diff --git a/aquadoggo/src/db/stores/document.rs b/aquadoggo/src/db/stores/document.rs index 22db8423b..6aa16d3e0 100644 --- a/aquadoggo/src/db/stores/document.rs +++ b/aquadoggo/src/db/stores/document.rs @@ -30,6 +30,7 @@ //! retained, we use a system of "pinned relations" to identify and materialise only views we //! explicitly wish to keep. use async_trait::async_trait; +use log::debug; use p2panda_rs::document::traits::AsDocument; use p2panda_rs::document::{DocumentId, DocumentView, DocumentViewId}; use p2panda_rs::schema::SchemaId; @@ -366,73 +367,215 @@ impl SqlStore { .map_err(|e| DocumentStorageError::FatalStorageError(e.to_string())) } - /// Iterate over all views of a document and delete any which: - /// - are not the current view - /// - _and_ no document field exists in the database which contains a pinned relation to this view - #[allow(dead_code)] - async fn prune_document_views( + /// Get the ids for all document views for a document which are currently materialized to the store. + pub async fn get_all_document_view_ids( &self, document_id: &DocumentId, - ) -> Result<(), DocumentStorageError> { - // Start a transaction, any db insertions after this point, and before the `commit()` - // will be rolled back in the event of an error. - let mut tx = self - .pool - .begin() - .await - .map_err(|e| DocumentStorageError::FatalStorageError(e.to_string()))?; - - // Collect all views _except_ the current view for this document + ) -> Result, DocumentStorageError> { let document_view_ids: Vec = query_scalar( " SELECT - document_views.document_view_id, - documents.document_view_id + document_views.document_view_id FROM document_views - LEFT JOIN - documents - ON - documents.document_view_id = document_views.document_view_id WHERE document_views.document_id = $1 - AND - documents.document_view_id IS NULL ", ) .bind(document_id.as_str()) - .fetch_all(&mut tx) + .fetch_all(&self.pool) + .await + .map_err(|err| DocumentStorageError::FatalStorageError(err.to_string()))?; + + Ok(document_view_ids + .iter() + .map(|document_id_str| { + document_id_str + .parse::() + .expect("Document Id's coming from the store should be valid") + }) + .collect()) + } + + /// Get the ids of all documents which are related to from another document view. + pub async fn get_child_document_ids( + &self, + document_view_id: &DocumentViewId, + ) -> Result, DocumentStorageError> { + let document_view_ids: Vec = query_scalar( + " + SELECT DISTINCT + document_views.document_id + FROM + document_views + WHERE + document_views.document_view_id + IN ( + SELECT + operation_fields_v1.value + FROM + document_view_fields + LEFT JOIN + operation_fields_v1 + ON + document_view_fields.operation_id = operation_fields_v1.operation_id + AND + document_view_fields.name = operation_fields_v1.name + WHERE + operation_fields_v1.field_type IN ('pinned_relation', 'pinned_relation_list') + AND + document_view_fields.document_view_id = $1 + ) + ", + ) + .bind(document_view_id.to_string()) + .fetch_all(&self.pool) .await .map_err(|err| DocumentStorageError::FatalStorageError(err.to_string()))?; - // Iterate over all document views and delete them if no document field exists in the - // database which contains a pinned relation to this view. - // - // Deletes on "document_views" cascade to "document_view_fields" so rows there are also removed - // from the database. - for document_view_id in document_view_ids { - query( + Ok(document_view_ids + .iter() + .map(|document_id_str| { + document_id_str + .parse::() + .expect("Document Id's coming from the store should be valid") + }) + .collect()) + } + + /// Attempt to remove a document view from the store. Returns a boolean which indicates if the + /// removal took place. + /// + /// This operations only succeeds if the view is "dangling", meaning no other document view + /// exists which relates to this view, AND it is not the current view of any document. + pub async fn prune_document_view( + &self, + document_view_id: &DocumentViewId, + ) -> Result { + // Attempt to delete the view. If it is pinned from an existing view, or it is the current + // view of a document, the deletion will not go ahead. + let result = query( " DELETE FROM document_views WHERE document_views.document_view_id = $1 AND NOT EXISTS ( - SELECT * FROM operation_fields_v1 + SELECT + document_view_fields.document_view_id + FROM + document_view_fields + LEFT JOIN + operation_fields_v1 + ON + document_view_fields.operation_id = operation_fields_v1.operation_id + AND + document_view_fields.name = operation_fields_v1.name WHERE operation_fields_v1.field_type IN ('pinned_relation', 'pinned_relation_list') AND operation_fields_v1.value = $1 ) - ", + AND NOT EXISTS ( + SELECT documents.document_id FROM documents + WHERE documents.document_view_id = $1 + ) + " ) - .bind(document_view_id) - .execute(&mut tx) + .bind(document_view_id.to_string()) + .execute(&self.pool) .await .map_err(|err| DocumentStorageError::FatalStorageError(err.to_string()))?; + + // If any rows were affected the deletion went ahead. + if result.rows_affected() > 0 { + debug!("Deleted view: {}", document_view_id); + Ok(true) + } else { + debug!("Did not delete view: {}", document_view_id); + Ok(false) } + } - // Commit the tx here as no errors occurred. + /// Check if this view is the current view of it's document. + pub async fn is_current_view( + &self, + document_view_id: &DocumentViewId, + ) -> Result { + let document_view_id: Option = query_scalar( + " + SELECT documents.document_view_id FROM documents + WHERE documents.document_view_id = $1 + ", + ) + .bind(document_view_id.to_string()) + .fetch_optional(&self.pool) + .await + .map_err(|err| DocumentStorageError::FatalStorageError(err.to_string()))?; + + Ok(document_view_id.is_some()) + } + + /// Purge a document from the store by it's id. + /// + /// This removes entries, operations and any materialized documents which exist. + /// + /// The only unaffected table after deletion is the `logs` table as we still want to remember + /// which log ids an author has already used so we can continue to avoid collisions. + pub async fn purge_document( + &self, + document_id: &DocumentId, + ) -> Result<(), DocumentStorageError> { + // Start a transaction, any db insertions after this point, and before the `commit()` + // will be rolled back in the event of an error. + let mut tx = self + .pool + .begin() + .await + .map_err(|e| DocumentStorageError::FatalStorageError(e.to_string()))?; + + // Delete rows from `documents` table, this cascades up to `document_views` and + // `document_view_fields` tables. + query( + " + DELETE FROM documents + WHERE documents.document_id = $1 + ", + ) + .bind(document_id.to_string()) + .fetch_all(&mut tx) + .await + .map_err(|e| DocumentStorageError::FatalStorageError(e.to_string()))?; + + // Delete rows from `entries` table. + query( + " + DELETE FROM entries + WHERE entries.entry_hash IN ( + SELECT operations_v1.operation_id FROM operations_v1 + WHERE operations_v1.document_id = $1 + ) + ", + ) + .bind(document_id.to_string()) + .fetch_all(&mut tx) + .await + .map_err(|e| DocumentStorageError::FatalStorageError(e.to_string()))?; + + // Delete rows from `operations_v1` table, this cascades up to `operation_fields_v1` table + // as well. + query( + " + DELETE FROM operations_v1 + WHERE operations_v1.document_id = $1 + ", + ) + .bind(document_id.to_string()) + .fetch_all(&mut tx) + .await + .map_err(|e| DocumentStorageError::FatalStorageError(e.to_string()))?; + + // Commit the transaction if all queries succeeded. tx.commit() .await .map_err(|e| DocumentStorageError::FatalStorageError(e.to_string()))?; @@ -465,16 +608,18 @@ async fn get_document_view_field_rows( operation_fields_v1.list_index, operation_fields_v1.field_type, operation_fields_v1.value - FROM + FROM document_view_fields - LEFT JOIN document_views - ON - document_view_fields.document_view_id = document_views.document_view_id - LEFT JOIN operation_fields_v1 - ON - document_view_fields.operation_id = operation_fields_v1.operation_id - AND - document_view_fields.name = operation_fields_v1.name + LEFT JOIN + operation_fields_v1 + ON + document_view_fields.operation_id = operation_fields_v1.operation_id + AND + document_view_fields.name = operation_fields_v1.name + LEFT JOIN + document_views + ON + document_view_fields.document_view_id = document_views.document_view_id WHERE document_view_fields.document_view_id = $1 ORDER BY @@ -603,9 +748,11 @@ async fn insert_document( #[cfg(test)] mod tests { + use p2panda_rs::api::next_args; use p2panda_rs::document::materialization::build_graph; use p2panda_rs::document::traits::AsDocument; use p2panda_rs::document::{DocumentBuilder, DocumentId, DocumentViewFields, DocumentViewId}; + use p2panda_rs::entry::{LogId, SeqNum}; use p2panda_rs::identity::KeyPair; use p2panda_rs::operation::traits::AsOperation; use p2panda_rs::operation::{Operation, OperationId}; @@ -622,8 +769,8 @@ mod tests { use crate::materializer::tasks::reduce_task; use crate::materializer::TaskInput; use crate::test_utils::{ - add_schema_and_documents, build_document, populate_and_materialize, populate_store_config, - test_runner, TestNode, + add_schema_and_documents, assert_query, build_document, populate_and_materialize, + populate_store_config, test_runner, TestNode, }; #[rstest] @@ -1008,7 +1155,7 @@ mod tests { } #[rstest] - fn prunes_document_views( + fn prunes_document_view( #[from(populate_store_config)] #[with(2, 1, 1)] config: PopulateStoreConfig, @@ -1041,9 +1188,15 @@ mod tests { .unwrap(); assert!(document.is_some()); - // Now prune dangling views for the document. - let result = node.context.store.prune_document_views(&document_id).await; + // Prune the first document view. + let result = node + .context + .store + .prune_document_view(&first_document_view_id) + .await; assert!(result.is_ok()); + // Returns `true` when pruning succeeded. + assert!(result.unwrap()); // Get the first document view again, it should no longer be there. let document = node @@ -1098,9 +1251,15 @@ mod tests { ) .await; - // Now prune dangling views for the document. - let result = node.context.store.prune_document_views(&document_id).await; + // Attempt to prune the first document view. + let result = node + .context + .store + .prune_document_view(&first_document_view_id) + .await; assert!(result.is_ok()); + // Returns `false` when pruning failed. + assert!(!result.unwrap()); // Get the first document view, it should still be in the store as it was pinned. let document = node @@ -1112,4 +1271,139 @@ mod tests { assert!(document.is_some()); }); } + + #[rstest] + fn does_not_prune_current_view( + #[from(populate_store_config)] + #[with(1, 1, 1)] + config: PopulateStoreConfig, + ) { + test_runner(|mut node: TestNode| async move { + // Populate the store and materialize all documents. + let (_, document_ids) = populate_and_materialize(&mut node, &config).await; + let document_id = document_ids[0].clone(); + let current_document_view_id: DocumentViewId = document_id.as_str().parse().unwrap(); + + // Attempt to prune the current document view. + let result = node + .context + .store + .prune_document_view(¤t_document_view_id) + .await; + assert!(result.is_ok()); + // Returns `false` when pruning failed. + assert!(!result.unwrap()); + + // Get the current document view, it should still be in the store. + let document = node + .context + .store + .get_document_by_view_id(¤t_document_view_id) + .await + .unwrap(); + assert!(document.is_some()); + }); + } + + #[rstest] + fn purge_document( + #[from(populate_store_config)] + #[with(2, 1, 1)] + config: PopulateStoreConfig, + ) { + test_runner(|mut node: TestNode| async move { + // Populate the store and materialize all documents. + let (_, document_ids) = populate_and_materialize(&mut node, &config).await; + let document_id = document_ids[0].clone(); + + // There is one document in the database which contains an CREATE and UPDATE operation + // which were both published by the same author. These are the number of rows we + // expect for each table. + assert_query(&node, "SELECT entry_hash FROM entries", 2).await; + assert_query(&node, "SELECT operation_id FROM operations_v1", 2).await; + assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 26).await; + assert_query(&node, "SELECT log_id FROM logs", 1).await; + assert_query(&node, "SELECT document_id FROM documents", 1).await; + assert_query(&node, "SELECT document_id FROM document_views", 1).await; + assert_query(&node, "SELECT name FROM document_view_fields", 10).await; + + // Purge this document from the database, we now expect all tables to be empty. + let result = node.context.store.purge_document(&document_id).await; + assert!(result.is_ok(), "{:#?}", result); + assert_query(&node, "SELECT entry_hash FROM entries", 0).await; + assert_query(&node, "SELECT operation_id FROM operations_v1", 0).await; + assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 0).await; + assert_query(&node, "SELECT log_id FROM logs", 1).await; + assert_query(&node, "SELECT document_id FROM documents", 0).await; + assert_query(&node, "SELECT document_id FROM document_views", 0).await; + assert_query(&node, "SELECT name FROM document_view_fields", 0).await; + }); + } + + #[rstest] + fn purging_only_effects_target_document( + #[from(populate_store_config)] + #[with(1, 2, 1)] + config: PopulateStoreConfig, + ) { + test_runner(|mut node: TestNode| async move { + // Populate the store and materialize all documents. + let (_, document_ids) = populate_and_materialize(&mut node, &config).await; + let document_id = document_ids[0].clone(); + + // There are two documents in the database which each contain a single CREATE operation + // and they were published by the same author. These are the number of rows we expect + // for each table. + assert_query(&node, "SELECT entry_hash FROM entries", 2).await; + assert_query(&node, "SELECT operation_id FROM operations_v1", 2).await; + assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 26).await; + assert_query(&node, "SELECT log_id FROM logs", 2).await; + assert_query(&node, "SELECT document_id FROM documents", 2).await; + assert_query(&node, "SELECT document_id FROM document_views", 2).await; + assert_query(&node, "SELECT name FROM document_view_fields", 20).await; + + // Purge one document from the database, we now expect half the rows to be remaining. + let result = node.context.store.purge_document(&document_id).await; + assert!(result.is_ok(), "{:#?}", result); + + assert_query(&node, "SELECT entry_hash FROM entries", 1).await; + assert_query(&node, "SELECT operation_id FROM operations_v1", 1).await; + assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 13).await; + assert_query(&node, "SELECT log_id FROM logs", 2).await; + assert_query(&node, "SELECT document_id FROM documents", 1).await; + assert_query(&node, "SELECT document_id FROM document_views", 1).await; + assert_query(&node, "SELECT name FROM document_view_fields", 10).await; + }); + } + + #[rstest] + fn next_args_after_purge( + #[from(populate_store_config)] + #[with(2, 1, 1)] + config: PopulateStoreConfig, + ) { + test_runner(|mut node: TestNode| async move { + // Populate the store and materialize all documents. + let (key_pairs, document_ids) = populate_and_materialize(&mut node, &config).await; + let document_id = document_ids[0].clone(); + let public_key = key_pairs[0].public_key(); + + let _ = node.context.store.purge_document(&document_id).await; + + let result = next_args( + &node.context.store, + &public_key, + Some(&document_id.as_str().parse().unwrap()), + ) + .await; + println!("{:#?}", result); + assert!(result.is_err()); + + let result = next_args(&node.context.store, &public_key, None).await; + + assert!(result.is_ok()); + let next_args = result.unwrap(); + assert_eq!(next_args, (None, None, SeqNum::default(), LogId::new(1))); + }); + } } diff --git a/aquadoggo/src/materializer/service.rs b/aquadoggo/src/materializer/service.rs index 7465ab0b0..7b1ebb45c 100644 --- a/aquadoggo/src/materializer/service.rs +++ b/aquadoggo/src/materializer/service.rs @@ -8,7 +8,9 @@ use tokio::task; use crate::bus::{ServiceMessage, ServiceSender}; use crate::context::Context; use crate::manager::{ServiceReadySender, Shutdown}; -use crate::materializer::tasks::{blob_task, dependency_task, reduce_task, schema_task}; +use crate::materializer::tasks::{ + blob_task, dependency_task, garbage_collection_task, reduce_task, schema_task, +}; use crate::materializer::worker::{Factory, Task, TaskStatus}; use crate::materializer::TaskInput; @@ -39,6 +41,7 @@ pub async fn materializer_service( factory.register("dependency", pool_size, dependency_task); factory.register("schema", pool_size, schema_task); factory.register("blob", pool_size, blob_task); + factory.register("garbage_collection", pool_size, garbage_collection_task); // Get a listener for error signal from factory let on_error = factory.on_error(); diff --git a/aquadoggo/src/materializer/tasks/dependency.rs b/aquadoggo/src/materializer/tasks/dependency.rs index 02effd0c5..83c3d58d4 100644 --- a/aquadoggo/src/materializer/tasks/dependency.rs +++ b/aquadoggo/src/materializer/tasks/dependency.rs @@ -995,8 +995,9 @@ mod tests { .await .unwrap() .expect("Should have returned new tasks"); - assert_eq!(tasks.len(), 1); - assert_eq!(tasks[0].worker_name(), &String::from("dependency")); + assert_eq!(tasks.len(), 2); + assert_eq!(tasks[0].worker_name(), &String::from("garbage_collection")); + assert_eq!(tasks[1].worker_name(), &String::from("dependency")); // We should have now a materialized latest post and comment document but not the // pinned historical version of the post, where the comment was pointing at! @@ -1026,7 +1027,7 @@ mod tests { // 2. The "dependency" task followed materialising the "post" found a reverse relation // to a "comment" document .. it dispatches another "dependency" task for it - let tasks = dependency_task(node_b.context.clone(), tasks[0].input().clone()) + let tasks = dependency_task(node_b.context.clone(), tasks[1].input().clone()) .await .unwrap(); assert_eq!( diff --git a/aquadoggo/src/materializer/tasks/garbage_collection.rs b/aquadoggo/src/materializer/tasks/garbage_collection.rs new file mode 100644 index 000000000..36963b3a7 --- /dev/null +++ b/aquadoggo/src/materializer/tasks/garbage_collection.rs @@ -0,0 +1,650 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later + +use log::debug; +use p2panda_rs::document::DocumentViewId; +use p2panda_rs::operation::traits::AsOperation; +use p2panda_rs::schema::SchemaId; +use p2panda_rs::storage_provider::traits::OperationStore; +use p2panda_rs::Human; + +use crate::context::Context; +use crate::materializer::worker::{TaskError, TaskResult}; +use crate::materializer::{Task, TaskInput}; + +pub async fn garbage_collection_task(context: Context, input: TaskInput) -> TaskResult { + debug!("Working on {}", input); + + match input { + TaskInput::DocumentId(document_id) => { + // This task is concerned with a document which may now have dangling views. We want + // to check for this and delete any views which are no longer needed. + debug!( + "Prune document views for document: {}", + document_id.display() + ); + + // Collect the ids of all views for this document. + let all_document_view_ids: Vec = context + .store + .get_all_document_view_ids(&document_id) + .await + .map_err(|err| TaskError::Critical(err.to_string()))?; + + // Iterate over all document views and delete them if no document view exists which refers + // to it in a pinned relation field AND they are not the current view of a document. + // + // Deletes on "document_views" cascade to "document_view_fields" so rows there are also removed + // from the database. + let mut all_effected_child_relations = vec![]; + let mut deleted_views_count = 0; + for document_view_id in &all_document_view_ids { + // Check if this is the current view of it's document. This will still return true + // if the document in question is deleted. + let is_current_view = context + .store + .is_current_view(document_view_id) + .await + .map_err(|err| TaskError::Critical(err.to_string()))?; + + let mut effected_child_relations = vec![]; + let mut view_deleted = false; + + if !is_current_view { + // Before attempting to delete this view we need to fetch the ids of any child documents + // which might have views that could become unpinned as a result of this delete. These + // will be returned if the deletion is successful. + effected_child_relations = context + .store + .get_child_document_ids(document_view_id) + .await + .map_err(|err| TaskError::Critical(err.to_string()))?; + + // Attempt to delete the view. If it is pinned from an existing view the deletion will + // not go ahead. + view_deleted = context + .store + .prune_document_view(document_view_id) + .await + .map_err(|err| TaskError::Critical(err.to_string()))?; + } + + // If the view was deleted then push the effected children to the return array + if view_deleted { + debug!("Deleted view: {}", document_view_id); + deleted_views_count += 1; + all_effected_child_relations.extend(effected_child_relations); + } else { + debug!("Did not delete view: {}", document_view_id); + } + } + + // If the number of deleted views equals the total existing views (minus one for the + // current view), then there is a chance this became completely detached. In this case + // we should check if this document is a blob document and then try to purge it. + if all_document_view_ids.len() - 1 == deleted_views_count { + let operation = context + .store + .get_operation(&document_id.as_str().parse().unwrap()) + .await + .map_err(|err| TaskError::Failure(err.to_string()))? + .expect("Operation exists in store"); + + if let SchemaId::Blob(_) = operation.schema_id() { + // Purge the blob and all it's pieces. This only succeeds if no document + // refers to the blob document by either a relation or pinned relation. + context + .store + .purge_blob(&document_id) + .await + .map_err(|err| TaskError::Failure(err.to_string()))?; + } + } + + // We compose some more prune tasks based on the effected documents returned above. + let next_tasks: Vec> = all_effected_child_relations + .iter() + .map(|document_id| { + debug!("Issue prune task for document: {document_id:#?}"); + Task::new( + "garbage_collection", + TaskInput::DocumentId(document_id.to_owned()), + ) + }) + .collect(); + + if next_tasks.is_empty() { + Ok(None) + } else { + Ok(Some(next_tasks)) + } + } + _ => Err(TaskError::Critical("Invalid task input".into())), + } +} + +#[cfg(test)] +mod tests { + use p2panda_rs::document::DocumentId; + use p2panda_rs::identity::KeyPair; + use p2panda_rs::schema::SchemaId; + use p2panda_rs::storage_provider::traits::DocumentStore; + use p2panda_rs::test_utils::fixtures::{key_pair, random_document_view_id}; + use rstest::rstest; + + use crate::materializer::tasks::garbage_collection_task; + use crate::materializer::{Task, TaskInput}; + use crate::test_utils::{ + add_blob, add_schema_and_documents, assert_query, test_runner, update_document, TestNode, + }; + + #[rstest] + fn e2e_pruning(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + // Publish some documents which we will later point relations at. + let (child_schema, child_document_view_ids) = add_schema_and_documents( + &mut node, + "schema_for_child", + vec![ + vec![("uninteresting_field", 1.into(), None)], + vec![("uninteresting_field", 2.into(), None)], + ], + &key_pair, + ) + .await; + + // Create some parent documents which contain a pinned relation list pointing to the + // children created above. + let (parent_schema, parent_document_view_ids) = add_schema_and_documents( + &mut node, + "schema_for_parent", + vec![vec![ + ("name", "parent".into(), None), + ( + "children", + child_document_view_ids.clone().into(), + Some(child_schema.id().to_owned()), + ), + ]], + &key_pair, + ) + .await; + + // Convert view id to document id. + let parent_document_id: DocumentId = parent_document_view_ids[0] + .clone() + .to_string() + .parse() + .unwrap(); + + // Update the parent document so that there are now two views stored in the db, one + // current and one dangling. + let updated_parent_view_id = update_document( + &mut node, + parent_schema.id(), + vec![("name", "Parent".into())], + &parent_document_view_ids[0], + &key_pair, + ) + .await; + + // Get the historic (dangling) view to check it's actually there. + let historic_document_view = node + .context + .store + .get_document_by_view_id(&parent_document_view_ids[0].clone()) + .await + .unwrap(); + + // It is there... + assert!(historic_document_view.is_some()); + + // Create another document, which has a pinned relation to the parent document created + // above. Now the relation graph looks like this + // + // GrandParent --> Parent --> Child1 + // \ + // --> Child2 + // + let (schema_for_grand_parent, grand_parent_document_view_ids) = + add_schema_and_documents( + &mut node, + "schema_for_grand_parent", + vec![vec![ + ("name", "grand parent".into(), None), + ( + "child", + parent_document_view_ids[0].clone().into(), + Some(parent_schema.id().to_owned()), + ), + ]], + &key_pair, + ) + .await; + + // Convert view id to document id. + let grand_parent_document_id: DocumentId = grand_parent_document_view_ids[0] + .clone() + .to_string() + .parse() + .unwrap(); + + // Update the grand parent document to a new view, leaving the previous one dangling. + // + // Note: this test method _does not_ dispatch "garbage_collection" tasks. + update_document( + &mut node, + schema_for_grand_parent.id(), + vec![ + ("name", "Grand Parent".into()), + ("child", updated_parent_view_id.into()), + ], + &grand_parent_document_view_ids[0], + &key_pair, + ) + .await; + + // Get the historic (dangling) view to make sure it exists. + let historic_document_view = node + .context + .store + .get_document_by_view_id(&grand_parent_document_view_ids[0].clone()) + .await + .unwrap(); + + // It does... + assert!(historic_document_view.is_some()); + + // Now prune dangling views for the grand parent document. This method deletes any + // dangling views (not pinned, not current) from the database for this document. It + // returns the document ids of any documents which may have views which have become + // "un-pinned" as a result of this view being removed. In this case, that's the + // document id of the "parent" document. + let next_tasks = garbage_collection_task( + node.context.clone(), + TaskInput::DocumentId(grand_parent_document_id), + ) + .await + .unwrap() + .unwrap(); + + // One new prune task is issued. + assert_eq!(next_tasks.len(), 1); + // It is the parent (which this grand parent relates to) as we expect. + assert_eq!( + next_tasks[0], + Task::new( + "garbage_collection", + TaskInput::DocumentId(parent_document_id) + ) + ); + + // Check the historic view has been deleted. + let historic_document_view = node + .context + .store + .get_document_by_view_id(&grand_parent_document_view_ids[0].clone()) + .await + .unwrap(); + + // It has... + assert!(historic_document_view.is_none()); + + // Now prune dangling views for the parent document. + let next_tasks = + garbage_collection_task(node.context.clone(), next_tasks[0].input().to_owned()) + .await + .unwrap() + .unwrap(); + + // Two new prune tasks issued. + assert_eq!(next_tasks.len(), 2); + // These are the two final child documents. + assert_eq!( + next_tasks, + child_document_view_ids + .iter() + .rev() + .map(|document_view_id| { + let document_id: DocumentId = document_view_id.to_string().parse().unwrap(); + Task::new("garbage_collection", TaskInput::DocumentId(document_id)) + }) + .collect::>>() + ); + + // Check the historic view has been deleted. + let historic_document_view = node + .context + .store + .get_document_by_view_id(&parent_document_view_ids[0].clone()) + .await + .unwrap(); + + // It has. + assert!(historic_document_view.is_none()); + + // Running the child tasks returns no new tasks. + let next_tasks = + garbage_collection_task(node.context.clone(), next_tasks[0].input().to_owned()) + .await + .unwrap(); + + assert!(next_tasks.is_none()); + }); + } + + #[rstest] + fn no_new_tasks_issued_when_no_views_pruned(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + // Create a child document. + let (child_schema, child_document_view_ids) = add_schema_and_documents( + &mut node, + "schema_for_child", + vec![vec![("uninteresting_field", 1.into(), None)]], + &key_pair, + ) + .await; + + // Create a parent document which contains a pinned relation list pointing to the + // child created above. + let (_, parent_document_view_ids) = add_schema_and_documents( + &mut node, + "schema_for_parent", + vec![vec![ + ("name", "parent".into(), None), + ( + "children", + child_document_view_ids.clone().into(), + Some(child_schema.id().to_owned()), + ), + ]], + &key_pair, + ) + .await; + + // Run a garbage collection task for the parent. + let document_id: DocumentId = parent_document_view_ids[0].to_string().parse().unwrap(); + let next_tasks = + garbage_collection_task(node.context.clone(), TaskInput::DocumentId(document_id)) + .await + .unwrap(); + + // No views were pruned so we expect no new tasks to be issued. + assert!(next_tasks.is_none()); + }) + } + + #[rstest] + fn purges_blobs(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + // Publish a blob. + let blob_document_view = add_blob(&mut node, "Hello World!", &key_pair).await; + let blob_document_id: DocumentId = blob_document_view.to_string().parse().unwrap(); + + // Check the blob is there. + let blob = node + .context + .store + .get_blob(&blob_document_id) + .await + .unwrap(); + assert!(blob.is_some()); + + // Run a garbage collection task for the blob document. + let next_tasks = garbage_collection_task( + node.context.clone(), + TaskInput::DocumentId(blob_document_id.clone()), + ) + .await + .unwrap(); + + // It shouldn't return any new tasks. + assert!(next_tasks.is_none()); + + // The blob should no longer be available. + let blob = node + .context + .store + .get_blob(&blob_document_id) + .await + .unwrap(); + assert!(blob.is_none()); + + // And all expected rows deleted from the database. + assert_query(&node, "SELECT entry_hash FROM entries", 0).await; + assert_query(&node, "SELECT operation_id FROM operations_v1", 0).await; + assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 0).await; + assert_query(&node, "SELECT log_id FROM logs", 3).await; + assert_query(&node, "SELECT document_id FROM documents", 0).await; + assert_query(&node, "SELECT document_id FROM document_views", 0).await; + assert_query(&node, "SELECT name FROM document_view_fields", 0).await; + }); + } + + #[rstest] + fn purges_newly_detached_blobs(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + // Create a blob document. + let blob_data = "Hello, World!".to_string(); + let blob_view_id = add_blob(&mut node, &blob_data, &key_pair).await; + let blob_document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); + + // Relate to the blob from a new document. + let (schema, documents_pinning_blob) = add_schema_and_documents( + &mut node, + "img", + vec![vec![( + "blob", + blob_view_id.clone().into(), + Some(SchemaId::Blob(1)), + )]], + &key_pair, + ) + .await; + + // Now update the document to relate to another blob. This means the previously + // created blob is now "dangling". + update_document( + &mut node, + schema.id(), + vec![("blob", random_document_view_id().into())], + &documents_pinning_blob[0].clone(), + &key_pair, + ) + .await; + + // Run a task for the parent document. + let document_id: DocumentId = documents_pinning_blob[0].to_string().parse().unwrap(); + let next_tasks = + garbage_collection_task(node.context.clone(), TaskInput::DocumentId(document_id)) + .await + .unwrap() + .unwrap(); + + // It issues one new task which is for the blob document. + assert_eq!(next_tasks.len(), 1); + let next_tasks = + garbage_collection_task(node.context.clone(), next_tasks[0].input().to_owned()) + .await + .unwrap(); + // No new tasks issued. + assert!(next_tasks.is_none()); + + // The blob has correctly been purged. + let blob = node + .context + .store + .get_blob(&blob_document_id) + .await + .unwrap(); + + assert!(blob.is_none()); + }) + } + + #[rstest] + fn other_documents_keep_blob_alive(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + // Create a blob document. + let blob_data = "Hello, World!".to_string(); + let blob_view_id = add_blob(&mut node, &blob_data, &key_pair).await; + let blob_document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); + + // Relate to the blob from a new document. + let (schema, documents_pinning_blob) = add_schema_and_documents( + &mut node, + "img", + vec![vec![( + "blob", + blob_view_id.clone().into(), + Some(SchemaId::Blob(1)), + )]], + &key_pair, + ) + .await; + + // Now update the document to relate to another blob. This means the previously + // created blob is now "dangling". + update_document( + &mut node, + schema.id(), + vec![("blob", random_document_view_id().into())], + &documents_pinning_blob[0].clone(), + &key_pair, + ) + .await; + + // Another document relating to the blob (this time from in a relation field). + let _ = add_schema_and_documents( + &mut node, + "img", + vec![vec![( + "blob", + blob_document_id.clone().into(), + Some(SchemaId::Blob(1)), + )]], + &key_pair, + ) + .await; + + // Run a task for the parent document. + let document_id: DocumentId = documents_pinning_blob[0].to_string().parse().unwrap(); + let next_tasks = + garbage_collection_task(node.context.clone(), TaskInput::DocumentId(document_id)) + .await + .unwrap() + .unwrap(); + + // It issues one new task which is for the blob document. + assert_eq!(next_tasks.len(), 1); + let next_tasks = + garbage_collection_task(node.context.clone(), next_tasks[0].input().to_owned()) + .await + .unwrap(); + // No new tasks issued. + assert!(next_tasks.is_none()); + + // The blob should still be there as it was kept alive by a different document. + let blob = node + .context + .store + .get_blob(&blob_document_id) + .await + .unwrap(); + + assert!(blob.is_some()); + }) + } + + #[rstest] + fn all_relation_types_keep_blobs_alive(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + let blob_data = "Hello, World!".to_string(); + + // Any type of relation can keep a blob alive, here we create one of each and run + // garbage collection tasks for each blob. + + let blob_view_id_1 = add_blob(&mut node, &blob_data, &key_pair).await; + let _ = add_schema_and_documents( + &mut node, + "img", + vec![vec![( + "blob", + blob_view_id_1.clone().into(), + Some(SchemaId::Blob(1)), + )]], + &key_pair, + ) + .await; + + let blob_view_id_2 = add_blob(&mut node, &blob_data, &key_pair).await; + let _ = add_schema_and_documents( + &mut node, + "img", + vec![vec![( + "blob", + vec![blob_view_id_2.clone()].into(), + Some(SchemaId::Blob(1)), + )]], + &key_pair, + ) + .await; + + let blob_view_id_3 = add_blob(&mut node, &blob_data, &key_pair).await; + let _ = add_schema_and_documents( + &mut node, + "img", + vec![vec![( + "blob", + blob_view_id_3 + .to_string() + .parse::() + .unwrap() + .into(), + Some(SchemaId::Blob(1)), + )]], + &key_pair, + ) + .await; + + let blob_view_id_4 = add_blob(&mut node, &blob_data, &key_pair).await; + let _ = add_schema_and_documents( + &mut node, + "img", + vec![vec![( + "blob", + vec![blob_view_id_4.to_string().parse::().unwrap()].into(), + Some(SchemaId::Blob(1)), + )]], + &key_pair, + ) + .await; + + for blob_view_id in [ + blob_view_id_1, + blob_view_id_2, + blob_view_id_3, + blob_view_id_4, + ] { + let blob_document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); + let next_tasks = garbage_collection_task( + node.context.clone(), + TaskInput::DocumentId(blob_document_id.clone()), + ) + .await + .unwrap(); + + assert!(next_tasks.is_none()); + + // All blobs should be kept alive. + let blob = node + .context + .store + .get_blob(&blob_document_id) + .await + .unwrap(); + + assert!(blob.is_some()); + } + }) + } +} diff --git a/aquadoggo/src/materializer/tasks/mod.rs b/aquadoggo/src/materializer/tasks/mod.rs index 674fab8f5..4f53e1bec 100644 --- a/aquadoggo/src/materializer/tasks/mod.rs +++ b/aquadoggo/src/materializer/tasks/mod.rs @@ -2,10 +2,12 @@ mod blob; mod dependency; +mod garbage_collection; mod reduce; mod schema; pub use blob::blob_task; pub use dependency::dependency_task; +pub use garbage_collection::garbage_collection_task; pub use reduce::reduce_task; pub use schema::schema_task; diff --git a/aquadoggo/src/materializer/tasks/reduce.rs b/aquadoggo/src/materializer/tasks/reduce.rs index 1e3a4b408..f24aeb3c5 100644 --- a/aquadoggo/src/materializer/tasks/reduce.rs +++ b/aquadoggo/src/materializer/tasks/reduce.rs @@ -231,6 +231,8 @@ async fn reduce_document + WithPublicKey>( .await .map_err(|err| TaskError::Critical(err.to_string()))?; + let mut tasks = vec![]; + // If the document was deleted, then we return nothing if document.is_deleted() { debug!( @@ -238,7 +240,6 @@ async fn reduce_document + WithPublicKey>( document.display(), document.view_id().display() ); - return Ok(None); } if document.is_edited() { @@ -251,14 +252,31 @@ async fn reduce_document + WithPublicKey>( debug!("Created {}", document.display()); }; - debug!( - "Dispatch dependency task for view with id: {}", - document.view_id() - ); - Ok(Some(vec![Task::new( - "dependency", - TaskInput::DocumentViewId(document.view_id().to_owned()), - )])) + if document.is_deleted() || document.is_edited() { + debug!( + "Dispatch prune task for document with id: {}", + document.id() + ); + + tasks.push(Task::new( + "garbage_collection", + TaskInput::DocumentId(document.id().to_owned()), + )) + } + + if !document.is_deleted() { + debug!( + "Dispatch dependency task for view with id: {}", + document.view_id() + ); + + tasks.push(Task::new( + "dependency", + TaskInput::DocumentViewId(document.view_id().to_owned()), + )); + } + + Ok(Some(tasks)) } Err(err) => { // There is not enough operations yet to materialise this view. Maybe next time! @@ -500,7 +518,7 @@ mod tests { for document_id in &document_ids { let input = TaskInput::DocumentId(document_id.clone()); let tasks = reduce_task(node.context.clone(), input).await.unwrap(); - assert!(tasks.is_none()); + assert_eq!(tasks.unwrap().len(), 1); } for document_id in &document_ids { @@ -527,16 +545,16 @@ mod tests { #[rstest] #[case( populate_store_config(3, 1, 1, false, doggo_schema(), doggo_fields(), doggo_fields()), - true + vec!["garbage_collection".to_string(), "dependency".to_string()] )] // This document is deleted, it shouldn't spawn a dependency task. #[case( populate_store_config(3, 1, 1, true, doggo_schema(), doggo_fields(), doggo_fields()), - false + vec!["garbage_collection".to_string()] )] - fn returns_dependency_task_inputs( + fn returns_correct_dependency_and_prune_tasks( #[case] config: PopulateStoreConfig, - #[case] is_next_task: bool, + #[case] expected_worker_names: Vec, ) { test_runner(move |node: TestNode| async move { // Populate the store with some entries and operations but DON'T materialise any @@ -547,9 +565,16 @@ mod tests { .expect("There should be at least one document id"); let input = TaskInput::DocumentId(document_id.clone()); - let next_task_inputs = reduce_task(node.context.clone(), input).await.unwrap(); + let next_tasks = reduce_task(node.context.clone(), input) + .await + .expect("Ok result") + .expect("Some tasks returned"); - assert_eq!(next_task_inputs.is_some(), is_next_task); + assert_eq!(next_tasks.len(), expected_worker_names.len()); + + for (index, worker_name) in expected_worker_names.iter().enumerate() { + assert_eq!(next_tasks[index].worker_name(), worker_name); + } }); } diff --git a/aquadoggo/src/test_utils/mod.rs b/aquadoggo/src/test_utils/mod.rs index 19d444074..eb0548ee3 100644 --- a/aquadoggo/src/test_utils/mod.rs +++ b/aquadoggo/src/test_utils/mod.rs @@ -12,7 +12,7 @@ pub use config::TestConfiguration; pub use db::{drop_database, initialize_db, initialize_sqlite_db}; pub use helpers::{build_document, doggo_fields, doggo_schema, schema_from_fields}; pub use node::{ - add_document, add_schema, add_schema_and_documents, populate_and_materialize, - populate_store_config, TestNode, + add_blob, add_document, add_schema, add_schema_and_documents, assert_query, + populate_and_materialize, populate_store_config, update_document, TestNode, }; pub use runner::{test_runner, test_runner_with_manager, TestNodeManager}; diff --git a/aquadoggo/src/test_utils/node.rs b/aquadoggo/src/test_utils/node.rs index 85a05cd43..5c7fae147 100644 --- a/aquadoggo/src/test_utils/node.rs +++ b/aquadoggo/src/test_utils/node.rs @@ -4,12 +4,14 @@ use log::{debug, info}; use p2panda_rs::document::{DocumentId, DocumentViewId}; use p2panda_rs::entry::traits::AsEncodedEntry; use p2panda_rs::identity::KeyPair; -use p2panda_rs::operation::{OperationBuilder, OperationValue}; +use p2panda_rs::operation::{OperationAction, OperationBuilder, OperationId, OperationValue}; use p2panda_rs::schema::{FieldType, Schema, SchemaId, SchemaName}; +use p2panda_rs::storage_provider::traits::OperationStore; use p2panda_rs::test_utils::memory_store::helpers::{ populate_store, send_to_store, PopulateStoreConfig, }; use rstest::fixture; +use sqlx::query_scalar; use crate::context::Context; use crate::db::SqlStore; @@ -97,13 +99,18 @@ pub async fn populate_and_materialize( // Create reduce task input. let input = TaskInput::DocumentId(document_id); // Run reduce task and collect returned dependency tasks. - let dependency_tasks = reduce_task(node.context.clone(), input.clone()) + let next_tasks = reduce_task(node.context.clone(), input.clone()) .await .expect("Reduce document"); // Run dependency tasks. - if let Some(tasks) = dependency_tasks { - for task in tasks { + if let Some(tasks) = next_tasks { + // We only want to issue dependency tasks. + let dependency_tasks = tasks + .iter() + .filter(|task| task.worker_name() == "depenedency"); + + for task in dependency_tasks { dependency_task(node.context.clone(), task.input().to_owned()) .await .expect("Run dependency task"); @@ -145,13 +152,18 @@ pub async fn add_document( .expect("Publish CREATE operation"); let input = TaskInput::DocumentId(DocumentId::from(entry_signed.hash())); - let dependency_tasks = reduce_task(node.context.clone(), input.clone()) + let next_tasks = reduce_task(node.context.clone(), input.clone()) .await .expect("Reduce document"); // Run dependency tasks - if let Some(tasks) = dependency_tasks { - for task in tasks { + if let Some(tasks) = next_tasks { + // We only want to issue dependency tasks. + let dependency_tasks = tasks + .iter() + .filter(|task| task.worker_name() == "depenedency"); + + for task in dependency_tasks { dependency_task(node.context.clone(), task.input().to_owned()) .await .expect("Run dependency task"); @@ -264,3 +276,105 @@ pub async fn add_schema_and_documents( (schema, view_ids) } + +/// Helper method for updating documents. +pub async fn update_document( + node: &mut TestNode, + schema_id: &SchemaId, + fields: Vec<(&str, OperationValue)>, + previous: &DocumentViewId, + key_pair: &KeyPair, +) -> DocumentViewId { + // Get requested schema from store. + let schema = node + .context + .schema_provider + .get(schema_id) + .await + .expect("Schema not found"); + + // Build, publish and reduce an update operation for document. + let create_op = OperationBuilder::new(schema.id()) + .action(OperationAction::Update) + .fields(&fields) + .previous(previous) + .build() + .expect("Build operation"); + + let (entry_signed, _) = send_to_store(&node.context.store, &create_op, &schema, key_pair) + .await + .expect("Publish UPDATE operation"); + + let document_id = node + .context + .store + .get_document_id_by_operation_id(&OperationId::from(entry_signed.hash())) + .await + .expect("No db errors") + .expect("Can get document id"); + + let input = TaskInput::DocumentId(document_id); + let next_tasks = reduce_task(node.context.clone(), input.clone()) + .await + .expect("Reduce document"); + + // Run dependency tasks + if let Some(tasks) = next_tasks { + // We only want to issue dependency tasks. + let dependency_tasks = tasks + .iter() + .filter(|task| task.worker_name() == "dependency"); + + for task in dependency_tasks { + dependency_task(node.context.clone(), task.input().to_owned()) + .await + .expect("Run dependency task"); + } + } + DocumentViewId::from(entry_signed.hash()) +} + +pub async fn add_blob(node: &mut TestNode, blob_data: &str, key_pair: &KeyPair) -> DocumentViewId { + // Publish blob pieces and blob. + let (blob_data_a, blob_data_b) = blob_data.split_at(blob_data.len() / 2); + let blob_piece_view_id_1 = add_document( + node, + &SchemaId::BlobPiece(1), + vec![("data", blob_data_a.into())], + &key_pair, + ) + .await; + + let blob_piece_view_id_2 = add_document( + node, + &SchemaId::BlobPiece(1), + vec![("data", blob_data_b.into())], + &key_pair, + ) + .await; + let blob_view_id = add_document( + node, + &SchemaId::Blob(1), + vec![ + ("length", { blob_data.len() as i64 }.into()), + ("mime_type", "text/plain".into()), + ( + "pieces", + vec![blob_piece_view_id_1, blob_piece_view_id_2].into(), + ), + ], + &key_pair, + ) + .await; + + blob_view_id +} + +// Helper for asserting expected number of items yielded from a SQL query. +pub async fn assert_query(node: &TestNode, sql: &str, expected_len: usize) { + let result: Result, _> = + query_scalar(sql).fetch_all(&node.context.store.pool).await; + + assert!(result.is_ok(), "{:#?}", result); + assert_eq!(result.unwrap().len(), expected_len, "{:?}", sql); +} From a404d0c655a55d641eed40e18c250d2f15c89aff Mon Sep 17 00:00:00 2001 From: Andreas Dzialocha Date: Tue, 5 Sep 2023 13:17:13 +0200 Subject: [PATCH 08/14] HTTP routes to serve files with correct content type and etag headers (#544) * Remove symlink directory * Add handlers for blob http routes * Fix typo in test utils * Add routes for blob reponses * Remove redundant return statements * Add entry to CHANGELOG.md * Update test since we store files now directly in blobs folder * Remove unused dependencies and features * Clean up a little bit, add comments * Make add_blob test helper accept bytes * Correct etag format and precondition check * WIP ETag test * Add more tests to check against content type and etag * Use tokio for fs io --- CHANGELOG.md | 6 + Cargo.lock | 27 +- aquadoggo/Cargo.toml | 5 +- aquadoggo/src/config.rs | 4 +- aquadoggo/src/db/stores/blob.rs | 54 +-- aquadoggo/src/graphql/mutations/publish.rs | 34 +- aquadoggo/src/graphql/queries/collection.rs | 16 +- aquadoggo/src/graphql/queries/document.rs | 8 +- aquadoggo/src/graphql/queries/next_args.rs | 8 +- aquadoggo/src/graphql/schema.rs | 8 +- aquadoggo/src/graphql/tests.rs | 6 +- aquadoggo/src/http/api.rs | 390 +++++++++++++++++- aquadoggo/src/http/context.rs | 8 +- aquadoggo/src/http/mod.rs | 2 +- aquadoggo/src/http/service.rs | 28 +- aquadoggo/src/materializer/tasks/blob.rs | 150 ++----- .../materializer/tasks/garbage_collection.rs | 53 +-- aquadoggo/src/node.rs | 15 +- aquadoggo/src/proptests/tests.rs | 6 +- aquadoggo/src/test_utils/client.rs | 34 +- aquadoggo/src/test_utils/mod.rs | 4 +- aquadoggo/src/test_utils/node.rs | 84 +++- aquadoggo/src/test_utils/runner.rs | 8 +- 23 files changed, 662 insertions(+), 296 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 83ee86c04..c12547750 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,11 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + - Serve static files from `blobs` directory [#480](https://github.com/p2panda/aquadoggo/pull/480) - Add method to store for pruning document views [#491](https://github.com/p2panda/aquadoggo/pull/491) - Introduce `BlobStore` [#484](https://github.com/p2panda/aquadoggo/pull/484) - Task for automatic garbage collection of unused documents and views [#500](https://github.com/p2panda/aquadoggo/pull/500) +### Changed + +- HTTP routes to serve files with correct content type headers [#544](https://github.com/p2panda/aquadoggo/pull/544) + ## [0.5.0] ### Added diff --git a/Cargo.lock b/Cargo.lock index a09973914..1519d5f25 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -201,6 +201,7 @@ dependencies = [ "thiserror", "tokio", "tokio-stream", + "tokio-util", "tower", "tower-http", "tower-service", @@ -2836,16 +2837,6 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" -[[package]] -name = "mime_guess" -version = "2.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4192263c238a5f0d0c6bfd21f336a313a4ce1c450542449ca191bb657b4642ef" -dependencies = [ - "mime", - "unicase", -] - [[package]] name = "minimal-lexical" version = "0.2.1" @@ -4783,16 +4774,9 @@ dependencies = [ "http", "http-body", "http-range-header", - "httpdate", - "mime", - "mime_guess", - "percent-encoding", "pin-project-lite", - "tokio", - "tokio-util", "tower-layer", "tower-service", - "tracing", ] [[package]] @@ -4956,15 +4940,6 @@ dependencies = [ "version_check", ] -[[package]] -name = "unicase" -version = "2.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6" -dependencies = [ - "version_check", -] - [[package]] name = "unicode-bidi" version = "0.3.13" diff --git a/aquadoggo/Cargo.toml b/aquadoggo/Cargo.toml index b2185efba..58492a563 100644 --- a/aquadoggo/Cargo.toml +++ b/aquadoggo/Cargo.toml @@ -24,7 +24,7 @@ async-graphql = { version = "5.0.6", features = ["dynamic-schema"] } async-graphql-axum = "5.0.6" async-trait = "0.1.64" asynchronous-codec = { version = "0.6.2", features = ["cbor"] } -axum = "0.6.10" +axum = { version = "0.6.10", features = ["headers"] } bamboo-rs-core-ed25519-yasmf = "0.1.1" bs58 = "0.4.0" deadqueue = { version = "0.2.3", default-features = false, features = [ @@ -76,11 +76,12 @@ tokio = { version = "1.28.2", features = [ "rt-multi-thread", "sync", "time", + "fs", ] } tokio-stream = { version = "0.1.14", features = ["sync"] } +tokio-util = { version = "0.7.8", features = ["io"] } tower-http = { version = "0.4.0", default-features = false, features = [ "cors", - "fs", ] } triggered = "0.1.2" void = "1.0.2" diff --git a/aquadoggo/src/config.rs b/aquadoggo/src/config.rs index 317e4cf90..4614ccecf 100644 --- a/aquadoggo/src/config.rs +++ b/aquadoggo/src/config.rs @@ -6,11 +6,9 @@ use p2panda_rs::schema::SchemaId; use crate::network::NetworkConfiguration; -/// Blobs directory +/// Blobs directory name. pub const BLOBS_DIR_NAME: &str = "blobs"; -pub const BLOBS_SYMLINK_DIR_NAME: &str = "documents"; - /// Configuration object holding all important variables throughout the application. #[derive(Debug, Clone)] pub struct Configuration { diff --git a/aquadoggo/src/db/stores/blob.rs b/aquadoggo/src/db/stores/blob.rs index 1cc491d67..f90e28c4f 100644 --- a/aquadoggo/src/db/stores/blob.rs +++ b/aquadoggo/src/db/stores/blob.rs @@ -19,12 +19,12 @@ use crate::db::SqlStore; /// p2panda-rs blob validation too. const MAX_BLOB_PIECES: u64 = 10000; -pub type BlobData = String; +pub type BlobData = Vec; impl SqlStore { /// Get the data for one blob from the store, identified by it's document id. pub async fn get_blob(&self, id: &DocumentId) -> Result, BlobStoreError> { - // Get the root blob document. + // Get the root blob document let blob_document = match self.get_document(id).await? { Some(document) => { if document.schema_id != SchemaId::Blob(1) { @@ -42,7 +42,7 @@ impl SqlStore { &self, view_id: &DocumentViewId, ) -> Result, BlobStoreError> { - // Get the root blob document. + // Get the root blob document let blob_document = match self.get_document_by_view_id(view_id).await? { Some(document) => { if document.schema_id != SchemaId::Blob(1) { @@ -71,9 +71,9 @@ impl SqlStore { operation_fields_v1.value FROM operation_fields_v1 - LEFT JOIN + LEFT JOIN operations_v1 - ON + ON operations_v1.operation_id = operation_fields_v1.operation_id WHERE operations_v1.document_id = $1 @@ -126,9 +126,9 @@ async fn reverse_relations( query_scalar(&format!( " - SELECT - document_view_fields.document_view_id - FROM + SELECT + document_view_fields.document_view_id + FROM document_view_fields LEFT JOIN operation_fields_v1 @@ -136,18 +136,18 @@ async fn reverse_relations( document_view_fields.operation_id = operation_fields_v1.operation_id AND document_view_fields.name = operation_fields_v1.name - LEFT JOIN + LEFT JOIN document_views ON document_view_fields.document_view_id = document_views.document_view_id WHERE operation_fields_v1.field_type - IN + IN ('pinned_relation', 'pinned_relation_list', 'relation', 'relation_list') {schema_id_condition} - AND + AND operation_fields_v1.value IN ( - SELECT document_views.document_view_id + SELECT document_views.document_view_id FROM document_views WHERE document_views.document_id = $1 ) OR operation_fields_v1.value = $1 @@ -178,8 +178,8 @@ async fn document_to_blob_data( // Now collect all existing pieces for the blob. // - // We do this using the stores' query method, targeting pieces which are in the relation - // list of the blob. + // We do this using the stores' query method, targeting pieces which are in the relation list + // of the blob. let schema = Schema::get_system(SchemaId::BlobPiece(1)).unwrap(); let list = RelationList::new_pinned(blob.view_id(), "pieces"); let pagination = Pagination { @@ -224,7 +224,7 @@ async fn document_to_blob_data( return Err(BlobStoreError::IncorrectLength); }; - Ok(Some(blob_data)) + Ok(Some(blob_data.into_bytes())) } #[cfg(test)] @@ -245,18 +245,18 @@ mod tests { #[rstest] fn get_blob(key_pair: KeyPair) { test_runner(|mut node: TestNode| async move { - let blob_data = "Hello, World!".to_string(); - let blob_view_id = add_blob(&mut node, &blob_data, &key_pair).await; + let blob_data = "Hello, World!".as_bytes(); + let blob_view_id = add_blob(&mut node, &blob_data, 6, "text/plain", &key_pair).await; let document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); - // Get blob by document id. + // Get blob by document id let blob = node.context.store.get_blob(&document_id).await.unwrap(); assert!(blob.is_some()); assert_eq!(blob.unwrap(), blob_data); - // Get blob by view id. + // Get blob by view id let blob = node .context .store @@ -376,8 +376,8 @@ mod tests { #[rstest] fn purge_blob(key_pair: KeyPair) { test_runner(|mut node: TestNode| async move { - let blob_data = "Hello, World!".to_string(); - let blob_view_id = add_blob(&mut node, &blob_data, &key_pair).await; + let blob_data = "Hello, World!".as_bytes(); + let blob_view_id = add_blob(&mut node, &blob_data, 7, "text/plain", &key_pair).await; // There is one blob and two blob pieces in database. // @@ -419,8 +419,8 @@ mod tests { test_runner(|mut node: TestNode| async move { let _ = populate_and_materialize(&mut node, &config).await; - let blob_data = "Hello, World!".to_string(); - let blob_view_id = add_blob(&mut node, &blob_data, &key_pair).await; + let blob_data = "Hello, World!".as_bytes(); + let blob_view_id = add_blob(&mut node, &blob_data, 7, "text/plain", &key_pair).await; // There is one blob and two blob pieces in database. // @@ -453,8 +453,8 @@ mod tests { #[rstest] fn does_not_purge_blob_if_still_pinned(key_pair: KeyPair) { test_runner(|mut node: TestNode| async move { - let blob_data = "Hello, World!".to_string(); - let blob_view_id = add_blob(&mut node, &blob_data, &key_pair).await; + let blob_data = "Hello, World!".as_bytes(); + let blob_view_id = add_blob(&mut node, &blob_data, 7, "text/plain", &key_pair).await; let _ = add_schema_and_documents( &mut node, @@ -497,8 +497,8 @@ mod tests { #[rstest] fn purge_all_pieces_of_updated_blob(key_pair: KeyPair) { test_runner(|mut node: TestNode| async move { - let blob_data = "Hello, World!".to_string(); - let blob_view_id = add_blob(&mut node, &blob_data, &key_pair).await; + let blob_data = "Hello, World!".as_bytes(); + let blob_view_id = add_blob(&mut node, &blob_data, 7, "text/plain", &key_pair).await; // Create a new blob piece. let new_blob_pieces = add_document( diff --git a/aquadoggo/src/graphql/mutations/publish.rs b/aquadoggo/src/graphql/mutations/publish.rs index f9c493827..745603ed9 100644 --- a/aquadoggo/src/graphql/mutations/publish.rs +++ b/aquadoggo/src/graphql/mutations/publish.rs @@ -125,9 +125,9 @@ mod tests { use crate::bus::ServiceMessage; use crate::graphql::GraphQLSchemaManager; - use crate::http::{HttpServiceContext, BLOBS_ROUTE}; + use crate::http::HttpServiceContext; use crate::test_utils::{ - add_schema, doggo_fields, doggo_schema, graphql_test_client, populate_and_materialize, + add_schema, doggo_fields, doggo_schema, http_test_client, populate_and_materialize, populate_store_config, test_runner, TestNode, }; @@ -237,7 +237,11 @@ mod tests { node.context.schema_provider.clone(), ) .await; - let context = HttpServiceContext::new(manager, BLOBS_ROUTE.into()); + let context = HttpServiceContext::new( + node.context.store.clone(), + manager, + node.context.config.blob_dir.as_ref().unwrap().to_path_buf(), + ); let response = context.schema.execute(publish_request).await; @@ -298,7 +302,11 @@ mod tests { node.context.schema_provider.clone(), ) .await; - let context = HttpServiceContext::new(manager, BLOBS_ROUTE.into()); + let context = HttpServiceContext::new( + node.context.store.clone(), + manager, + node.context.config.blob_dir.as_ref().unwrap().to_path_buf(), + ); let response = context .schema @@ -326,7 +334,11 @@ mod tests { node.context.schema_provider.clone(), ) .await; - let context = HttpServiceContext::new(manager, BLOBS_ROUTE.into()); + let context = HttpServiceContext::new( + node.context.store.clone(), + manager, + node.context.config.blob_dir.as_ref().unwrap().to_path_buf(), + ); context.schema.execute(publish_request).await; @@ -354,7 +366,7 @@ mod tests { populate_and_materialize(&mut node, &config).await; // Init the test client. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; let response = client .post("/graphql") @@ -573,7 +585,7 @@ mod tests { populate_and_materialize(&mut node, &config).await; // Init the test client - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; // Prepare the GQL publish request let publish_request = publish_request(&entry_encoded, &encoded_operation); @@ -701,7 +713,7 @@ mod tests { populate_and_materialize(&mut node, &config).await; // Init the test client. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; let publish_request = publish_request(&entry_encoded, &encoded_operation); @@ -736,7 +748,7 @@ mod tests { populate_and_materialize(&mut node, &config).await; // Init the test client. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; // Two key pairs representing two different authors let key_pairs = vec![KeyPair::new(), KeyPair::new()]; @@ -828,7 +840,7 @@ mod tests { populate_and_materialize(&mut node, &config).await; // Init the test client. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; // Get the one entry from the store. let entries = node @@ -871,7 +883,7 @@ mod tests { ) { test_runner(|node: TestNode| async move { // Init the test client. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; // Prepare a publish entry request for the entry. let publish_entry = publish_request( diff --git a/aquadoggo/src/graphql/queries/collection.rs b/aquadoggo/src/graphql/queries/collection.rs index c149cabe1..44655b9d1 100644 --- a/aquadoggo/src/graphql/queries/collection.rs +++ b/aquadoggo/src/graphql/queries/collection.rs @@ -56,7 +56,7 @@ mod tests { use serde_json::{json, Value as JsonValue}; use crate::test_utils::{ - add_document, add_schema, add_schema_and_documents, graphql_test_client, test_runner, + add_document, add_schema, add_schema_and_documents, http_test_client, test_runner, TestClient, TestNode, }; @@ -594,7 +594,7 @@ mod tests { .await; // Configure and send test query. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; let query = format!( r#"{{ collection: all_{type_name}{query_args} {{ @@ -697,7 +697,7 @@ mod tests { .await; // Configure and send test query. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; let query = format!( r#"{{ collection: all_{type_name} {{ @@ -742,7 +742,7 @@ mod tests { add_document(&mut node, schema.id(), document_values, &key_pair).await; // Configure and send test query. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; let query = format!( r#"{{ collection: all_{type_name} {{ @@ -852,7 +852,7 @@ mod tests { here_be_some_karaoke_hits(&mut node, &view_ids, &lyric_schema, &key_pair).await; // Init a GraphQL client we'll use to query the node. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; // Perform a paginated collection query for the songs. query_songs(&client, song_schema.id(), &song_args, &lyric_args).await; @@ -870,7 +870,7 @@ mod tests { here_be_some_karaoke_hits(&mut node, &view_ids, &lyric_schema, &key_pair).await; // Init a GraphQL client we'll use to query the node. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; // Perform a paginated collection query for the songs on the node identified by the // schema id. We don't pass any arguments and so will get up to the default number of @@ -1103,7 +1103,7 @@ mod tests { here_be_some_karaoke_hits(&mut node, &view_ids, &lyric_schema, &key_pair).await; // Init a GraphQL client we'll use to query the node. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; let data = query_songs(&client, song_schema.id(), "(first: 4)", "").await; assert_eq!(data["query"]["documents"].as_array().unwrap().len(), 3); @@ -1144,7 +1144,7 @@ mod tests { here_be_some_karaoke_hits(&mut node, &view_ids, &lyric_schema, &key_pair).await; // Init a GraphQL client we'll use to query the node. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; let data = query_songs_meta_fields_only(&client, song_schema.id(), "(first: 4)").await; assert_eq!(data["query"]["documents"].as_array().unwrap().len(), 3); diff --git a/aquadoggo/src/graphql/queries/document.rs b/aquadoggo/src/graphql/queries/document.rs index c882f9f4d..0e0603b97 100644 --- a/aquadoggo/src/graphql/queries/document.rs +++ b/aquadoggo/src/graphql/queries/document.rs @@ -100,7 +100,7 @@ mod test { use rstest::rstest; use serde_json::json; - use crate::test_utils::{add_document, add_schema, graphql_test_client, test_runner, TestNode}; + use crate::test_utils::{add_document, add_schema, http_test_client, test_runner, TestNode}; #[rstest] fn single_query(#[from(random_key_pair)] key_pair: KeyPair) { @@ -137,7 +137,7 @@ mod test { let document_id = document.id(); // Configure and send test query. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; let query = format!( r#"{{ byViewId: {type_name}(viewId: "{view_id}") {{ @@ -213,7 +213,7 @@ mod test { // Test single query parameter variations. test_runner(move |node: TestNode| async move { // Configure and send test query. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; let query = format!( r#"{{ view: schema_definition_v1{params} {{ @@ -269,7 +269,7 @@ mod test { .await; // Configure and send test query. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; let query = format!( r#"{{ single: {schema_id}(id: "{view_id}") {{ diff --git a/aquadoggo/src/graphql/queries/next_args.rs b/aquadoggo/src/graphql/queries/next_args.rs index 51bc98e07..642730136 100644 --- a/aquadoggo/src/graphql/queries/next_args.rs +++ b/aquadoggo/src/graphql/queries/next_args.rs @@ -97,13 +97,13 @@ mod tests { use serde_json::json; use crate::test_utils::{ - graphql_test_client, populate_and_materialize, populate_store_config, test_runner, TestNode, + http_test_client, populate_and_materialize, populate_store_config, test_runner, TestNode, }; #[rstest] fn next_args_valid_query() { test_runner(|node: TestNode| async move { - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; // Selected fields need to be alphabetically sorted because that's what the `json` // macro that is used in the assert below produces. let received_entry_args = client @@ -149,7 +149,7 @@ mod tests { // Populates the store and materialises documents and schema. let (key_pairs, document_ids) = populate_and_materialize(&mut node, &config).await; - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; let document_id = document_ids.get(0).expect("There should be a document id"); let public_key = key_pairs .get(0) @@ -201,7 +201,7 @@ mod tests { #[rstest] fn next_args_error_response() { test_runner(|node: TestNode| async move { - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; let response = client .post("/graphql") .json(&json!({ diff --git a/aquadoggo/src/graphql/schema.rs b/aquadoggo/src/graphql/schema.rs index 354901501..50244d7e9 100644 --- a/aquadoggo/src/graphql/schema.rs +++ b/aquadoggo/src/graphql/schema.rs @@ -271,15 +271,15 @@ mod test { use rstest::rstest; use serde_json::{json, Value}; - use crate::test_utils::{add_schema, graphql_test_client, test_runner, TestNode}; + use crate::test_utils::{add_schema, http_test_client, test_runner, TestNode}; #[rstest] fn schema_updates() { test_runner(|mut node: TestNode| async move { // Create test client in the beginning so it is initialised with just the system - // schemas. Then we create a new application schema to test that the graphql schema - // is updated and we can query the changed schema. - let client = graphql_test_client(&node).await; + // schemas. Then we create a new application schema to test that the graphql schema is + // updated and we can query the changed schema. + let client = http_test_client(&node).await; // This test uses a fixed private key to allow us to anticipate the schema typename. let key_pair = key_pair(PRIVATE_KEY); diff --git a/aquadoggo/src/graphql/tests.rs b/aquadoggo/src/graphql/tests.rs index a40e0f900..39dac3579 100644 --- a/aquadoggo/src/graphql/tests.rs +++ b/aquadoggo/src/graphql/tests.rs @@ -9,7 +9,7 @@ use p2panda_rs::{document::DocumentId, schema::FieldType}; use rstest::rstest; use serde_json::json; -use crate::test_utils::{add_document, add_schema, graphql_test_client, test_runner, TestNode}; +use crate::test_utils::{add_document, add_schema, http_test_client, test_runner, TestNode}; // Test querying application documents with scalar fields (no relations) by document id and by view // id. @@ -44,7 +44,7 @@ fn scalar_fields() { let view_id = add_document(&mut node, schema.id(), doc_fields, &key_pair).await; // Configure and send test query - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; let query = format!( r#"{{ scalarDoc: {type_name}(viewId: "{view_id}") {{ @@ -149,7 +149,7 @@ fn relation_fields() { add_document(&mut node, parent_schema.id(), parent_fields, &key_pair).await; // Configure and send test query - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; let query = format!( r#"{{ result: {}(viewId: "{}") {{ diff --git a/aquadoggo/src/http/api.rs b/aquadoggo/src/http/api.rs index 8ec063003..f94c8c94b 100644 --- a/aquadoggo/src/http/api.rs +++ b/aquadoggo/src/http/api.rs @@ -1,21 +1,403 @@ // SPDX-License-Identifier: AGPL-3.0-or-later +use std::path::PathBuf; +use std::str::FromStr; + +use anyhow::{anyhow, Result}; use async_graphql::http::{playground_source, GraphQLPlaygroundConfig}; use async_graphql_axum::{GraphQLRequest, GraphQLResponse}; -use axum::extract::Extension; -use axum::response::{self, IntoResponse}; +use axum::body::StreamBody; +use axum::extract::{Extension, Path}; +use axum::headers::{ETag, IfNoneMatch}; +use axum::http::StatusCode; +use axum::response::{self, IntoResponse, Response}; +use axum::TypedHeader; +use http::header; +use log::warn; +use p2panda_rs::document::traits::AsDocument; +use p2panda_rs::document::{DocumentId, DocumentViewId}; +use p2panda_rs::schema::SchemaId; +use p2panda_rs::storage_provider::traits::DocumentStore; +use p2panda_rs::Human; +use tokio::fs::File; +use tokio_util::io::ReaderStream; use crate::http::context::HttpServiceContext; -/// Handle graphql playground requests at the given path. +/// Handle GraphQL playground requests at the given path. pub async fn handle_graphql_playground(path: &str) -> impl IntoResponse { response::Html(playground_source(GraphQLPlaygroundConfig::new(path))) } -/// Handle graphql requests. +/// Handle GraphQL requests. pub async fn handle_graphql_query( Extension(context): Extension, req: GraphQLRequest, ) -> GraphQLResponse { context.schema.execute(req.into_inner()).await.into() } + +/// Handle requests for a blob document served via HTTP. +/// +/// This method automatically returns the "latest" version of the document. +pub async fn handle_blob_document( + TypedHeader(if_none_match): TypedHeader, + Extension(context): Extension, + Path(document_id): Path, +) -> Result { + let document_id: DocumentId = DocumentId::from_str(&document_id) + .map_err(|err| BlobHttpError::InvalidFormat(err.into()))?; + + let document = context + .store + .get_document(&document_id) + .await + .map_err(|err| BlobHttpError::InternalError(err.into()))? + .ok_or_else(|| BlobHttpError::NotFound)?; + + // Requested document is not a blob, treat this as a "not found" error + if document.schema_id() != &SchemaId::Blob(1) { + return Err(BlobHttpError::NotFound); + } + + respond_with_blob(if_none_match, context.blob_dir_path, document).await +} + +/// Handle requests for a blob document view served via HTTP. +/// +/// This method returns the version which was specified by the document view id. +pub async fn handle_blob_view( + TypedHeader(if_none_match): TypedHeader, + Extension(context): Extension, + Path((document_id, view_id)): Path<(String, String)>, +) -> Result { + let document_id = DocumentId::from_str(&document_id) + .map_err(|err| BlobHttpError::InvalidFormat(err.into()))?; + let view_id = DocumentViewId::from_str(&view_id) + .map_err(|err| BlobHttpError::InvalidFormat(err.into()))?; + + let document = context + .store + .get_document_by_view_id(&view_id) + .await + .map_err(|err| BlobHttpError::InternalError(err.into()))? + .ok_or(BlobHttpError::NotFound)?; + + if document.id() != &document_id || document.schema_id() != &SchemaId::Blob(1) { + return Err(BlobHttpError::NotFound); + } + + respond_with_blob(if_none_match, context.blob_dir_path, document).await +} + +/// Returns HTTP response with the contents, ETag and given MIME type of a blob. +/// +/// Supports basic caching by handling "IfNoneMatch" headers matching the latest ETag. +async fn respond_with_blob( + if_none_match: IfNoneMatch, + blob_dir_path: PathBuf, + document: impl AsDocument, +) -> Result { + let view_id = document.view_id(); + + // Convert document view id into correct ETag value (with quotation marks defined in + // https://datatracker.ietf.org/doc/html/rfc7232#section-2.3) + let to_etag_str = || format!("\"{}\"", view_id); + + // Respond with 304 "not modified" if ETag still matches (document did not get updated) + let etag = + ETag::from_str(&to_etag_str()).map_err(|err| BlobHttpError::InternalError(err.into()))?; + if !if_none_match.precondition_passes(&etag) { + return Ok(StatusCode::NOT_MODIFIED.into_response()); + } + + // Get MIME type of blob + let mime_type_str = match document.get("mime_type") { + Some(p2panda_rs::operation::OperationValue::String(value)) => Ok(value), + _ => Err(BlobHttpError::InternalError(anyhow!( + "Blob document did not contain a valid 'mime_type' field" + ))), + }?; + + // Get body from read-stream of stored file on file system + let mut file_path = blob_dir_path; + file_path.push(format!("{view_id}")); + match File::open(&file_path).await { + Ok(file) => { + let headers = [ + // MIME type to allow browsers to correctly handle this specific blob format + (header::CONTENT_TYPE, mime_type_str), + // ETag to allow browsers handle caching + (header::ETAG, &to_etag_str()), + ]; + + let stream = ReaderStream::new(file); + let body = StreamBody::new(stream); + + Ok((headers, body).into_response()) + } + Err(_) => { + warn!( + "Data inconsistency detected: Blob document {} exists in database but not on file + system at path {}!", + view_id.display(), + file_path.display() + ); + + Err(BlobHttpError::NotFound) + } + } +} + +#[derive(Debug)] +pub enum BlobHttpError { + NotFound, + InvalidFormat(anyhow::Error), + InternalError(anyhow::Error), +} + +impl IntoResponse for BlobHttpError { + fn into_response(self) -> Response { + match self { + BlobHttpError::NotFound => { + (StatusCode::NOT_FOUND, "Could not find document").into_response() + } + BlobHttpError::InvalidFormat(err) => ( + StatusCode::BAD_REQUEST, + format!("Could not parse identifier: {}", err), + ) + .into_response(), + BlobHttpError::InternalError(err) => ( + StatusCode::INTERNAL_SERVER_ERROR, + format!("Something went wrong: {}", err), + ) + .into_response(), + } + } +} + +#[cfg(test)] +mod tests { + use http::{header, StatusCode}; + use p2panda_rs::document::DocumentId; + use p2panda_rs::identity::KeyPair; + use p2panda_rs::test_utils::fixtures::key_pair; + use rstest::rstest; + + use crate::materializer::tasks::blob_task; + use crate::materializer::TaskInput; + use crate::test_utils::{add_blob, http_test_client, test_runner, update_blob, TestNode}; + + // @TODO: Would be nice if this would come out of p2panda_rs + const MAX_BLOB_PIECE_LENGTH: usize = 256; + + #[rstest] + fn responds_with_blob_in_http_body(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + let blob_data = "Hello, World!".as_bytes(); + let blob_view_id = add_blob(&mut node, &blob_data, 6, "text/plain", &key_pair).await; + let document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); + + // Make sure to materialize blob on file system + blob_task( + node.context.clone(), + TaskInput::DocumentViewId(blob_view_id.clone()), + ) + .await + .unwrap(); + + let client = http_test_client(&node).await; + + // "/blobs/" path + let response = client.get(&format!("/blobs/{}", document_id)).send().await; + let status_code = response.status(); + let body = response.text().await; + + assert_eq!(status_code, StatusCode::OK); + assert_eq!(body, "Hello, World!"); + + // "/blobs//" path + let response = client + .get(&format!("/blobs/{}/{}", document_id, blob_view_id)) + .send() + .await; + let status_code = response.status(); + let body = response.text().await; + + assert_eq!(status_code, StatusCode::OK); + assert_eq!(body, "Hello, World!"); + }) + } + + #[rstest] + fn document_route_responds_with_latest_view(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + let blob_data = "Hello, World!".as_bytes(); + let blob_view_id = add_blob(&mut node, &blob_data, 6, "text/plain", &key_pair).await; + let document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); + + // Make sure to materialize blob on file system + blob_task( + node.context.clone(), + TaskInput::DocumentViewId(blob_view_id.clone()), + ) + .await + .unwrap(); + + // Update the blob + let blob_data = "Hello, Panda!".as_bytes(); + let blob_view_id_2 = + update_blob(&mut node, &blob_data, 6, &blob_view_id, &key_pair).await; + + blob_task( + node.context.clone(), + TaskInput::DocumentViewId(blob_view_id_2.clone()), + ) + .await + .unwrap(); + + // Expect to receive latest version + let client = http_test_client(&node).await; + let response = client.get(&format!("/blobs/{}", document_id)).send().await; + let status_code = response.status(); + let body = response.text().await; + + assert_eq!(status_code, StatusCode::OK); + assert_eq!(body, "Hello, Panda!"); + }) + } + + #[rstest] + fn responds_with_content_type_header(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + let blob_data = r#" + + + + + "# + .as_bytes(); + let blob_view_id = add_blob( + &mut node, + &blob_data, + MAX_BLOB_PIECE_LENGTH, + "image/svg+xml", + &key_pair, + ) + .await; + let document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); + + // Make sure to materialize blob on file system + blob_task( + node.context.clone(), + TaskInput::DocumentViewId(blob_view_id.clone()), + ) + .await + .unwrap(); + + // Expect correctly set content type header and body in response + let client = http_test_client(&node).await; + let response = client.get(&format!("/blobs/{}", document_id)).send().await; + let status_code = response.status(); + let headers = response.headers(); + let body = response.bytes().await; + let content_type = headers + .get(header::CONTENT_TYPE) + .expect("ContentType to exist in header"); + + assert_eq!(content_type, "image/svg+xml"); + assert_eq!(status_code, StatusCode::OK); + assert_eq!(body, blob_data); + }) + } + + #[rstest] + fn handles_etag_and_if_none_match_precondition(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + let blob_data = "Hello, World!".as_bytes(); + let blob_view_id = add_blob(&mut node, &blob_data, 6, "text/plain", &key_pair).await; + let document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); + + // Make sure to materialize blob on file system + blob_task( + node.context.clone(), + TaskInput::DocumentViewId(blob_view_id.clone()), + ) + .await + .unwrap(); + + let client = http_test_client(&node).await; + + // 1. Get blob and ETag connected to it + let response = client.get(&format!("/blobs/{}", document_id)).send().await; + let status_code = response.status(); + let headers = response.headers(); + let body = response.text().await; + let etag = headers.get(header::ETAG).expect("ETag to exist in header"); + + assert_eq!(status_code, StatusCode::OK); + assert_eq!(body, "Hello, World!"); + + // 2. Send another request, including the received ETag inside a "IfNoneMatch" header + let response = client + .get(&format!("/blobs/{}", document_id)) + .header(header::IF_NONE_MATCH, etag) + .send() + .await; + let status_code = response.status(); + let body = response.text().await; + assert_eq!(status_code, StatusCode::NOT_MODIFIED); + assert_eq!(body, ""); + + // 3. Update the blob + let blob_data = "Hello, Panda!".as_bytes(); + let blob_view_id_2 = + update_blob(&mut node, &blob_data, 6, &blob_view_id, &key_pair).await; + + // Make sure to materialize blob on file system + blob_task( + node.context.clone(), + TaskInput::DocumentViewId(blob_view_id_2.clone()), + ) + .await + .unwrap(); + + // 4. Send request again, including the (now outdated) ETag + let response = client + .get(&format!("/blobs/{}", document_id)) + .header(header::IF_NONE_MATCH, etag) + .send() + .await; + let status_code = response.status(); + let headers = response.headers(); + let body = response.text().await; + let etag_2 = headers.get(header::ETAG).expect("ETag to exist in header"); + + assert_ne!(etag, etag_2); + assert_eq!(status_code, StatusCode::OK); + assert_eq!(body, "Hello, Panda!"); + }) + } + + #[rstest] + #[case::inexisting_document_id( + "/blobs/0020aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + StatusCode::NOT_FOUND + )] + #[case::inexisting_document_view_id( + "/blobs/0020aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/0020bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", + StatusCode::NOT_FOUND + )] + #[case::invalid_document_id("/blobs/not_valid", StatusCode::BAD_REQUEST)] + #[case::invalid_document_view_id( + "/blobs/0020aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/not_valid", + StatusCode::BAD_REQUEST + )] + fn error_responses(#[case] path: &'static str, #[case] expected_status_code: StatusCode) { + test_runner(move |node: TestNode| async move { + let client = http_test_client(&node).await; + let response = client.get(path).send().await; + assert_eq!(response.status(), expected_status_code); + }) + } +} diff --git a/aquadoggo/src/http/context.rs b/aquadoggo/src/http/context.rs index 758d07bd9..f431ee7c9 100644 --- a/aquadoggo/src/http/context.rs +++ b/aquadoggo/src/http/context.rs @@ -2,10 +2,14 @@ use std::path::PathBuf; +use crate::db::SqlStore; use crate::graphql::GraphQLSchemaManager; #[derive(Clone)] pub struct HttpServiceContext { + /// SQL database. + pub store: SqlStore, + /// Dynamic GraphQL schema manager. pub schema: GraphQLSchemaManager, @@ -14,9 +18,9 @@ pub struct HttpServiceContext { } impl HttpServiceContext { - /// Create a new HttpServiceContext. - pub fn new(schema: GraphQLSchemaManager, blob_dir_path: PathBuf) -> Self { + pub fn new(store: SqlStore, schema: GraphQLSchemaManager, blob_dir_path: PathBuf) -> Self { Self { + store, schema, blob_dir_path, } diff --git a/aquadoggo/src/http/mod.rs b/aquadoggo/src/http/mod.rs index 312cb4a5b..db475580e 100644 --- a/aquadoggo/src/http/mod.rs +++ b/aquadoggo/src/http/mod.rs @@ -5,4 +5,4 @@ mod context; mod service; pub use context::HttpServiceContext; -pub use service::{build_server, http_service, BLOBS_ROUTE}; +pub use service::{build_server, http_service}; diff --git a/aquadoggo/src/http/service.rs b/aquadoggo/src/http/service.rs index 8613a0d7d..7c045e50b 100644 --- a/aquadoggo/src/http/service.rs +++ b/aquadoggo/src/http/service.rs @@ -10,21 +10,19 @@ use axum::Router; use http::header::CONTENT_TYPE; use log::{debug, warn}; use tower_http::cors::{Any, CorsLayer}; -use tower_http::services::ServeDir; use crate::bus::ServiceSender; use crate::context::Context; use crate::graphql::GraphQLSchemaManager; -use crate::http::api::{handle_graphql_playground, handle_graphql_query}; +use crate::http::api::{ + handle_blob_document, handle_blob_view, handle_graphql_playground, handle_graphql_query, +}; use crate::http::context::HttpServiceContext; use crate::manager::{ServiceReadySender, Shutdown}; /// Route to the GraphQL playground const GRAPHQL_ROUTE: &str = "/graphql"; -/// Route to the blobs static file server -pub const BLOBS_ROUTE: &str = "/blobs"; - /// Build HTTP server with GraphQL API. pub fn build_server(http_context: HttpServiceContext) -> Router { // Configure CORS middleware @@ -34,17 +32,15 @@ pub fn build_server(http_context: HttpServiceContext) -> Router { .allow_credentials(false) .allow_origin(Any); - // Construct static file server - let blob_service = ServeDir::new(http_context.blob_dir_path.clone()); - Router::new() - // Add blobs static file server - .nest_service(BLOBS_ROUTE, blob_service) // Add GraphQL routes .route( GRAPHQL_ROUTE, get(|| handle_graphql_playground(GRAPHQL_ROUTE)).post(handle_graphql_query), ) + // Add blob routes + .route("/blobs/:document_id", get(handle_blob_document)) + .route("/blobs/:document_id/:view_hash", get(handle_blob_view)) // Add middlewares .layer(cors) // Add shared context @@ -68,7 +64,11 @@ pub async fn http_service( let blob_dir_path = context.config.blob_dir.as_ref().expect("Base path not set"); // Introduce a new context for all HTTP routes - let http_context = HttpServiceContext::new(graphql_schema_manager, blob_dir_path.to_owned()); + let http_context = HttpServiceContext::new( + context.store.clone(), + graphql_schema_manager, + blob_dir_path.to_owned(), + ); // Start HTTP server with given port and re-attempt with random port if it was taken already let builder = if let Ok(builder) = axum::Server::try_bind(&http_address) { @@ -121,7 +121,11 @@ mod tests { let schema_provider = SchemaProvider::default(); let graphql_schema_manager = GraphQLSchemaManager::new(node.context.store.clone(), tx, schema_provider).await; - let context = HttpServiceContext::new(graphql_schema_manager, BLOBS_DIR_NAME.into()); + let context = HttpServiceContext::new( + node.context.store.clone(), + graphql_schema_manager, + BLOBS_DIR_NAME.into(), + ); let client = TestClient::new(build_server(context)); let response = client diff --git a/aquadoggo/src/materializer/tasks/blob.rs b/aquadoggo/src/materializer/tasks/blob.rs index 456e83863..8366cbba6 100644 --- a/aquadoggo/src/materializer/tasks/blob.rs +++ b/aquadoggo/src/materializer/tasks/blob.rs @@ -1,20 +1,16 @@ // SPDX-License-Identifier: AGPL-3.0-or-later -use std::fs::{self, File}; -use std::io::Write; -use std::os::unix::fs::symlink; - use log::{debug, info}; use p2panda_rs::document::traits::AsDocument; use p2panda_rs::document::DocumentViewId; use p2panda_rs::operation::OperationValue; use p2panda_rs::schema::SchemaId; -use p2panda_rs::storage_provider::traits::{DocumentStore, OperationStore}; +use p2panda_rs::storage_provider::traits::DocumentStore; +use tokio::fs::File; +use tokio::io::AsyncWriteExt; -use crate::config::BLOBS_SYMLINK_DIR_NAME; use crate::context::Context; use crate::db::types::StorageDocument; -use crate::db::SqlStore; use crate::materializer::worker::{TaskError, TaskResult}; use crate::materializer::TaskInput; @@ -75,38 +71,33 @@ pub async fn blob_task(context: Context, input: TaskInput) -> TaskResult base_path, None => return Err(TaskError::Critical("No base path configured".to_string())), }; - - let blob_dir = base_path.join(blob_document.id().as_str()); - - fs::create_dir_all(&blob_dir).map_err(|err| TaskError::Critical(err.to_string()))?; - let blob_view_path = blob_dir.join(blob_document.view_id().to_string()); + let blob_view_path = base_path.join(blob_document.view_id().to_string()); // Write the blob to the filesystem. - info!("Creating blob at path {blob_view_path:?}"); - - let mut file = File::create(&blob_view_path).unwrap(); - file.write_all(blob_data.as_bytes()).unwrap(); - - // create a symlink from `../documents/` -> `..//` - if is_current_view(&context.store, blob_document.view_id()).await? { - info!("Creating symlink from document id to current view"); - - let link_path = base_path - .join(BLOBS_SYMLINK_DIR_NAME) - .join(blob_document.id().as_str()); - - let _ = fs::remove_file(&link_path); - - symlink(blob_view_path, link_path) - .map_err(|err| TaskError::Critical(err.to_string()))?; - } + info!("Creating blob at path {}", blob_view_path.display()); + + let mut file = File::create(&blob_view_path).await.map_err(|err| { + TaskError::Critical(format!( + "Could not create blob file @ {}: {}", + blob_view_path.display(), + err + )) + })?; + + file.write_all(&blob_data).await.map_err(|err| { + TaskError::Critical(format!( + "Could not write blob file @ {}: {}", + blob_view_path.display(), + err + )) + })?; } Ok(None) @@ -152,118 +143,47 @@ async fn get_related_blobs( Ok(related_blobs) } -// Check if this is the current view for this blob. -async fn is_current_view( - store: &SqlStore, - document_view_id: &DocumentViewId, -) -> Result { - let blob_document_id = store - .get_document_id_by_operation_id(document_view_id.graph_tips().first().unwrap()) - .await - .map_err(|err| TaskError::Critical(err.to_string()))? - .expect("Document for blob exists"); - - let current_blob_document = store - .get_document(&blob_document_id) - .await - .map_err(|err| TaskError::Critical(err.to_string()))? - .expect("Document for blob exists"); - - Ok(current_blob_document.view_id() == document_view_id) -} - #[cfg(test)] mod tests { - use std::fs; - - use p2panda_rs::document::DocumentId; use p2panda_rs::identity::KeyPair; - use p2panda_rs::schema::SchemaId; use p2panda_rs::test_utils::fixtures::key_pair; use rstest::rstest; + use tokio::fs; - use crate::config::BLOBS_SYMLINK_DIR_NAME; use crate::materializer::tasks::blob_task; use crate::materializer::TaskInput; - use crate::test_utils::{add_document, test_runner, TestNode}; + use crate::test_utils::{add_blob, test_runner, TestNode}; #[rstest] fn materializes_blob_to_filesystem(key_pair: KeyPair) { test_runner(|mut node: TestNode| async move { - let blob_data = "Hello, World!".to_string(); - - // Publish blob pieces and blob. - let blob_piece_view_id_1 = add_document( - &mut node, - &SchemaId::BlobPiece(1), - vec![("data", blob_data[..5].into())], - &key_pair, - ) - .await; - - let blob_piece_view_id_2 = add_document( - &mut node, - &SchemaId::BlobPiece(1), - vec![("data", blob_data[5..].into())], - &key_pair, - ) - .await; - - // Publish blob. - let blob_view_id = add_document( - &mut node, - &SchemaId::Blob(1), - vec![ - ("length", { blob_data.len() as i64 }.into()), - ("mime_type", "text/plain".into()), - ( - "pieces", - vec![blob_piece_view_id_1, blob_piece_view_id_2].into(), - ), - ], - &key_pair, - ) - .await; + // Publish blob + let blob_data = "Hello, World!"; + let blob_view_id = + add_blob(&mut node, blob_data.as_bytes(), 5, "plain/text", &key_pair).await; - // Run blob task. + // Run blob task let result = blob_task( node.context.clone(), TaskInput::DocumentViewId(blob_view_id.clone()), ) .await; - // It shouldn't fail. + // It shouldn't fail assert!(result.is_ok(), "{:#?}", result); - // It should return no extra tasks. + // It should return no extra tasks assert!(result.unwrap().is_none()); - // Convert blob view id to document id. - let document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); - - // Construct the expected path to the blob view file. + // Construct the expected path to the blob view file let base_path = node.context.config.blob_dir.as_ref().unwrap(); - let blob_path = base_path - .join(document_id.as_str()) - .join(blob_view_id.to_string()); + let blob_path = base_path.join(blob_view_id.to_string()); // Read from this file - let retrieved_blob_data = fs::read_to_string(blob_path); + let retrieved_blob_data = fs::read_to_string(blob_path).await; - // It should match the complete published blob data. + // It should match the complete published blob data assert!(retrieved_blob_data.is_ok(), "{:?}", retrieved_blob_data); assert_eq!(blob_data, retrieved_blob_data.unwrap()); - - // Construct the expected path to the blob symlink file location. - let blob_path = base_path - .join(BLOBS_SYMLINK_DIR_NAME) - .join(document_id.as_str()); - - // Read from this file - let retrieved_blob_data = fs::read_to_string(blob_path); - - // It should match the complete published blob data. - assert!(retrieved_blob_data.is_ok(), "{:?}", retrieved_blob_data); - assert_eq!(blob_data, retrieved_blob_data.unwrap()) }) } } diff --git a/aquadoggo/src/materializer/tasks/garbage_collection.rs b/aquadoggo/src/materializer/tasks/garbage_collection.rs index 36963b3a7..6138273cc 100644 --- a/aquadoggo/src/materializer/tasks/garbage_collection.rs +++ b/aquadoggo/src/materializer/tasks/garbage_collection.rs @@ -376,11 +376,18 @@ mod tests { #[rstest] fn purges_blobs(key_pair: KeyPair) { test_runner(|mut node: TestNode| async move { - // Publish a blob. - let blob_document_view = add_blob(&mut node, "Hello World!", &key_pair).await; + // Publish a blob + let blob_document_view = add_blob( + &mut node, + "Hello World!".as_bytes(), + 6, + "text/plain", + &key_pair, + ) + .await; let blob_document_id: DocumentId = blob_document_view.to_string().parse().unwrap(); - // Check the blob is there. + // Check the blob is there let blob = node .context .store @@ -389,7 +396,7 @@ mod tests { .unwrap(); assert!(blob.is_some()); - // Run a garbage collection task for the blob document. + // Run a garbage collection task for the blob document let next_tasks = garbage_collection_task( node.context.clone(), TaskInput::DocumentId(blob_document_id.clone()), @@ -397,10 +404,10 @@ mod tests { .await .unwrap(); - // It shouldn't return any new tasks. + // It shouldn't return any new tasks assert!(next_tasks.is_none()); - // The blob should no longer be available. + // The blob should no longer be available let blob = node .context .store @@ -423,12 +430,12 @@ mod tests { #[rstest] fn purges_newly_detached_blobs(key_pair: KeyPair) { test_runner(|mut node: TestNode| async move { - // Create a blob document. - let blob_data = "Hello, World!".to_string(); - let blob_view_id = add_blob(&mut node, &blob_data, &key_pair).await; + // Create a blob document + let blob_data = "Hello, World!".as_bytes(); + let blob_view_id = add_blob(&mut node, &blob_data, 6, "text/plain", &key_pair).await; let blob_document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); - // Relate to the blob from a new document. + // Relate to the blob from a new document let (schema, documents_pinning_blob) = add_schema_and_documents( &mut node, "img", @@ -441,8 +448,8 @@ mod tests { ) .await; - // Now update the document to relate to another blob. This means the previously - // created blob is now "dangling". + // Now update the document to relate to another blob. This means the previously created + // blob is now "dangling" update_document( &mut node, schema.id(), @@ -452,7 +459,7 @@ mod tests { ) .await; - // Run a task for the parent document. + // Run a task for the parent document let document_id: DocumentId = documents_pinning_blob[0].to_string().parse().unwrap(); let next_tasks = garbage_collection_task(node.context.clone(), TaskInput::DocumentId(document_id)) @@ -460,16 +467,16 @@ mod tests { .unwrap() .unwrap(); - // It issues one new task which is for the blob document. + // It issues one new task which is for the blob document assert_eq!(next_tasks.len(), 1); let next_tasks = garbage_collection_task(node.context.clone(), next_tasks[0].input().to_owned()) .await .unwrap(); - // No new tasks issued. + // No new tasks issued assert!(next_tasks.is_none()); - // The blob has correctly been purged. + // The blob has correctly been purged let blob = node .context .store @@ -485,8 +492,8 @@ mod tests { fn other_documents_keep_blob_alive(key_pair: KeyPair) { test_runner(|mut node: TestNode| async move { // Create a blob document. - let blob_data = "Hello, World!".to_string(); - let blob_view_id = add_blob(&mut node, &blob_data, &key_pair).await; + let blob_data = "Hello, World!".as_bytes(); + let blob_view_id = add_blob(&mut node, &blob_data, 6, "text/plain", &key_pair).await; let blob_document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); // Relate to the blob from a new document. @@ -558,12 +565,12 @@ mod tests { #[rstest] fn all_relation_types_keep_blobs_alive(key_pair: KeyPair) { test_runner(|mut node: TestNode| async move { - let blob_data = "Hello, World!".to_string(); + let blob_data = "Hello, World!".as_bytes(); // Any type of relation can keep a blob alive, here we create one of each and run // garbage collection tasks for each blob. - let blob_view_id_1 = add_blob(&mut node, &blob_data, &key_pair).await; + let blob_view_id_1 = add_blob(&mut node, &blob_data, 6, "text/plain", &key_pair).await; let _ = add_schema_and_documents( &mut node, "img", @@ -576,7 +583,7 @@ mod tests { ) .await; - let blob_view_id_2 = add_blob(&mut node, &blob_data, &key_pair).await; + let blob_view_id_2 = add_blob(&mut node, &blob_data, 6, "text/plain", &key_pair).await; let _ = add_schema_and_documents( &mut node, "img", @@ -589,7 +596,7 @@ mod tests { ) .await; - let blob_view_id_3 = add_blob(&mut node, &blob_data, &key_pair).await; + let blob_view_id_3 = add_blob(&mut node, &blob_data, 6, "text/plain", &key_pair).await; let _ = add_schema_and_documents( &mut node, "img", @@ -606,7 +613,7 @@ mod tests { ) .await; - let blob_view_id_4 = add_blob(&mut node, &blob_data, &key_pair).await; + let blob_view_id_4 = add_blob(&mut node, &blob_data, 6, "text/plain", &key_pair).await; let _ = add_schema_and_documents( &mut node, "img", diff --git a/aquadoggo/src/node.rs b/aquadoggo/src/node.rs index 165357327..c92fd32cc 100644 --- a/aquadoggo/src/node.rs +++ b/aquadoggo/src/node.rs @@ -1,12 +1,12 @@ // SPDX-License-Identifier: AGPL-3.0-or-later -use std::fs; - use anyhow::Result; use p2panda_rs::identity::KeyPair; +use tempfile::TempDir; +use tokio::fs; use crate::bus::ServiceMessage; -use crate::config::{Configuration, BLOBS_DIR_NAME, BLOBS_SYMLINK_DIR_NAME}; +use crate::config::{Configuration, BLOBS_DIR_NAME}; use crate::context::Context; use crate::db::SqlStore; use crate::db::{connection_pool, create_database, run_pending_migrations, Pool}; @@ -64,12 +64,13 @@ impl Node { let schema_provider = SchemaProvider::new(application_schema, config.allow_schema_ids.clone()); - // Create tmp dirs for blob storage. + // Create temporary dirs for blob storage. // - // @TODO: implement configuring this path for persistent storage. - let tmp_dir = tempfile::TempDir::new().unwrap(); + // @TODO: Implement configuring this path for persistent storage, see related issue: + // https://github.com/p2panda/aquadoggo/issues/542 + let tmp_dir = TempDir::new().unwrap(); let blob_dir_path = tmp_dir.path().join(BLOBS_DIR_NAME); - fs::create_dir_all(blob_dir_path.join(BLOBS_SYMLINK_DIR_NAME)).unwrap(); + fs::create_dir_all(&blob_dir_path).await.unwrap(); config.blob_dir = Some(blob_dir_path); // Create service manager with shared data between services diff --git a/aquadoggo/src/proptests/tests.rs b/aquadoggo/src/proptests/tests.rs index 8aa72e367..a57ed87fe 100644 --- a/aquadoggo/src/proptests/tests.rs +++ b/aquadoggo/src/proptests/tests.rs @@ -15,7 +15,7 @@ use crate::proptests::schema_strategies::{schema_strategy, SchemaAST}; use crate::proptests::utils::{ add_documents_from_ast, add_schemas_from_ast, parse_filter, parse_selected_fields, FieldName, }; -use crate::test_utils::{graphql_test_client, test_runner, TestClient, TestNode}; +use crate::test_utils::{http_test_client, test_runner, TestClient, TestNode}; use super::filter_strategies::{ application_filters_strategy, meta_field_filter_strategy, Filter, MetaField, @@ -212,7 +212,7 @@ proptest! { sanity_checks(&node, &documents, &schemas).await; // Create a GraphQL client. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; // Run the test for each schema and related documents that have been generated. for schema_id in schemas { @@ -249,7 +249,7 @@ proptest! { sanity_checks(&node, &documents, &schemas).await; // Create a GraphQL client. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; // Get the root schema from the provider. let schema = node.context.schema_provider.get(&schema_ast.id).await.expect("Schema should exist on node"); diff --git a/aquadoggo/src/test_utils/client.rs b/aquadoggo/src/test_utils/client.rs index cef4e1003..b8a4b29fb 100644 --- a/aquadoggo/src/test_utils/client.rs +++ b/aquadoggo/src/test_utils/client.rs @@ -2,21 +2,22 @@ use std::convert::TryFrom; use std::net::{SocketAddr, TcpListener}; +use std::time::Duration; use axum::body::HttpBody; use axum::BoxError; use http::header::{HeaderName, HeaderValue}; -use http::{Request, StatusCode}; +use http::{HeaderMap, Request, StatusCode}; use hyper::{Body, Server}; use tokio::sync::broadcast; use tower::make::Shared; use tower_service::Service; use crate::graphql::GraphQLSchemaManager; -use crate::http::{build_server, HttpServiceContext, BLOBS_ROUTE}; +use crate::http::{build_server, HttpServiceContext}; use crate::test_utils::TestNode; -/// GraphQL client which can be used for querying a node in tests. +/// HTTP client for testing request and responses. pub struct TestClient { client: reqwest::Client, addr: SocketAddr, @@ -44,14 +45,14 @@ impl TestClient { }); let client = reqwest::Client::builder() - .redirect(reqwest::redirect::Policy::none()) + .timeout(Duration::from_secs(10)) + .redirect(reqwest::redirect::Policy::default()) .build() .unwrap(); TestClient { client, addr } } - #[allow(dead_code)] pub(crate) fn get(&self, url: &str) -> RequestBuilder { RequestBuilder { builder: self.client.get(format!("http://{}{}", self.addr, url)), @@ -65,16 +66,23 @@ impl TestClient { } } -/// Configures a test client that can be used for GraphQL testing. -pub async fn graphql_test_client(node: &TestNode) -> TestClient { +/// Configures a test client that can be used for HTTP API testing. +pub async fn http_test_client(node: &TestNode) -> TestClient { let (tx, _) = broadcast::channel(120); + let manager = GraphQLSchemaManager::new( node.context.store.clone(), tx, node.context.schema_provider.clone(), ) .await; - let http_context = HttpServiceContext::new(manager, BLOBS_ROUTE.into()); + + let http_context = HttpServiceContext::new( + node.context.store.clone(), + manager, + node.context.config.blob_dir.as_ref().unwrap().to_path_buf(), + ); + TestClient::new(build_server(http_context)) } @@ -103,7 +111,6 @@ impl RequestBuilder { self } - #[allow(dead_code)] pub(crate) fn header(mut self, key: K, value: V) -> Self where HeaderName: TryFrom, @@ -121,6 +128,10 @@ pub(crate) struct TestResponse { } impl TestResponse { + pub(crate) async fn bytes(self) -> Vec { + self.response.bytes().await.unwrap().to_vec() + } + pub(crate) async fn text(self) -> String { self.response.text().await.unwrap() } @@ -132,8 +143,11 @@ impl TestResponse { self.response.json().await.unwrap() } - #[allow(dead_code)] pub(crate) fn status(&self) -> StatusCode { self.response.status() } + + pub(crate) fn headers(&self) -> HeaderMap { + self.response.headers().clone() + } } diff --git a/aquadoggo/src/test_utils/mod.rs b/aquadoggo/src/test_utils/mod.rs index eb0548ee3..809354444 100644 --- a/aquadoggo/src/test_utils/mod.rs +++ b/aquadoggo/src/test_utils/mod.rs @@ -7,12 +7,12 @@ pub mod helpers; mod node; mod runner; -pub use client::{graphql_test_client, TestClient}; +pub use client::{http_test_client, TestClient}; pub use config::TestConfiguration; pub use db::{drop_database, initialize_db, initialize_sqlite_db}; pub use helpers::{build_document, doggo_fields, doggo_schema, schema_from_fields}; pub use node::{ add_blob, add_document, add_schema, add_schema_and_documents, assert_query, - populate_and_materialize, populate_store_config, update_document, TestNode, + populate_and_materialize, populate_store_config, update_blob, update_document, TestNode, }; pub use runner::{test_runner, test_runner_with_manager, TestNodeManager}; diff --git a/aquadoggo/src/test_utils/node.rs b/aquadoggo/src/test_utils/node.rs index 5c7fae147..eec2a8b48 100644 --- a/aquadoggo/src/test_utils/node.rs +++ b/aquadoggo/src/test_utils/node.rs @@ -161,7 +161,7 @@ pub async fn add_document( // We only want to issue dependency tasks. let dependency_tasks = tasks .iter() - .filter(|task| task.worker_name() == "depenedency"); + .filter(|task| task.worker_name() == "dependency"); for task in dependency_tasks { dependency_task(node.context.clone(), task.input().to_owned()) @@ -334,35 +334,77 @@ pub async fn update_document( DocumentViewId::from(entry_signed.hash()) } -pub async fn add_blob(node: &mut TestNode, blob_data: &str, key_pair: &KeyPair) -> DocumentViewId { - // Publish blob pieces and blob. - let (blob_data_a, blob_data_b) = blob_data.split_at(blob_data.len() / 2); - let blob_piece_view_id_1 = add_document( - node, - &SchemaId::BlobPiece(1), - vec![("data", blob_data_a.into())], - &key_pair, - ) - .await; +/// Splits bytes into chunks with a defined maximum length (256 bytes is the specified maximum) and +/// publishes a blob_piece_v1 document for each chunk. +pub async fn add_blob_pieces( + node: &mut TestNode, + body: &[u8], + max_piece_length: usize, + key_pair: &KeyPair, +) -> Vec { + let blob_pieces = body.chunks(max_piece_length); + + let mut blob_pieces_view_ids = Vec::with_capacity(blob_pieces.len()); + for piece in blob_pieces { + // @TODO: No need to convert bytes into a string when we introduced our new bytes operation + // field type. Related issue: https://github.com/p2panda/aquadoggo/issues/543 + let byte_str = std::str::from_utf8(piece).expect("Invalid UTF-8 sequence"); + + let view_id = add_document( + node, + &SchemaId::BlobPiece(1), + vec![("data", byte_str.into())], + &key_pair, + ) + .await; + + blob_pieces_view_ids.push(view_id); + } + + blob_pieces_view_ids +} - let blob_piece_view_id_2 = add_document( +pub async fn add_blob( + node: &mut TestNode, + body: &[u8], + max_piece_length: usize, + mime_type: &str, + key_pair: &KeyPair, +) -> DocumentViewId { + let blob_pieces_view_ids = add_blob_pieces(node, body, max_piece_length, key_pair).await; + + let blob_view_id = add_document( node, - &SchemaId::BlobPiece(1), - vec![("data", blob_data_b.into())], + &SchemaId::Blob(1), + vec![ + ("length", { body.len() as i64 }.into()), + ("mime_type", mime_type.into()), + ("pieces", blob_pieces_view_ids.into()), + ], &key_pair, ) .await; - let blob_view_id = add_document( + + blob_view_id +} + +pub async fn update_blob( + node: &mut TestNode, + body: &[u8], + max_piece_length: usize, + previous: &DocumentViewId, + key_pair: &KeyPair, +) -> DocumentViewId { + let blob_pieces_view_ids = add_blob_pieces(node, body, max_piece_length, key_pair).await; + + let blob_view_id = update_document( node, &SchemaId::Blob(1), vec![ - ("length", { blob_data.len() as i64 }.into()), - ("mime_type", "text/plain".into()), - ( - "pieces", - vec![blob_piece_view_id_1, blob_piece_view_id_2].into(), - ), + ("length", { body.len() as i64 }.into()), + ("pieces", blob_pieces_view_ids.into()), ], + &previous, &key_pair, ) .await; diff --git a/aquadoggo/src/test_utils/runner.rs b/aquadoggo/src/test_utils/runner.rs index b700bb695..7349a3287 100644 --- a/aquadoggo/src/test_utils/runner.rs +++ b/aquadoggo/src/test_utils/runner.rs @@ -8,7 +8,7 @@ use p2panda_rs::identity::KeyPair; use tokio::runtime::Builder; use tokio::sync::Mutex; -use crate::config::{BLOBS_DIR_NAME, BLOBS_SYMLINK_DIR_NAME}; +use crate::config::BLOBS_DIR_NAME; use crate::context::Context; use crate::db::Pool; use crate::db::SqlStore; @@ -102,12 +102,12 @@ pub fn test_runner(test: F) { let (_config, pool) = initialize_db().await; let store = SqlStore::new(pool); - // Construct tempfile directory for the test runner. + // Construct temporary directory for the test runner let tmp_dir = tempfile::TempDir::new().unwrap(); let blob_dir_path = tmp_dir.path().join(BLOBS_DIR_NAME); - fs::create_dir_all(blob_dir_path.join(BLOBS_SYMLINK_DIR_NAME)).unwrap(); + fs::create_dir_all(&blob_dir_path).unwrap(); - // Construct node config supporting any schema. + // Construct node config supporting any schema let mut cfg = Configuration::default(); cfg.blob_dir = Some(blob_dir_path); From c055fbe164e2e806f0ea15e7edda321a9ac1bd5a Mon Sep 17 00:00:00 2001 From: Andreas Dzialocha Date: Tue, 5 Sep 2023 13:18:43 +0200 Subject: [PATCH 09/14] Build a byte buffer over paginated pieces when assembling blobs (#547) * Build a byte buffer over paginated blob pieces * Add entry to CHANGELOG.md * Add doc string to constant * Mark panics as unreachable --- CHANGELOG.md | 1 + Cargo.lock | 1 + aquadoggo/Cargo.toml | 1 + aquadoggo/src/db/stores/blob.rs | 87 ++++++++++++++++++--------------- 4 files changed, 50 insertions(+), 40 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c12547750..0dc239da9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - HTTP routes to serve files with correct content type headers [#544](https://github.com/p2panda/aquadoggo/pull/544) +- Build a byte buffer over paginated pieces when assembling blobs [#547](https://github.com/p2panda/aquadoggo/pull/547) ## [0.5.0] diff --git a/Cargo.lock b/Cargo.lock index 1519d5f25..862c46b7c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -167,6 +167,7 @@ dependencies = [ "axum", "bamboo-rs-core-ed25519-yasmf", "bs58 0.4.0", + "bytes", "ciborium", "ctor", "deadqueue", diff --git a/aquadoggo/Cargo.toml b/aquadoggo/Cargo.toml index 58492a563..6e16211dd 100644 --- a/aquadoggo/Cargo.toml +++ b/aquadoggo/Cargo.toml @@ -27,6 +27,7 @@ asynchronous-codec = { version = "0.6.2", features = ["cbor"] } axum = { version = "0.6.10", features = ["headers"] } bamboo-rs-core-ed25519-yasmf = "0.1.1" bs58 = "0.4.0" +bytes = "1.4.0" deadqueue = { version = "0.2.3", default-features = false, features = [ "unlimited", ] } diff --git a/aquadoggo/src/db/stores/blob.rs b/aquadoggo/src/db/stores/blob.rs index f90e28c4f..73f1153a3 100644 --- a/aquadoggo/src/db/stores/blob.rs +++ b/aquadoggo/src/db/stores/blob.rs @@ -2,6 +2,7 @@ use std::num::NonZeroU64; +use bytes::{BufMut, BytesMut}; use p2panda_rs::document::traits::AsDocument; use p2panda_rs::document::{DocumentId, DocumentViewId}; use p2panda_rs::operation::OperationValue; @@ -10,14 +11,12 @@ use p2panda_rs::storage_provider::traits::DocumentStore; use sqlx::{query_scalar, AnyPool}; use crate::db::errors::{BlobStoreError, SqlStoreError}; -use crate::db::query::{Field, Filter, Order, Pagination, Select}; +use crate::db::query::{Filter, Order, Pagination, PaginationField, Select}; use crate::db::stores::query::{Query, RelationList}; use crate::db::SqlStore; -/// The maximum allowed number of blob pieces per blob. -/// @TODO: do we want this? If so, what value should it be and we should add this to -/// p2panda-rs blob validation too. -const MAX_BLOB_PIECES: u64 = 10000; +/// Number of blob pieces requested per database query iteration. +const BLOB_QUERY_PAGE_SIZE: u64 = 10; pub type BlobData = Vec; @@ -164,16 +163,16 @@ async fn document_to_blob_data( store: &SqlStore, blob: impl AsDocument, ) -> Result, BlobStoreError> { - // Get the length of the blob. - let length = match blob.get("length").unwrap() { - OperationValue::Integer(length) => length, - _ => panic!(), // We should never hit this as we already validated that this is a blob document. + // Get the length of the blob + let expected_length = match blob.get("length").unwrap() { + OperationValue::Integer(length) => *length as usize, + _ => unreachable!(), // We already validated that this is a blob document }; - // Get the number of pieces in the blob. - let num_pieces = match blob.get("pieces").unwrap() { + // Get the number of pieces in the blob + let expected_num_pieces = match blob.get("pieces").unwrap() { OperationValue::PinnedRelationList(list) => list.len(), - _ => panic!(), // We should never hit this as we already validated that this is a blob document. + _ => unreachable!(), // We already validated that this is a blob document }; // Now collect all existing pieces for the blob. @@ -182,49 +181,57 @@ async fn document_to_blob_data( // of the blob. let schema = Schema::get_system(SchemaId::BlobPiece(1)).unwrap(); let list = RelationList::new_pinned(blob.view_id(), "pieces"); - let pagination = Pagination { - first: NonZeroU64::new(MAX_BLOB_PIECES).unwrap(), - ..Default::default() - }; - let args = Query::new( - &pagination, - &Select::new(&[Field::new("data")]), + let mut has_next_page = true; + let mut args = Query::new( + &Pagination::new( + &NonZeroU64::new(BLOB_QUERY_PAGE_SIZE).unwrap(), + None, + &vec![PaginationField::EndCursor, PaginationField::HasNextPage], + ), + &Select::new(&["data".into()]), &Filter::default(), &Order::default(), ); - let (_, results) = store.query(schema, &args, Some(&list)).await?; + let mut buf = BytesMut::with_capacity(expected_length); + let mut num_pieces = 0; + + while has_next_page { + let (pagination_data, documents) = store.query(schema, &args, Some(&list)).await?; + has_next_page = pagination_data.has_next_page; + args.pagination.after = pagination_data.end_cursor; + num_pieces += documents.len(); + + for (_, blob_piece_document) in documents { + match blob_piece_document + .get("data") + .expect("Blob piece document without \"data\" field") + { + // @TODO: Use bytes here instead, see related issue: + // https://github.com/p2panda/aquadoggo/issues/543 + OperationValue::String(data_str) => buf.put(data_str.as_bytes()), + _ => unreachable!(), // We only queried for blob piece documents + } + } + } - // No pieces were found. - if results.is_empty() { + // No pieces were found + if buf.is_empty() { return Err(BlobStoreError::NoBlobPiecesFound); }; - // Not all pieces were found. - if results.len() != num_pieces { + // Not all pieces were found + if expected_num_pieces != num_pieces { return Err(BlobStoreError::MissingPieces); } - // Now we construct the blob data. - let mut blob_data = "".to_string(); - - for (_, blob_piece_document) in results { - match blob_piece_document - .get("data") - .expect("Blob piece document without \"data\" field") - { - OperationValue::String(data_str) => blob_data += data_str, - _ => panic!(), // We should never hit this as we only queried for blob piece documents. - } - } - - // Combined blob data length doesn't match the claimed length. - if blob_data.len() != *length as usize { + // Combined blob data length doesn't match the claimed length + if expected_length != buf.len() { return Err(BlobStoreError::IncorrectLength); }; - Ok(Some(blob_data.into_bytes())) + Ok(Some(buf.into())) } #[cfg(test)] From 7cbf5f5a48681f5ee75e3d7baa2f3823eaf628f3 Mon Sep 17 00:00:00 2001 From: adz Date: Wed, 6 Sep 2023 11:10:29 +0200 Subject: [PATCH 10/14] Use correct MAX_BLOB_PIECE_LENGTH from p2panda_rs --- Cargo.lock | 2 +- aquadoggo/Cargo.toml | 4 ++-- aquadoggo/src/http/api.rs | 4 +--- aquadoggo_cli/Cargo.toml | 2 +- 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 862c46b7c..6114c450e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3117,7 +3117,7 @@ checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" [[package]] name = "p2panda-rs" version = "0.7.1" -source = "git+https://github.com/p2panda/p2panda?rev=17f4fcb1dcf7cebabd6d9b5a824399e9384d96b2#17f4fcb1dcf7cebabd6d9b5a824399e9384d96b2" +source = "git+https://github.com/p2panda/p2panda?rev=8377056617b64e898e9980e8ad84d258ca0442a1#8377056617b64e898e9980e8ad84d258ca0442a1" dependencies = [ "arrayvec 0.5.2", "async-trait", diff --git a/aquadoggo/Cargo.toml b/aquadoggo/Cargo.toml index 6e16211dd..9b1f72b13 100644 --- a/aquadoggo/Cargo.toml +++ b/aquadoggo/Cargo.toml @@ -56,7 +56,7 @@ lipmaa-link = "0.2.2" log = "0.4.19" once_cell = "1.18.0" openssl-probe = "0.1.5" -p2panda-rs = { git = "https://github.com/p2panda/p2panda", rev = "17f4fcb1dcf7cebabd6d9b5a824399e9384d96b2", features = [ +p2panda-rs = { git = "https://github.com/p2panda/p2panda", rev = "8377056617b64e898e9980e8ad84d258ca0442a1", features = [ "storage-provider", ] } rand = "0.8.5" @@ -97,7 +97,7 @@ http = "0.2.9" hyper = "0.14.19" libp2p-swarm-test = "0.2.0" once_cell = "1.17.0" -p2panda-rs = { git = "https://github.com/p2panda/p2panda", rev = "17f4fcb1dcf7cebabd6d9b5a824399e9384d96b2", features = [ +p2panda-rs = { git = "https://github.com/p2panda/p2panda", rev = "8377056617b64e898e9980e8ad84d258ca0442a1", features = [ "test-utils", "storage-provider", ] } diff --git a/aquadoggo/src/http/api.rs b/aquadoggo/src/http/api.rs index f94c8c94b..cab488964 100644 --- a/aquadoggo/src/http/api.rs +++ b/aquadoggo/src/http/api.rs @@ -181,6 +181,7 @@ mod tests { use http::{header, StatusCode}; use p2panda_rs::document::DocumentId; use p2panda_rs::identity::KeyPair; + use p2panda_rs::schema::validate::MAX_BLOB_PIECE_LENGTH; use p2panda_rs::test_utils::fixtures::key_pair; use rstest::rstest; @@ -188,9 +189,6 @@ mod tests { use crate::materializer::TaskInput; use crate::test_utils::{add_blob, http_test_client, test_runner, update_blob, TestNode}; - // @TODO: Would be nice if this would come out of p2panda_rs - const MAX_BLOB_PIECE_LENGTH: usize = 256; - #[rstest] fn responds_with_blob_in_http_body(key_pair: KeyPair) { test_runner(|mut node: TestNode| async move { diff --git a/aquadoggo_cli/Cargo.toml b/aquadoggo_cli/Cargo.toml index a84e1e907..c60207e19 100644 --- a/aquadoggo_cli/Cargo.toml +++ b/aquadoggo_cli/Cargo.toml @@ -31,7 +31,7 @@ libp2p = "0.52.0" log = "0.4.20" path-clean = "1.0.1" serde = { version = "1.0.185", features = ["serde_derive"] } -p2panda-rs = { git = "https://github.com/p2panda/p2panda", rev = "17f4fcb1dcf7cebabd6d9b5a824399e9384d96b2" } +p2panda-rs = { git = "https://github.com/p2panda/p2panda", rev = "8377056617b64e898e9980e8ad84d258ca0442a1" } tokio = { version = "1.28.2", features = ["full"] } toml = "0.7.6" From 3140f688210b7c2606232102e44d266ee785ea8b Mon Sep 17 00:00:00 2001 From: Andreas Dzialocha Date: Wed, 6 Sep 2023 18:00:50 +0200 Subject: [PATCH 11/14] Blobs directory configuration (#549) * Move configuration of blobs base path into client * Rename variable in all places * Update READMEs and fix 'its' spelling * Add entry to CHANGELOG.md * Add warnings when database is persisted but blobs are not --- CHANGELOG.md | 1 + README.md | 2 +- aquadoggo/Cargo.toml | 2 +- aquadoggo/src/config.rs | 14 +-- aquadoggo/src/db/stores/blob.rs | 6 +- aquadoggo/src/db/stores/document.rs | 111 +++++++++--------- aquadoggo/src/db/stores/entry.rs | 10 +- aquadoggo/src/db/stores/operation.rs | 10 +- aquadoggo/src/db/stores/schema.rs | 6 +- aquadoggo/src/db/types/entry.rs | 6 +- aquadoggo/src/db/types/operation.rs | 2 +- aquadoggo/src/graphql/constants.rs | 4 +- aquadoggo/src/graphql/mutations/publish.rs | 6 +- aquadoggo/src/graphql/objects/document.rs | 6 +- aquadoggo/src/graphql/queries/collection.rs | 2 +- .../scalars/document_view_id_scalar.rs | 2 +- aquadoggo/src/graphql/schema.rs | 2 +- aquadoggo/src/http/api.rs | 8 +- aquadoggo/src/http/context.rs | 8 +- aquadoggo/src/http/service.rs | 7 +- aquadoggo/src/materializer/tasks/blob.rs | 17 ++- .../src/materializer/tasks/dependency.rs | 2 +- .../materializer/tasks/garbage_collection.rs | 6 +- aquadoggo/src/materializer/tasks/reduce.rs | 8 +- aquadoggo/src/network/service.rs | 2 +- aquadoggo/src/node.rs | 15 +-- .../src/proptests/document_strategies.rs | 2 +- aquadoggo/src/replication/manager.rs | 4 +- aquadoggo/src/replication/strategies/diff.rs | 4 +- aquadoggo/src/schema/schema_provider.rs | 4 +- aquadoggo/src/test_utils/client.rs | 2 +- aquadoggo/src/test_utils/runner.rs | 21 ++-- aquadoggo/src/tests.rs | 4 +- aquadoggo_cli/Cargo.toml | 6 +- aquadoggo_cli/README.md | 16 ++- aquadoggo_cli/config.toml | 16 +++ aquadoggo_cli/src/config.rs | 23 +++- aquadoggo_cli/src/key_pair.rs | 8 +- aquadoggo_cli/src/main.rs | 31 ++++- 39 files changed, 225 insertions(+), 181 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0dc239da9..bee3a970a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Add method to store for pruning document views [#491](https://github.com/p2panda/aquadoggo/pull/491) - Introduce `BlobStore` [#484](https://github.com/p2panda/aquadoggo/pull/484) - Task for automatic garbage collection of unused documents and views [#500](https://github.com/p2panda/aquadoggo/pull/500) +- Blobs directory configuration [#549](https://github.com/p2panda/aquadoggo/pull/549) ### Changed diff --git a/README.md b/README.md index d65e45fcb..9ae74aac3 100644 --- a/README.md +++ b/README.md @@ -213,7 +213,7 @@ If you are not working with Rust you can create FFI bindings from the `aquadoggo As an application developer the interface to `aquadoggo` you are likely to use the most is the GraphQL query API. For whichever schema your node supports a custom query api is generated, you use this to fetch data into your app. Results from a collection query can be paginated, filtered. -Fetch one "mushroom" by it's id, returning values for only the selected fields: +Fetch one "mushroom" by its id, returning values for only the selected fields: ```graphql { mushroom: mushroom_0020c3accb0b0c8822ecc0309190e23de5f7f6c82f660ce08023a1d74e055a3d7c4d( diff --git a/aquadoggo/Cargo.toml b/aquadoggo/Cargo.toml index 9b1f72b13..0cf5acaec 100644 --- a/aquadoggo/Cargo.toml +++ b/aquadoggo/Cargo.toml @@ -68,7 +68,6 @@ sqlx = { version = "0.6.1", features = [ "sqlite", "runtime-tokio-rustls", ] } -tempfile = "3.7.0" thiserror = "1.0.39" tokio = { version = "1.28.2", features = [ "macros", @@ -112,5 +111,6 @@ rstest = "0.15.0" rstest_reuse = "0.3.0" serde_bytes = "0.11.12" serde_json = "1.0.85" +tempfile = "3.7.0" tower = "0.4.13" tower-service = "0.3.2" diff --git a/aquadoggo/src/config.rs b/aquadoggo/src/config.rs index 4614ccecf..eba85da0a 100644 --- a/aquadoggo/src/config.rs +++ b/aquadoggo/src/config.rs @@ -6,9 +6,6 @@ use p2panda_rs::schema::SchemaId; use crate::network::NetworkConfiguration; -/// Blobs directory name. -pub const BLOBS_DIR_NAME: &str = "blobs"; - /// Configuration object holding all important variables throughout the application. #[derive(Debug, Clone)] pub struct Configuration { @@ -26,9 +23,6 @@ pub struct Configuration { /// _not_ recommended for production settings. pub allow_schema_ids: AllowList, - /// Path to blobs directory. - pub blob_dir: Option, - /// URL / connection string to PostgreSQL or SQLite database. pub database_url: String, @@ -44,6 +38,12 @@ pub struct Configuration { /// 2020. pub http_port: u16, + /// Path to folder where blobs (binary files) are kept and served from. + /// + /// **Warning**: When set to a temporary directory, make sure that also the database itself is + /// not persisted, otherwise you will run into data inconsistencies. + pub blobs_base_path: PathBuf, + /// Number of concurrent workers which defines the maximum of materialization tasks which can /// be worked on simultaneously. /// @@ -59,10 +59,10 @@ impl Default for Configuration { fn default() -> Self { Self { allow_schema_ids: AllowList::Wildcard, - blob_dir: None, database_url: "sqlite::memory:".into(), database_max_connections: 32, http_port: 2020, + blobs_base_path: PathBuf::new(), worker_pool_size: 16, network: NetworkConfiguration::default(), } diff --git a/aquadoggo/src/db/stores/blob.rs b/aquadoggo/src/db/stores/blob.rs index 73f1153a3..a33b33081 100644 --- a/aquadoggo/src/db/stores/blob.rs +++ b/aquadoggo/src/db/stores/blob.rs @@ -21,7 +21,7 @@ const BLOB_QUERY_PAGE_SIZE: u64 = 10; pub type BlobData = Vec; impl SqlStore { - /// Get the data for one blob from the store, identified by it's document id. + /// Get the data for one blob from the store, identified by its document id. pub async fn get_blob(&self, id: &DocumentId) -> Result, BlobStoreError> { // Get the root blob document let blob_document = match self.get_document(id).await? { @@ -36,7 +36,7 @@ impl SqlStore { document_to_blob_data(self, blob_document).await } - /// Get the data for one blob from the store, identified by it's document view id. + /// Get the data for one blob from the store, identified by its document view id. pub async fn get_blob_by_view_id( &self, view_id: &DocumentViewId, @@ -62,7 +62,7 @@ impl SqlStore { // If there are no documents referring to the blob then we continue with the purge. if blob_reverse_relations.is_empty() { - // Collect the document view ids of all pieces this blob has ever referred to in it's + // Collect the document view ids of all pieces this blob has ever referred to in its // `pieces` let blob_piece_ids: Vec = query_scalar( " diff --git a/aquadoggo/src/db/stores/document.rs b/aquadoggo/src/db/stores/document.rs index 6aa16d3e0..6df98d61d 100644 --- a/aquadoggo/src/db/stores/document.rs +++ b/aquadoggo/src/db/stores/document.rs @@ -9,7 +9,7 @@ //! themselves. On completion, the resultant documents are stored and can be retrieved using the //! methods defined here. //! -//! The whole document store can be seen as a live cache. All it's content is derived from +//! The whole document store can be seen as a live cache. All its content is derived from //! operations already stored on the node. It allows easy and quick access to current or pinned //! values. //! @@ -23,12 +23,11 @@ //! state, we call these states document views. When a document is updated it gets a new state, or //! view, which can be referred to by a globally unique document view id. //! -//! The getter methods allow retrieving a document by it's `DocumentId` or it's -//! `DocumentViewId`. The former always returns the most current document state, the latter -//! returns the specific document view if it has already been materialised and stored. Although it -//! is possible to construct a document at any point in it's history if all operations are -//! retained, we use a system of "pinned relations" to identify and materialise only views we -//! explicitly wish to keep. +//! The getter methods allow retrieving a document by its `DocumentId` or its `DocumentViewId`. The +//! former always returns the most current document state, the latter returns the specific document +//! view if it has already been materialised and stored. Although it is possible to construct a +//! document at any point in its history if all operations are retained, we use a system of "pinned +//! relations" to identify and materialise only views we explicitly wish to keep. use async_trait::async_trait; use log::debug; use p2panda_rs::document::traits::AsDocument; @@ -49,9 +48,9 @@ use crate::db::SqlStore; impl DocumentStore for SqlStore { type Document = StorageDocument; - /// Get a document from the store by it's `DocumentId`. + /// Get a document from the store by its `DocumentId`. /// - /// Retrieves a document in it's most current state from the store. Ignores documents which + /// Retrieves a document in its most current state from the store. Ignores documents which /// contain a DELETE operation. /// /// An error is returned only if a fatal database error occurs. @@ -113,7 +112,7 @@ impl DocumentStore for SqlStore { /// Get a document from the database by `DocumentViewId`. /// - /// Get's a document at a specific point in it's history. Only returns views that have already + /// Get's a document at a specific point in its history. Only returns views that have already /// been materialised and persisted in the store. These are likely to be "pinned views" which /// are relations from other documents, in which case the materialiser service will have /// identified and materialised them ready for querying. @@ -276,7 +275,7 @@ impl SqlStore { /// current view and field values into the `document_views` and `document_view_fields` tables /// respectively. /// - /// If the document already existed in the store then it's current view and view id will be + /// If the document already existed in the store then its current view and view id will be /// updated with those contained on the passed document. /// /// If any of the operations fail all insertions are rolled back. @@ -374,11 +373,11 @@ impl SqlStore { ) -> Result, DocumentStorageError> { let document_view_ids: Vec = query_scalar( " - SELECT + SELECT document_views.document_view_id - FROM + FROM document_views - WHERE + WHERE document_views.document_id = $1 ", ) @@ -404,18 +403,18 @@ impl SqlStore { ) -> Result, DocumentStorageError> { let document_view_ids: Vec = query_scalar( " - SELECT DISTINCT + SELECT DISTINCT document_views.document_id FROM document_views - WHERE - document_views.document_view_id + WHERE + document_views.document_view_id IN ( SELECT operation_fields_v1.value - FROM + FROM document_view_fields - LEFT JOIN + LEFT JOIN operation_fields_v1 ON document_view_fields.operation_id = operation_fields_v1.operation_id @@ -423,7 +422,7 @@ impl SqlStore { document_view_fields.name = operation_fields_v1.name WHERE operation_fields_v1.field_type IN ('pinned_relation', 'pinned_relation_list') - AND + AND document_view_fields.document_view_id = $1 ) ", @@ -456,14 +455,14 @@ impl SqlStore { // view of a document, the deletion will not go ahead. let result = query( " - DELETE FROM + DELETE FROM document_views WHERE document_views.document_view_id = $1 AND NOT EXISTS ( - SELECT - document_view_fields.document_view_id - FROM + SELECT + document_view_fields.document_view_id + FROM document_view_fields LEFT JOIN operation_fields_v1 @@ -473,12 +472,12 @@ impl SqlStore { document_view_fields.name = operation_fields_v1.name WHERE operation_fields_v1.field_type IN ('pinned_relation', 'pinned_relation_list') - AND + AND operation_fields_v1.value = $1 ) AND NOT EXISTS ( SELECT documents.document_id FROM documents - WHERE documents.document_view_id = $1 + WHERE documents.document_view_id = $1 ) " ) @@ -497,7 +496,7 @@ impl SqlStore { } } - /// Check if this view is the current view of it's document. + /// Check if this view is the current view of its document. pub async fn is_current_view( &self, document_view_id: &DocumentViewId, @@ -516,7 +515,7 @@ impl SqlStore { Ok(document_view_id.is_some()) } - /// Purge a document from the store by it's id. + /// Purge a document from the store by its id. /// /// This removes entries, operations and any materialized documents which exist. /// @@ -550,7 +549,7 @@ impl SqlStore { // Delete rows from `entries` table. query( " - DELETE FROM entries + DELETE FROM entries WHERE entries.entry_hash IN ( SELECT operations_v1.operation_id FROM operations_v1 WHERE operations_v1.document_id = $1 @@ -593,7 +592,7 @@ async fn get_document_view_field_rows( // // This query performs a join against the `operation_fields_v1` table as this is where the // actual field values live. The `document_view_fields` table defines relations between a - // document view and the operation values which hold it's field values. + // document view and the operation values which hold its field values. // // Each field has one row, or in the case of list values (pinned relations, or relation lists) // then one row exists for every item in the list. The `list_index` column is used for @@ -608,7 +607,7 @@ async fn get_document_view_field_rows( operation_fields_v1.list_index, operation_fields_v1.field_type, operation_fields_v1.value - FROM + FROM document_view_fields LEFT JOIN operation_fields_v1 @@ -616,7 +615,7 @@ async fn get_document_view_field_rows( document_view_fields.operation_id = operation_fields_v1.operation_id AND document_view_fields.name = operation_fields_v1.name - LEFT JOIN + LEFT JOIN document_views ON document_view_fields.document_view_id = document_views.document_view_id @@ -724,7 +723,7 @@ async fn insert_document( .await .map_err(|err| DocumentStorageError::FatalStorageError(err.to_string()))?; - // If the document is not deleted, then we also want to insert it's view and fields. + // If the document is not deleted, then we also want to insert its view and fields. if !document.is_deleted() && document.view().is_some() { // Construct the view, unwrapping the document view fields as we checked they exist above. let document_view = @@ -800,7 +799,7 @@ mod tests { let result = node.context.store.insert_document(&document).await; assert!(result.is_ok()); - // Find the "CREATE" operation and get it's id. + // Find the "CREATE" operation and get its id. let create_operation = WithId::::id( operations .iter() @@ -826,9 +825,9 @@ mod tests { .await; assert!(result.is_ok()); - // We should be able to retrieve the document at either of it's views now. + // We should be able to retrieve the document at either of its views now. - // Here we request the document with it's initial state. + // Here we request the document with its initial state. let retrieved_document = node .context .store @@ -842,7 +841,7 @@ mod tests { assert_eq!(retrieved_document.view_id(), document_at_view_1.view_id()); assert_eq!(retrieved_document.fields(), document_at_view_1.fields()); - // Here we request it at it's current state. + // Here we request it at its current state. let retrieved_document = node .context .store @@ -856,7 +855,7 @@ mod tests { assert_eq!(retrieved_document.view_id(), document.view_id()); assert_eq!(retrieved_document.fields(), document.fields()); - // If we retrieve the document by it's id, we expect the current state. + // If we retrieve the document by its id, we expect the current state. let retrieved_document = node .context .store @@ -875,8 +874,7 @@ mod tests { #[rstest] fn document_view_does_not_exist(random_document_view_id: DocumentViewId) { test_runner(|node: TestNode| async move { - // We try to retrieve a document view by it's id but no view - // with that id exists. + // We try to retrieve a document view by its id but no view with that id exists. let view_does_not_exist = node .context .store @@ -925,20 +923,20 @@ mod tests { config: PopulateStoreConfig, ) { test_runner(|node: TestNode| async move { - // Populate the store with some entries and operations but DON'T materialise any resulting documents. + // Populate the store with some entries and operations but DON'T materialise any + // resulting documents. let (_, document_ids) = populate_store(&node.context.store, &config).await; let document_id = document_ids.get(0).expect("At least one document id"); // Build the document. let document = build_document(&node.context.store, &document_id).await; - // The document is successfully inserted into the database, this - // relies on the operations already being present and would fail - // if they were not. + // The document is successfully inserted into the database, this relies on the + // operations already being present and would fail if they were not. let result = node.context.store.insert_document(&document).await; assert!(result.is_ok()); - // We can retrieve the most recent document view for this document by it's id. + // We can retrieve the most recent document view for this document by its id. let retrieved_document = node .context .store @@ -947,8 +945,8 @@ mod tests { .unwrap() .unwrap(); - // We can retrieve a specific document view for this document by it's view_id. - // In this case, that should be the same as the view retrieved above. + // We can retrieve a specific document view for this document by its view_id. In this + // case, that should be the same as the view retrieved above. let specific_document = node .context .store @@ -985,7 +983,8 @@ mod tests { config: PopulateStoreConfig, ) { test_runner(|node: TestNode| async move { - // Populate the store with some entries and operations but DON'T materialise any resulting documents. + // Populate the store with some entries and operations but DON'T materialise any + // resulting documents. let (_, document_ids) = populate_store(&node.context.store, &config).await; let document_id = document_ids.get(0).expect("At least one document id"); @@ -997,12 +996,12 @@ mod tests { // As it has been deleted, there should be no view. assert!(document.view().is_none()); - // Here we insert the document. This action also sets it's most recent view. + // Here we insert the document. This action also sets its most recent view. let result = node.context.store.insert_document(&document).await; assert!(result.is_ok()); - // We retrieve the most recent view for this document by it's document id, - // but as the document is deleted, we should get a none value back. + // We retrieve the most recent view for this document by its document id, but as the + // document is deleted, we should get a none value back. let document = node .context .store @@ -1011,8 +1010,8 @@ mod tests { .unwrap(); assert!(document.is_none()); - // We also try to retrieve the specific document view by it's view id. - // This should also return none as it is deleted. + // We also try to retrieve the specific document view by its view id. This should also + // return none as it is deleted. let document = node .context .store @@ -1090,14 +1089,14 @@ mod tests { .build() .expect("Build document"); - // Insert it to the database, this should also update it's view. + // Insert it to the database, this should also update its view. node.context .store .insert_document(&document) .await .expect("Insert document"); - // We can retrieve the document by it's document id. + // We can retrieve the document by its document id. let retrieved_document = node .context .store @@ -1106,7 +1105,7 @@ mod tests { .expect("Get document") .expect("Unwrap document"); - // And also directly by it's document view id. + // And also directly by its document view id. let specific_document = node .context .store diff --git a/aquadoggo/src/db/stores/entry.rs b/aquadoggo/src/db/stores/entry.rs index e9c3d7978..ebdf47fd3 100644 --- a/aquadoggo/src/db/stores/entry.rs +++ b/aquadoggo/src/db/stores/entry.rs @@ -74,7 +74,7 @@ impl EntryStore for SqlStore { Ok(()) } - /// Get an entry from storage by it's hash id. + /// Get an entry from storage by its hash id. /// /// Returns a result containing the entry wrapped in an option if it was found successfully. /// Returns `None` if the entry was not found in storage. Errors when a fatal storage error @@ -186,7 +186,7 @@ impl EntryStore for SqlStore { /// Get all entries of a given schema /// /// Returns a result containing a vector of all entries which follow the passed schema - /// (identified by it's `SchemaId`). If no entries exist, or the schema is not known by this + /// (identified by its `SchemaId`). If no entries exist, or the schema is not known by this /// node, then an empty vector is returned. async fn get_entries_by_schema( &self, @@ -222,7 +222,7 @@ impl EntryStore for SqlStore { /// Get all entries of a given schema. /// /// Returns a result containing a vector of all entries which follow the passed schema - /// (identified by it's `SchemaId`). If no entries exist, or the schema is not known by this + /// (identified by its `SchemaId`). If no entries exist, or the schema is not known by this /// node, then an empty vector is returned. async fn get_paginated_log_entries( &self, @@ -444,7 +444,7 @@ mod tests { .await; assert!(result.is_ok()); - // Retrieve the entry again by it's hash. + // Retrieve the entry again by its hash let retrieved_entry = node .context .store @@ -622,7 +622,7 @@ mod tests { .expect("At least one key pair") .public_key(); - // We should be able to get each entry by it's public_key, log_id and seq_num. + // We should be able to get each entry by its public_key, log_id and seq_num. for seq_num in 1..10 { let seq_num = SeqNum::new(seq_num).unwrap(); diff --git a/aquadoggo/src/db/stores/operation.rs b/aquadoggo/src/db/stores/operation.rs index d70e438b1..73550759f 100644 --- a/aquadoggo/src/db/stores/operation.rs +++ b/aquadoggo/src/db/stores/operation.rs @@ -25,8 +25,8 @@ use crate::db::SqlStore; /// the required `AsVerifiedOperation` trait. /// /// There are several intermediary structs defined in `db/models/` which represent rows from tables -/// in the database where this entry, it's fields and opreation relations are stored. These are -/// used in conjunction with the `sqlx` library to coerce raw values into structs when querying the +/// in the database where this entry, its fields and opreation relations are stored. These are used +/// in conjunction with the `sqlx` library to coerce raw values into structs when querying the /// database. #[async_trait] impl OperationStore for SqlStore { @@ -77,7 +77,7 @@ impl OperationStore for SqlStore { .await } - /// Get an operation identified by it's `OperationId`. + /// Get an operation identified by its `OperationId`. /// /// Returns a result containing an `VerifiedOperation` wrapped in an option, if no operation /// with this id was found, returns none. Errors if a fatal storage error occured. @@ -269,7 +269,7 @@ impl SqlStore { Ok(()) } - /// Insert an operation as well as the index for it's position in the document after + /// Insert an operation as well as the index for its position in the document after /// materialization has occurred. async fn insert_operation_with_index( &self, @@ -467,7 +467,7 @@ mod tests { .await; assert!(result.is_ok()); - // Request the previously inserted operation by it's id. + // Request the previously inserted operation by its id. let returned_operation = node .context .store diff --git a/aquadoggo/src/db/stores/schema.rs b/aquadoggo/src/db/stores/schema.rs index 59a17077c..e00f99f67 100644 --- a/aquadoggo/src/db/stores/schema.rs +++ b/aquadoggo/src/db/stores/schema.rs @@ -14,7 +14,7 @@ use crate::db::errors::SchemaStoreError; use crate::db::SqlStore; impl SqlStore { - /// Get a Schema from the database by it's document view id. + /// Get a Schema from the database by its document view id. /// /// Internally, this method performs three steps: /// - fetch the document view for the schema definition @@ -52,7 +52,7 @@ impl SqlStore { // We silently ignore errors as we are assuming views we retrieve from the database // themselves are valid, meaning any error in constructing the schema must be because some - // of it's fields are simply missing from our database. + // of its fields are simply missing from our database. let schema = Schema::from_views(schema_view, schema_fields).ok(); Ok(schema) @@ -205,7 +205,7 @@ mod tests { ) .await; - // Retrieve the schema by it's document view id. We unwrap here as we expect an `Ok` + // Retrieve the schema by its document view id. We unwrap here as we expect an `Ok` // result for the succeeding db query, even though the schema could not be built. let schema = node .context diff --git a/aquadoggo/src/db/types/entry.rs b/aquadoggo/src/db/types/entry.rs index bda3ad030..5d093c3f1 100644 --- a/aquadoggo/src/db/types/entry.rs +++ b/aquadoggo/src/db/types/entry.rs @@ -9,9 +9,9 @@ use p2panda_rs::operation::EncodedOperation; use crate::db::models::EntryRow; -/// A signed entry and it's encoded operation. Entries are the lowest level data type on the -/// p2panda network, they are signed by authors and form bamboo append only logs. The operation is -/// an entries' payload, it contains the data mutations which authors publish. +/// A signed entry and its encoded operation. Entries are the lowest level data type on the p2panda +/// network, they are signed by authors and form bamboo append only logs. The operation is an +/// entries' payload, it contains the data mutations which authors publish. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct StorageEntry { /// PublicKey of this entry. diff --git a/aquadoggo/src/db/types/operation.rs b/aquadoggo/src/db/types/operation.rs index 2af007136..6142cb2a0 100644 --- a/aquadoggo/src/db/types/operation.rs +++ b/aquadoggo/src/db/types/operation.rs @@ -36,7 +36,7 @@ pub struct StorageOperation { /// Index for the position of this operation once topological sorting of the operation graph /// has been performed. /// - /// Is `None` when the operation has not been materialized into it's document yet. + /// Is `None` when the operation has not been materialized into its document yet. pub(crate) sorted_index: Option, } diff --git a/aquadoggo/src/graphql/constants.rs b/aquadoggo/src/graphql/constants.rs index 9fb0f4cf7..22250bad4 100644 --- a/aquadoggo/src/graphql/constants.rs +++ b/aquadoggo/src/graphql/constants.rs @@ -58,10 +58,10 @@ pub const ORDER_DIRECTION_ARG: &str = "orderDirection"; /// Name of field where a collection of documents can be accessed. pub const DOCUMENTS_FIELD: &str = "documents"; -/// Name of field on a document where it's fields can be accessed. +/// Name of field on a document where its fields can be accessed. pub const FIELDS_FIELD: &str = "fields"; -/// Name of field on a document where it's meta data can be accessed. +/// Name of field on a document where its meta data can be accessed. pub const META_FIELD: &str = "meta"; /// Name of field on a document where pagination cursor can be accessed. diff --git a/aquadoggo/src/graphql/mutations/publish.rs b/aquadoggo/src/graphql/mutations/publish.rs index 745603ed9..d2e47caa3 100644 --- a/aquadoggo/src/graphql/mutations/publish.rs +++ b/aquadoggo/src/graphql/mutations/publish.rs @@ -240,7 +240,7 @@ mod tests { let context = HttpServiceContext::new( node.context.store.clone(), manager, - node.context.config.blob_dir.as_ref().unwrap().to_path_buf(), + node.context.config.blobs_base_path.to_path_buf(), ); let response = context.schema.execute(publish_request).await; @@ -305,7 +305,7 @@ mod tests { let context = HttpServiceContext::new( node.context.store.clone(), manager, - node.context.config.blob_dir.as_ref().unwrap().to_path_buf(), + node.context.config.blobs_base_path.to_path_buf(), ); let response = context @@ -337,7 +337,7 @@ mod tests { let context = HttpServiceContext::new( node.context.store.clone(), manager, - node.context.config.blob_dir.as_ref().unwrap().to_path_buf(), + node.context.config.blobs_base_path.to_path_buf(), ); context.schema.execute(publish_request).await; diff --git a/aquadoggo/src/graphql/objects/document.rs b/aquadoggo/src/graphql/objects/document.rs index 0a86feb44..6f894daad 100644 --- a/aquadoggo/src/graphql/objects/document.rs +++ b/aquadoggo/src/graphql/objects/document.rs @@ -13,7 +13,7 @@ use crate::graphql::utils::{collection_item_name, fields_name}; /// schema. /// /// Constructs resolvers for both `fields` and `meta` fields. The former simply passes up the query -/// arguments to it's children query fields. The latter calls the `resolve` method defined on +/// arguments to its children query fields. The latter calls the `resolve` method defined on /// `DocumentMeta` type. pub fn build_document_object(schema: &Schema) -> Object { let fields = Object::new(schema.id().to_string()); @@ -24,7 +24,7 @@ pub fn build_document_object(schema: &Schema) -> Object { /// schema and are contained in a paginated collection. /// /// Contains resolvers for `cursor`, `fields` and `meta`. `fields` simply passes up the query -/// arguments to it's children query fields. `meta` calls the `resolve` method defined on +/// arguments to its children query fields. `meta` calls the `resolve` method defined on /// `DocumentMeta` type. pub fn build_paginated_document_object(schema: &Schema) -> Object { let fields = Object::new(collection_item_name(schema.id())); @@ -56,7 +56,7 @@ pub fn build_paginated_document_object(schema: &Schema) -> Object { /// Add application `fields` and `meta` fields to a GraphQL object. fn with_document_fields(fields: Object, schema: &Schema) -> Object { fields - // The `fields` field passes down the parent value to it's children + // The `fields` field passes down the parent value to its children .field( Field::new( constants::FIELDS_FIELD, diff --git a/aquadoggo/src/graphql/queries/collection.rs b/aquadoggo/src/graphql/queries/collection.rs index 44655b9d1..6d7d6ff73 100644 --- a/aquadoggo/src/graphql/queries/collection.rs +++ b/aquadoggo/src/graphql/queries/collection.rs @@ -921,7 +921,7 @@ mod tests { // That's more my style, so let's get the lyrics for this song. But there are a lot, // so I'll just get the first 2 lines. - // We can identify the song by it's id and then paginate the lyrics field which is a + // We can identify the song by its id and then paginate the lyrics field which is a // relation list of song lyric lines. let oh_bondage_up_yours_id = oh_bondage_up_yours["meta"]["documentId"].as_str().unwrap(); diff --git a/aquadoggo/src/graphql/scalars/document_view_id_scalar.rs b/aquadoggo/src/graphql/scalars/document_view_id_scalar.rs index ef522b908..4cdeb0a57 100644 --- a/aquadoggo/src/graphql/scalars/document_view_id_scalar.rs +++ b/aquadoggo/src/graphql/scalars/document_view_id_scalar.rs @@ -7,7 +7,7 @@ use dynamic_graphql::{Error, Result, Scalar, ScalarValue, Value}; use p2panda_rs::document::DocumentViewId; /// The document view id of a p2panda document. Refers to a specific point in a documents history -/// and can be used to deterministically reconstruct it's state at that time. +/// and can be used to deterministically reconstruct its state at that time. #[derive(Scalar, Clone, Debug, Eq, PartialEq)] #[graphql(name = "DocumentViewId", validator(validate))] pub struct DocumentViewIdScalar(DocumentViewId); diff --git a/aquadoggo/src/graphql/schema.rs b/aquadoggo/src/graphql/schema.rs index 50244d7e9..e16aee61d 100644 --- a/aquadoggo/src/graphql/schema.rs +++ b/aquadoggo/src/graphql/schema.rs @@ -111,7 +111,7 @@ pub async fn build_root_schema( .register(filter_input); // Add a query for each schema. It offers an interface to retrieve a single document of - // this schema by it's document id or view id. Its resolver parses and validates the passed + // this schema by its document id or view id. Its resolver parses and validates the passed // parameters, then forwards them up to the children query fields root_query = build_document_query(root_query, &schema); diff --git a/aquadoggo/src/http/api.rs b/aquadoggo/src/http/api.rs index cab488964..c29a47d6f 100644 --- a/aquadoggo/src/http/api.rs +++ b/aquadoggo/src/http/api.rs @@ -60,7 +60,7 @@ pub async fn handle_blob_document( return Err(BlobHttpError::NotFound); } - respond_with_blob(if_none_match, context.blob_dir_path, document).await + respond_with_blob(if_none_match, context.blobs_base_path, document).await } /// Handle requests for a blob document view served via HTTP. @@ -87,7 +87,7 @@ pub async fn handle_blob_view( return Err(BlobHttpError::NotFound); } - respond_with_blob(if_none_match, context.blob_dir_path, document).await + respond_with_blob(if_none_match, context.blobs_base_path, document).await } /// Returns HTTP response with the contents, ETag and given MIME type of a blob. @@ -95,7 +95,7 @@ pub async fn handle_blob_view( /// Supports basic caching by handling "IfNoneMatch" headers matching the latest ETag. async fn respond_with_blob( if_none_match: IfNoneMatch, - blob_dir_path: PathBuf, + blobs_base_path: PathBuf, document: impl AsDocument, ) -> Result { let view_id = document.view_id(); @@ -120,7 +120,7 @@ async fn respond_with_blob( }?; // Get body from read-stream of stored file on file system - let mut file_path = blob_dir_path; + let mut file_path = blobs_base_path; file_path.push(format!("{view_id}")); match File::open(&file_path).await { Ok(file) => { diff --git a/aquadoggo/src/http/context.rs b/aquadoggo/src/http/context.rs index f431ee7c9..01015bf5c 100644 --- a/aquadoggo/src/http/context.rs +++ b/aquadoggo/src/http/context.rs @@ -13,16 +13,16 @@ pub struct HttpServiceContext { /// Dynamic GraphQL schema manager. pub schema: GraphQLSchemaManager, - /// Path of the directory where blobs should be served from - pub blob_dir_path: PathBuf, + /// Path of the directory where blobs should be served from. + pub blobs_base_path: PathBuf, } impl HttpServiceContext { - pub fn new(store: SqlStore, schema: GraphQLSchemaManager, blob_dir_path: PathBuf) -> Self { + pub fn new(store: SqlStore, schema: GraphQLSchemaManager, blobs_base_path: PathBuf) -> Self { Self { store, schema, - blob_dir_path, + blobs_base_path, } } } diff --git a/aquadoggo/src/http/service.rs b/aquadoggo/src/http/service.rs index 7c045e50b..855d61193 100644 --- a/aquadoggo/src/http/service.rs +++ b/aquadoggo/src/http/service.rs @@ -61,13 +61,13 @@ pub async fn http_service( let graphql_schema_manager = GraphQLSchemaManager::new(context.store.clone(), tx, context.schema_provider.clone()).await; - let blob_dir_path = context.config.blob_dir.as_ref().expect("Base path not set"); + let blobs_base_path = &context.config.blobs_base_path; // Introduce a new context for all HTTP routes let http_context = HttpServiceContext::new( context.store.clone(), graphql_schema_manager, - blob_dir_path.to_owned(), + blobs_base_path.to_owned(), ); // Start HTTP server with given port and re-attempt with random port if it was taken already @@ -105,7 +105,6 @@ mod tests { use serde_json::json; use tokio::sync::broadcast; - use crate::config::BLOBS_DIR_NAME; use crate::graphql::GraphQLSchemaManager; use crate::http::context::HttpServiceContext; use crate::schema::SchemaProvider; @@ -124,7 +123,7 @@ mod tests { let context = HttpServiceContext::new( node.context.store.clone(), graphql_schema_manager, - BLOBS_DIR_NAME.into(), + node.context.config.blobs_base_path.clone(), ); let client = TestClient::new(build_server(context)); diff --git a/aquadoggo/src/materializer/tasks/blob.rs b/aquadoggo/src/materializer/tasks/blob.rs index 8366cbba6..133ea795c 100644 --- a/aquadoggo/src/materializer/tasks/blob.rs +++ b/aquadoggo/src/materializer/tasks/blob.rs @@ -63,7 +63,7 @@ pub async fn blob_task(context: Context, input: TaskInput) -> TaskResult TaskResult base_path, - None => return Err(TaskError::Critical("No base path configured".to_string())), - }; - let blob_view_path = base_path.join(blob_document.view_id().to_string()); + // Compose, and when needed create, the path for the blob file + let blob_view_path = context + .config + .blobs_base_path + .join(blob_document.view_id().to_string()); - // Write the blob to the filesystem. + // Write the blob to the filesystem info!("Creating blob at path {}", blob_view_path.display()); let mut file = File::create(&blob_view_path).await.map_err(|err| { @@ -175,7 +174,7 @@ mod tests { assert!(result.unwrap().is_none()); // Construct the expected path to the blob view file - let base_path = node.context.config.blob_dir.as_ref().unwrap(); + let base_path = &node.context.config.blobs_base_path; let blob_path = base_path.join(blob_view_id.to_string()); // Read from this file diff --git a/aquadoggo/src/materializer/tasks/dependency.rs b/aquadoggo/src/materializer/tasks/dependency.rs index 83c3d58d4..e9d1de9c3 100644 --- a/aquadoggo/src/materializer/tasks/dependency.rs +++ b/aquadoggo/src/materializer/tasks/dependency.rs @@ -894,7 +894,7 @@ mod tests { assert_eq!(tasks[0].worker_name(), &String::from("dependency")); // 2. The "dependency" task will try to resolve the pinned document view pointing at - // the "post" document in it's version 2 + // the "post" document in its version 2 let tasks = dependency_task(node_b.context.clone(), tasks[0].input().clone()) .await .unwrap(); diff --git a/aquadoggo/src/materializer/tasks/garbage_collection.rs b/aquadoggo/src/materializer/tasks/garbage_collection.rs index 6138273cc..7e6901e8e 100644 --- a/aquadoggo/src/materializer/tasks/garbage_collection.rs +++ b/aquadoggo/src/materializer/tasks/garbage_collection.rs @@ -38,7 +38,7 @@ pub async fn garbage_collection_task(context: Context, input: TaskInput) -> Task let mut all_effected_child_relations = vec![]; let mut deleted_views_count = 0; for document_view_id in &all_document_view_ids { - // Check if this is the current view of it's document. This will still return true + // Check if this is the current view of its document. This will still return true // if the document in question is deleted. let is_current_view = context .store @@ -90,8 +90,8 @@ pub async fn garbage_collection_task(context: Context, input: TaskInput) -> Task .expect("Operation exists in store"); if let SchemaId::Blob(_) = operation.schema_id() { - // Purge the blob and all it's pieces. This only succeeds if no document - // refers to the blob document by either a relation or pinned relation. + // Purge the blob and all its pieces. This only succeeds if no document refers + // to the blob document by either a relation or pinned relation. context .store .purge_blob(&document_id) diff --git a/aquadoggo/src/materializer/tasks/reduce.rs b/aquadoggo/src/materializer/tasks/reduce.rs index f24aeb3c5..ac12d2a51 100644 --- a/aquadoggo/src/materializer/tasks/reduce.rs +++ b/aquadoggo/src/materializer/tasks/reduce.rs @@ -223,7 +223,7 @@ async fn reduce_document + WithPublicKey>( .map_err(|err| TaskError::Critical(err.to_string()))?; } - // Insert this document into storage. If it already existed, this will update it's + // Insert this document into storage. If it already existed, this will update its // current view context .store @@ -453,7 +453,7 @@ mod tests { .unwrap() .sorted(); - // Reduce document to it's current view and insert into database + // Reduce document to its current view and insert into database let input = TaskInput::DocumentId(document_id.clone()); assert!(reduce_task(node.context.clone(), input).await.is_ok()); @@ -585,11 +585,11 @@ mod tests { ) { // Prepare empty database. test_runner(move |node: TestNode| async move { - // Dispatch a reduce task for a document which doesn't exist by it's document id. + // Dispatch a reduce task for a document which doesn't exist by its document id let input = TaskInput::DocumentId(document_id); assert!(reduce_task(node.context.clone(), input).await.is_ok()); - // Dispatch a reduce task for a document which doesn't exist by it's document view id. + // Dispatch a reduce task for a document which doesn't exist by its document view id let input = TaskInput::DocumentViewId(document_view_id); assert!(reduce_task(node.context.clone(), input).await.is_ok()); }); diff --git a/aquadoggo/src/network/service.rs b/aquadoggo/src/network/service.rs index 200523262..720d8e36f 100644 --- a/aquadoggo/src/network/service.rs +++ b/aquadoggo/src/network/service.rs @@ -200,7 +200,7 @@ pub async fn connect_to_relay( // our local public address and (b) enable a freshly started relay to learn its public address. swarm.dial(relay_address.clone())?; - // Wait to get confirmation that we told the relay node it's public address and that they told + // Wait to get confirmation that we told the relay node its public address and that they told // us ours. let mut learned_observed_addr = false; let mut told_relay_observed_addr = false; diff --git a/aquadoggo/src/node.rs b/aquadoggo/src/node.rs index c92fd32cc..3bd70c151 100644 --- a/aquadoggo/src/node.rs +++ b/aquadoggo/src/node.rs @@ -2,11 +2,9 @@ use anyhow::Result; use p2panda_rs::identity::KeyPair; -use tempfile::TempDir; -use tokio::fs; use crate::bus::ServiceMessage; -use crate::config::{Configuration, BLOBS_DIR_NAME}; +use crate::config::Configuration; use crate::context::Context; use crate::db::SqlStore; use crate::db::{connection_pool, create_database, run_pending_migrations, Pool}; @@ -47,7 +45,7 @@ pub struct Node { impl Node { /// Start p2panda node with your configuration. This method can be used to run the node within /// other applications. - pub async fn start(key_pair: KeyPair, mut config: Configuration) -> Self { + pub async fn start(key_pair: KeyPair, config: Configuration) -> Self { // Initialize database and get connection pool let pool = initialize_db(&config) .await @@ -64,15 +62,6 @@ impl Node { let schema_provider = SchemaProvider::new(application_schema, config.allow_schema_ids.clone()); - // Create temporary dirs for blob storage. - // - // @TODO: Implement configuring this path for persistent storage, see related issue: - // https://github.com/p2panda/aquadoggo/issues/542 - let tmp_dir = TempDir::new().unwrap(); - let blob_dir_path = tmp_dir.path().join(BLOBS_DIR_NAME); - fs::create_dir_all(&blob_dir_path).await.unwrap(); - config.blob_dir = Some(blob_dir_path); - // Create service manager with shared data between services let context = Context::new(store, key_pair, config, schema_provider); let mut manager = diff --git a/aquadoggo/src/proptests/document_strategies.rs b/aquadoggo/src/proptests/document_strategies.rs index eb15d455f..ef7c86550 100644 --- a/aquadoggo/src/proptests/document_strategies.rs +++ b/aquadoggo/src/proptests/document_strategies.rs @@ -15,7 +15,7 @@ const MAX_DOCUMENTS_PER_ROOT_SCHEMA: usize = 15; /// Same as above, this is a shrinking value. const MAX_DOCUMENTS_PER_RELATION_LIST: usize = 2; -/// AST representing a document and it's relations. +/// AST representing a document and its relations. #[derive(Debug, Clone)] pub struct DocumentAST { pub schema_id: SchemaId, diff --git a/aquadoggo/src/replication/manager.rs b/aquadoggo/src/replication/manager.rs index 964d6fdaa..a0f9092ef 100644 --- a/aquadoggo/src/replication/manager.rs +++ b/aquadoggo/src/replication/manager.rs @@ -743,7 +743,7 @@ mod tests { .unwrap(); // We expect Peer B to drop the incoming request from Peer A and simply wait for a - // response from it's original request. + // response from its original request assert_eq!(result.messages.len(), 0); // Peer A has two sessions running: The one initiated by Peer B and the one it @@ -943,7 +943,7 @@ mod tests { let response = result.unwrap(); // We expect Peer B to drop the incoming request from Peer A and simply wait - // for a response from it's original request. + // for a response from its original request assert_eq!(response.messages.len(), 0); // Both peers have exactly one session running. diff --git a/aquadoggo/src/replication/strategies/diff.rs b/aquadoggo/src/replication/strategies/diff.rs index f3c7a39cc..7d46f0312 100644 --- a/aquadoggo/src/replication/strategies/diff.rs +++ b/aquadoggo/src/replication/strategies/diff.rs @@ -16,7 +16,7 @@ fn remote_requires_entries( remote_log_heights: &HashMap, ) -> Option<(LogId, SeqNum)> { trace!("Local log height: {:?} {:?}", log_id, local_seq_num); - // Get height of the remote log by it's id. + // Get height of the remote log by its id let remote_log_height = remote_log_heights.get(log_id); match remote_log_height { @@ -30,7 +30,7 @@ fn remote_requires_entries( // We increment the seq num as we want it to represent an inclusive lower // bound. // - // We can unwrap as we are incrementing the lower remote seq num which means it's + // We can unwrap as we are incrementing the lower remote seq num which means it // will not reach max seq number. let from_seq_num = remote_seq_num.clone().next().unwrap(); diff --git a/aquadoggo/src/schema/schema_provider.rs b/aquadoggo/src/schema/schema_provider.rs index 14262249c..d1edfd4fb 100644 --- a/aquadoggo/src/schema/schema_provider.rs +++ b/aquadoggo/src/schema/schema_provider.rs @@ -81,7 +81,7 @@ impl SchemaProvider { /// Inserts or updates the given schema in this provider. /// - /// Returns `true` if a schema was updated or it already existed in it's current state, and + /// Returns `true` if a schema was updated or it already existed in its current state, and /// `false` if it was inserted. pub async fn update(&self, schema: Schema) -> Result { if let AllowList::Set(allow_schema_ids) = &self.allow_schema_ids { @@ -94,7 +94,7 @@ impl SchemaProvider { let schema_exists = schemas.get(schema.id()).is_some(); if schema_exists { - // Return true here as the schema already exists in it's current state so we don't need + // Return true here as the schema already exists in its current state so we don't need // to mutate the schema store or announce any change. return Ok(true); } diff --git a/aquadoggo/src/test_utils/client.rs b/aquadoggo/src/test_utils/client.rs index b8a4b29fb..005efcbbb 100644 --- a/aquadoggo/src/test_utils/client.rs +++ b/aquadoggo/src/test_utils/client.rs @@ -80,7 +80,7 @@ pub async fn http_test_client(node: &TestNode) -> TestClient { let http_context = HttpServiceContext::new( node.context.store.clone(), manager, - node.context.config.blob_dir.as_ref().unwrap().to_path_buf(), + node.context.config.blobs_base_path.to_path_buf(), ); TestClient::new(build_server(http_context)) diff --git a/aquadoggo/src/test_utils/runner.rs b/aquadoggo/src/test_utils/runner.rs index 7349a3287..9f1080e71 100644 --- a/aquadoggo/src/test_utils/runner.rs +++ b/aquadoggo/src/test_utils/runner.rs @@ -1,14 +1,12 @@ // SPDX-License-Identifier: AGPL-3.0-or-later use std::sync::Arc; -use std::{fs, panic}; use futures::Future; use p2panda_rs::identity::KeyPair; use tokio::runtime::Builder; use tokio::sync::Mutex; -use crate::config::BLOBS_DIR_NAME; use crate::context::Context; use crate::db::Pool; use crate::db::SqlStore; @@ -102,21 +100,20 @@ pub fn test_runner(test: F) { let (_config, pool) = initialize_db().await; let store = SqlStore::new(pool); - // Construct temporary directory for the test runner - let tmp_dir = tempfile::TempDir::new().unwrap(); - let blob_dir_path = tmp_dir.path().join(BLOBS_DIR_NAME); - fs::create_dir_all(&blob_dir_path).unwrap(); + // Construct temporary blobs directory for the test runner + let temp_dir = tempfile::TempDir::new() + .expect("Could not create temporary test directory for blobs storage"); // Construct node config supporting any schema - let mut cfg = Configuration::default(); - cfg.blob_dir = Some(blob_dir_path); + let mut config = Configuration::default(); + config.blobs_base_path = temp_dir.path().to_path_buf(); // Construct the actual test node let node = TestNode { context: Context::new( store.clone(), KeyPair::new(), - cfg, + config, SchemaProvider::default(), ), }; @@ -142,7 +139,7 @@ pub fn test_runner(test: F) { // there, we need to propagate it further to inform the test runtime about the result match result { Ok(_) => (), - Err(err) => panic::resume_unwind(err.into_panic()), + Err(err) => std::panic::resume_unwind(err.into_panic()), }; }); } @@ -163,7 +160,7 @@ pub fn test_runner_with_manager (), - Err(err) => panic::resume_unwind(err.into_panic()), + Err(err) => std::panic::resume_unwind(err.into_panic()), }; }); } diff --git a/aquadoggo/src/tests.rs b/aquadoggo/src/tests.rs index 2aea479f5..40a3b9b3d 100644 --- a/aquadoggo/src/tests.rs +++ b/aquadoggo/src/tests.rs @@ -156,7 +156,7 @@ async fn e2e() { // Query a document. // // Now that the cafe has been created and updated we can query it from the client. We do can do - // this using it's schema id and document or view id. + // this using its schema id and document or view id. let panda_cafe = query(&client, &panda_cafe_view_id, &cafe_schema_id).await; @@ -175,7 +175,7 @@ async fn e2e() { aquadoggo.shutdown().await; } -/// Publish an entry and it's operation to a node. +/// Publish an entry and its operation to a node. async fn publish(client: &Client, key_pair: &KeyPair, operation: &Operation) -> DocumentViewId { // Publishing operations. // diff --git a/aquadoggo_cli/Cargo.toml b/aquadoggo_cli/Cargo.toml index c60207e19..327417209 100644 --- a/aquadoggo_cli/Cargo.toml +++ b/aquadoggo_cli/Cargo.toml @@ -29,15 +29,13 @@ figment = { version = "0.10.10", features = ["toml", "env"] } hex = "0.4.3" libp2p = "0.52.0" log = "0.4.20" +p2panda-rs = { git = "https://github.com/p2panda/p2panda", rev = "8377056617b64e898e9980e8ad84d258ca0442a1" } path-clean = "1.0.1" serde = { version = "1.0.185", features = ["serde_derive"] } -p2panda-rs = { git = "https://github.com/p2panda/p2panda", rev = "8377056617b64e898e9980e8ad84d258ca0442a1" } +tempfile = "3.7.0" tokio = { version = "1.28.2", features = ["full"] } toml = "0.7.6" [dependencies.aquadoggo] version = "~0.5.0" path = "../aquadoggo" - -[dev-dependencies] -tempfile = "3.4.0" diff --git a/aquadoggo_cli/README.md b/aquadoggo_cli/README.md index 8317da499..394fdf68f 100644 --- a/aquadoggo_cli/README.md +++ b/aquadoggo_cli/README.md @@ -85,7 +85,7 @@ depending on your needs. #### Support only certain schemas > "I want to run a node which only replicates and serves data from a limited -> set of schemas. In this case it's schemas required by a mushroom sighting +> set of schemas. In this case its schemas required by a mushroom sighting > app." ```toml @@ -156,8 +156,8 @@ direct_node_addresses = [ #### Persist node identity and database -> "I want my node to persist it's identity and database on the filesystem and -> retreive them whenever it runs again." +> "I want my node to persist its identity, uploaded files and database on the +> filesystem and retreive them whenever it runs again." ```toml # Persist node private key at given location (using Linux XDG paths as an example) @@ -165,6 +165,9 @@ private_key = "$HOME/.local/share/aquadoggo/private-key.txt" # Persist SQLite database at given location database_url = "sqlite:$HOME/.local/share/aquadoggo/db.sqlite3" + +# Persist blobs (large binary files) at given location +blobs_base_path = "$HOME/.local/share/aquadoggo/blobs" ``` ### Configuration @@ -219,6 +222,13 @@ Options: QUIC port for node-node communication and data replication. Defaults to 2022 + -f, --blobs-base-path + Path to folder where blobs (large binary files) are persisted. + Defaults to a temporary directory. + + WARNING: By default your node will not persist any blobs after + shutdown. Set a path for production settings to not loose data. + -k, --private-key Path to persist your ed25519 private key file. Defaults to an ephemeral key only for this current session. diff --git a/aquadoggo_cli/config.toml b/aquadoggo_cli/config.toml index 20a9d0055..26ce05b03 100644 --- a/aquadoggo_cli/config.toml +++ b/aquadoggo_cli/config.toml @@ -88,6 +88,22 @@ http_port = 2020 # quic_port = 2022 +# ゚・。+☆ +# BLOBS +# ゚・。+☆ + +# Path to folder where blobs (large binary files) are persisted. Defaults to a +# temporary directory. +# +# WARNING: By default your node will not persist any blobs after shutdown. Set +# a path for production settings to not loose data. +# +# WARNING: This setting should reflect the `database_url` configuration. If the +# database is set to be stored somewhere permamently, you should do the same +# for blob files to not run into data inconsistencies. +# +# blobs_base_path = "$HOME/.local/share/aquadoggo/blobs" + # ゚・。+☆+。・ # IDENTITY # ゚・。+☆+。・ diff --git a/aquadoggo_cli/src/config.rs b/aquadoggo_cli/src/config.rs index 8b146e38b..9d1e32b52 100644 --- a/aquadoggo_cli/src/config.rs +++ b/aquadoggo_cli/src/config.rs @@ -121,6 +121,15 @@ struct Cli { #[serde(skip_serializing_if = "Option::is_none")] quic_port: Option, + /// Path to folder where blobs (large binary files) are persisted. Defaults to a temporary + /// directory. + /// + /// WARNING: By default your node will not persist any blobs after shutdown. Set a path for + /// production settings to not loose data. + #[arg(short = 'f', long, value_name = "PATH")] + #[serde(skip_serializing_if = "Option::is_none")] + blobs_base_path: Option, + /// Path to persist your ed25519 private key file. Defaults to an ephemeral key only for this /// current session. /// @@ -267,6 +276,7 @@ pub struct Configuration { pub database_max_connections: u32, pub http_port: u16, pub quic_port: u16, + pub blobs_base_path: Option, pub private_key: Option, pub mdns: bool, pub direct_node_addresses: Vec, @@ -286,6 +296,7 @@ impl Default for Configuration { database_max_connections: 32, http_port: 2020, quic_port: 2022, + blobs_base_path: None, mdns: true, private_key: None, direct_node_addresses: vec![], @@ -338,12 +349,22 @@ impl TryFrom for NodeConfiguration { } }; + // Create a temporary blobs directory when none was given + let blobs_base_path = match value.blobs_base_path { + Some(path) => path, + None => { + let tmp_dir = tempfile::TempDir::new() + .map_err(|_| anyhow!("Could not create temporary directory to store blobs"))?; + tmp_dir.path().to_path_buf() + } + }; + Ok(NodeConfiguration { allow_schema_ids, - blob_dir: None, database_url: value.database_url, database_max_connections: value.database_max_connections, http_port: value.http_port, + blobs_base_path, worker_pool_size: value.worker_pool_size, network: NetworkConfiguration { quic_port: value.quic_port, diff --git a/aquadoggo_cli/src/key_pair.rs b/aquadoggo_cli/src/key_pair.rs index 4635d2b83..9585c5ca6 100644 --- a/aquadoggo_cli/src/key_pair.rs +++ b/aquadoggo_cli/src/key_pair.rs @@ -1,6 +1,6 @@ // SPDX-License-Identifier: AGPL-3.0-or-later -use std::fs::{self, File}; +use std::fs::File; use std::io::{Read, Write}; #[cfg(target_os = "unix")] use std::os::unix::fs::PermissionsExt; @@ -39,8 +39,6 @@ pub fn generate_ephemeral_key_pair() -> KeyPair { fn save_key_pair_to_file(key_pair: &KeyPair, path: PathBuf) -> Result<()> { let private_key_hex = hex::encode(key_pair.private_key().as_bytes()); - // Make sure that directories exist and write file into it - fs::create_dir_all(path.parent().unwrap())?; let mut file = File::create(&path)?; file.write_all(private_key_hex.as_bytes())?; file.sync_all()?; @@ -57,9 +55,7 @@ fn save_key_pair_to_file(key_pair: &KeyPair, path: PathBuf) -> Result<()> { fn save_key_pair_to_file(key_pair: &KeyPair, path: PathBuf) -> Result<()> { let private_key_hex = hex::encode(key_pair.private_key().as_bytes()); - // Make sure that directories exist and write file into it - fs::create_dir_all(path.parent().unwrap())?; - let mut file = File::create(&path)?; + let mut file = File::create(path)?; file.write_all(private_key_hex.as_bytes())?; file.sync_all()?; diff --git a/aquadoggo_cli/src/main.rs b/aquadoggo_cli/src/main.rs index d02d22735..41d1e9717 100644 --- a/aquadoggo_cli/src/main.rs +++ b/aquadoggo_cli/src/main.rs @@ -20,7 +20,11 @@ async fn main() -> anyhow::Result<()> { // Load configuration from command line arguments, environment variables and .toml file let (config_file_path, config) = load_config().context("Could not load configuration")?; - // Set log verbosity based on config. By default scope it always to the "aquadoggo" module. + // Remember if user did not set a blobs directory path, which means that it will default to a + // temporary one + let is_temporary_blobs_path = config.blobs_base_path.is_none(); + + // Set log verbosity based on config. By default scope it always to the "aquadoggo" module let mut builder = env_logger::Builder::new(); let builder = match LevelFilter::from_str(&config.log_level) { Ok(log_level) => builder.filter(Some("aquadoggo"), log_level), @@ -50,7 +54,7 @@ async fn main() -> anyhow::Result<()> { "{}", print_config(key_pair_path, config_file_path, &node_config) ); - show_warnings(&node_config); + show_warnings(&node_config, is_temporary_blobs_path); // Start p2panda node in async runtime let node = Node::start(key_pair, node_config).await; @@ -68,19 +72,34 @@ async fn main() -> anyhow::Result<()> { } /// Show some hopefully helpful warnings around common configuration issues. -fn show_warnings(config: &Configuration) { +fn show_warnings(config: &Configuration, is_temporary_blobs_path: bool) { match &config.allow_schema_ids { AllowList::Set(values) => { if values.is_empty() && !config.network.relay_mode { - warn!("Your node was set to not allow any schema ids which is only useful in combination with enabling relay mode. With this setting you will not be able to interact with any client or node."); + warn!( + "Your node was set to not allow any schema ids which is only useful in + combination with enabling relay mode. With this setting you will not be able to + interact with any client or node." + ); } } AllowList::Wildcard => { - warn!("Allowed schema ids is set to wildcard. Your node will support _any_ schemas it will encounter on the network. This is useful for experimentation and local development but _not_ recommended for production settings."); + warn!( + "Allowed schema ids is set to wildcard. Your node will support _any_ schemas it + will encounter on the network. This is useful for experimentation and local + development but _not_ recommended for production settings." + ); } } if !config.network.relay_addresses.is_empty() && config.network.relay_mode { - warn!("Will not connect to given relay addresses when relay mode is enabled"); + warn!("Will not connect to given relay addresses when relay mode is enabled."); + } + + if config.database_url != "sqlite::memory:" && is_temporary_blobs_path { + warn!("Your database is persisted but blobs _are not_ which might result in unrecoverable + data inconsistency (blob operations are stored but the files themselves are _not_). It is + recommended to either set both values (`database_url` and `blobs_base_path`) to an + temporary value or set both to persist all data."); } } From 3760e2031becd99740b0df1bd2165ec43c8dba81 Mon Sep 17 00:00:00 2001 From: Andreas Dzialocha Date: Thu, 7 Sep 2023 19:42:36 +0200 Subject: [PATCH 12/14] Stream blob data in chunks to files to not occupy too much memory (#551) * Stream data from database into file * Fix tests * Run cargo fmt * Use OpenOptions to create file * Add comments and doc strings * Add entry to CHANGELOG.md * Update doc-string --- CHANGELOG.md | 1 + Cargo.lock | 1 + aquadoggo/Cargo.toml | 1 + aquadoggo/src/db/stores/blob.rs | 341 ++++++++++++++--------- aquadoggo/src/materializer/tasks/blob.rs | 58 ++-- aquadoggo_cli/src/main.rs | 6 +- 6 files changed, 257 insertions(+), 151 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bee3a970a..c4a799b01 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - HTTP routes to serve files with correct content type headers [#544](https://github.com/p2panda/aquadoggo/pull/544) - Build a byte buffer over paginated pieces when assembling blobs [#547](https://github.com/p2panda/aquadoggo/pull/547) +- Stream blob data in chunks to files to not occupy too much memory [#551](https://github.com/p2panda/aquadoggo/pull/551) ## [0.5.0] diff --git a/Cargo.lock b/Cargo.lock index 6114c450e..6a4118d57 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -162,6 +162,7 @@ dependencies = [ "async-graphql", "async-graphql-axum", "async-recursion", + "async-stream", "async-trait", "asynchronous-codec", "axum", diff --git a/aquadoggo/Cargo.toml b/aquadoggo/Cargo.toml index 0cf5acaec..66befc7b2 100644 --- a/aquadoggo/Cargo.toml +++ b/aquadoggo/Cargo.toml @@ -22,6 +22,7 @@ proptests = [] anyhow = "1.0.62" async-graphql = { version = "5.0.6", features = ["dynamic-schema"] } async-graphql-axum = "5.0.6" +async-stream = "0.3.5" async-trait = "0.1.64" asynchronous-codec = { version = "0.6.2", features = ["cbor"] } axum = { version = "0.6.10", features = ["headers"] } diff --git a/aquadoggo/src/db/stores/blob.rs b/aquadoggo/src/db/stores/blob.rs index a33b33081..20ae2f7d8 100644 --- a/aquadoggo/src/db/stores/blob.rs +++ b/aquadoggo/src/db/stores/blob.rs @@ -2,17 +2,20 @@ use std::num::NonZeroU64; +use async_stream::try_stream; use bytes::{BufMut, BytesMut}; +use futures::Stream; use p2panda_rs::document::traits::AsDocument; use p2panda_rs::document::{DocumentId, DocumentViewId}; use p2panda_rs::operation::OperationValue; +use p2panda_rs::schema::validate::MAX_BLOB_PIECE_LENGTH; use p2panda_rs::schema::{Schema, SchemaId}; use p2panda_rs::storage_provider::traits::DocumentStore; use sqlx::{query_scalar, AnyPool}; use crate::db::errors::{BlobStoreError, SqlStoreError}; use crate::db::query::{Filter, Order, Pagination, PaginationField, Select}; -use crate::db::stores::query::{Query, RelationList}; +use crate::db::stores::query::{PaginationCursor, Query, RelationList}; use crate::db::SqlStore; /// Number of blob pieces requested per database query iteration. @@ -20,38 +23,161 @@ const BLOB_QUERY_PAGE_SIZE: u64 = 10; pub type BlobData = Vec; -impl SqlStore { - /// Get the data for one blob from the store, identified by its document id. - pub async fn get_blob(&self, id: &DocumentId) -> Result, BlobStoreError> { - // Get the root blob document - let blob_document = match self.get_document(id).await? { - Some(document) => { - if document.schema_id != SchemaId::Blob(1) { - return Err(BlobStoreError::NotBlobDocument); - } - document +/// Gets blob data from the database in chunks (via pagination) and populates a readable stream +/// with it. +/// +/// This stream can further be used to write data into a file etc. This helps dealing with large +/// blobs as only little system memory is occupied per reading and writing step. We only move small +/// chunks at a time and keep the memory-footprint managable. +/// +/// Currently the BLOB_QUERY_PAGE_SIZE is set to 10 which is the multiplier of the +/// MAX_BLOB_PIECE_LENGTH. With 10 * 256kb we occupy an approximate maximum of 2.56mb memory at a +/// time. If these values make sense needs to be re-visited, but it is a start! +#[derive(Debug)] +pub struct BlobStream { + store: SqlStore, + pagination_cursor: Option, + document_view_id: DocumentViewId, + num_pieces: usize, + length: usize, + expected_num_pieces: usize, + expected_length: usize, +} + +impl BlobStream { + pub fn new(store: &SqlStore, document: impl AsDocument) -> Result { + if document.schema_id() != &SchemaId::Blob(1) { + return Err(BlobStoreError::NotBlobDocument); + } + + // Get the length of the blob + let expected_length = match document.get("length").unwrap() { + OperationValue::Integer(length) => *length as usize, + _ => unreachable!(), // We already validated that this is a blob document + }; + + // Get the number of pieces in the blob + let expected_num_pieces = match document.get("pieces").unwrap() { + OperationValue::PinnedRelationList(list) => list.len(), + _ => unreachable!(), // We already validated that this is a blob document + }; + + Ok(Self { + store: store.to_owned(), + pagination_cursor: None, + document_view_id: document.view_id().to_owned(), + num_pieces: 0, + length: 0, + expected_length, + expected_num_pieces, + }) + } + + async fn next_chunk(&mut self) -> Result { + let schema = Schema::get_system(SchemaId::BlobPiece(1)).expect("System schema is given"); + let list = RelationList::new_pinned(&self.document_view_id, "pieces"); + + let args = Query::new( + &Pagination::new( + &NonZeroU64::new(BLOB_QUERY_PAGE_SIZE).unwrap(), + self.pagination_cursor.as_ref(), + &vec![PaginationField::EndCursor, PaginationField::HasNextPage], + ), + &Select::new(&["data".into()]), + &Filter::default(), + &Order::default(), + ); + + let mut buf = + BytesMut::with_capacity(BLOB_QUERY_PAGE_SIZE as usize * MAX_BLOB_PIECE_LENGTH); + + let (pagination_data, documents) = self.store.query(schema, &args, Some(&list)).await?; + self.pagination_cursor = pagination_data.end_cursor; + self.num_pieces += documents.len(); + + for (_, blob_piece_document) in documents { + match blob_piece_document + .get("data") + .expect("Blob piece document without \"data\" field") + { + // @TODO: Use bytes here instead, see related issue: + // https://github.com/p2panda/aquadoggo/issues/543 + OperationValue::String(data_str) => buf.put(data_str.as_bytes()), + _ => unreachable!(), // We only queried for blob piece documents } - None => return Ok(None), + } + + self.length += buf.len(); + + Ok(buf.to_vec()) + } + + /// This method is called _after_ the stream has ended. We compare the values with what we've + /// expected and find inconsistencies and invalid blobs. + fn validate(&self) -> Result<(), BlobStoreError> { + // No pieces were found + if self.length == 0 { + return Err(BlobStoreError::NoBlobPiecesFound); }; - document_to_blob_data(self, blob_document).await + + // Not all pieces were found + if self.expected_num_pieces != self.num_pieces { + return Err(BlobStoreError::MissingPieces); + } + + // Combined blob data length doesn't match the claimed length + if self.expected_length != self.length { + return Err(BlobStoreError::IncorrectLength); + }; + + Ok(()) } - /// Get the data for one blob from the store, identified by its document view id. + /// Establishes a data stream of blob data. + /// + /// The stream ends when all data has been written, at the end the blob data gets validated + /// against the expected blob length. + /// + /// To consume this stream in form of an iterator it is required to use the `pin_mut` macro. + // NOTE: Clippy does not understand that this macro generates code which asks for an explicit + // lifetime. + #[allow(clippy::needless_lifetimes)] + pub fn read_all<'a>(&'a mut self) -> impl Stream> + 'a { + try_stream! { + loop { + let blob_data = self.next_chunk().await?; + + if blob_data.is_empty() { + self.validate()?; + break; + } + + yield blob_data; + } + } + } +} + +impl SqlStore { + /// Get data stream for one blob from the store, identified by it's document id. + pub async fn get_blob(&self, id: &DocumentId) -> Result, BlobStoreError> { + if let Some(document) = self.get_document(id).await? { + Ok(Some(BlobStream::new(self, document)?)) + } else { + Ok(None) + } + } + + /// Get data stream for one blob from the store, identified by its document view id. pub async fn get_blob_by_view_id( &self, view_id: &DocumentViewId, - ) -> Result, BlobStoreError> { - // Get the root blob document - let blob_document = match self.get_document_by_view_id(view_id).await? { - Some(document) => { - if document.schema_id != SchemaId::Blob(1) { - return Err(BlobStoreError::NotBlobDocument); - } - document - } - None => return Ok(None), - }; - document_to_blob_data(self, blob_document).await + ) -> Result, BlobStoreError> { + if let Some(document) = self.get_document_by_view_id(view_id).await? { + Ok(Some(BlobStream::new(self, document)?)) + } else { + Ok(None) + } } /// Purge blob data from the node _if_ it is not related to from another document. @@ -158,84 +284,10 @@ async fn reverse_relations( .map_err(|e| SqlStoreError::Transaction(e.to_string())) } -/// Helper method for validation and parsing a document into blob data. -async fn document_to_blob_data( - store: &SqlStore, - blob: impl AsDocument, -) -> Result, BlobStoreError> { - // Get the length of the blob - let expected_length = match blob.get("length").unwrap() { - OperationValue::Integer(length) => *length as usize, - _ => unreachable!(), // We already validated that this is a blob document - }; - - // Get the number of pieces in the blob - let expected_num_pieces = match blob.get("pieces").unwrap() { - OperationValue::PinnedRelationList(list) => list.len(), - _ => unreachable!(), // We already validated that this is a blob document - }; - - // Now collect all existing pieces for the blob. - // - // We do this using the stores' query method, targeting pieces which are in the relation list - // of the blob. - let schema = Schema::get_system(SchemaId::BlobPiece(1)).unwrap(); - let list = RelationList::new_pinned(blob.view_id(), "pieces"); - - let mut has_next_page = true; - let mut args = Query::new( - &Pagination::new( - &NonZeroU64::new(BLOB_QUERY_PAGE_SIZE).unwrap(), - None, - &vec![PaginationField::EndCursor, PaginationField::HasNextPage], - ), - &Select::new(&["data".into()]), - &Filter::default(), - &Order::default(), - ); - - let mut buf = BytesMut::with_capacity(expected_length); - let mut num_pieces = 0; - - while has_next_page { - let (pagination_data, documents) = store.query(schema, &args, Some(&list)).await?; - has_next_page = pagination_data.has_next_page; - args.pagination.after = pagination_data.end_cursor; - num_pieces += documents.len(); - - for (_, blob_piece_document) in documents { - match blob_piece_document - .get("data") - .expect("Blob piece document without \"data\" field") - { - // @TODO: Use bytes here instead, see related issue: - // https://github.com/p2panda/aquadoggo/issues/543 - OperationValue::String(data_str) => buf.put(data_str.as_bytes()), - _ => unreachable!(), // We only queried for blob piece documents - } - } - } - - // No pieces were found - if buf.is_empty() { - return Err(BlobStoreError::NoBlobPiecesFound); - }; - - // Not all pieces were found - if expected_num_pieces != num_pieces { - return Err(BlobStoreError::MissingPieces); - } - - // Combined blob data length doesn't match the claimed length - if expected_length != buf.len() { - return Err(BlobStoreError::IncorrectLength); - }; - - Ok(Some(buf.into())) -} - #[cfg(test)] mod tests { + use bytes::{BufMut, BytesMut}; + use futures::{pin_mut, StreamExt}; use p2panda_rs::document::DocumentId; use p2panda_rs::identity::KeyPair; use p2panda_rs::schema::SchemaId; @@ -249,30 +301,49 @@ mod tests { populate_store_config, test_runner, update_document, TestNode, }; + use super::BlobStream; + + async fn read_data_from_stream(mut blob_stream: BlobStream) -> Result, BlobStoreError> { + let stream = blob_stream.read_all(); + pin_mut!(stream); + + let mut buf = BytesMut::new(); + + while let Some(value) = stream.next().await { + match value { + Ok(blob_data) => { + buf.put(blob_data.as_slice()); + } + Err(err) => return Err(err), + } + } + + Ok(buf.to_vec()) + } + #[rstest] fn get_blob(key_pair: KeyPair) { test_runner(|mut node: TestNode| async move { let blob_data = "Hello, World!".as_bytes(); let blob_view_id = add_blob(&mut node, &blob_data, 6, "text/plain", &key_pair).await; - let document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); // Get blob by document id - let blob = node.context.store.get_blob(&document_id).await.unwrap(); - - assert!(blob.is_some()); - assert_eq!(blob.unwrap(), blob_data); + let blob_stream = node.context.store.get_blob(&document_id).await.unwrap(); + assert!(blob_stream.is_some()); + let collected_data = read_data_from_stream(blob_stream.unwrap()).await; + assert_eq!(blob_data, collected_data.unwrap()); // Get blob by view id - let blob = node + let blob_stream_view = node .context .store .get_blob_by_view_id(&blob_view_id) .await .unwrap(); - - assert!(blob.is_some()); - assert_eq!(blob.unwrap(), blob_data) + assert!(blob_stream_view.is_some()); + let collected_data = read_data_from_stream(blob_stream_view.unwrap()).await; + assert_eq!(blob_data, collected_data.unwrap()); }) } @@ -300,12 +371,17 @@ mod tests { let blob_document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); // We get the correct `NoBlobPiecesFound` error. - let result = node.context.store.get_blob(&blob_document_id).await; - assert!( - matches!(result, Err(BlobStoreError::NoBlobPiecesFound)), - "{:?}", - result - ); + let stream = node + .context + .store + .get_blob(&blob_document_id) + .await + .unwrap(); + let collected_data = read_data_from_stream(stream.unwrap()).await; + assert!(matches!( + collected_data, + Err(BlobStoreError::NoBlobPiecesFound) + ),); // Publish one blob piece. let blob_piece_view_id_1 = add_document( @@ -335,12 +411,14 @@ mod tests { let blob_document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); // We should get the correct `MissingBlobPieces` error. - let result = node.context.store.get_blob(&blob_document_id).await; - assert!( - matches!(result, Err(BlobStoreError::MissingPieces)), - "{:?}", - result - ); + let stream = node + .context + .store + .get_blob(&blob_document_id) + .await + .unwrap(); + let collected_data = read_data_from_stream(stream.unwrap()).await; + assert!(matches!(collected_data, Err(BlobStoreError::MissingPieces)),); // Publish one more blob piece, but it doesn't contain the correct number of bytes. let blob_piece_view_id_2 = add_document( @@ -371,12 +449,17 @@ mod tests { let blob_document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); // We get the correct `IncorrectLength` error. - let result = node.context.store.get_blob(&blob_document_id).await; - assert!( - matches!(result, Err(BlobStoreError::IncorrectLength)), - "{:?}", - result - ); + let stream = node + .context + .store + .get_blob(&blob_document_id) + .await + .unwrap(); + let collected_data = read_data_from_stream(stream.unwrap()).await; + assert!(matches!( + collected_data, + Err(BlobStoreError::IncorrectLength) + ),); }) } diff --git a/aquadoggo/src/materializer/tasks/blob.rs b/aquadoggo/src/materializer/tasks/blob.rs index 133ea795c..b9c1cc28f 100644 --- a/aquadoggo/src/materializer/tasks/blob.rs +++ b/aquadoggo/src/materializer/tasks/blob.rs @@ -1,12 +1,14 @@ // SPDX-License-Identifier: AGPL-3.0-or-later +use anyhow::anyhow; +use futures::{pin_mut, StreamExt}; use log::{debug, info}; use p2panda_rs::document::traits::AsDocument; use p2panda_rs::document::DocumentViewId; use p2panda_rs::operation::OperationValue; use p2panda_rs::schema::SchemaId; use p2panda_rs::storage_provider::traits::DocumentStore; -use tokio::fs::File; +use tokio::fs::OpenOptions; use tokio::io::AsyncWriteExt; use crate::context::Context; @@ -63,17 +65,17 @@ pub async fn blob_task(context: Context, input: TaskInput) -> TaskResult TaskResult file.write(&buf).await.map_err(|err| anyhow!(err)), + Err(err) => Err(anyhow!(err)), + } + .map_err(|err| { + TaskError::Critical(format!( + "Could not write blob file @ {}: {}", + blob_view_path.display(), + err + )) + })?; + } } Ok(None) diff --git a/aquadoggo_cli/src/main.rs b/aquadoggo_cli/src/main.rs index 41d1e9717..7bd87b836 100644 --- a/aquadoggo_cli/src/main.rs +++ b/aquadoggo_cli/src/main.rs @@ -97,9 +97,11 @@ fn show_warnings(config: &Configuration, is_temporary_blobs_path: bool) { } if config.database_url != "sqlite::memory:" && is_temporary_blobs_path { - warn!("Your database is persisted but blobs _are not_ which might result in unrecoverable + warn!( + "Your database is persisted but blobs _are not_ which might result in unrecoverable data inconsistency (blob operations are stored but the files themselves are _not_). It is recommended to either set both values (`database_url` and `blobs_base_path`) to an - temporary value or set both to persist all data."); + temporary value or set both to persist all data." + ); } } From 9bbcd08d0fdb179ee4708ab40ca89ab85bb37a59 Mon Sep 17 00:00:00 2001 From: Sam Andreae Date: Thu, 7 Sep 2023 20:17:09 +0100 Subject: [PATCH 13/14] Integrate `Bytes` value (#554) * Bump p2panda-rs version * Bump p2panda-rs again * Bump p2panda-rs in cli as well * Update test method * Handle bytes field in GraphQL api * Add bytes value to test fields * fmt * Bump p2panda-rs * Collection query tests including byte fields * Include data field in document insert/get test * Add bytes field in GraphQL scalar * Update proptests for bytes field * Encode bytes as hex strings in graphql type conversion * Fixes after rebase * Revert proptest config changes * Clippy & fmt * Update CHANGELOG --- CHANGELOG.md | 1 + Cargo.lock | 197 ++++++--------- aquadoggo/Cargo.toml | 9 +- aquadoggo/src/db/models/utils.rs | 230 +++++++++--------- aquadoggo/src/db/stores/blob.rs | 17 +- aquadoggo/src/db/stores/document.rs | 15 +- aquadoggo/src/db/stores/query.rs | 1 + .../src/graphql/input_values/fields_filter.rs | 21 +- aquadoggo/src/graphql/input_values/mod.rs | 4 +- aquadoggo/src/graphql/queries/collection.rs | 82 +++++-- aquadoggo/src/graphql/queries/next_args.rs | 2 +- .../src/graphql/scalars/hex_bytes_scalar.rs | 61 +++++ aquadoggo/src/graphql/scalars/mod.rs | 2 + aquadoggo/src/graphql/schema.rs | 11 +- aquadoggo/src/graphql/tests.rs | 6 +- aquadoggo/src/graphql/utils.rs | 10 + .../src/proptests/document_strategies.rs | 12 + aquadoggo/src/proptests/filter_strategies.rs | 16 +- aquadoggo/src/proptests/schema_strategies.rs | 8 + aquadoggo/src/proptests/tests.rs | 1 - aquadoggo/src/proptests/utils.rs | 18 ++ aquadoggo/src/test_utils/helpers.rs | 1 + aquadoggo/src/test_utils/node.rs | 6 +- aquadoggo_cli/Cargo.toml | 2 +- 24 files changed, 446 insertions(+), 287 deletions(-) create mode 100644 aquadoggo/src/graphql/scalars/hex_bytes_scalar.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index c4a799b01..508669816 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Introduce `BlobStore` [#484](https://github.com/p2panda/aquadoggo/pull/484) - Task for automatic garbage collection of unused documents and views [#500](https://github.com/p2panda/aquadoggo/pull/500) - Blobs directory configuration [#549](https://github.com/p2panda/aquadoggo/pull/549) +- Integrate `Bytes` operation value [554](https://github.com/p2panda/aquadoggo/pull/554/) ### Changed diff --git a/Cargo.lock b/Cargo.lock index 6a4118d57..d1dde2c07 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -279,8 +279,8 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "726535892e8eae7e70657b4c8ea93d26b8553afb1ce617caee529ef96d7dee6c" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 1.0.109", "synstructure", ] @@ -291,8 +291,8 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2777730b2039ac0f95f093556e61b6d26cebed5393ca6f152717777cec3a42ed" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 1.0.109", ] @@ -408,8 +408,8 @@ dependencies = [ "async-graphql-parser", "darling", "proc-macro-crate", - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 1.0.109", "thiserror", ] @@ -503,8 +503,8 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e97ce7de6cf12de5d7226c73f5ba9811622f4db3a5b91b55c53e987e5f91cba" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", ] @@ -525,8 +525,8 @@ version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", ] @@ -542,8 +542,8 @@ version = "0.1.71" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a564d521dd56509c4c47480d00b80ee55f7e385ae48db5744c67ad50c92d2ebf" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", ] @@ -946,8 +946,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "54a9bb5758fc5dfe728d1019941681eccaf0cf8a4189b692a0ee2f2ecf90a050" dependencies = [ "heck", - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", ] @@ -1129,7 +1129,7 @@ version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d2301688392eb071b0bf1a37be05c469d3cc4dbbd95df672fe28ab021e6a096" dependencies = [ - "quote 1.0.31", + "quote", "syn 1.0.109", ] @@ -1187,8 +1187,8 @@ checksum = "109c1ca6e6b7f82cc233a97004ea8ed7ca123a9af07a8230878fcfda9b158bf0" dependencies = [ "fnv", "ident_case", - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "strsim", "syn 1.0.109", ] @@ -1200,7 +1200,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e" dependencies = [ "darling_core", - "quote 1.0.31", + "quote", "syn 1.0.109", ] @@ -1321,8 +1321,8 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", ] @@ -1364,8 +1364,8 @@ dependencies = [ "Inflector", "darling", "proc-macro-crate", - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 1.0.109", "thiserror", ] @@ -1418,8 +1418,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c9720bba047d567ffc8a3cba48bf19126600e249ab7f128e9233e6376976a116" dependencies = [ "heck", - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 1.0.109", ] @@ -1630,8 +1630,8 @@ version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", ] @@ -2630,8 +2630,8 @@ checksum = "c4d5ec2a3df00c7836d7696c136274c9c59705bac69133253696a6c932cd1d74" dependencies = [ "heck", "proc-macro-warning", - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", ] @@ -3118,7 +3118,7 @@ checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" [[package]] name = "p2panda-rs" version = "0.7.1" -source = "git+https://github.com/p2panda/p2panda?rev=8377056617b64e898e9980e8ad84d258ca0442a1#8377056617b64e898e9980e8ad84d258ca0442a1" +source = "git+https://github.com/p2panda/p2panda?rev=be84d7c4e39c1b67125d80468ccf412cf25ae1d7#be84d7c4e39c1b67125d80468ccf412cf25ae1d7" dependencies = [ "arrayvec 0.5.2", "async-trait", @@ -3238,9 +3238,9 @@ version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da9f0f13dac8069c139e8300a6510e3f4143ecf5259c60b116a9b271b4ca0d54" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2", "proc-macro2-diagnostics", - "quote 1.0.31", + "quote", "syn 2.0.29", ] @@ -3287,8 +3287,8 @@ checksum = "99d490fe7e8556575ff6911e45567ab95e71617f43781e5c05490dc8d75c965c" dependencies = [ "pest", "pest_meta", - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", ] @@ -3318,8 +3318,8 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec2e072ecce94ec471b13398d5402c188e76ac03cf74dd1a975161b23a3f6d9c" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", ] @@ -3408,20 +3408,11 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "70550716265d1ec349c41f70dd4f964b4fd88394efe4405f0c1da679c4799a07" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", ] -[[package]] -name = "proc-macro2" -version = "0.4.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759" -dependencies = [ - "unicode-xid 0.1.0", -] - [[package]] name = "proc-macro2" version = "1.0.66" @@ -3437,8 +3428,8 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", "version_check", "yansi", @@ -3462,8 +3453,8 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b6a5217beb0ad503ee7fa752d451c905113d70721b937126158f3106a48cc1" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 1.0.109", ] @@ -3489,13 +3480,13 @@ dependencies = [ [[package]] name = "proptest-derive" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90b46295382dc76166cb7cf2bb4a97952464e4b7ed5a43e6cd34e1fec3349ddc" +checksum = "9cf16337405ca084e9c78985114633b6827711d22b9e6ef6c6c0d665eb3f0b6e" dependencies = [ - "proc-macro2 0.4.30", - "quote 0.6.13", - "syn 0.15.44", + "proc-macro2", + "quote", + "syn 1.0.109", ] [[package]] @@ -3574,22 +3565,13 @@ dependencies = [ "windows-sys", ] -[[package]] -name = "quote" -version = "0.6.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1" -dependencies = [ - "proc-macro2 0.4.30", -] - [[package]] name = "quote" version = "1.0.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5fe8a65d69dd0808184ebb5f836ab526bb259db23c657efa38711b1072ee47f0" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2", ] [[package]] @@ -3862,8 +3844,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5015e68a0685a95ade3eee617ff7101ab6a3fc689203101ca16ebc16f2b89c66" dependencies = [ "cfg-if", - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "rustc_version", "syn 1.0.109", ] @@ -3875,8 +3857,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7229b505ae0706e64f37ffc54a9c163e11022a6636d58fe1f3f52018257ff9f7" dependencies = [ "cfg-if", - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "rustc_version", "syn 1.0.109", "unicode-ident", @@ -3888,7 +3870,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b29d3117bce27ea307d1fb7ce12c64ba11b3fd04311a42d32bc5f0072e6e3d4d" dependencies = [ - "quote 1.0.31", + "quote", "rustc_version", "syn 1.0.109", ] @@ -3899,7 +3881,7 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "45f80dcc84beab3a327bbe161f77db25f336a1452428176787c8c79ac79d7073" dependencies = [ - "quote 1.0.31", + "quote", "rand 0.8.5", "rustc_version", "syn 1.0.109", @@ -4089,9 +4071,9 @@ dependencies = [ [[package]] name = "serde-wasm-bindgen" -version = "0.4.5" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3b4c031cd0d9014307d82b8abf653c0290fbdaeb4c02d00c63cf52f728628bf" +checksum = "f3b143e2833c57ab9ad3ea280d21fd34e285a42837aeb0ee301f4f41890fa00e" dependencies = [ "js-sys", "serde", @@ -4123,8 +4105,8 @@ version = "1.0.185" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc59dfdcbad1437773485e0367fea4b090a2e0a16d9ffc46af47764536a298ec" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", ] @@ -4278,8 +4260,8 @@ version = "0.6.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1508efa03c362e23817f96cde18abed596a25219a8b2c66e8db33c03543d315b" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 1.0.109", ] @@ -4422,8 +4404,8 @@ dependencies = [ "either", "heck", "once_cell", - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "sha2 0.10.7", "sqlx-core", "sqlx-rt", @@ -4476,25 +4458,14 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" -[[package]] -name = "syn" -version = "0.15.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ca4b3b69a77cbe1ffc9e198781b7acb0c7365a883670e8f1c1bc66fba79a5c5" -dependencies = [ - "proc-macro2 0.4.30", - "quote 0.6.13", - "unicode-xid 0.1.0", -] - [[package]] name = "syn" version = "1.0.109" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "unicode-ident", ] @@ -4504,8 +4475,8 @@ version = "2.0.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c324c494eba9d92503e6f1ef2e6df781e78f6a7705a0202d9801b198807d518a" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "unicode-ident", ] @@ -4521,10 +4492,10 @@ version = "0.12.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 1.0.109", - "unicode-xid 0.2.4", + "unicode-xid", ] [[package]] @@ -4585,8 +4556,8 @@ version = "1.0.46" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1728216d3244de4f14f14f8c15c79be1a7c67867d28d69b719690e2a19fb445" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", ] @@ -4658,8 +4629,8 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", ] @@ -4812,8 +4783,8 @@ version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", ] @@ -4969,12 +4940,6 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" -[[package]] -name = "unicode-xid" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" - [[package]] name = "unicode-xid" version = "0.2.4" @@ -5124,8 +5089,8 @@ dependencies = [ "bumpalo", "log", "once_cell", - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", "wasm-bindgen-shared", ] @@ -5148,7 +5113,7 @@ version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" dependencies = [ - "quote 1.0.31", + "quote", "wasm-bindgen-macro-support", ] @@ -5158,8 +5123,8 @@ version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", "wasm-bindgen-backend", "wasm-bindgen-shared", @@ -5486,7 +5451,7 @@ version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", ] diff --git a/aquadoggo/Cargo.toml b/aquadoggo/Cargo.toml index 66befc7b2..e1ad02e9c 100644 --- a/aquadoggo/Cargo.toml +++ b/aquadoggo/Cargo.toml @@ -57,12 +57,13 @@ lipmaa-link = "0.2.2" log = "0.4.19" once_cell = "1.18.0" openssl-probe = "0.1.5" -p2panda-rs = { git = "https://github.com/p2panda/p2panda", rev = "8377056617b64e898e9980e8ad84d258ca0442a1", features = [ +p2panda-rs = { git = "https://github.com/p2panda/p2panda", rev = "be84d7c4e39c1b67125d80468ccf412cf25ae1d7", features = [ "storage-provider", ] } rand = "0.8.5" regex = "1.9.3" serde = { version = "1.0.152", features = ["derive"] } +serde_bytes = "0.11.12" sqlx = { version = "0.6.1", features = [ "any", "postgres", @@ -97,12 +98,12 @@ http = "0.2.9" hyper = "0.14.19" libp2p-swarm-test = "0.2.0" once_cell = "1.17.0" -p2panda-rs = { git = "https://github.com/p2panda/p2panda", rev = "8377056617b64e898e9980e8ad84d258ca0442a1", features = [ +p2panda-rs = { git = "https://github.com/p2panda/p2panda", rev = "be84d7c4e39c1b67125d80468ccf412cf25ae1d7", features = [ "test-utils", "storage-provider", ] } -proptest = "1.1.0" -proptest-derive = "0.3.0" +proptest = "1.2.0" +proptest-derive = "0.4.0" rand = "0.8.5" reqwest = { version = "0.11.11", default-features = false, features = [ "json", diff --git a/aquadoggo/src/db/models/utils.rs b/aquadoggo/src/db/models/utils.rs index f35dc9c54..2a3b773e1 100644 --- a/aquadoggo/src/db/models/utils.rs +++ b/aquadoggo/src/db/models/utils.rs @@ -81,6 +81,14 @@ pub fn parse_operation_rows( OperationValue::String(field_value.unwrap().clone()), )); } + "bytes" => { + operation_fields.push(( + field_name.to_string(), + OperationValue::Bytes(hex::decode(field_value.unwrap()).expect( + "bytes coming from the store are encoded in valid hex strings", + )), + )); + } "relation" => { operation_fields.push(( field_name.to_string(), @@ -235,6 +243,10 @@ pub fn parse_value_to_string_vec(value: &OperationValue) -> Vec> } db_values } + OperationValue::Bytes(bytes) => { + // bytes are stored in the db as hex strings + vec![Some(hex::encode(bytes))] + } } } @@ -300,6 +312,18 @@ pub fn parse_document_view_field_rows( ), ); } + "bytes" => { + document_view_fields.insert( + &row.name, + DocumentViewValue::new( + &row.operation_id.parse::().unwrap(), + &OperationValue::Bytes( + hex::decode(row.value.as_ref().unwrap()) + .expect("bytes coming from the db to be hex encoded"), + ), + ), + ); + } "relation" => { document_view_fields.insert( &row.name, @@ -435,6 +459,25 @@ mod tests { list_index: Some(0), sorted_index: None, }, + OperationFieldsJoinedRow { + public_key: "2f8e50c2ede6d936ecc3144187ff1c273808185cfbc5ff3d3748d1ff7353fc96" + .to_string(), + document_id: "0020b177ec1bf26dfb3b7010d473e6d44713b29b765b99c6e60ecbfae742de496543" + .to_string(), + operation_id: + "0020b177ec1bf26dfb3b7010d473e6d44713b29b765b99c6e60ecbfae742de496543" + .to_string(), + action: "create".to_string(), + schema_id: + "venue_0020c65567ae37efea293e34a9c7d13f8f2bf23dbdc3b5c7b9ab46293111c48fc78b" + .to_string(), + previous: None, + name: Some("data".to_string()), + field_type: Some("bytes".to_string()), + value: Some("00010203".to_string()), + list_index: Some(0), + sorted_index: None, + }, OperationFieldsJoinedRow { public_key: "2f8e50c2ede6d936ecc3144187ff1c273808185cfbc5ff3d3748d1ff7353fc96" .to_string(), @@ -695,6 +738,10 @@ mod tests { operation.fields().unwrap().get("username").unwrap(), &OperationValue::String("bubu".to_string()) ); + assert_eq!( + operation.fields().unwrap().get("data").unwrap(), + &OperationValue::Bytes(vec![0, 1, 2, 3]) + ); assert_eq!( operation.fields().unwrap().get("age").unwrap(), &OperationValue::Integer(28) @@ -786,35 +833,20 @@ mod tests { #[rstest] fn operation_values_to_string_vec(schema_id: SchemaId) { let expected_list = vec![ - Some("28".to_string()), - None, - Some( - "0020abababababababababababababababababababababababababababababababab".to_string(), - ), - Some( - "0020cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd".to_string(), - ), - Some("3.5".to_string()), - Some("false".to_string()), - Some( - "0020aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".to_string(), - ), - Some( - "0020bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb".to_string(), - ), - Some( - "0020cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc".to_string(), - ), - Some( - "0020dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd".to_string(), - ), - Some( - "0020eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee".to_string(), - ), - Some( - "0020ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff".to_string(), - ), - Some("bubu".to_string()), + Some("28".into()), + None, // This is an empty relation list + Some("0020abababababababababababababababababababababababababababababababab".into()), + Some("0020cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd".into()), + Some("00010203".into()), + Some("3.5".into()), + Some("false".into()), + Some("0020aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".into()), + Some("0020bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb".into()), + Some("0020cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc".into()), + Some("0020dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd".into()), + Some("0020eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee".into()), + Some("0020ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff".into()), + Some("bubu".into()), ]; let operation = create_operation(doggo_fields(), schema_id); @@ -853,58 +885,44 @@ mod tests { #[test] fn parses_document_field_rows() { + let document_id = + "0020713b2777f1222660291cb528d220c358920b4beddc1aea9df88a69cec45a10c0".to_string(); + let operation_id = + "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770".to_string(); + let document_view_id = operation_id.clone(); + let document_field_rows = vec![ DocumentViewFieldRow { - document_id: "0020713b2777f1222660291cb528d220c358920b4beddc1aea9df88a69cec45a10c0" - .to_string(), - document_view_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), - operation_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), + document_id: document_id.clone(), + document_view_id: document_view_id.clone(), + operation_id: operation_id.clone(), name: "age".to_string(), list_index: 0, field_type: "int".to_string(), value: Some("28".to_string()), }, DocumentViewFieldRow { - document_id: "0020713b2777f1222660291cb528d220c358920b4beddc1aea9df88a69cec45a10c0" - .to_string(), - document_view_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), - operation_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), + document_id: document_id.clone(), + document_view_id: document_view_id.clone(), + operation_id: operation_id.clone(), name: "height".to_string(), list_index: 0, field_type: "float".to_string(), value: Some("3.5".to_string()), }, DocumentViewFieldRow { - document_id: "0020713b2777f1222660291cb528d220c358920b4beddc1aea9df88a69cec45a10c0" - .to_string(), - document_view_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), - operation_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), + document_id: document_id.clone(), + document_view_id: document_view_id.clone(), + operation_id: operation_id.clone(), name: "is_admin".to_string(), list_index: 0, field_type: "bool".to_string(), value: Some("false".to_string()), }, DocumentViewFieldRow { - document_id: "0020713b2777f1222660291cb528d220c358920b4beddc1aea9df88a69cec45a10c0" - .to_string(), - document_view_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), - operation_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), + document_id: document_id.clone(), + document_view_id: document_view_id.clone(), + operation_id: operation_id.clone(), name: "many_profile_pictures".to_string(), list_index: 0, field_type: "relation_list".to_string(), @@ -914,14 +932,9 @@ mod tests { ), }, DocumentViewFieldRow { - document_id: "0020713b2777f1222660291cb528d220c358920b4beddc1aea9df88a69cec45a10c0" - .to_string(), - document_view_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), - operation_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), + document_id: document_id.clone(), + document_view_id: document_view_id.clone(), + operation_id: operation_id.clone(), name: "many_profile_pictures".to_string(), list_index: 1, field_type: "relation_list".to_string(), @@ -931,14 +944,9 @@ mod tests { ), }, DocumentViewFieldRow { - document_id: "0020713b2777f1222660291cb528d220c358920b4beddc1aea9df88a69cec45a10c0" - .to_string(), - document_view_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), - operation_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), + document_id: document_id.clone(), + document_view_id: document_view_id.clone(), + operation_id: operation_id.clone(), name: "many_special_profile_pictures".to_string(), list_index: 0, field_type: "pinned_relation_list".to_string(), @@ -948,14 +956,9 @@ mod tests { ), }, DocumentViewFieldRow { - document_id: "0020713b2777f1222660291cb528d220c358920b4beddc1aea9df88a69cec45a10c0" - .to_string(), - document_view_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), - operation_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), + document_id: document_id.clone(), + document_view_id: document_view_id.clone(), + operation_id: operation_id.clone(), name: "many_special_profile_pictures".to_string(), list_index: 1, field_type: "pinned_relation_list".to_string(), @@ -965,14 +968,9 @@ mod tests { ), }, DocumentViewFieldRow { - document_id: "0020713b2777f1222660291cb528d220c358920b4beddc1aea9df88a69cec45a10c0" - .to_string(), - document_view_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), - operation_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), + document_id: document_id.clone(), + document_view_id: document_view_id.clone(), + operation_id: operation_id.clone(), name: "profile_picture".to_string(), list_index: 0, field_type: "relation".to_string(), @@ -982,14 +980,9 @@ mod tests { ), }, DocumentViewFieldRow { - document_id: "0020713b2777f1222660291cb528d220c358920b4beddc1aea9df88a69cec45a10c0" - .to_string(), - document_view_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), - operation_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), + document_id: document_id.clone(), + document_view_id: document_view_id.clone(), + operation_id: operation_id.clone(), name: "special_profile_picture".to_string(), list_index: 0, field_type: "pinned_relation".to_string(), @@ -999,28 +992,27 @@ mod tests { ), }, DocumentViewFieldRow { - document_id: "0020713b2777f1222660291cb528d220c358920b4beddc1aea9df88a69cec45a10c0" - .to_string(), - document_view_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), - operation_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), + document_id: document_id.clone(), + document_view_id: document_view_id.clone(), + operation_id: operation_id.clone(), name: "username".to_string(), list_index: 0, field_type: "str".to_string(), value: Some("bubu".to_string()), }, DocumentViewFieldRow { - document_id: "0020713b2777f1222660291cb528d220c358920b4beddc1aea9df88a69cec45a10c0" - .to_string(), - document_view_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), - operation_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), + document_id: document_id.clone(), + document_view_id: document_view_id.clone(), + operation_id: operation_id.clone(), + name: "data".to_string(), + list_index: 0, + field_type: "bytes".to_string(), + value: Some("00010203".to_string()), + }, + DocumentViewFieldRow { + document_id: document_id.clone(), + document_view_id: document_view_id.clone(), + operation_id: operation_id.clone(), name: "an_empty_relation_list".to_string(), list_index: 0, field_type: "pinned_relation_list".to_string(), @@ -1038,6 +1030,10 @@ mod tests { document_fields.get("username").unwrap(), &DocumentViewValue::new(&operation_id, &OperationValue::String("bubu".to_string())) ); + assert_eq!( + document_fields.get("data").unwrap(), + &DocumentViewValue::new(&operation_id, &OperationValue::Bytes(vec![0, 1, 2, 3])) + ); assert_eq!( document_fields.get("age").unwrap(), &DocumentViewValue::new(&operation_id, &OperationValue::Integer(28)) diff --git a/aquadoggo/src/db/stores/blob.rs b/aquadoggo/src/db/stores/blob.rs index 20ae2f7d8..2b7ad75c4 100644 --- a/aquadoggo/src/db/stores/blob.rs +++ b/aquadoggo/src/db/stores/blob.rs @@ -100,9 +100,7 @@ impl BlobStream { .get("data") .expect("Blob piece document without \"data\" field") { - // @TODO: Use bytes here instead, see related issue: - // https://github.com/p2panda/aquadoggo/issues/543 - OperationValue::String(data_str) => buf.put(data_str.as_bytes()), + OperationValue::Bytes(data_str) => buf.put(&data_str[..]), _ => unreachable!(), // We only queried for blob piece documents } } @@ -292,6 +290,7 @@ mod tests { use p2panda_rs::identity::KeyPair; use p2panda_rs::schema::SchemaId; use p2panda_rs::test_utils::fixtures::{key_pair, random_document_view_id}; + use p2panda_rs::test_utils::generate_random_bytes; use p2panda_rs::test_utils::memory_store::helpers::PopulateStoreConfig; use rstest::rstest; @@ -350,7 +349,7 @@ mod tests { #[rstest] fn get_blob_errors(key_pair: KeyPair) { test_runner(|mut node: TestNode| async move { - let blob_data = "Hello, World!".to_string(); + let blob_data = generate_random_bytes(12); // Publish a blob containing pieces which aren't in the store. let blob_view_id = add_document( @@ -517,22 +516,22 @@ mod tests { // These are the rows we expect to exist in each table. assert_query(&node, "SELECT entry_hash FROM entries", 4).await; assert_query(&node, "SELECT operation_id FROM operations_v1", 4).await; - assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 19).await; + assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 20).await; assert_query(&node, "SELECT log_id FROM logs", 4).await; assert_query(&node, "SELECT document_id FROM documents", 4).await; assert_query(&node, "SELECT document_id FROM document_views", 4).await; - assert_query(&node, "SELECT name FROM document_view_fields", 15).await; + assert_query(&node, "SELECT name FROM document_view_fields", 16).await; let document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); let result = node.context.store.purge_blob(&document_id).await; assert!(result.is_ok(), "{:#?}", result); assert_query(&node, "SELECT entry_hash FROM entries", 1).await; assert_query(&node, "SELECT operation_id FROM operations_v1", 1).await; - assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 13).await; + assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 14).await; assert_query(&node, "SELECT log_id FROM logs", 4).await; assert_query(&node, "SELECT document_id FROM documents", 1).await; assert_query(&node, "SELECT document_id FROM document_views", 1).await; - assert_query(&node, "SELECT name FROM document_view_fields", 10).await; + assert_query(&node, "SELECT name FROM document_view_fields", 11).await; let result = node.context.store.purge_blob(&document_id).await; @@ -594,7 +593,7 @@ mod tests { let new_blob_pieces = add_document( &mut node, &SchemaId::BlobPiece(1), - vec![("data", "more blob data".into())], + vec![("data", "more blob data".as_bytes().into())], &key_pair, ) .await; diff --git a/aquadoggo/src/db/stores/document.rs b/aquadoggo/src/db/stores/document.rs index 6df98d61d..981cbdd05 100644 --- a/aquadoggo/src/db/stores/document.rs +++ b/aquadoggo/src/db/stores/document.rs @@ -960,6 +960,7 @@ mod tests { "age", "height", "is_admin", + "data", "profile_picture", "many_profile_pictures", "special_profile_picture", @@ -1320,11 +1321,11 @@ mod tests { // expect for each table. assert_query(&node, "SELECT entry_hash FROM entries", 2).await; assert_query(&node, "SELECT operation_id FROM operations_v1", 2).await; - assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 26).await; + assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 28).await; assert_query(&node, "SELECT log_id FROM logs", 1).await; assert_query(&node, "SELECT document_id FROM documents", 1).await; assert_query(&node, "SELECT document_id FROM document_views", 1).await; - assert_query(&node, "SELECT name FROM document_view_fields", 10).await; + assert_query(&node, "SELECT name FROM document_view_fields", 11).await; // Purge this document from the database, we now expect all tables to be empty. let result = node.context.store.purge_document(&document_id).await; @@ -1355,11 +1356,11 @@ mod tests { // for each table. assert_query(&node, "SELECT entry_hash FROM entries", 2).await; assert_query(&node, "SELECT operation_id FROM operations_v1", 2).await; - assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 26).await; + assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 28).await; assert_query(&node, "SELECT log_id FROM logs", 2).await; assert_query(&node, "SELECT document_id FROM documents", 2).await; assert_query(&node, "SELECT document_id FROM document_views", 2).await; - assert_query(&node, "SELECT name FROM document_view_fields", 20).await; + assert_query(&node, "SELECT name FROM document_view_fields", 22).await; // Purge one document from the database, we now expect half the rows to be remaining. let result = node.context.store.purge_document(&document_id).await; @@ -1367,11 +1368,11 @@ mod tests { assert_query(&node, "SELECT entry_hash FROM entries", 1).await; assert_query(&node, "SELECT operation_id FROM operations_v1", 1).await; - assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 13).await; + assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 14).await; assert_query(&node, "SELECT log_id FROM logs", 2).await; assert_query(&node, "SELECT document_id FROM documents", 1).await; assert_query(&node, "SELECT document_id FROM document_views", 1).await; - assert_query(&node, "SELECT name FROM document_view_fields", 10).await; + assert_query(&node, "SELECT name FROM document_view_fields", 11).await; }); } @@ -1395,7 +1396,7 @@ mod tests { Some(&document_id.as_str().parse().unwrap()), ) .await; - println!("{:#?}", result); + assert!(result.is_err()); let result = next_args(&node.context.store, &public_key, None).await; diff --git a/aquadoggo/src/db/stores/query.rs b/aquadoggo/src/db/stores/query.rs index e8ff37483..c441e41e7 100644 --- a/aquadoggo/src/db/stores/query.rs +++ b/aquadoggo/src/db/stores/query.rs @@ -300,6 +300,7 @@ fn bind_arg(value: &OperationValue) -> Vec { .iter() .map(|view_id| BindArgument::String(view_id.to_string())) .collect(), + OperationValue::Bytes(value) => vec![BindArgument::String(hex::encode(value))], } } diff --git a/aquadoggo/src/graphql/input_values/fields_filter.rs b/aquadoggo/src/graphql/input_values/fields_filter.rs index 79f765f3a..f0eeb8116 100644 --- a/aquadoggo/src/graphql/input_values/fields_filter.rs +++ b/aquadoggo/src/graphql/input_values/fields_filter.rs @@ -10,7 +10,9 @@ use async_graphql::dynamic::{InputObject, InputValue, TypeRef}; use dynamic_graphql::InputObject; use p2panda_rs::schema::{FieldType, Schema}; -use crate::graphql::scalars::{DocumentIdScalar, DocumentViewIdScalar, PublicKeyScalar}; +use crate::graphql::scalars::{ + DocumentIdScalar, DocumentViewIdScalar, HexBytesScalar, PublicKeyScalar, +}; use crate::graphql::utils::filter_name; /// Build a filter input object for a p2panda schema. It can be used to filter collection queries @@ -42,6 +44,10 @@ pub fn build_filter_input_object(schema: &Schema) -> InputObject { filter_input = filter_input.field(InputValue::new(name, TypeRef::named("StringFilter"))); } + FieldType::Bytes => { + filter_input = + filter_input.field(InputValue::new(name, TypeRef::named("HexBytesFilter"))); + } FieldType::Relation(_) => { filter_input = filter_input.field(InputValue::new(name, TypeRef::named("RelationFilter"))); @@ -171,6 +177,19 @@ pub struct StringFilter { not_contains: Option, } +/// A filter input type for bytes field values. +#[derive(InputObject)] +#[allow(dead_code)] +pub struct HexBytesFilter { + /// Filter by equal to. + #[graphql(name = "eq")] + eq: Option, + + /// Filter by not equal to. + #[graphql(name = "notEq")] + not_eq: Option, +} + /// A filter input type for integer field values. #[derive(InputObject)] #[allow(dead_code)] diff --git a/aquadoggo/src/graphql/input_values/mod.rs b/aquadoggo/src/graphql/input_values/mod.rs index 22492a39b..c42f2fd1f 100644 --- a/aquadoggo/src/graphql/input_values/mod.rs +++ b/aquadoggo/src/graphql/input_values/mod.rs @@ -6,8 +6,8 @@ mod order; pub use fields_filter::{ build_filter_input_object, BooleanFilter, DocumentIdFilter, DocumentViewIdFilter, FloatFilter, - IntegerFilter, OwnerFilter, PinnedRelationFilter, PinnedRelationListFilter, RelationFilter, - RelationListFilter, StringFilter, + HexBytesFilter, IntegerFilter, OwnerFilter, PinnedRelationFilter, PinnedRelationListFilter, + RelationFilter, RelationListFilter, StringFilter, }; pub use meta_filter::MetaFilterInputObject; pub use order::{build_order_enum_value, OrderDirection}; diff --git a/aquadoggo/src/graphql/queries/collection.rs b/aquadoggo/src/graphql/queries/collection.rs index 6d7d6ff73..86317d0ec 100644 --- a/aquadoggo/src/graphql/queries/collection.rs +++ b/aquadoggo/src/graphql/queries/collection.rs @@ -284,6 +284,7 @@ mod tests { ("artist", "X-ray Spex".into(), None), ("title", "Oh Bondage Up Yours!".into(), None), ("release_year", 1977.into(), None), + ("audio", vec![0, 1, 2, 3][..].into(), None), ( "lyrics", vec![ @@ -328,6 +329,7 @@ mod tests { ("artist", "Gang Of Four".into(), None), ("title", "Natural's Not In".into(), None), ("release_year", 1979.into(), None), + ("audio", vec![4, 5, 6, 7][..].into(), None), ( "lyrics", vec![ @@ -385,6 +387,7 @@ mod tests { ("artist", "David Bowie".into(), None), ("title", "Speed Of Life".into(), None), ("release_year", 1977.into(), None), + ("audio", vec![8, 9, 10, 11][..].into(), None), ( "lyrics", OperationValue::RelationList(RelationList::new(vec![])), @@ -404,24 +407,30 @@ mod tests { "collection": value!({ "hasNextPage": false, "totalCount": 2, - "endCursor": "31Ch6qa4mdKcxpWJG4X9Wf5iMvSSxmSGg8cyg9teNR6yKmLncZCmyVUaPFjRNoWcxpeASGqrRiJGR8HSqjWBz5HE", + "endCursor": "24gc7iHafVKTcfRZfVVV8etkSoJMJVsqs1iYJAuHb8oNp32Vi1PcYw6S5GJ8hNhPmHHbP1weVbACYRctHVz4jXjQ", "documents": [ { - "cursor": "273AmFQTk7w6134GhzKUS5tY8qDuaMYBPgbaftZ43G7saiKa73MPapFvjNDixbNjCr5ucNqzNsx2fYdRqRod9U2W", - "fields": { "bool": true, }, + "cursor": "24gZVnL75RPvxMVAiuGT2SgCrHneGZgsvEaiCh5g8qgxGBhcunAffueCUTiyuLDamP1G48KYPmRDBBFG43dh3XJ2", + "fields": { + "bool": true, + "data": "00010203", + }, "meta": { "owner": "2f8e50c2ede6d936ecc3144187ff1c273808185cfbc5ff3d3748d1ff7353fc96", - "documentId": "00200436216389856afb3f3a7d8cb2d2981be85787aebed02031c72eb9c216406c57", - "viewId": "00200436216389856afb3f3a7d8cb2d2981be85787aebed02031c72eb9c216406c57", + "documentId": "0020223f123be0f9025c591fba1a5800ca64084e837315521d5b65a870e874ed8b4e", + "viewId": "0020223f123be0f9025c591fba1a5800ca64084e837315521d5b65a870e874ed8b4e", } }, { - "cursor": "31Ch6qa4mdKcxpWJG4X9Wf5iMvSSxmSGg8cyg9teNR6yKmLncZCmyVUaPFjRNoWcxpeASGqrRiJGR8HSqjWBz5HE", - "fields": { "bool": false, }, + "cursor": "24gc7iHafVKTcfRZfVVV8etkSoJMJVsqs1iYJAuHb8oNp32Vi1PcYw6S5GJ8hNhPmHHbP1weVbACYRctHVz4jXjQ", + "fields": { + "bool": false, + "data": "04050607" + }, "meta": { "owner": "2f8e50c2ede6d936ecc3144187ff1c273808185cfbc5ff3d3748d1ff7353fc96", - "documentId": "0020de552d81948f220d09127dc42963071d086a142c9547e701674d4cac83f29872", - "viewId": "0020de552d81948f220d09127dc42963071d086a142c9547e701674d4cac83f29872", + "documentId": "0020c7dbed85159bbea8f1c44f1d4d7dfbdded6cd43c09ab1a292089e9530964cab9", + "viewId": "0020c7dbed85159bbea8f1c44f1d4d7dfbdded6cd43c09ab1a292089e9530964cab9", } } ] @@ -433,7 +442,7 @@ mod tests { r#" ( first: 1, - after: "31Ch6qa4mdKcxpWJG4X9Wf5iMvSSxmSGg8cyg9teNR6yKmLncZCmyVUaPFjRNoWcxpeASGqrRiJGR8HSqjWBz5HE", + after: "24gc7iHafVKTcfRZfVVV8etkSoJMJVsqs1iYJAuHb8oNp32Vi1PcYw6S5GJ8hNhPmHHbP1weVbACYRctHVz4jXjQ", orderBy: DOCUMENT_ID, orderDirection: ASC, filter: { @@ -453,6 +462,38 @@ mod tests { }), vec![] )] + #[case( + r#"( + first: 2, + filter: { + data: { + eq: "00010203" + } + } + )"#.to_string(), + value!({ + "collection": value!({ + "hasNextPage": false, + "totalCount": 1, + "endCursor": "24gZVnL75RPvxMVAiuGT2SgCrHneGZgsvEaiCh5g8qgxGBhcunAffueCUTiyuLDamP1G48KYPmRDBBFG43dh3XJ2", + "documents": [ + { + "cursor": "24gZVnL75RPvxMVAiuGT2SgCrHneGZgsvEaiCh5g8qgxGBhcunAffueCUTiyuLDamP1G48KYPmRDBBFG43dh3XJ2", + "fields": { + "bool": true, + "data": "00010203", + }, + "meta": { + "owner": "2f8e50c2ede6d936ecc3144187ff1c273808185cfbc5ff3d3748d1ff7353fc96", + "documentId": "0020223f123be0f9025c591fba1a5800ca64084e837315521d5b65a870e874ed8b4e", + "viewId": "0020223f123be0f9025c591fba1a5800ca64084e837315521d5b65a870e874ed8b4e", + } + } + ] + }), + }), + vec![] + )] #[case( r#"(first: 0)"#.to_string(), Value::Null, @@ -469,7 +510,7 @@ mod tests { vec!["Invalid value for argument \"after\", expected type \"Cursor\"".to_string()] )] #[case( - r#"(after: "00205406410aefce40c5cbbb04488f50714b7d5657b9f17eed7358da35379bc20331")"#.to_string(), + r#"(after: "0020d384b69386867b61acebe6b23d4fac8c1425d5dce339bb3ef7c2218c155b3f9a")"#.to_string(), Value::Null, vec!["Invalid value for argument \"after\", expected type \"Cursor\"".to_string()] )] @@ -481,12 +522,12 @@ mod tests { #[case( r#"(orderBy: HELLO)"#.to_string(), Value::Null, - vec!["Invalid value for argument \"orderBy\", enumeration type \"schema_name_00205406410aefce40c5cbbb04488f50714b7d5657b9f17eed7358da35379bc20331OrderBy\" does not contain the value \"HELLO\"".to_string()] + vec!["Invalid value for argument \"orderBy\", enumeration type \"schema_name_0020d384b69386867b61acebe6b23d4fac8c1425d5dce339bb3ef7c2218c155b3f9aOrderBy\" does not contain the value \"HELLO\"".to_string()] )] #[case( r#"(orderBy: "hello")"#.to_string(), Value::Null, - vec!["Invalid value for argument \"orderBy\", enumeration type \"schema_name_00205406410aefce40c5cbbb04488f50714b7d5657b9f17eed7358da35379bc20331OrderBy\" does not contain the value \"hello\"".to_string()] + vec!["Invalid value for argument \"orderBy\", enumeration type \"schema_name_0020d384b69386867b61acebe6b23d4fac8c1425d5dce339bb3ef7c2218c155b3f9aOrderBy\" does not contain the value \"hello\"".to_string()] )] #[case( r#"(orderDirection: HELLO)"#.to_string(), @@ -511,7 +552,7 @@ mod tests { #[case( r#"(filter: { hello: { eq: true }})"#.to_string(), Value::Null, - vec!["Invalid value for argument \"filter\", unknown field \"hello\" of type \"schema_name_00205406410aefce40c5cbbb04488f50714b7d5657b9f17eed7358da35379bc20331Filter\"".to_string()] + vec!["Invalid value for argument \"filter\", unknown field \"hello\" of type \"schema_name_0020d384b69386867b61acebe6b23d4fac8c1425d5dce339bb3ef7c2218c155b3f9aFilter\"".to_string()] )] #[case( r#"(filter: { bool: { contains: "hello" }})"#.to_string(), @@ -570,7 +611,7 @@ mod tests { let schema = add_schema( &mut node, "schema_name", - vec![("bool", FieldType::Boolean)], + vec![("bool", FieldType::Boolean), ("data", FieldType::Bytes)], &key_pair, ) .await; @@ -579,7 +620,7 @@ mod tests { add_document( &mut node, schema.id(), - vec![("bool", true.into())], + vec![("bool", true.into()), ("data", vec![0, 1, 2, 3][..].into())], &key_pair, ) .await; @@ -588,7 +629,10 @@ mod tests { add_document( &mut node, schema.id(), - vec![("bool", false.into())], + vec![ + ("bool", false.into()), + ("data", vec![4, 5, 6, 7][..].into()), + ], &key_pair, ) .await; @@ -605,6 +649,7 @@ mod tests { cursor fields {{ bool + data }} meta {{ owner @@ -813,6 +858,9 @@ mod tests { #[case("(filter: { title: { eq: \"Natural's Not In\" } })", "")] #[case("(filter: { title: { notEq: \"Natural's Not In\", in: [ \"Oh Bondage Up Yours!\", \"Speed Of Life\" ] } })", "")] #[case("(filter: { title: { notEq: \"Natural's Not In\" }, release_year: { gt: 1978 }, artist: { in: [ \"X-ray Spex\"] } })", "")] + #[case("(filter: { audio: { notEq: \"aa\" } })", "")] + #[case("(filter: { audio: { eq: \"E8\" } })", "")] + #[case("(filter: { audio: { eq: \"\" } })", "")] #[case( "(orderDirection: DESC, orderBy: title)", "(orderDirection: ASC, orderBy: line)" diff --git a/aquadoggo/src/graphql/queries/next_args.rs b/aquadoggo/src/graphql/queries/next_args.rs index 642730136..8ce545c39 100644 --- a/aquadoggo/src/graphql/queries/next_args.rs +++ b/aquadoggo/src/graphql/queries/next_args.rs @@ -190,7 +190,7 @@ mod tests { "nextArgs": { "logId": "0", "seqNum": "2", - "backlink": "0020597040e2b85b4eaf3955f7aaca8f8fd60f00f77549a5554c8dd4081657f0d231", + "backlink": "002098e61a9d946a1f046bd68414bfcc8fec09ddb3954dccaf184eaf7a7f4eb9cd26", "skiplink": null, } }) diff --git a/aquadoggo/src/graphql/scalars/hex_bytes_scalar.rs b/aquadoggo/src/graphql/scalars/hex_bytes_scalar.rs new file mode 100644 index 000000000..a77c72d44 --- /dev/null +++ b/aquadoggo/src/graphql/scalars/hex_bytes_scalar.rs @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later + +use std::fmt::Display; + +use dynamic_graphql::{Error, Result, Scalar, ScalarValue, Value}; +use serde::Serialize; + +/// Bytes encoded as a hexadecimal string. +#[derive(Scalar, Clone, Debug, Eq, PartialEq, Serialize)] +#[graphql(name = "HexBytes", validator(validate))] +pub struct HexBytesScalar(String); + +impl ScalarValue for HexBytesScalar { + fn from_value(value: Value) -> Result + where + Self: Sized, + { + match &value { + Value::String(value) => { + hex::decode(value)?; + Ok(HexBytesScalar(value.to_string())) + } + _ => Err(Error::new(format!("Expected hex string, found: {value}"))), + } + } + + fn to_value(&self) -> Value { + Value::Binary(self.0.clone().into()) + } +} + +impl From for String { + fn from(hash: HexBytesScalar) -> Self { + hash.0 + } +} + +impl From for HexBytesScalar { + fn from(vec: String) -> Self { + Self(vec) + } +} + +impl From for Value { + fn from(entry: HexBytesScalar) -> Self { + ScalarValue::to_value(&entry) + } +} + +impl Display for HexBytesScalar { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let hex = hex::encode(&self.0); + write!(f, "{}", hex) + } +} + +/// Validation method used internally in `async-graphql` to check scalar values passed into the +/// public api. +fn validate(value: &Value) -> bool { + HexBytesScalar::from_value(value.to_owned()).is_ok() +} diff --git a/aquadoggo/src/graphql/scalars/mod.rs b/aquadoggo/src/graphql/scalars/mod.rs index 0b94149ad..4a887426d 100644 --- a/aquadoggo/src/graphql/scalars/mod.rs +++ b/aquadoggo/src/graphql/scalars/mod.rs @@ -13,6 +13,7 @@ mod document_view_id_scalar; mod encoded_entry_scalar; mod encoded_operation_scalar; mod entry_hash_scalar; +mod hex_bytes_scalar; mod log_id_scalar; mod public_key_scalar; mod seq_num_scalar; @@ -23,6 +24,7 @@ pub use document_view_id_scalar::DocumentViewIdScalar; pub use encoded_entry_scalar::EncodedEntryScalar; pub use encoded_operation_scalar::EncodedOperationScalar; pub use entry_hash_scalar::EntryHashScalar; +pub use hex_bytes_scalar::HexBytesScalar; pub use log_id_scalar::LogIdScalar; pub use public_key_scalar::PublicKeyScalar; pub use seq_num_scalar::SeqNumScalar; diff --git a/aquadoggo/src/graphql/schema.rs b/aquadoggo/src/graphql/schema.rs index e16aee61d..d82589405 100644 --- a/aquadoggo/src/graphql/schema.rs +++ b/aquadoggo/src/graphql/schema.rs @@ -13,9 +13,9 @@ use tokio::sync::Mutex; use crate::bus::ServiceSender; use crate::db::SqlStore; use crate::graphql::input_values::{ - build_filter_input_object, build_order_enum_value, BooleanFilter, FloatFilter, IntegerFilter, - MetaFilterInputObject, OrderDirection, PinnedRelationFilter, PinnedRelationListFilter, - RelationFilter, RelationListFilter, StringFilter, + build_filter_input_object, build_order_enum_value, BooleanFilter, FloatFilter, HexBytesFilter, + IntegerFilter, MetaFilterInputObject, OrderDirection, PinnedRelationFilter, + PinnedRelationListFilter, RelationFilter, RelationListFilter, StringFilter, }; use crate::graphql::mutations::{MutationRoot, Publish}; use crate::graphql::objects::{ @@ -28,7 +28,8 @@ use crate::graphql::queries::{ use crate::graphql::responses::NextArguments; use crate::graphql::scalars::{ CursorScalar, DocumentIdScalar, DocumentViewIdScalar, EncodedEntryScalar, - EncodedOperationScalar, EntryHashScalar, LogIdScalar, PublicKeyScalar, SeqNumScalar, + EncodedOperationScalar, EntryHashScalar, HexBytesScalar, LogIdScalar, PublicKeyScalar, + SeqNumScalar, }; use crate::schema::SchemaProvider; @@ -52,6 +53,7 @@ pub async fn build_root_schema( .register::() // Register input values .register::() + .register::() .register::() .register::() .register::() @@ -62,6 +64,7 @@ pub async fn build_root_schema( .register::() .register::() // Register scalars + .register::() .register::() .register::() .register::() diff --git a/aquadoggo/src/graphql/tests.rs b/aquadoggo/src/graphql/tests.rs index 39dac3579..b7b4e0b31 100644 --- a/aquadoggo/src/graphql/tests.rs +++ b/aquadoggo/src/graphql/tests.rs @@ -27,6 +27,7 @@ fn scalar_fields() { ("float", FieldType::Float), ("int", FieldType::Integer), ("text", FieldType::String), + ("bytes", FieldType::Bytes), ], &key_pair, ) @@ -38,6 +39,7 @@ fn scalar_fields() { ("float", (1.0).into()), ("int", 1.into()), ("text", "yes".into()), + ("bytes", vec![0, 1, 2, 3][..].into()), ] .try_into() .unwrap(); @@ -52,7 +54,8 @@ fn scalar_fields() { bool, float, int, - text + text, + bytes }} }}, }}"#, @@ -77,6 +80,7 @@ fn scalar_fields() { "float": 1.0, "int": 1, "text": "yes", + "bytes": "00010203", } }, }); diff --git a/aquadoggo/src/graphql/utils.rs b/aquadoggo/src/graphql/utils.rs index 2b3d5e92a..7f6a6fe8a 100644 --- a/aquadoggo/src/graphql/utils.rs +++ b/aquadoggo/src/graphql/utils.rs @@ -61,6 +61,10 @@ pub fn gql_scalar(operation_value: &OperationValue) -> Value { OperationValue::Float(value) => value.to_owned().into(), OperationValue::Integer(value) => value.to_owned().into(), OperationValue::String(value) => value.to_owned().into(), + OperationValue::Bytes(value) => { + let hex_string = hex::encode(value); + hex_string.into() + } _ => panic!("This method is not used for relation types"), } } @@ -74,6 +78,7 @@ pub fn graphql_type(field_type: &FieldType) -> TypeRef { FieldType::Integer => TypeRef::named(TypeRef::INT), FieldType::Float => TypeRef::named(TypeRef::FLOAT), FieldType::String => TypeRef::named(TypeRef::STRING), + FieldType::Bytes => TypeRef::named("HexBytes"), FieldType::Relation(schema_id) => TypeRef::named(schema_id.to_string()), FieldType::RelationList(schema_id) => TypeRef::named(collection_name(schema_id)), FieldType::PinnedRelation(schema_id) => TypeRef::named(schema_id.to_string()), @@ -91,6 +96,11 @@ pub fn filter_to_operation_value( FieldType::Integer => filter_value.i64()?.into(), FieldType::Float => filter_value.f64()?.into(), FieldType::String => filter_value.string()?.into(), + FieldType::Bytes => { + let hex_string = filter_value.string()?; + let bytes = hex::decode(hex_string)?; + bytes[..].into() + } // We are only ever dealing with list items here FieldType::Relation(_) | FieldType::RelationList(_) => { DocumentId::new(&filter_value.string()?.parse()?).into() diff --git a/aquadoggo/src/proptests/document_strategies.rs b/aquadoggo/src/proptests/document_strategies.rs index ef7c86550..8f5cede02 100644 --- a/aquadoggo/src/proptests/document_strategies.rs +++ b/aquadoggo/src/proptests/document_strategies.rs @@ -44,6 +44,9 @@ pub enum FieldValue { /// String value. String(String), + /// Hex encoded bytes value. + Bytes(Vec), + /// Reference to a document. Relation(DocumentAST), @@ -117,6 +120,15 @@ fn values_from_schema(schema: SchemaAST) -> impl Strategy any::>() + .prop_map(move |value| { + let value = FieldValue::Bytes(value); + DocumentFieldValue { + name: field_name.clone(), + value, + } + }) + .boxed(), SchemaFieldType::Relation => values_from_schema(*relation_schema.clone().unwrap()) .prop_map(move |value| { let schema_id = relation_schema.clone().unwrap().id.clone(); diff --git a/aquadoggo/src/proptests/filter_strategies.rs b/aquadoggo/src/proptests/filter_strategies.rs index f0a95e890..88df7873c 100644 --- a/aquadoggo/src/proptests/filter_strategies.rs +++ b/aquadoggo/src/proptests/filter_strategies.rs @@ -8,7 +8,7 @@ use proptest::strategy::{BoxedStrategy, Just, Strategy}; use proptest_derive::Arbitrary; use crate::proptests::schema_strategies::{SchemaField, SchemaFieldType}; -use crate::proptests::utils::FieldName; +use crate::proptests::utils::{FieldName, HexString}; /// Possible values used in filter arguments. `UniqueIdentifier` is a placeholder for values which /// can be derived at runtime in order to use identifiers which exist in on the node, these include @@ -17,6 +17,7 @@ use crate::proptests::utils::FieldName; pub enum FilterValue { Boolean(bool), String(String), + Bytes(HexString), Integer(i64), Float(f64), UniqueIdentifier, // This is a placeholder for a document id, document view id or public key which is selected at testing time @@ -87,6 +88,7 @@ fn application_field_filter_strategy( | SchemaFieldType::Integer | SchemaFieldType::Float | SchemaFieldType::String + | SchemaFieldType::Bytes | SchemaFieldType::Relation | SchemaFieldType::PinnedRelation => generate_simple_field_filter(field.clone()) .prop_map(|(name, filter)| ((name, filter), Vec::new())) @@ -221,6 +223,18 @@ fn generate_simple_field_filter(field: SchemaField) -> BoxedStrategy<(FieldName, ] .boxed() } + SchemaFieldType::Bytes => { + let field_clone = field.clone(); + prop_oneof![ + any::() + .prop_map(FilterValue::Bytes) + .prop_map(move |value| (field.name.clone(), Filter::Equal(value))), + any::() + .prop_map(FilterValue::Bytes) + .prop_map(move |value| (field_clone.name.clone(), Filter::NotEqual(value))) + ] + .boxed() + } SchemaFieldType::Relation | SchemaFieldType::PinnedRelation => prop_oneof![ ( Just(field.name.clone()), diff --git a/aquadoggo/src/proptests/schema_strategies.rs b/aquadoggo/src/proptests/schema_strategies.rs index 4095bc8e7..32a0c4b74 100644 --- a/aquadoggo/src/proptests/schema_strategies.rs +++ b/aquadoggo/src/proptests/schema_strategies.rs @@ -64,6 +64,7 @@ pub enum SchemaFieldType { Integer, Float, String, + Bytes, Relation, RelationList, PinnedRelation, @@ -107,6 +108,13 @@ fn schema_field() -> impl Strategy { relation_schema: None, } }), + any::().prop_map(|field_name| { + SchemaField { + name: field_name, + field_type: SchemaFieldType::Bytes, + relation_schema: None, + } + }), ]; // Selections for the recursive fields. diff --git a/aquadoggo/src/proptests/tests.rs b/aquadoggo/src/proptests/tests.rs index a57ed87fe..fef3f990a 100644 --- a/aquadoggo/src/proptests/tests.rs +++ b/aquadoggo/src/proptests/tests.rs @@ -187,7 +187,6 @@ prop_compose! { proptest! { #![proptest_config(Config { - cases: 100, failure_persistence: Some(Box::new(FileFailurePersistence::WithSource("regressions"))), .. Config::default() })] diff --git a/aquadoggo/src/proptests/utils.rs b/aquadoggo/src/proptests/utils.rs index 6a6b62d7c..0b58e28a5 100644 --- a/aquadoggo/src/proptests/utils.rs +++ b/aquadoggo/src/proptests/utils.rs @@ -19,6 +19,10 @@ use super::filter_strategies::{Filter, FilterValue}; #[derive(Arbitrary, Debug, Clone, PartialEq, Eq, Hash)] pub struct FieldName(#[proptest(regex = "[A-Za-z]{1}[A-Za-z0-9_]{0,63}")] pub String); +/// A hexadecimal string. +#[derive(Arbitrary, Debug, Clone, PartialEq, Eq, Hash)] +pub struct HexString(#[proptest(regex = "([a-fA-F0-9]{2}){0,64}")] pub String); + /// Add schemas from a schema AST to a test node. #[async_recursion] pub async fn add_schemas_from_ast( @@ -42,6 +46,9 @@ pub async fn add_schemas_from_ast( SchemaFieldType::String => { schema_fields.push((field.name, FieldType::String)); } + SchemaFieldType::Bytes => { + schema_fields.push((field.name, FieldType::Bytes)); + } SchemaFieldType::Relation => { let schema_ast = field.relation_schema.unwrap(); let schema = add_schemas_from_ast(node, &schema_ast, schemas).await; @@ -116,6 +123,9 @@ pub async fn add_documents_from_ast( FieldValue::String(value) => { operation_fields.push((&field.name.0, value.to_owned().into())); } + FieldValue::Bytes(value) => { + operation_fields.push((&field.name.0, value[..].into())); + } FieldValue::Relation(document_ast) => { let document_view_id = add_documents_from_ast(node, &document_ast, documents).await; let operation_id = document_view_id.graph_tips().first().unwrap(); @@ -246,6 +256,9 @@ pub fn parse_filter(filter_args: &mut Vec, name: &FieldName, filter: &Fi FilterValue::String(value) => { filter_args.push(format!("{name}: {{ eq: {} }}", escape_string_value(value))) } + FilterValue::Bytes(value) => { + filter_args.push(format!("{name}: {{ eq: \"{}\" }}", value.0)) + } FilterValue::Integer(value) => filter_args.push(format!("{name}: {{ eq: {value} }}")), FilterValue::Float(value) => filter_args.push(format!("{name}: {{ eq: {value} }}")), }, @@ -260,6 +273,9 @@ pub fn parse_filter(filter_args: &mut Vec, name: &FieldName, filter: &Fi "{name}: {{ notEq: {} }}", escape_string_value(value) )), + FilterValue::Bytes(value) => { + filter_args.push(format!("{name}: {{ notEq: \"{}\" }}", value.0)) + } FilterValue::Integer(value) => { filter_args.push(format!("{name}: {{ notEq: {value} }}")) } @@ -276,6 +292,7 @@ pub fn parse_filter(filter_args: &mut Vec, name: &FieldName, filter: &Fi )), FilterValue::Integer(value) => filter_args.push(format!("{name}: {{ in: [{value}] }}")), FilterValue::Float(value) => filter_args.push(format!("{name}: {{ in: [{value}] }}")), + _ => panic!(), }, Filter::NotIn(value) => match value { FilterValue::UniqueIdentifier => { @@ -294,6 +311,7 @@ pub fn parse_filter(filter_args: &mut Vec, name: &FieldName, filter: &Fi FilterValue::Float(value) => { filter_args.push(format!("{name}: {{ notIn: [{value}] }}")) } + _ => panic!(), }, Filter::GreaterThan(value) => match value { FilterValue::String(value) => { diff --git a/aquadoggo/src/test_utils/helpers.rs b/aquadoggo/src/test_utils/helpers.rs index 0b004d86f..64a3603d6 100644 --- a/aquadoggo/src/test_utils/helpers.rs +++ b/aquadoggo/src/test_utils/helpers.rs @@ -36,6 +36,7 @@ pub fn doggo_schema() -> Schema { pub fn doggo_fields() -> Vec<(&'static str, OperationValue)> { vec![ ("username", OperationValue::String("bubu".to_owned())), + ("data", OperationValue::Bytes(vec![0, 1, 2, 3])), ("height", OperationValue::Float(3.5)), ("age", OperationValue::Integer(28)), ("is_admin", OperationValue::Boolean(false)), diff --git a/aquadoggo/src/test_utils/node.rs b/aquadoggo/src/test_utils/node.rs index eec2a8b48..b1668c9ea 100644 --- a/aquadoggo/src/test_utils/node.rs +++ b/aquadoggo/src/test_utils/node.rs @@ -346,14 +346,10 @@ pub async fn add_blob_pieces( let mut blob_pieces_view_ids = Vec::with_capacity(blob_pieces.len()); for piece in blob_pieces { - // @TODO: No need to convert bytes into a string when we introduced our new bytes operation - // field type. Related issue: https://github.com/p2panda/aquadoggo/issues/543 - let byte_str = std::str::from_utf8(piece).expect("Invalid UTF-8 sequence"); - let view_id = add_document( node, &SchemaId::BlobPiece(1), - vec![("data", byte_str.into())], + vec![("data", piece.into())], &key_pair, ) .await; diff --git a/aquadoggo_cli/Cargo.toml b/aquadoggo_cli/Cargo.toml index 327417209..339c51cc6 100644 --- a/aquadoggo_cli/Cargo.toml +++ b/aquadoggo_cli/Cargo.toml @@ -29,7 +29,7 @@ figment = { version = "0.10.10", features = ["toml", "env"] } hex = "0.4.3" libp2p = "0.52.0" log = "0.4.20" -p2panda-rs = { git = "https://github.com/p2panda/p2panda", rev = "8377056617b64e898e9980e8ad84d258ca0442a1" } +p2panda-rs = { git = "https://github.com/p2panda/p2panda", rev = "be84d7c4e39c1b67125d80468ccf412cf25ae1d7" } path-clean = "1.0.1" serde = { version = "1.0.185", features = ["serde_derive"] } tempfile = "3.7.0" From 8791463eb2de4d64ff4baf10f0eb0284dd594a71 Mon Sep 17 00:00:00 2001 From: Andreas Dzialocha Date: Fri, 8 Sep 2023 11:43:10 +0200 Subject: [PATCH 14/14] Make sure `/tmp` directory does not run out of scope before application ends (#557) * Make sure tmp dir does not run out of scope before application ends * Add entry to CHANGELOG.md --- CHANGELOG.md | 4 ++++ aquadoggo_cli/src/config.rs | 18 +++++++++++++----- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 508669816..16a912807 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Build a byte buffer over paginated pieces when assembling blobs [#547](https://github.com/p2panda/aquadoggo/pull/547) - Stream blob data in chunks to files to not occupy too much memory [#551](https://github.com/p2panda/aquadoggo/pull/551) +## Fixed + +- Make sure temporary directory does not run out of scope [#557](https://github.com/p2panda/aquadoggo/pull/557) + ## [0.5.0] ### Added diff --git a/aquadoggo_cli/src/config.rs b/aquadoggo_cli/src/config.rs index 9d1e32b52..375d16ee6 100644 --- a/aquadoggo_cli/src/config.rs +++ b/aquadoggo_cli/src/config.rs @@ -4,6 +4,7 @@ use std::convert::TryFrom; use std::net::{IpAddr, SocketAddr}; use std::path::PathBuf; use std::str::FromStr; +use std::sync::OnceLock; use anyhow::{anyhow, bail, Result}; use aquadoggo::{AllowList, Configuration as NodeConfiguration, NetworkConfiguration}; @@ -16,6 +17,7 @@ use libp2p::multiaddr::Protocol; use libp2p::{Multiaddr, PeerId}; use p2panda_rs::schema::SchemaId; use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use tempfile::TempDir; use crate::utils::absolute_path; @@ -23,6 +25,8 @@ const WILDCARD: &str = "*"; const CONFIG_FILE_NAME: &str = "config.toml"; +static TMP_DIR: OnceLock = OnceLock::new(); + type ConfigFilePath = Option; /// Get configuration from 1. .toml file, 2. environment variables and 3. command line arguments @@ -352,11 +356,15 @@ impl TryFrom for NodeConfiguration { // Create a temporary blobs directory when none was given let blobs_base_path = match value.blobs_base_path { Some(path) => path, - None => { - let tmp_dir = tempfile::TempDir::new() - .map_err(|_| anyhow!("Could not create temporary directory to store blobs"))?; - tmp_dir.path().to_path_buf() - } + None => TMP_DIR + .get_or_init(|| { + // Initialise a `TempDir` instance globally to make sure it does not run out of + // scope and gets deleted before the end of the application runtime + tempfile::TempDir::new() + .expect("Could not create temporary directory to store blobs") + }) + .path() + .to_path_buf(), }; Ok(NodeConfiguration {