diff --git a/CHANGELOG.md b/CHANGELOG.md index 74e31dc9d..16a912807 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,25 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Serve static files from `blobs` directory [#480](https://github.com/p2panda/aquadoggo/pull/480) +- Add method to store for pruning document views [#491](https://github.com/p2panda/aquadoggo/pull/491) +- Introduce `BlobStore` [#484](https://github.com/p2panda/aquadoggo/pull/484) +- Task for automatic garbage collection of unused documents and views [#500](https://github.com/p2panda/aquadoggo/pull/500) +- Blobs directory configuration [#549](https://github.com/p2panda/aquadoggo/pull/549) +- Integrate `Bytes` operation value [554](https://github.com/p2panda/aquadoggo/pull/554/) + +### Changed + +- HTTP routes to serve files with correct content type headers [#544](https://github.com/p2panda/aquadoggo/pull/544) +- Build a byte buffer over paginated pieces when assembling blobs [#547](https://github.com/p2panda/aquadoggo/pull/547) +- Stream blob data in chunks to files to not occupy too much memory [#551](https://github.com/p2panda/aquadoggo/pull/551) + +## Fixed + +- Make sure temporary directory does not run out of scope [#557](https://github.com/p2panda/aquadoggo/pull/557) + ## [0.5.0] ### Added diff --git a/Cargo.lock b/Cargo.lock index 4c412a346..d1dde2c07 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -162,11 +162,13 @@ dependencies = [ "async-graphql", "async-graphql-axum", "async-recursion", + "async-stream", "async-trait", "asynchronous-codec", "axum", "bamboo-rs-core-ed25519-yasmf", "bs58 0.4.0", + "bytes", "ciborium", "ctor", "deadqueue", @@ -197,9 +199,11 @@ dependencies = [ "serde_bytes", "serde_json", "sqlx", + "tempfile", "thiserror", "tokio", "tokio-stream", + "tokio-util", "tower", "tower-http", "tower-service", @@ -275,8 +279,8 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "726535892e8eae7e70657b4c8ea93d26b8553afb1ce617caee529ef96d7dee6c" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 1.0.109", "synstructure", ] @@ -287,8 +291,8 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2777730b2039ac0f95f093556e61b6d26cebed5393ca6f152717777cec3a42ed" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 1.0.109", ] @@ -312,7 +316,7 @@ dependencies = [ "async-lock", "async-task", "concurrent-queue", - "fastrand", + "fastrand 1.9.0", "futures-lite", "slab", ] @@ -404,8 +408,8 @@ dependencies = [ "async-graphql-parser", "darling", "proc-macro-crate", - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 1.0.109", "thiserror", ] @@ -499,8 +503,8 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0e97ce7de6cf12de5d7226c73f5ba9811622f4db3a5b91b55c53e987e5f91cba" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", ] @@ -521,8 +525,8 @@ version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", ] @@ -538,8 +542,8 @@ version = "0.1.71" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a564d521dd56509c4c47480d00b80ee55f7e385ae48db5744c67ad50c92d2ebf" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", ] @@ -797,7 +801,7 @@ dependencies = [ "async-lock", "async-task", "atomic-waker", - "fastrand", + "fastrand 1.9.0", "futures-lite", "log", ] @@ -942,8 +946,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "54a9bb5758fc5dfe728d1019941681eccaf0cf8a4189b692a0ee2f2ecf90a050" dependencies = [ "heck", - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", ] @@ -1125,7 +1129,7 @@ version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d2301688392eb071b0bf1a37be05c469d3cc4dbbd95df672fe28ab021e6a096" dependencies = [ - "quote 1.0.31", + "quote", "syn 1.0.109", ] @@ -1183,8 +1187,8 @@ checksum = "109c1ca6e6b7f82cc233a97004ea8ed7ca123a9af07a8230878fcfda9b158bf0" dependencies = [ "fnv", "ident_case", - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "strsim", "syn 1.0.109", ] @@ -1196,7 +1200,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e" dependencies = [ "darling_core", - "quote 1.0.31", + "quote", "syn 1.0.109", ] @@ -1317,8 +1321,8 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", ] @@ -1360,8 +1364,8 @@ dependencies = [ "Inflector", "darling", "proc-macro-crate", - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 1.0.109", "thiserror", ] @@ -1414,8 +1418,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c9720bba047d567ffc8a3cba48bf19126600e249ab7f128e9233e6376976a116" dependencies = [ "heck", - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 1.0.109", ] @@ -1492,6 +1496,12 @@ dependencies = [ "instant", ] +[[package]] +name = "fastrand" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6999dc1837253364c2ebb0704ba97994bd874e8f195d665c50b7548f6ea92764" + [[package]] name = "fiat-crypto" version = "0.1.20" @@ -1605,7 +1615,7 @@ version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49a9d51ce47660b1e808d3c990b4709f2f415d928835a17dfd16991515c46bce" dependencies = [ - "fastrand", + "fastrand 1.9.0", "futures-core", "futures-io", "memchr", @@ -1620,8 +1630,8 @@ version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", ] @@ -2620,8 +2630,8 @@ checksum = "c4d5ec2a3df00c7836d7696c136274c9c59705bac69133253696a6c932cd1d74" dependencies = [ "heck", "proc-macro-warning", - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", ] @@ -3108,8 +3118,7 @@ checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" [[package]] name = "p2panda-rs" version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "462f0e5a6df45b0b9ad387fdf77f9d43c75610d72036c1b1de0aebfe10f434b0" +source = "git+https://github.com/p2panda/p2panda?rev=be84d7c4e39c1b67125d80468ccf412cf25ae1d7#be84d7c4e39c1b67125d80468ccf412cf25ae1d7" dependencies = [ "arrayvec 0.5.2", "async-trait", @@ -3229,9 +3238,9 @@ version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da9f0f13dac8069c139e8300a6510e3f4143ecf5259c60b116a9b271b4ca0d54" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2", "proc-macro2-diagnostics", - "quote 1.0.31", + "quote", "syn 2.0.29", ] @@ -3278,8 +3287,8 @@ checksum = "99d490fe7e8556575ff6911e45567ab95e71617f43781e5c05490dc8d75c965c" dependencies = [ "pest", "pest_meta", - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", ] @@ -3309,8 +3318,8 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec2e072ecce94ec471b13398d5402c188e76ac03cf74dd1a975161b23a3f6d9c" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", ] @@ -3399,20 +3408,11 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "70550716265d1ec349c41f70dd4f964b4fd88394efe4405f0c1da679c4799a07" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", ] -[[package]] -name = "proc-macro2" -version = "0.4.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759" -dependencies = [ - "unicode-xid 0.1.0", -] - [[package]] name = "proc-macro2" version = "1.0.66" @@ -3428,8 +3428,8 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", "version_check", "yansi", @@ -3453,8 +3453,8 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b6a5217beb0ad503ee7fa752d451c905113d70721b937126158f3106a48cc1" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 1.0.109", ] @@ -3480,13 +3480,13 @@ dependencies = [ [[package]] name = "proptest-derive" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90b46295382dc76166cb7cf2bb4a97952464e4b7ed5a43e6cd34e1fec3349ddc" +checksum = "9cf16337405ca084e9c78985114633b6827711d22b9e6ef6c6c0d665eb3f0b6e" dependencies = [ - "proc-macro2 0.4.30", - "quote 0.6.13", - "syn 0.15.44", + "proc-macro2", + "quote", + "syn 1.0.109", ] [[package]] @@ -3565,22 +3565,13 @@ dependencies = [ "windows-sys", ] -[[package]] -name = "quote" -version = "0.6.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1" -dependencies = [ - "proc-macro2 0.4.30", -] - [[package]] name = "quote" version = "1.0.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5fe8a65d69dd0808184ebb5f836ab526bb259db23c657efa38711b1072ee47f0" dependencies = [ - "proc-macro2 1.0.66", + "proc-macro2", ] [[package]] @@ -3853,8 +3844,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5015e68a0685a95ade3eee617ff7101ab6a3fc689203101ca16ebc16f2b89c66" dependencies = [ "cfg-if", - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "rustc_version", "syn 1.0.109", ] @@ -3866,8 +3857,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7229b505ae0706e64f37ffc54a9c163e11022a6636d58fe1f3f52018257ff9f7" dependencies = [ "cfg-if", - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "rustc_version", "syn 1.0.109", "unicode-ident", @@ -3879,7 +3870,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b29d3117bce27ea307d1fb7ce12c64ba11b3fd04311a42d32bc5f0072e6e3d4d" dependencies = [ - "quote 1.0.31", + "quote", "rustc_version", "syn 1.0.109", ] @@ -3890,7 +3881,7 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "45f80dcc84beab3a327bbe161f77db25f336a1452428176787c8c79ac79d7073" dependencies = [ - "quote 1.0.31", + "quote", "rand 0.8.5", "rustc_version", "syn 1.0.109", @@ -4080,9 +4071,9 @@ dependencies = [ [[package]] name = "serde-wasm-bindgen" -version = "0.4.5" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3b4c031cd0d9014307d82b8abf653c0290fbdaeb4c02d00c63cf52f728628bf" +checksum = "f3b143e2833c57ab9ad3ea280d21fd34e285a42837aeb0ee301f4f41890fa00e" dependencies = [ "js-sys", "serde", @@ -4114,8 +4105,8 @@ version = "1.0.185" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc59dfdcbad1437773485e0367fea4b090a2e0a16d9ffc46af47764536a298ec" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", ] @@ -4269,8 +4260,8 @@ version = "0.6.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1508efa03c362e23817f96cde18abed596a25219a8b2c66e8db33c03543d315b" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 1.0.109", ] @@ -4413,8 +4404,8 @@ dependencies = [ "either", "heck", "once_cell", - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "sha2 0.10.7", "sqlx-core", "sqlx-rt", @@ -4467,25 +4458,14 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" -[[package]] -name = "syn" -version = "0.15.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ca4b3b69a77cbe1ffc9e198781b7acb0c7365a883670e8f1c1bc66fba79a5c5" -dependencies = [ - "proc-macro2 0.4.30", - "quote 0.6.13", - "unicode-xid 0.1.0", -] - [[package]] name = "syn" version = "1.0.109" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "unicode-ident", ] @@ -4495,8 +4475,8 @@ version = "2.0.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c324c494eba9d92503e6f1ef2e6df781e78f6a7705a0202d9801b198807d518a" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "unicode-ident", ] @@ -4512,10 +4492,10 @@ version = "0.12.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f36bdaa60a83aca3921b5259d5400cbf5e90fc51931376a9bd4a0eb79aa7210f" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 1.0.109", - "unicode-xid 0.2.4", + "unicode-xid", ] [[package]] @@ -4541,15 +4521,14 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.6.0" +version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "31c0432476357e58790aaa47a8efb0c5138f137343f3b5f23bd36a27e3b0a6d6" +checksum = "5486094ee78b2e5038a6382ed7645bc084dc2ec433426ca4c3cb61e2007b8998" dependencies = [ - "autocfg", "cfg-if", - "fastrand", + "fastrand 2.0.0", "redox_syscall 0.3.5", - "rustix 0.37.23", + "rustix 0.38.4", "windows-sys", ] @@ -4577,8 +4556,8 @@ version = "1.0.46" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f1728216d3244de4f14f14f8c15c79be1a7c67867d28d69b719690e2a19fb445" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", ] @@ -4650,8 +4629,8 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", ] @@ -4757,11 +4736,11 @@ dependencies = [ [[package]] name = "tower-http" -version = "0.3.5" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f873044bf02dd1e8239e9c1293ea39dad76dc594ec16185d0a1bf31d8dc8d858" +checksum = "55ae70283aba8d2a8b411c695c437fe25b8b5e44e23e780662002fc72fb47a82" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.3.3", "bytes", "futures-core", "futures-util", @@ -4804,8 +4783,8 @@ version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", ] @@ -4961,12 +4940,6 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" -[[package]] -name = "unicode-xid" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" - [[package]] name = "unicode-xid" version = "0.2.4" @@ -5116,8 +5089,8 @@ dependencies = [ "bumpalo", "log", "once_cell", - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", "wasm-bindgen-shared", ] @@ -5140,7 +5113,7 @@ version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dee495e55982a3bd48105a7b947fd2a9b4a8ae3010041b9e0faab3f9cd028f1d" dependencies = [ - "quote 1.0.31", + "quote", "wasm-bindgen-macro-support", ] @@ -5150,8 +5123,8 @@ version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", "wasm-bindgen-backend", "wasm-bindgen-shared", @@ -5478,7 +5451,7 @@ version = "1.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" dependencies = [ - "proc-macro2 1.0.66", - "quote 1.0.31", + "proc-macro2", + "quote", "syn 2.0.29", ] diff --git a/README.md b/README.md index d4fc1f392..8b10a3c6c 100644 --- a/README.md +++ b/README.md @@ -213,7 +213,7 @@ If you are not working with Rust you can create FFI bindings from the `aquadoggo As an application developer the interface you are likely to use the most is the GraphQL query API. For whichever schema your node supports a custom query API is generated, you use this to fetch data into your app. Results from a collection query can be paginated, sorted and filtered. -Fetch one "mushroom" by it's id, returning values for only the selected fields: +Fetch one "mushroom" by its id, returning values for only the selected fields: ```graphql { diff --git a/aquadoggo/Cargo.toml b/aquadoggo/Cargo.toml index 7dd1a5652..e1ad02e9c 100644 --- a/aquadoggo/Cargo.toml +++ b/aquadoggo/Cargo.toml @@ -22,11 +22,13 @@ proptests = [] anyhow = "1.0.62" async-graphql = { version = "5.0.6", features = ["dynamic-schema"] } async-graphql-axum = "5.0.6" +async-stream = "0.3.5" async-trait = "0.1.64" asynchronous-codec = { version = "0.6.2", features = ["cbor"] } -axum = "0.6.10" +axum = { version = "0.6.10", features = ["headers"] } bamboo-rs-core-ed25519-yasmf = "0.1.1" bs58 = "0.4.0" +bytes = "1.4.0" deadqueue = { version = "0.2.3", default-features = false, features = [ "unlimited", ] } @@ -55,10 +57,13 @@ lipmaa-link = "0.2.2" log = "0.4.19" once_cell = "1.18.0" openssl-probe = "0.1.5" -p2panda-rs = { version = "0.7.1", features = ["storage-provider"] } +p2panda-rs = { git = "https://github.com/p2panda/p2panda", rev = "be84d7c4e39c1b67125d80468ccf412cf25ae1d7", features = [ + "storage-provider", +] } rand = "0.8.5" regex = "1.9.3" serde = { version = "1.0.152", features = ["derive"] } +serde_bytes = "0.11.12" sqlx = { version = "0.6.1", features = [ "any", "postgres", @@ -73,9 +78,11 @@ tokio = { version = "1.28.2", features = [ "rt-multi-thread", "sync", "time", + "fs", ] } tokio-stream = { version = "0.1.14", features = ["sync"] } -tower-http = { version = "0.3.4", default-features = false, features = [ +tokio-util = { version = "0.7.8", features = ["io"] } +tower-http = { version = "0.4.0", default-features = false, features = [ "cors", ] } triggered = "0.1.2" @@ -91,12 +98,12 @@ http = "0.2.9" hyper = "0.14.19" libp2p-swarm-test = "0.2.0" once_cell = "1.17.0" -p2panda-rs = { version = "0.7.1", features = [ +p2panda-rs = { git = "https://github.com/p2panda/p2panda", rev = "be84d7c4e39c1b67125d80468ccf412cf25ae1d7", features = [ "test-utils", "storage-provider", ] } -proptest = "1.1.0" -proptest-derive = "0.3.0" +proptest = "1.2.0" +proptest-derive = "0.4.0" rand = "0.8.5" reqwest = { version = "0.11.11", default-features = false, features = [ "json", @@ -106,5 +113,6 @@ rstest = "0.15.0" rstest_reuse = "0.3.0" serde_bytes = "0.11.12" serde_json = "1.0.85" +tempfile = "3.7.0" tower = "0.4.13" tower-service = "0.3.2" diff --git a/aquadoggo/migrations/20220509090252_create-operations.sql b/aquadoggo/migrations/20220509090252_create-operations.sql index d33712c6d..383de95a8 100644 --- a/aquadoggo/migrations/20220509090252_create-operations.sql +++ b/aquadoggo/migrations/20220509090252_create-operations.sql @@ -16,7 +16,7 @@ CREATE TABLE IF NOT EXISTS operation_fields_v1 ( field_type TEXT NOT NULL, value TEXT NULL, list_index INT NOT NULL, - FOREIGN KEY(operation_id) REFERENCES operations_v1(operation_id) + FOREIGN KEY(operation_id) REFERENCES operations_v1(operation_id) ON DELETE CASCADE ); CREATE INDEX idx_operation_fields_v1 ON operation_fields_v1 (operation_id, name); diff --git a/aquadoggo/migrations/20220510022755_create-documents.sql b/aquadoggo/migrations/20220510022755_create-documents.sql index e1b2d0850..cc32860c5 100644 --- a/aquadoggo/migrations/20220510022755_create-documents.sql +++ b/aquadoggo/migrations/20220510022755_create-documents.sql @@ -1,20 +1,20 @@ -- SPDX-License-Identifier: AGPL-3.0-or-later -CREATE TABLE IF NOT EXISTS document_view_fields ( - document_view_id TEXT NOT NULL, - operation_id TEXT NOT NULL, - name TEXT NOT NULL, - FOREIGN KEY(operation_id) REFERENCES operations_v1(operation_id) -); - -CREATE INDEX idx_document_view_fields ON document_view_fields (document_view_id, operation_id, name); - CREATE TABLE IF NOT EXISTS document_views ( document_view_id TEXT NOT NULL UNIQUE, schema_id TEXT NOT NULL, PRIMARY KEY (document_view_id) ); +CREATE TABLE IF NOT EXISTS document_view_fields ( + document_view_id TEXT NOT NULL, + operation_id TEXT NOT NULL, + name TEXT NOT NULL, + FOREIGN KEY(document_view_id) REFERENCES document_views(document_view_id) ON DELETE CASCADE +); + +CREATE INDEX idx_document_view_fields ON document_view_fields (document_view_id, operation_id, name); + CREATE TABLE IF NOT EXISTS documents ( document_id TEXT NOT NULL UNIQUE, document_view_id TEXT NOT NULL, diff --git a/aquadoggo/migrations/20230114140233_alter-documents.sql b/aquadoggo/migrations/20230114140233_alter-documents.sql index ec8e11be6..023475477 100644 --- a/aquadoggo/migrations/20230114140233_alter-documents.sql +++ b/aquadoggo/migrations/20230114140233_alter-documents.sql @@ -1,3 +1,3 @@ -- SPDX-License-Identifier: AGPL-3.0-or-later -ALTER TABLE document_views ADD COLUMN document_id TEXT NOT NULL REFERENCES documents(document_id); \ No newline at end of file +ALTER TABLE document_views ADD COLUMN document_id TEXT NOT NULL REFERENCES documents(document_id) ON DELETE CASCADE; \ No newline at end of file diff --git a/aquadoggo/src/config.rs b/aquadoggo/src/config.rs index 15810cf04..eba85da0a 100644 --- a/aquadoggo/src/config.rs +++ b/aquadoggo/src/config.rs @@ -1,5 +1,7 @@ // SPDX-License-Identifier: AGPL-3.0-or-later +use std::path::PathBuf; + use p2panda_rs::schema::SchemaId; use crate::network::NetworkConfiguration; @@ -36,6 +38,12 @@ pub struct Configuration { /// 2020. pub http_port: u16, + /// Path to folder where blobs (binary files) are kept and served from. + /// + /// **Warning**: When set to a temporary directory, make sure that also the database itself is + /// not persisted, otherwise you will run into data inconsistencies. + pub blobs_base_path: PathBuf, + /// Number of concurrent workers which defines the maximum of materialization tasks which can /// be worked on simultaneously. /// @@ -54,6 +62,7 @@ impl Default for Configuration { database_url: "sqlite::memory:".into(), database_max_connections: 32, http_port: 2020, + blobs_base_path: PathBuf::new(), worker_pool_size: 16, network: NetworkConfiguration::default(), } diff --git a/aquadoggo/src/db/errors.rs b/aquadoggo/src/db/errors.rs index 76fc4724e..d76f6b061 100644 --- a/aquadoggo/src/db/errors.rs +++ b/aquadoggo/src/db/errors.rs @@ -13,33 +13,59 @@ pub enum SqlStoreError { #[error("Deletion of row from table {0} did not show any effect")] Deletion(String), + + /// Error returned from BlobStore. + #[error(transparent)] + BlobStoreError(#[from] BlobStoreError), + + /// Error returned from `DocumentStore` methods. + #[error(transparent)] + DocumentStorage(#[from] DocumentStorageError), } /// `SchemaStore` errors. #[derive(Error, Debug)] pub enum SchemaStoreError { - /// Catch all error which implementers can use for passing their own errors up the chain. - #[error("Error occured in DocumentStore: {0}")] - #[allow(dead_code)] - Custom(String), - /// Error returned from converting p2panda-rs `DocumentView` into `SchemaView. #[error(transparent)] - SystemSchemaError(#[from] SystemSchemaError), + SystemSchema(#[from] SystemSchemaError), /// Error returned from p2panda-rs `Schema` methods. #[error(transparent)] - SchemaError(#[from] SchemaError), + Schema(#[from] SchemaError), /// Error returned from p2panda-rs `SchemaId` methods. #[error(transparent)] - SchemaIdError(#[from] SchemaIdError), + SchemaId(#[from] SchemaIdError), /// Error returned from `DocumentStore` methods. #[error(transparent)] - DocumentStorageError(#[from] DocumentStorageError), + DocumentStorage(#[from] DocumentStorageError), /// Error returned from `OperationStore` methods. #[error(transparent)] - OperationStorageError(#[from] OperationStorageError), + OperationStorage(#[from] OperationStorageError), +} + +#[derive(Error, Debug)] +pub enum BlobStoreError { + /// Error when no "pieces" field found on blob document. + #[error("Missing \"pieces\" field on blob document")] + NotBlobDocument, + + /// Error when no pieces found for existing blob document. + #[error("No pieces found for the requested blob")] + NoBlobPiecesFound, + + /// Error when some pieces not found for existing blob document. + #[error("Some pieces missing for the requested blob")] + MissingPieces, + + /// Error when combined pieces length and claimed blob length don't match. + #[error("The combined pieces length and claimed blob length don't match")] + IncorrectLength, + + /// Error returned from `DocumentStore` methods. + #[error(transparent)] + DocumentStorageError(#[from] DocumentStorageError), } diff --git a/aquadoggo/src/db/models/utils.rs b/aquadoggo/src/db/models/utils.rs index f35dc9c54..2a3b773e1 100644 --- a/aquadoggo/src/db/models/utils.rs +++ b/aquadoggo/src/db/models/utils.rs @@ -81,6 +81,14 @@ pub fn parse_operation_rows( OperationValue::String(field_value.unwrap().clone()), )); } + "bytes" => { + operation_fields.push(( + field_name.to_string(), + OperationValue::Bytes(hex::decode(field_value.unwrap()).expect( + "bytes coming from the store are encoded in valid hex strings", + )), + )); + } "relation" => { operation_fields.push(( field_name.to_string(), @@ -235,6 +243,10 @@ pub fn parse_value_to_string_vec(value: &OperationValue) -> Vec> } db_values } + OperationValue::Bytes(bytes) => { + // bytes are stored in the db as hex strings + vec![Some(hex::encode(bytes))] + } } } @@ -300,6 +312,18 @@ pub fn parse_document_view_field_rows( ), ); } + "bytes" => { + document_view_fields.insert( + &row.name, + DocumentViewValue::new( + &row.operation_id.parse::().unwrap(), + &OperationValue::Bytes( + hex::decode(row.value.as_ref().unwrap()) + .expect("bytes coming from the db to be hex encoded"), + ), + ), + ); + } "relation" => { document_view_fields.insert( &row.name, @@ -435,6 +459,25 @@ mod tests { list_index: Some(0), sorted_index: None, }, + OperationFieldsJoinedRow { + public_key: "2f8e50c2ede6d936ecc3144187ff1c273808185cfbc5ff3d3748d1ff7353fc96" + .to_string(), + document_id: "0020b177ec1bf26dfb3b7010d473e6d44713b29b765b99c6e60ecbfae742de496543" + .to_string(), + operation_id: + "0020b177ec1bf26dfb3b7010d473e6d44713b29b765b99c6e60ecbfae742de496543" + .to_string(), + action: "create".to_string(), + schema_id: + "venue_0020c65567ae37efea293e34a9c7d13f8f2bf23dbdc3b5c7b9ab46293111c48fc78b" + .to_string(), + previous: None, + name: Some("data".to_string()), + field_type: Some("bytes".to_string()), + value: Some("00010203".to_string()), + list_index: Some(0), + sorted_index: None, + }, OperationFieldsJoinedRow { public_key: "2f8e50c2ede6d936ecc3144187ff1c273808185cfbc5ff3d3748d1ff7353fc96" .to_string(), @@ -695,6 +738,10 @@ mod tests { operation.fields().unwrap().get("username").unwrap(), &OperationValue::String("bubu".to_string()) ); + assert_eq!( + operation.fields().unwrap().get("data").unwrap(), + &OperationValue::Bytes(vec![0, 1, 2, 3]) + ); assert_eq!( operation.fields().unwrap().get("age").unwrap(), &OperationValue::Integer(28) @@ -786,35 +833,20 @@ mod tests { #[rstest] fn operation_values_to_string_vec(schema_id: SchemaId) { let expected_list = vec![ - Some("28".to_string()), - None, - Some( - "0020abababababababababababababababababababababababababababababababab".to_string(), - ), - Some( - "0020cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd".to_string(), - ), - Some("3.5".to_string()), - Some("false".to_string()), - Some( - "0020aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".to_string(), - ), - Some( - "0020bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb".to_string(), - ), - Some( - "0020cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc".to_string(), - ), - Some( - "0020dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd".to_string(), - ), - Some( - "0020eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee".to_string(), - ), - Some( - "0020ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff".to_string(), - ), - Some("bubu".to_string()), + Some("28".into()), + None, // This is an empty relation list + Some("0020abababababababababababababababababababababababababababababababab".into()), + Some("0020cdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcdcd".into()), + Some("00010203".into()), + Some("3.5".into()), + Some("false".into()), + Some("0020aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".into()), + Some("0020bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb".into()), + Some("0020cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc".into()), + Some("0020dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd".into()), + Some("0020eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee".into()), + Some("0020ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff".into()), + Some("bubu".into()), ]; let operation = create_operation(doggo_fields(), schema_id); @@ -853,58 +885,44 @@ mod tests { #[test] fn parses_document_field_rows() { + let document_id = + "0020713b2777f1222660291cb528d220c358920b4beddc1aea9df88a69cec45a10c0".to_string(); + let operation_id = + "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770".to_string(); + let document_view_id = operation_id.clone(); + let document_field_rows = vec![ DocumentViewFieldRow { - document_id: "0020713b2777f1222660291cb528d220c358920b4beddc1aea9df88a69cec45a10c0" - .to_string(), - document_view_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), - operation_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), + document_id: document_id.clone(), + document_view_id: document_view_id.clone(), + operation_id: operation_id.clone(), name: "age".to_string(), list_index: 0, field_type: "int".to_string(), value: Some("28".to_string()), }, DocumentViewFieldRow { - document_id: "0020713b2777f1222660291cb528d220c358920b4beddc1aea9df88a69cec45a10c0" - .to_string(), - document_view_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), - operation_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), + document_id: document_id.clone(), + document_view_id: document_view_id.clone(), + operation_id: operation_id.clone(), name: "height".to_string(), list_index: 0, field_type: "float".to_string(), value: Some("3.5".to_string()), }, DocumentViewFieldRow { - document_id: "0020713b2777f1222660291cb528d220c358920b4beddc1aea9df88a69cec45a10c0" - .to_string(), - document_view_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), - operation_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), + document_id: document_id.clone(), + document_view_id: document_view_id.clone(), + operation_id: operation_id.clone(), name: "is_admin".to_string(), list_index: 0, field_type: "bool".to_string(), value: Some("false".to_string()), }, DocumentViewFieldRow { - document_id: "0020713b2777f1222660291cb528d220c358920b4beddc1aea9df88a69cec45a10c0" - .to_string(), - document_view_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), - operation_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), + document_id: document_id.clone(), + document_view_id: document_view_id.clone(), + operation_id: operation_id.clone(), name: "many_profile_pictures".to_string(), list_index: 0, field_type: "relation_list".to_string(), @@ -914,14 +932,9 @@ mod tests { ), }, DocumentViewFieldRow { - document_id: "0020713b2777f1222660291cb528d220c358920b4beddc1aea9df88a69cec45a10c0" - .to_string(), - document_view_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), - operation_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), + document_id: document_id.clone(), + document_view_id: document_view_id.clone(), + operation_id: operation_id.clone(), name: "many_profile_pictures".to_string(), list_index: 1, field_type: "relation_list".to_string(), @@ -931,14 +944,9 @@ mod tests { ), }, DocumentViewFieldRow { - document_id: "0020713b2777f1222660291cb528d220c358920b4beddc1aea9df88a69cec45a10c0" - .to_string(), - document_view_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), - operation_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), + document_id: document_id.clone(), + document_view_id: document_view_id.clone(), + operation_id: operation_id.clone(), name: "many_special_profile_pictures".to_string(), list_index: 0, field_type: "pinned_relation_list".to_string(), @@ -948,14 +956,9 @@ mod tests { ), }, DocumentViewFieldRow { - document_id: "0020713b2777f1222660291cb528d220c358920b4beddc1aea9df88a69cec45a10c0" - .to_string(), - document_view_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), - operation_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), + document_id: document_id.clone(), + document_view_id: document_view_id.clone(), + operation_id: operation_id.clone(), name: "many_special_profile_pictures".to_string(), list_index: 1, field_type: "pinned_relation_list".to_string(), @@ -965,14 +968,9 @@ mod tests { ), }, DocumentViewFieldRow { - document_id: "0020713b2777f1222660291cb528d220c358920b4beddc1aea9df88a69cec45a10c0" - .to_string(), - document_view_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), - operation_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), + document_id: document_id.clone(), + document_view_id: document_view_id.clone(), + operation_id: operation_id.clone(), name: "profile_picture".to_string(), list_index: 0, field_type: "relation".to_string(), @@ -982,14 +980,9 @@ mod tests { ), }, DocumentViewFieldRow { - document_id: "0020713b2777f1222660291cb528d220c358920b4beddc1aea9df88a69cec45a10c0" - .to_string(), - document_view_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), - operation_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), + document_id: document_id.clone(), + document_view_id: document_view_id.clone(), + operation_id: operation_id.clone(), name: "special_profile_picture".to_string(), list_index: 0, field_type: "pinned_relation".to_string(), @@ -999,28 +992,27 @@ mod tests { ), }, DocumentViewFieldRow { - document_id: "0020713b2777f1222660291cb528d220c358920b4beddc1aea9df88a69cec45a10c0" - .to_string(), - document_view_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), - operation_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), + document_id: document_id.clone(), + document_view_id: document_view_id.clone(), + operation_id: operation_id.clone(), name: "username".to_string(), list_index: 0, field_type: "str".to_string(), value: Some("bubu".to_string()), }, DocumentViewFieldRow { - document_id: "0020713b2777f1222660291cb528d220c358920b4beddc1aea9df88a69cec45a10c0" - .to_string(), - document_view_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), - operation_id: - "0020dc8fe1cbacac4d411ae25ea264369a7b2dabdfb617129dec03b6661edd963770" - .to_string(), + document_id: document_id.clone(), + document_view_id: document_view_id.clone(), + operation_id: operation_id.clone(), + name: "data".to_string(), + list_index: 0, + field_type: "bytes".to_string(), + value: Some("00010203".to_string()), + }, + DocumentViewFieldRow { + document_id: document_id.clone(), + document_view_id: document_view_id.clone(), + operation_id: operation_id.clone(), name: "an_empty_relation_list".to_string(), list_index: 0, field_type: "pinned_relation_list".to_string(), @@ -1038,6 +1030,10 @@ mod tests { document_fields.get("username").unwrap(), &DocumentViewValue::new(&operation_id, &OperationValue::String("bubu".to_string())) ); + assert_eq!( + document_fields.get("data").unwrap(), + &DocumentViewValue::new(&operation_id, &OperationValue::Bytes(vec![0, 1, 2, 3])) + ); assert_eq!( document_fields.get("age").unwrap(), &DocumentViewValue::new(&operation_id, &OperationValue::Integer(28)) diff --git a/aquadoggo/src/db/stores/blob.rs b/aquadoggo/src/db/stores/blob.rs new file mode 100644 index 000000000..2b7ad75c4 --- /dev/null +++ b/aquadoggo/src/db/stores/blob.rs @@ -0,0 +1,640 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later + +use std::num::NonZeroU64; + +use async_stream::try_stream; +use bytes::{BufMut, BytesMut}; +use futures::Stream; +use p2panda_rs::document::traits::AsDocument; +use p2panda_rs::document::{DocumentId, DocumentViewId}; +use p2panda_rs::operation::OperationValue; +use p2panda_rs::schema::validate::MAX_BLOB_PIECE_LENGTH; +use p2panda_rs::schema::{Schema, SchemaId}; +use p2panda_rs::storage_provider::traits::DocumentStore; +use sqlx::{query_scalar, AnyPool}; + +use crate::db::errors::{BlobStoreError, SqlStoreError}; +use crate::db::query::{Filter, Order, Pagination, PaginationField, Select}; +use crate::db::stores::query::{PaginationCursor, Query, RelationList}; +use crate::db::SqlStore; + +/// Number of blob pieces requested per database query iteration. +const BLOB_QUERY_PAGE_SIZE: u64 = 10; + +pub type BlobData = Vec; + +/// Gets blob data from the database in chunks (via pagination) and populates a readable stream +/// with it. +/// +/// This stream can further be used to write data into a file etc. This helps dealing with large +/// blobs as only little system memory is occupied per reading and writing step. We only move small +/// chunks at a time and keep the memory-footprint managable. +/// +/// Currently the BLOB_QUERY_PAGE_SIZE is set to 10 which is the multiplier of the +/// MAX_BLOB_PIECE_LENGTH. With 10 * 256kb we occupy an approximate maximum of 2.56mb memory at a +/// time. If these values make sense needs to be re-visited, but it is a start! +#[derive(Debug)] +pub struct BlobStream { + store: SqlStore, + pagination_cursor: Option, + document_view_id: DocumentViewId, + num_pieces: usize, + length: usize, + expected_num_pieces: usize, + expected_length: usize, +} + +impl BlobStream { + pub fn new(store: &SqlStore, document: impl AsDocument) -> Result { + if document.schema_id() != &SchemaId::Blob(1) { + return Err(BlobStoreError::NotBlobDocument); + } + + // Get the length of the blob + let expected_length = match document.get("length").unwrap() { + OperationValue::Integer(length) => *length as usize, + _ => unreachable!(), // We already validated that this is a blob document + }; + + // Get the number of pieces in the blob + let expected_num_pieces = match document.get("pieces").unwrap() { + OperationValue::PinnedRelationList(list) => list.len(), + _ => unreachable!(), // We already validated that this is a blob document + }; + + Ok(Self { + store: store.to_owned(), + pagination_cursor: None, + document_view_id: document.view_id().to_owned(), + num_pieces: 0, + length: 0, + expected_length, + expected_num_pieces, + }) + } + + async fn next_chunk(&mut self) -> Result { + let schema = Schema::get_system(SchemaId::BlobPiece(1)).expect("System schema is given"); + let list = RelationList::new_pinned(&self.document_view_id, "pieces"); + + let args = Query::new( + &Pagination::new( + &NonZeroU64::new(BLOB_QUERY_PAGE_SIZE).unwrap(), + self.pagination_cursor.as_ref(), + &vec![PaginationField::EndCursor, PaginationField::HasNextPage], + ), + &Select::new(&["data".into()]), + &Filter::default(), + &Order::default(), + ); + + let mut buf = + BytesMut::with_capacity(BLOB_QUERY_PAGE_SIZE as usize * MAX_BLOB_PIECE_LENGTH); + + let (pagination_data, documents) = self.store.query(schema, &args, Some(&list)).await?; + self.pagination_cursor = pagination_data.end_cursor; + self.num_pieces += documents.len(); + + for (_, blob_piece_document) in documents { + match blob_piece_document + .get("data") + .expect("Blob piece document without \"data\" field") + { + OperationValue::Bytes(data_str) => buf.put(&data_str[..]), + _ => unreachable!(), // We only queried for blob piece documents + } + } + + self.length += buf.len(); + + Ok(buf.to_vec()) + } + + /// This method is called _after_ the stream has ended. We compare the values with what we've + /// expected and find inconsistencies and invalid blobs. + fn validate(&self) -> Result<(), BlobStoreError> { + // No pieces were found + if self.length == 0 { + return Err(BlobStoreError::NoBlobPiecesFound); + }; + + // Not all pieces were found + if self.expected_num_pieces != self.num_pieces { + return Err(BlobStoreError::MissingPieces); + } + + // Combined blob data length doesn't match the claimed length + if self.expected_length != self.length { + return Err(BlobStoreError::IncorrectLength); + }; + + Ok(()) + } + + /// Establishes a data stream of blob data. + /// + /// The stream ends when all data has been written, at the end the blob data gets validated + /// against the expected blob length. + /// + /// To consume this stream in form of an iterator it is required to use the `pin_mut` macro. + // NOTE: Clippy does not understand that this macro generates code which asks for an explicit + // lifetime. + #[allow(clippy::needless_lifetimes)] + pub fn read_all<'a>(&'a mut self) -> impl Stream> + 'a { + try_stream! { + loop { + let blob_data = self.next_chunk().await?; + + if blob_data.is_empty() { + self.validate()?; + break; + } + + yield blob_data; + } + } + } +} + +impl SqlStore { + /// Get data stream for one blob from the store, identified by it's document id. + pub async fn get_blob(&self, id: &DocumentId) -> Result, BlobStoreError> { + if let Some(document) = self.get_document(id).await? { + Ok(Some(BlobStream::new(self, document)?)) + } else { + Ok(None) + } + } + + /// Get data stream for one blob from the store, identified by its document view id. + pub async fn get_blob_by_view_id( + &self, + view_id: &DocumentViewId, + ) -> Result, BlobStoreError> { + if let Some(document) = self.get_document_by_view_id(view_id).await? { + Ok(Some(BlobStream::new(self, document)?)) + } else { + Ok(None) + } + } + + /// Purge blob data from the node _if_ it is not related to from another document. + pub async fn purge_blob(&self, document_id: &DocumentId) -> Result<(), SqlStoreError> { + // Collect the view id of any existing document views which contain a relation to the blob + // which is the purge target. + let blob_reverse_relations = reverse_relations(&self.pool, document_id, None).await?; + + // If there are no documents referring to the blob then we continue with the purge. + if blob_reverse_relations.is_empty() { + // Collect the document view ids of all pieces this blob has ever referred to in its + // `pieces` + let blob_piece_ids: Vec = query_scalar( + " + SELECT + operation_fields_v1.value + FROM + operation_fields_v1 + LEFT JOIN + operations_v1 + ON + operations_v1.operation_id = operation_fields_v1.operation_id + WHERE + operations_v1.document_id = $1 + AND + operation_fields_v1.name = 'pieces' + ", + ) + .bind(document_id.to_string()) + .fetch_all(&self.pool) + .await + .map_err(|e| SqlStoreError::Transaction(e.to_string()))?; + + // Purge the blob document itself. + self.purge_document(document_id).await?; + + // Now iterate over each collected blob piece in order to check if they are still + // needed by any other blob document, and if not purge them as well. + for blob_piece_id in blob_piece_ids { + let blob_piece_id: DocumentId = blob_piece_id + .parse() + .expect("Document Id's from the store are valid"); + + // Collect reverse relations for this blob piece. + let blob_piece_reverse_relations = + reverse_relations(&self.pool, &blob_piece_id, Some(SchemaId::Blob(1))).await?; + + // If there are none then purge the blob piece. + if blob_piece_reverse_relations.is_empty() { + self.purge_document(&blob_piece_id).await?; + } + } + } + + Ok(()) + } +} + +/// Helper for getting the document ids of any document which relates to the specified document. +/// +/// Optionally pass in a `SchemaId` to restrict the results to documents of a certain schema. +async fn reverse_relations( + pool: &AnyPool, + document_id: &DocumentId, + schema_id: Option, +) -> Result, SqlStoreError> { + let schema_id_condition = match schema_id { + Some(schema_id) => format!("AND document_views.schema_id = '{}'", schema_id), + None => String::new(), + }; + + query_scalar(&format!( + " + SELECT + document_view_fields.document_view_id + FROM + document_view_fields + LEFT JOIN + operation_fields_v1 + ON + document_view_fields.operation_id = operation_fields_v1.operation_id + AND + document_view_fields.name = operation_fields_v1.name + LEFT JOIN + document_views + ON + document_view_fields.document_view_id = document_views.document_view_id + WHERE + operation_fields_v1.field_type + IN + ('pinned_relation', 'pinned_relation_list', 'relation', 'relation_list') + {schema_id_condition} + AND + operation_fields_v1.value IN ( + SELECT document_views.document_view_id + FROM document_views + WHERE document_views.document_id = $1 + ) OR operation_fields_v1.value = $1 + ", + )) + .bind(document_id.to_string()) + .fetch_all(pool) + .await + .map_err(|e| SqlStoreError::Transaction(e.to_string())) +} + +#[cfg(test)] +mod tests { + use bytes::{BufMut, BytesMut}; + use futures::{pin_mut, StreamExt}; + use p2panda_rs::document::DocumentId; + use p2panda_rs::identity::KeyPair; + use p2panda_rs::schema::SchemaId; + use p2panda_rs::test_utils::fixtures::{key_pair, random_document_view_id}; + use p2panda_rs::test_utils::generate_random_bytes; + use p2panda_rs::test_utils::memory_store::helpers::PopulateStoreConfig; + use rstest::rstest; + + use crate::db::errors::BlobStoreError; + use crate::test_utils::{ + add_blob, add_document, add_schema_and_documents, assert_query, populate_and_materialize, + populate_store_config, test_runner, update_document, TestNode, + }; + + use super::BlobStream; + + async fn read_data_from_stream(mut blob_stream: BlobStream) -> Result, BlobStoreError> { + let stream = blob_stream.read_all(); + pin_mut!(stream); + + let mut buf = BytesMut::new(); + + while let Some(value) = stream.next().await { + match value { + Ok(blob_data) => { + buf.put(blob_data.as_slice()); + } + Err(err) => return Err(err), + } + } + + Ok(buf.to_vec()) + } + + #[rstest] + fn get_blob(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + let blob_data = "Hello, World!".as_bytes(); + let blob_view_id = add_blob(&mut node, &blob_data, 6, "text/plain", &key_pair).await; + let document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); + + // Get blob by document id + let blob_stream = node.context.store.get_blob(&document_id).await.unwrap(); + assert!(blob_stream.is_some()); + let collected_data = read_data_from_stream(blob_stream.unwrap()).await; + assert_eq!(blob_data, collected_data.unwrap()); + + // Get blob by view id + let blob_stream_view = node + .context + .store + .get_blob_by_view_id(&blob_view_id) + .await + .unwrap(); + assert!(blob_stream_view.is_some()); + let collected_data = read_data_from_stream(blob_stream_view.unwrap()).await; + assert_eq!(blob_data, collected_data.unwrap()); + }) + } + + #[rstest] + fn get_blob_errors(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + let blob_data = generate_random_bytes(12); + + // Publish a blob containing pieces which aren't in the store. + let blob_view_id = add_document( + &mut node, + &SchemaId::Blob(1), + vec![ + ("length", { blob_data.len() as i64 }.into()), + ("mime_type", "text/plain".into()), + ( + "pieces", + vec![random_document_view_id(), random_document_view_id()].into(), + ), + ], + &key_pair, + ) + .await; + + let blob_document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); + + // We get the correct `NoBlobPiecesFound` error. + let stream = node + .context + .store + .get_blob(&blob_document_id) + .await + .unwrap(); + let collected_data = read_data_from_stream(stream.unwrap()).await; + assert!(matches!( + collected_data, + Err(BlobStoreError::NoBlobPiecesFound) + ),); + + // Publish one blob piece. + let blob_piece_view_id_1 = add_document( + &mut node, + &SchemaId::BlobPiece(1), + vec![("data", blob_data[..5].into())], + &key_pair, + ) + .await; + + // Publish a blob with one piece that is in the store and one that isn't. + let blob_view_id = add_document( + &mut node, + &SchemaId::Blob(1), + vec![ + ("length", { blob_data.len() as i64 }.into()), + ("mime_type", "text/plain".into()), + ( + "pieces", + vec![blob_piece_view_id_1.clone(), random_document_view_id()].into(), + ), + ], + &key_pair, + ) + .await; + + let blob_document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); + + // We should get the correct `MissingBlobPieces` error. + let stream = node + .context + .store + .get_blob(&blob_document_id) + .await + .unwrap(); + let collected_data = read_data_from_stream(stream.unwrap()).await; + assert!(matches!(collected_data, Err(BlobStoreError::MissingPieces)),); + + // Publish one more blob piece, but it doesn't contain the correct number of bytes. + let blob_piece_view_id_2 = add_document( + &mut node, + &SchemaId::BlobPiece(1), + vec![("data", blob_data[9..].into())], + &key_pair, + ) + .await; + + // Publish a blob with two pieces that are in the store but they don't add up to the + // right byte length. + let blob_view_id = add_document( + &mut node, + &SchemaId::Blob(1), + vec![ + ("length", { blob_data.len() as i64 }.into()), + ("mime_type", "text/plain".into()), + ( + "pieces", + vec![blob_piece_view_id_1, blob_piece_view_id_2].into(), + ), + ], + &key_pair, + ) + .await; + + let blob_document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); + + // We get the correct `IncorrectLength` error. + let stream = node + .context + .store + .get_blob(&blob_document_id) + .await + .unwrap(); + let collected_data = read_data_from_stream(stream.unwrap()).await; + assert!(matches!( + collected_data, + Err(BlobStoreError::IncorrectLength) + ),); + }) + } + + #[rstest] + fn purge_blob(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + let blob_data = "Hello, World!".as_bytes(); + let blob_view_id = add_blob(&mut node, &blob_data, 7, "text/plain", &key_pair).await; + + // There is one blob and two blob pieces in database. + // + // These are the rows we expect to exist in each table. + assert_query(&node, "SELECT entry_hash FROM entries", 3).await; + assert_query(&node, "SELECT operation_id FROM operations_v1", 3).await; + assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 6).await; + assert_query(&node, "SELECT log_id FROM logs", 3).await; + assert_query(&node, "SELECT document_id FROM documents", 3).await; + assert_query(&node, "SELECT document_id FROM document_views", 3).await; + assert_query(&node, "SELECT name FROM document_view_fields", 5).await; + + // Purge this blob from the database, we now expect all tables to be empty (except the + // logs table). + let document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); + let result = node.context.store.purge_blob(&document_id).await; + assert!(result.is_ok(), "{:#?}", result); + assert_query(&node, "SELECT entry_hash FROM entries", 0).await; + assert_query(&node, "SELECT operation_id FROM operations_v1", 0).await; + assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 0).await; + assert_query(&node, "SELECT log_id FROM logs", 3).await; + assert_query(&node, "SELECT document_id FROM documents", 0).await; + assert_query(&node, "SELECT document_id FROM document_views", 0).await; + assert_query(&node, "SELECT name FROM document_view_fields", 0).await; + + let result = node.context.store.purge_blob(&document_id).await; + + assert!(result.is_ok(), "{:#?}", result) + }) + } + + #[rstest] + fn purge_blob_only_purges_blobs( + #[from(populate_store_config)] + #[with(1, 1, 1)] + config: PopulateStoreConfig, + key_pair: KeyPair, + ) { + test_runner(|mut node: TestNode| async move { + let _ = populate_and_materialize(&mut node, &config).await; + + let blob_data = "Hello, World!".as_bytes(); + let blob_view_id = add_blob(&mut node, &blob_data, 7, "text/plain", &key_pair).await; + + // There is one blob and two blob pieces in database. + // + // These are the rows we expect to exist in each table. + assert_query(&node, "SELECT entry_hash FROM entries", 4).await; + assert_query(&node, "SELECT operation_id FROM operations_v1", 4).await; + assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 20).await; + assert_query(&node, "SELECT log_id FROM logs", 4).await; + assert_query(&node, "SELECT document_id FROM documents", 4).await; + assert_query(&node, "SELECT document_id FROM document_views", 4).await; + assert_query(&node, "SELECT name FROM document_view_fields", 16).await; + + let document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); + let result = node.context.store.purge_blob(&document_id).await; + assert!(result.is_ok(), "{:#?}", result); + assert_query(&node, "SELECT entry_hash FROM entries", 1).await; + assert_query(&node, "SELECT operation_id FROM operations_v1", 1).await; + assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 14).await; + assert_query(&node, "SELECT log_id FROM logs", 4).await; + assert_query(&node, "SELECT document_id FROM documents", 1).await; + assert_query(&node, "SELECT document_id FROM document_views", 1).await; + assert_query(&node, "SELECT name FROM document_view_fields", 11).await; + + let result = node.context.store.purge_blob(&document_id).await; + + assert!(result.is_ok(), "{:#?}", result) + }) + } + + #[rstest] + fn does_not_purge_blob_if_still_pinned(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + let blob_data = "Hello, World!".as_bytes(); + let blob_view_id = add_blob(&mut node, &blob_data, 7, "text/plain", &key_pair).await; + + let _ = add_schema_and_documents( + &mut node, + "img", + vec![vec![( + "blob", + blob_view_id.clone().into(), + Some(SchemaId::Blob(1)), + )]], + &key_pair, + ) + .await; + + assert_query(&node, "SELECT entry_hash FROM entries", 6).await; + assert_query(&node, "SELECT operation_id FROM operations_v1", 6).await; + assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 12).await; + assert_query(&node, "SELECT log_id FROM logs", 6).await; + assert_query(&node, "SELECT document_id FROM documents", 6).await; + assert_query(&node, "SELECT document_id FROM document_views", 6).await; + assert_query(&node, "SELECT name FROM document_view_fields", 11).await; + + // Purge this blob from the database, we now expect all tables to be empty. + let document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); + let result = node.context.store.purge_blob(&document_id).await; + assert!(result.is_ok(), "{:#?}", result); + assert_query(&node, "SELECT entry_hash FROM entries", 6).await; + assert_query(&node, "SELECT operation_id FROM operations_v1", 6).await; + assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 12).await; + assert_query(&node, "SELECT log_id FROM logs", 6).await; + assert_query(&node, "SELECT document_id FROM documents", 6).await; + assert_query(&node, "SELECT document_id FROM document_views", 6).await; + assert_query(&node, "SELECT name FROM document_view_fields", 11).await; + + let result = node.context.store.purge_blob(&document_id).await; + + assert!(result.is_ok(), "{:#?}", result) + }) + } + + #[rstest] + fn purge_all_pieces_of_updated_blob(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + let blob_data = "Hello, World!".as_bytes(); + let blob_view_id = add_blob(&mut node, &blob_data, 7, "text/plain", &key_pair).await; + + // Create a new blob piece. + let new_blob_pieces = add_document( + &mut node, + &SchemaId::BlobPiece(1), + vec![("data", "more blob data".as_bytes().into())], + &key_pair, + ) + .await; + + // Update the blob document to point at the new blob piece. + let _ = update_document( + &mut node, + &SchemaId::Blob(1), + vec![("pieces", vec![new_blob_pieces].into())], + &blob_view_id, + &key_pair, + ) + .await; + + // There is one blob and three blob pieces in database. + // + // These are the rows we expect to exist in each table. + assert_query(&node, "SELECT entry_hash FROM entries", 5).await; + assert_query(&node, "SELECT operation_id FROM operations_v1", 5).await; + assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 8).await; + assert_query(&node, "SELECT log_id FROM logs", 4).await; + assert_query(&node, "SELECT document_id FROM documents", 4).await; + assert_query(&node, "SELECT document_id FROM document_views", 5).await; + assert_query(&node, "SELECT name FROM document_view_fields", 9).await; + + // Purge this blob from the database, we now expect all tables to be empty (except the + // logs table). + let document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); + let result = node.context.store.purge_blob(&document_id).await; + assert!(result.is_ok(), "{:#?}", result); + assert_query(&node, "SELECT entry_hash FROM entries", 0).await; + assert_query(&node, "SELECT operation_id FROM operations_v1", 0).await; + assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 0).await; + assert_query(&node, "SELECT log_id FROM logs", 4).await; + assert_query(&node, "SELECT document_id FROM documents", 0).await; + assert_query(&node, "SELECT document_id FROM document_views", 0).await; + assert_query(&node, "SELECT name FROM document_view_fields", 0).await; + + let result = node.context.store.purge_blob(&document_id).await; + + assert!(result.is_ok(), "{:#?}", result) + }) + } +} diff --git a/aquadoggo/src/db/stores/document.rs b/aquadoggo/src/db/stores/document.rs index 463880a2d..981cbdd05 100644 --- a/aquadoggo/src/db/stores/document.rs +++ b/aquadoggo/src/db/stores/document.rs @@ -9,7 +9,7 @@ //! themselves. On completion, the resultant documents are stored and can be retrieved using the //! methods defined here. //! -//! The whole document store can be seen as a live cache. All it's content is derived from +//! The whole document store can be seen as a live cache. All its content is derived from //! operations already stored on the node. It allows easy and quick access to current or pinned //! values. //! @@ -23,13 +23,13 @@ //! state, we call these states document views. When a document is updated it gets a new state, or //! view, which can be referred to by a globally unique document view id. //! -//! The getter methods allow retrieving a document by it's `DocumentId` or it's -//! `DocumentViewId`. The former always returns the most current document state, the latter -//! returns the specific document view if it has already been materialised and stored. Although it -//! is possible to construct a document at any point in it's history if all operations are -//! retained, we use a system of "pinned relations" to identify and materialise only views we -//! explicitly wish to keep. +//! The getter methods allow retrieving a document by its `DocumentId` or its `DocumentViewId`. The +//! former always returns the most current document state, the latter returns the specific document +//! view if it has already been materialised and stored. Although it is possible to construct a +//! document at any point in its history if all operations are retained, we use a system of "pinned +//! relations" to identify and materialise only views we explicitly wish to keep. use async_trait::async_trait; +use log::debug; use p2panda_rs::document::traits::AsDocument; use p2panda_rs::document::{DocumentId, DocumentView, DocumentViewId}; use p2panda_rs::schema::SchemaId; @@ -48,9 +48,9 @@ use crate::db::SqlStore; impl DocumentStore for SqlStore { type Document = StorageDocument; - /// Get a document from the store by it's `DocumentId`. + /// Get a document from the store by its `DocumentId`. /// - /// Retrieves a document in it's most current state from the store. Ignores documents which + /// Retrieves a document in its most current state from the store. Ignores documents which /// contain a DELETE operation. /// /// An error is returned only if a fatal database error occurs. @@ -112,7 +112,7 @@ impl DocumentStore for SqlStore { /// Get a document from the database by `DocumentViewId`. /// - /// Get's a document at a specific point in it's history. Only returns views that have already + /// Get's a document at a specific point in its history. Only returns views that have already /// been materialised and persisted in the store. These are likely to be "pinned views" which /// are relations from other documents, in which case the materialiser service will have /// identified and materialised them ready for querying. @@ -275,7 +275,7 @@ impl SqlStore { /// current view and field values into the `document_views` and `document_view_fields` tables /// respectively. /// - /// If the document already existed in the store then it's current view and view id will be + /// If the document already existed in the store then its current view and view id will be /// updated with those contained on the passed document. /// /// If any of the operations fail all insertions are rolled back. @@ -365,6 +365,222 @@ impl SqlStore { .await .map_err(|e| DocumentStorageError::FatalStorageError(e.to_string())) } + + /// Get the ids for all document views for a document which are currently materialized to the store. + pub async fn get_all_document_view_ids( + &self, + document_id: &DocumentId, + ) -> Result, DocumentStorageError> { + let document_view_ids: Vec = query_scalar( + " + SELECT + document_views.document_view_id + FROM + document_views + WHERE + document_views.document_id = $1 + ", + ) + .bind(document_id.as_str()) + .fetch_all(&self.pool) + .await + .map_err(|err| DocumentStorageError::FatalStorageError(err.to_string()))?; + + Ok(document_view_ids + .iter() + .map(|document_id_str| { + document_id_str + .parse::() + .expect("Document Id's coming from the store should be valid") + }) + .collect()) + } + + /// Get the ids of all documents which are related to from another document view. + pub async fn get_child_document_ids( + &self, + document_view_id: &DocumentViewId, + ) -> Result, DocumentStorageError> { + let document_view_ids: Vec = query_scalar( + " + SELECT DISTINCT + document_views.document_id + FROM + document_views + WHERE + document_views.document_view_id + IN ( + SELECT + operation_fields_v1.value + FROM + document_view_fields + LEFT JOIN + operation_fields_v1 + ON + document_view_fields.operation_id = operation_fields_v1.operation_id + AND + document_view_fields.name = operation_fields_v1.name + WHERE + operation_fields_v1.field_type IN ('pinned_relation', 'pinned_relation_list') + AND + document_view_fields.document_view_id = $1 + ) + ", + ) + .bind(document_view_id.to_string()) + .fetch_all(&self.pool) + .await + .map_err(|err| DocumentStorageError::FatalStorageError(err.to_string()))?; + + Ok(document_view_ids + .iter() + .map(|document_id_str| { + document_id_str + .parse::() + .expect("Document Id's coming from the store should be valid") + }) + .collect()) + } + + /// Attempt to remove a document view from the store. Returns a boolean which indicates if the + /// removal took place. + /// + /// This operations only succeeds if the view is "dangling", meaning no other document view + /// exists which relates to this view, AND it is not the current view of any document. + pub async fn prune_document_view( + &self, + document_view_id: &DocumentViewId, + ) -> Result { + // Attempt to delete the view. If it is pinned from an existing view, or it is the current + // view of a document, the deletion will not go ahead. + let result = query( + " + DELETE FROM + document_views + WHERE + document_views.document_view_id = $1 + AND NOT EXISTS ( + SELECT + document_view_fields.document_view_id + FROM + document_view_fields + LEFT JOIN + operation_fields_v1 + ON + document_view_fields.operation_id = operation_fields_v1.operation_id + AND + document_view_fields.name = operation_fields_v1.name + WHERE + operation_fields_v1.field_type IN ('pinned_relation', 'pinned_relation_list') + AND + operation_fields_v1.value = $1 + ) + AND NOT EXISTS ( + SELECT documents.document_id FROM documents + WHERE documents.document_view_id = $1 + ) + " + ) + .bind(document_view_id.to_string()) + .execute(&self.pool) + .await + .map_err(|err| DocumentStorageError::FatalStorageError(err.to_string()))?; + + // If any rows were affected the deletion went ahead. + if result.rows_affected() > 0 { + debug!("Deleted view: {}", document_view_id); + Ok(true) + } else { + debug!("Did not delete view: {}", document_view_id); + Ok(false) + } + } + + /// Check if this view is the current view of its document. + pub async fn is_current_view( + &self, + document_view_id: &DocumentViewId, + ) -> Result { + let document_view_id: Option = query_scalar( + " + SELECT documents.document_view_id FROM documents + WHERE documents.document_view_id = $1 + ", + ) + .bind(document_view_id.to_string()) + .fetch_optional(&self.pool) + .await + .map_err(|err| DocumentStorageError::FatalStorageError(err.to_string()))?; + + Ok(document_view_id.is_some()) + } + + /// Purge a document from the store by its id. + /// + /// This removes entries, operations and any materialized documents which exist. + /// + /// The only unaffected table after deletion is the `logs` table as we still want to remember + /// which log ids an author has already used so we can continue to avoid collisions. + pub async fn purge_document( + &self, + document_id: &DocumentId, + ) -> Result<(), DocumentStorageError> { + // Start a transaction, any db insertions after this point, and before the `commit()` + // will be rolled back in the event of an error. + let mut tx = self + .pool + .begin() + .await + .map_err(|e| DocumentStorageError::FatalStorageError(e.to_string()))?; + + // Delete rows from `documents` table, this cascades up to `document_views` and + // `document_view_fields` tables. + query( + " + DELETE FROM documents + WHERE documents.document_id = $1 + ", + ) + .bind(document_id.to_string()) + .fetch_all(&mut tx) + .await + .map_err(|e| DocumentStorageError::FatalStorageError(e.to_string()))?; + + // Delete rows from `entries` table. + query( + " + DELETE FROM entries + WHERE entries.entry_hash IN ( + SELECT operations_v1.operation_id FROM operations_v1 + WHERE operations_v1.document_id = $1 + ) + ", + ) + .bind(document_id.to_string()) + .fetch_all(&mut tx) + .await + .map_err(|e| DocumentStorageError::FatalStorageError(e.to_string()))?; + + // Delete rows from `operations_v1` table, this cascades up to `operation_fields_v1` table + // as well. + query( + " + DELETE FROM operations_v1 + WHERE operations_v1.document_id = $1 + ", + ) + .bind(document_id.to_string()) + .fetch_all(&mut tx) + .await + .map_err(|e| DocumentStorageError::FatalStorageError(e.to_string()))?; + + // Commit the transaction if all queries succeeded. + tx.commit() + .await + .map_err(|e| DocumentStorageError::FatalStorageError(e.to_string()))?; + + Ok(()) + } } // Helper method for getting rows from the `document_view_fields` table. @@ -376,7 +592,7 @@ async fn get_document_view_field_rows( // // This query performs a join against the `operation_fields_v1` table as this is where the // actual field values live. The `document_view_fields` table defines relations between a - // document view and the operation values which hold it's field values. + // document view and the operation values which hold its field values. // // Each field has one row, or in the case of list values (pinned relations, or relation lists) // then one row exists for every item in the list. The `list_index` column is used for @@ -393,14 +609,16 @@ async fn get_document_view_field_rows( operation_fields_v1.value FROM document_view_fields - LEFT JOIN document_views - ON - document_view_fields.document_view_id = document_views.document_view_id - LEFT JOIN operation_fields_v1 - ON - document_view_fields.operation_id = operation_fields_v1.operation_id - AND - document_view_fields.name = operation_fields_v1.name + LEFT JOIN + operation_fields_v1 + ON + document_view_fields.operation_id = operation_fields_v1.operation_id + AND + document_view_fields.name = operation_fields_v1.name + LEFT JOIN + document_views + ON + document_view_fields.document_view_id = document_views.document_view_id WHERE document_view_fields.document_view_id = $1 ORDER BY @@ -505,7 +723,7 @@ async fn insert_document( .await .map_err(|err| DocumentStorageError::FatalStorageError(err.to_string()))?; - // If the document is not deleted, then we also want to insert it's view and fields. + // If the document is not deleted, then we also want to insert its view and fields. if !document.is_deleted() && document.view().is_some() { // Construct the view, unwrapping the document view fields as we checked they exist above. let document_view = @@ -529,23 +747,29 @@ async fn insert_document( #[cfg(test)] mod tests { + use p2panda_rs::api::next_args; use p2panda_rs::document::materialization::build_graph; use p2panda_rs::document::traits::AsDocument; use p2panda_rs::document::{DocumentBuilder, DocumentId, DocumentViewFields, DocumentViewId}; + use p2panda_rs::entry::{LogId, SeqNum}; + use p2panda_rs::identity::KeyPair; use p2panda_rs::operation::traits::AsOperation; use p2panda_rs::operation::{Operation, OperationId}; use p2panda_rs::storage_provider::traits::{DocumentStore, OperationStore}; use p2panda_rs::test_utils::constants; use p2panda_rs::test_utils::fixtures::{ - operation, random_document_id, random_document_view_id, random_operation_id, + key_pair, operation, random_document_id, random_document_view_id, random_operation_id, }; use p2panda_rs::test_utils::memory_store::helpers::{populate_store, PopulateStoreConfig}; use p2panda_rs::WithId; use rstest::rstest; use crate::db::stores::document::DocumentView; + use crate::materializer::tasks::reduce_task; + use crate::materializer::TaskInput; use crate::test_utils::{ - build_document, populate_and_materialize, populate_store_config, test_runner, TestNode, + add_schema_and_documents, assert_query, build_document, populate_and_materialize, + populate_store_config, test_runner, TestNode, }; #[rstest] @@ -575,7 +799,7 @@ mod tests { let result = node.context.store.insert_document(&document).await; assert!(result.is_ok()); - // Find the "CREATE" operation and get it's id. + // Find the "CREATE" operation and get its id. let create_operation = WithId::::id( operations .iter() @@ -601,9 +825,9 @@ mod tests { .await; assert!(result.is_ok()); - // We should be able to retrieve the document at either of it's views now. + // We should be able to retrieve the document at either of its views now. - // Here we request the document with it's initial state. + // Here we request the document with its initial state. let retrieved_document = node .context .store @@ -617,7 +841,7 @@ mod tests { assert_eq!(retrieved_document.view_id(), document_at_view_1.view_id()); assert_eq!(retrieved_document.fields(), document_at_view_1.fields()); - // Here we request it at it's current state. + // Here we request it at its current state. let retrieved_document = node .context .store @@ -631,7 +855,7 @@ mod tests { assert_eq!(retrieved_document.view_id(), document.view_id()); assert_eq!(retrieved_document.fields(), document.fields()); - // If we retrieve the document by it's id, we expect the current state. + // If we retrieve the document by its id, we expect the current state. let retrieved_document = node .context .store @@ -650,8 +874,7 @@ mod tests { #[rstest] fn document_view_does_not_exist(random_document_view_id: DocumentViewId) { test_runner(|node: TestNode| async move { - // We try to retrieve a document view by it's id but no view - // with that id exists. + // We try to retrieve a document view by its id but no view with that id exists. let view_does_not_exist = node .context .store @@ -700,20 +923,20 @@ mod tests { config: PopulateStoreConfig, ) { test_runner(|node: TestNode| async move { - // Populate the store with some entries and operations but DON'T materialise any resulting documents. + // Populate the store with some entries and operations but DON'T materialise any + // resulting documents. let (_, document_ids) = populate_store(&node.context.store, &config).await; let document_id = document_ids.get(0).expect("At least one document id"); // Build the document. let document = build_document(&node.context.store, &document_id).await; - // The document is successfully inserted into the database, this - // relies on the operations already being present and would fail - // if they were not. + // The document is successfully inserted into the database, this relies on the + // operations already being present and would fail if they were not. let result = node.context.store.insert_document(&document).await; assert!(result.is_ok()); - // We can retrieve the most recent document view for this document by it's id. + // We can retrieve the most recent document view for this document by its id. let retrieved_document = node .context .store @@ -722,8 +945,8 @@ mod tests { .unwrap() .unwrap(); - // We can retrieve a specific document view for this document by it's view_id. - // In this case, that should be the same as the view retrieved above. + // We can retrieve a specific document view for this document by its view_id. In this + // case, that should be the same as the view retrieved above. let specific_document = node .context .store @@ -737,6 +960,7 @@ mod tests { "age", "height", "is_admin", + "data", "profile_picture", "many_profile_pictures", "special_profile_picture", @@ -760,7 +984,8 @@ mod tests { config: PopulateStoreConfig, ) { test_runner(|node: TestNode| async move { - // Populate the store with some entries and operations but DON'T materialise any resulting documents. + // Populate the store with some entries and operations but DON'T materialise any + // resulting documents. let (_, document_ids) = populate_store(&node.context.store, &config).await; let document_id = document_ids.get(0).expect("At least one document id"); @@ -772,12 +997,12 @@ mod tests { // As it has been deleted, there should be no view. assert!(document.view().is_none()); - // Here we insert the document. This action also sets it's most recent view. + // Here we insert the document. This action also sets its most recent view. let result = node.context.store.insert_document(&document).await; assert!(result.is_ok()); - // We retrieve the most recent view for this document by it's document id, - // but as the document is deleted, we should get a none value back. + // We retrieve the most recent view for this document by its document id, but as the + // document is deleted, we should get a none value back. let document = node .context .store @@ -786,8 +1011,8 @@ mod tests { .unwrap(); assert!(document.is_none()); - // We also try to retrieve the specific document view by it's view id. - // This should also return none as it is deleted. + // We also try to retrieve the specific document view by its view id. This should also + // return none as it is deleted. let document = node .context .store @@ -865,14 +1090,14 @@ mod tests { .build() .expect("Build document"); - // Insert it to the database, this should also update it's view. + // Insert it to the database, this should also update its view. node.context .store .insert_document(&document) .await .expect("Insert document"); - // We can retrieve the document by it's document id. + // We can retrieve the document by its document id. let retrieved_document = node .context .store @@ -881,7 +1106,7 @@ mod tests { .expect("Get document") .expect("Unwrap document"); - // And also directly by it's document view id. + // And also directly by its document view id. let specific_document = node .context .store @@ -928,4 +1153,257 @@ mod tests { assert_eq!(schema_documents.len(), 10); }); } + + #[rstest] + fn prunes_document_view( + #[from(populate_store_config)] + #[with(2, 1, 1)] + config: PopulateStoreConfig, + ) { + test_runner(|mut node: TestNode| async move { + // Populate the store and materialize all documents. + let (_, document_ids) = populate_and_materialize(&mut node, &config).await; + let document_id = document_ids[0].clone(); + let first_document_view_id: DocumentViewId = document_id.as_str().parse().unwrap(); + + // Get the current document from the store. + let current_document = node.context.store.get_document(&document_id).await.unwrap(); + + // Get the current view id. + let current_document_view_id = current_document.unwrap().view_id().to_owned(); + + // Reduce a historic view of an existing document. + let _ = reduce_task( + node.context.clone(), + TaskInput::DocumentViewId(first_document_view_id.clone()), + ) + .await; + + // Get that view again to check it's in the db. + let document = node + .context + .store + .get_document_by_view_id(&first_document_view_id) + .await + .unwrap(); + assert!(document.is_some()); + + // Prune the first document view. + let result = node + .context + .store + .prune_document_view(&first_document_view_id) + .await; + assert!(result.is_ok()); + // Returns `true` when pruning succeeded. + assert!(result.unwrap()); + + // Get the first document view again, it should no longer be there. + let document = node + .context + .store + .get_document_by_view_id(&first_document_view_id) + .await + .unwrap(); + assert!(document.is_none()); + + // Get the current view of the document to make sure that wasn't deleted too. + let document = node + .context + .store + .get_document_by_view_id(¤t_document_view_id) + .await + .unwrap(); + assert!(document.is_some()); + }); + } + + #[rstest] + fn does_not_prune_pinned_views( + #[from(populate_store_config)] + #[with(2, 1, 1)] + config: PopulateStoreConfig, + key_pair: KeyPair, + ) { + test_runner(|mut node: TestNode| async move { + // Populate the store and materialize all documents. + let (_, document_ids) = populate_and_materialize(&mut node, &config).await; + let document_id = document_ids[0].clone(); + let first_document_view_id: DocumentViewId = document_id.as_str().parse().unwrap(); + + // Reduce a historic view of an existing document. + let _ = reduce_task( + node.context.clone(), + TaskInput::DocumentViewId(first_document_view_id.clone()), + ) + .await; + + // Add a new document to the store which pins the first view of the above document. + add_schema_and_documents( + &mut node, + "new_schema", + vec![vec![( + "pin_document", + first_document_view_id.clone().into(), + Some(config.schema.id().to_owned()), + )]], + &key_pair, + ) + .await; + + // Attempt to prune the first document view. + let result = node + .context + .store + .prune_document_view(&first_document_view_id) + .await; + assert!(result.is_ok()); + // Returns `false` when pruning failed. + assert!(!result.unwrap()); + + // Get the first document view, it should still be in the store as it was pinned. + let document = node + .context + .store + .get_document_by_view_id(&first_document_view_id) + .await + .unwrap(); + assert!(document.is_some()); + }); + } + + #[rstest] + fn does_not_prune_current_view( + #[from(populate_store_config)] + #[with(1, 1, 1)] + config: PopulateStoreConfig, + ) { + test_runner(|mut node: TestNode| async move { + // Populate the store and materialize all documents. + let (_, document_ids) = populate_and_materialize(&mut node, &config).await; + let document_id = document_ids[0].clone(); + let current_document_view_id: DocumentViewId = document_id.as_str().parse().unwrap(); + + // Attempt to prune the current document view. + let result = node + .context + .store + .prune_document_view(¤t_document_view_id) + .await; + assert!(result.is_ok()); + // Returns `false` when pruning failed. + assert!(!result.unwrap()); + + // Get the current document view, it should still be in the store. + let document = node + .context + .store + .get_document_by_view_id(¤t_document_view_id) + .await + .unwrap(); + assert!(document.is_some()); + }); + } + + #[rstest] + fn purge_document( + #[from(populate_store_config)] + #[with(2, 1, 1)] + config: PopulateStoreConfig, + ) { + test_runner(|mut node: TestNode| async move { + // Populate the store and materialize all documents. + let (_, document_ids) = populate_and_materialize(&mut node, &config).await; + let document_id = document_ids[0].clone(); + + // There is one document in the database which contains an CREATE and UPDATE operation + // which were both published by the same author. These are the number of rows we + // expect for each table. + assert_query(&node, "SELECT entry_hash FROM entries", 2).await; + assert_query(&node, "SELECT operation_id FROM operations_v1", 2).await; + assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 28).await; + assert_query(&node, "SELECT log_id FROM logs", 1).await; + assert_query(&node, "SELECT document_id FROM documents", 1).await; + assert_query(&node, "SELECT document_id FROM document_views", 1).await; + assert_query(&node, "SELECT name FROM document_view_fields", 11).await; + + // Purge this document from the database, we now expect all tables to be empty. + let result = node.context.store.purge_document(&document_id).await; + assert!(result.is_ok(), "{:#?}", result); + assert_query(&node, "SELECT entry_hash FROM entries", 0).await; + assert_query(&node, "SELECT operation_id FROM operations_v1", 0).await; + assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 0).await; + assert_query(&node, "SELECT log_id FROM logs", 1).await; + assert_query(&node, "SELECT document_id FROM documents", 0).await; + assert_query(&node, "SELECT document_id FROM document_views", 0).await; + assert_query(&node, "SELECT name FROM document_view_fields", 0).await; + }); + } + + #[rstest] + fn purging_only_effects_target_document( + #[from(populate_store_config)] + #[with(1, 2, 1)] + config: PopulateStoreConfig, + ) { + test_runner(|mut node: TestNode| async move { + // Populate the store and materialize all documents. + let (_, document_ids) = populate_and_materialize(&mut node, &config).await; + let document_id = document_ids[0].clone(); + + // There are two documents in the database which each contain a single CREATE operation + // and they were published by the same author. These are the number of rows we expect + // for each table. + assert_query(&node, "SELECT entry_hash FROM entries", 2).await; + assert_query(&node, "SELECT operation_id FROM operations_v1", 2).await; + assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 28).await; + assert_query(&node, "SELECT log_id FROM logs", 2).await; + assert_query(&node, "SELECT document_id FROM documents", 2).await; + assert_query(&node, "SELECT document_id FROM document_views", 2).await; + assert_query(&node, "SELECT name FROM document_view_fields", 22).await; + + // Purge one document from the database, we now expect half the rows to be remaining. + let result = node.context.store.purge_document(&document_id).await; + assert!(result.is_ok(), "{:#?}", result); + + assert_query(&node, "SELECT entry_hash FROM entries", 1).await; + assert_query(&node, "SELECT operation_id FROM operations_v1", 1).await; + assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 14).await; + assert_query(&node, "SELECT log_id FROM logs", 2).await; + assert_query(&node, "SELECT document_id FROM documents", 1).await; + assert_query(&node, "SELECT document_id FROM document_views", 1).await; + assert_query(&node, "SELECT name FROM document_view_fields", 11).await; + }); + } + + #[rstest] + fn next_args_after_purge( + #[from(populate_store_config)] + #[with(2, 1, 1)] + config: PopulateStoreConfig, + ) { + test_runner(|mut node: TestNode| async move { + // Populate the store and materialize all documents. + let (key_pairs, document_ids) = populate_and_materialize(&mut node, &config).await; + let document_id = document_ids[0].clone(); + let public_key = key_pairs[0].public_key(); + + let _ = node.context.store.purge_document(&document_id).await; + + let result = next_args( + &node.context.store, + &public_key, + Some(&document_id.as_str().parse().unwrap()), + ) + .await; + + assert!(result.is_err()); + + let result = next_args(&node.context.store, &public_key, None).await; + + assert!(result.is_ok()); + let next_args = result.unwrap(); + assert_eq!(next_args, (None, None, SeqNum::default(), LogId::new(1))); + }); + } } diff --git a/aquadoggo/src/db/stores/entry.rs b/aquadoggo/src/db/stores/entry.rs index e9c3d7978..ebdf47fd3 100644 --- a/aquadoggo/src/db/stores/entry.rs +++ b/aquadoggo/src/db/stores/entry.rs @@ -74,7 +74,7 @@ impl EntryStore for SqlStore { Ok(()) } - /// Get an entry from storage by it's hash id. + /// Get an entry from storage by its hash id. /// /// Returns a result containing the entry wrapped in an option if it was found successfully. /// Returns `None` if the entry was not found in storage. Errors when a fatal storage error @@ -186,7 +186,7 @@ impl EntryStore for SqlStore { /// Get all entries of a given schema /// /// Returns a result containing a vector of all entries which follow the passed schema - /// (identified by it's `SchemaId`). If no entries exist, or the schema is not known by this + /// (identified by its `SchemaId`). If no entries exist, or the schema is not known by this /// node, then an empty vector is returned. async fn get_entries_by_schema( &self, @@ -222,7 +222,7 @@ impl EntryStore for SqlStore { /// Get all entries of a given schema. /// /// Returns a result containing a vector of all entries which follow the passed schema - /// (identified by it's `SchemaId`). If no entries exist, or the schema is not known by this + /// (identified by its `SchemaId`). If no entries exist, or the schema is not known by this /// node, then an empty vector is returned. async fn get_paginated_log_entries( &self, @@ -444,7 +444,7 @@ mod tests { .await; assert!(result.is_ok()); - // Retrieve the entry again by it's hash. + // Retrieve the entry again by its hash let retrieved_entry = node .context .store @@ -622,7 +622,7 @@ mod tests { .expect("At least one key pair") .public_key(); - // We should be able to get each entry by it's public_key, log_id and seq_num. + // We should be able to get each entry by its public_key, log_id and seq_num. for seq_num in 1..10 { let seq_num = SeqNum::new(seq_num).unwrap(); diff --git a/aquadoggo/src/db/stores/mod.rs b/aquadoggo/src/db/stores/mod.rs index 440b4b3e4..e5ccebf8e 100644 --- a/aquadoggo/src/db/stores/mod.rs +++ b/aquadoggo/src/db/stores/mod.rs @@ -2,6 +2,7 @@ //! Implementations of all `p2panda-rs` defined storage provider traits and additionally //! `aquadoggo` specific interfaces. +mod blob; pub mod document; mod entry; mod log; diff --git a/aquadoggo/src/db/stores/operation.rs b/aquadoggo/src/db/stores/operation.rs index d70e438b1..73550759f 100644 --- a/aquadoggo/src/db/stores/operation.rs +++ b/aquadoggo/src/db/stores/operation.rs @@ -25,8 +25,8 @@ use crate::db::SqlStore; /// the required `AsVerifiedOperation` trait. /// /// There are several intermediary structs defined in `db/models/` which represent rows from tables -/// in the database where this entry, it's fields and opreation relations are stored. These are -/// used in conjunction with the `sqlx` library to coerce raw values into structs when querying the +/// in the database where this entry, its fields and opreation relations are stored. These are used +/// in conjunction with the `sqlx` library to coerce raw values into structs when querying the /// database. #[async_trait] impl OperationStore for SqlStore { @@ -77,7 +77,7 @@ impl OperationStore for SqlStore { .await } - /// Get an operation identified by it's `OperationId`. + /// Get an operation identified by its `OperationId`. /// /// Returns a result containing an `VerifiedOperation` wrapped in an option, if no operation /// with this id was found, returns none. Errors if a fatal storage error occured. @@ -269,7 +269,7 @@ impl SqlStore { Ok(()) } - /// Insert an operation as well as the index for it's position in the document after + /// Insert an operation as well as the index for its position in the document after /// materialization has occurred. async fn insert_operation_with_index( &self, @@ -467,7 +467,7 @@ mod tests { .await; assert!(result.is_ok()); - // Request the previously inserted operation by it's id. + // Request the previously inserted operation by its id. let returned_operation = node .context .store diff --git a/aquadoggo/src/db/stores/query.rs b/aquadoggo/src/db/stores/query.rs index e8ff37483..c441e41e7 100644 --- a/aquadoggo/src/db/stores/query.rs +++ b/aquadoggo/src/db/stores/query.rs @@ -300,6 +300,7 @@ fn bind_arg(value: &OperationValue) -> Vec { .iter() .map(|view_id| BindArgument::String(view_id.to_string())) .collect(), + OperationValue::Bytes(value) => vec![BindArgument::String(hex::encode(value))], } } diff --git a/aquadoggo/src/db/stores/schema.rs b/aquadoggo/src/db/stores/schema.rs index 59a17077c..e00f99f67 100644 --- a/aquadoggo/src/db/stores/schema.rs +++ b/aquadoggo/src/db/stores/schema.rs @@ -14,7 +14,7 @@ use crate::db::errors::SchemaStoreError; use crate::db::SqlStore; impl SqlStore { - /// Get a Schema from the database by it's document view id. + /// Get a Schema from the database by its document view id. /// /// Internally, this method performs three steps: /// - fetch the document view for the schema definition @@ -52,7 +52,7 @@ impl SqlStore { // We silently ignore errors as we are assuming views we retrieve from the database // themselves are valid, meaning any error in constructing the schema must be because some - // of it's fields are simply missing from our database. + // of its fields are simply missing from our database. let schema = Schema::from_views(schema_view, schema_fields).ok(); Ok(schema) @@ -205,7 +205,7 @@ mod tests { ) .await; - // Retrieve the schema by it's document view id. We unwrap here as we expect an `Ok` + // Retrieve the schema by its document view id. We unwrap here as we expect an `Ok` // result for the succeeding db query, even though the schema could not be built. let schema = node .context diff --git a/aquadoggo/src/db/types/entry.rs b/aquadoggo/src/db/types/entry.rs index bda3ad030..5d093c3f1 100644 --- a/aquadoggo/src/db/types/entry.rs +++ b/aquadoggo/src/db/types/entry.rs @@ -9,9 +9,9 @@ use p2panda_rs::operation::EncodedOperation; use crate::db::models::EntryRow; -/// A signed entry and it's encoded operation. Entries are the lowest level data type on the -/// p2panda network, they are signed by authors and form bamboo append only logs. The operation is -/// an entries' payload, it contains the data mutations which authors publish. +/// A signed entry and its encoded operation. Entries are the lowest level data type on the p2panda +/// network, they are signed by authors and form bamboo append only logs. The operation is an +/// entries' payload, it contains the data mutations which authors publish. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct StorageEntry { /// PublicKey of this entry. diff --git a/aquadoggo/src/db/types/operation.rs b/aquadoggo/src/db/types/operation.rs index 2af007136..6142cb2a0 100644 --- a/aquadoggo/src/db/types/operation.rs +++ b/aquadoggo/src/db/types/operation.rs @@ -36,7 +36,7 @@ pub struct StorageOperation { /// Index for the position of this operation once topological sorting of the operation graph /// has been performed. /// - /// Is `None` when the operation has not been materialized into it's document yet. + /// Is `None` when the operation has not been materialized into its document yet. pub(crate) sorted_index: Option, } diff --git a/aquadoggo/src/graphql/constants.rs b/aquadoggo/src/graphql/constants.rs index 9fb0f4cf7..22250bad4 100644 --- a/aquadoggo/src/graphql/constants.rs +++ b/aquadoggo/src/graphql/constants.rs @@ -58,10 +58,10 @@ pub const ORDER_DIRECTION_ARG: &str = "orderDirection"; /// Name of field where a collection of documents can be accessed. pub const DOCUMENTS_FIELD: &str = "documents"; -/// Name of field on a document where it's fields can be accessed. +/// Name of field on a document where its fields can be accessed. pub const FIELDS_FIELD: &str = "fields"; -/// Name of field on a document where it's meta data can be accessed. +/// Name of field on a document where its meta data can be accessed. pub const META_FIELD: &str = "meta"; /// Name of field on a document where pagination cursor can be accessed. diff --git a/aquadoggo/src/graphql/input_values/fields_filter.rs b/aquadoggo/src/graphql/input_values/fields_filter.rs index 79f765f3a..f0eeb8116 100644 --- a/aquadoggo/src/graphql/input_values/fields_filter.rs +++ b/aquadoggo/src/graphql/input_values/fields_filter.rs @@ -10,7 +10,9 @@ use async_graphql::dynamic::{InputObject, InputValue, TypeRef}; use dynamic_graphql::InputObject; use p2panda_rs::schema::{FieldType, Schema}; -use crate::graphql::scalars::{DocumentIdScalar, DocumentViewIdScalar, PublicKeyScalar}; +use crate::graphql::scalars::{ + DocumentIdScalar, DocumentViewIdScalar, HexBytesScalar, PublicKeyScalar, +}; use crate::graphql::utils::filter_name; /// Build a filter input object for a p2panda schema. It can be used to filter collection queries @@ -42,6 +44,10 @@ pub fn build_filter_input_object(schema: &Schema) -> InputObject { filter_input = filter_input.field(InputValue::new(name, TypeRef::named("StringFilter"))); } + FieldType::Bytes => { + filter_input = + filter_input.field(InputValue::new(name, TypeRef::named("HexBytesFilter"))); + } FieldType::Relation(_) => { filter_input = filter_input.field(InputValue::new(name, TypeRef::named("RelationFilter"))); @@ -171,6 +177,19 @@ pub struct StringFilter { not_contains: Option, } +/// A filter input type for bytes field values. +#[derive(InputObject)] +#[allow(dead_code)] +pub struct HexBytesFilter { + /// Filter by equal to. + #[graphql(name = "eq")] + eq: Option, + + /// Filter by not equal to. + #[graphql(name = "notEq")] + not_eq: Option, +} + /// A filter input type for integer field values. #[derive(InputObject)] #[allow(dead_code)] diff --git a/aquadoggo/src/graphql/input_values/mod.rs b/aquadoggo/src/graphql/input_values/mod.rs index 22492a39b..c42f2fd1f 100644 --- a/aquadoggo/src/graphql/input_values/mod.rs +++ b/aquadoggo/src/graphql/input_values/mod.rs @@ -6,8 +6,8 @@ mod order; pub use fields_filter::{ build_filter_input_object, BooleanFilter, DocumentIdFilter, DocumentViewIdFilter, FloatFilter, - IntegerFilter, OwnerFilter, PinnedRelationFilter, PinnedRelationListFilter, RelationFilter, - RelationListFilter, StringFilter, + HexBytesFilter, IntegerFilter, OwnerFilter, PinnedRelationFilter, PinnedRelationListFilter, + RelationFilter, RelationListFilter, StringFilter, }; pub use meta_filter::MetaFilterInputObject; pub use order::{build_order_enum_value, OrderDirection}; diff --git a/aquadoggo/src/graphql/mutations/publish.rs b/aquadoggo/src/graphql/mutations/publish.rs index 774a963ba..d2e47caa3 100644 --- a/aquadoggo/src/graphql/mutations/publish.rs +++ b/aquadoggo/src/graphql/mutations/publish.rs @@ -127,7 +127,7 @@ mod tests { use crate::graphql::GraphQLSchemaManager; use crate::http::HttpServiceContext; use crate::test_utils::{ - add_schema, doggo_fields, doggo_schema, graphql_test_client, populate_and_materialize, + add_schema, doggo_fields, doggo_schema, http_test_client, populate_and_materialize, populate_store_config, test_runner, TestNode, }; @@ -237,7 +237,11 @@ mod tests { node.context.schema_provider.clone(), ) .await; - let context = HttpServiceContext::new(manager); + let context = HttpServiceContext::new( + node.context.store.clone(), + manager, + node.context.config.blobs_base_path.to_path_buf(), + ); let response = context.schema.execute(publish_request).await; @@ -298,7 +302,11 @@ mod tests { node.context.schema_provider.clone(), ) .await; - let context = HttpServiceContext::new(manager); + let context = HttpServiceContext::new( + node.context.store.clone(), + manager, + node.context.config.blobs_base_path.to_path_buf(), + ); let response = context .schema @@ -326,7 +334,11 @@ mod tests { node.context.schema_provider.clone(), ) .await; - let context = HttpServiceContext::new(manager); + let context = HttpServiceContext::new( + node.context.store.clone(), + manager, + node.context.config.blobs_base_path.to_path_buf(), + ); context.schema.execute(publish_request).await; @@ -354,7 +366,7 @@ mod tests { populate_and_materialize(&mut node, &config).await; // Init the test client. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; let response = client .post("/graphql") @@ -573,7 +585,7 @@ mod tests { populate_and_materialize(&mut node, &config).await; // Init the test client - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; // Prepare the GQL publish request let publish_request = publish_request(&entry_encoded, &encoded_operation); @@ -701,7 +713,7 @@ mod tests { populate_and_materialize(&mut node, &config).await; // Init the test client. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; let publish_request = publish_request(&entry_encoded, &encoded_operation); @@ -736,7 +748,7 @@ mod tests { populate_and_materialize(&mut node, &config).await; // Init the test client. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; // Two key pairs representing two different authors let key_pairs = vec![KeyPair::new(), KeyPair::new()]; @@ -828,7 +840,7 @@ mod tests { populate_and_materialize(&mut node, &config).await; // Init the test client. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; // Get the one entry from the store. let entries = node @@ -871,7 +883,7 @@ mod tests { ) { test_runner(|node: TestNode| async move { // Init the test client. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; // Prepare a publish entry request for the entry. let publish_entry = publish_request( diff --git a/aquadoggo/src/graphql/objects/document.rs b/aquadoggo/src/graphql/objects/document.rs index 0a86feb44..6f894daad 100644 --- a/aquadoggo/src/graphql/objects/document.rs +++ b/aquadoggo/src/graphql/objects/document.rs @@ -13,7 +13,7 @@ use crate::graphql::utils::{collection_item_name, fields_name}; /// schema. /// /// Constructs resolvers for both `fields` and `meta` fields. The former simply passes up the query -/// arguments to it's children query fields. The latter calls the `resolve` method defined on +/// arguments to its children query fields. The latter calls the `resolve` method defined on /// `DocumentMeta` type. pub fn build_document_object(schema: &Schema) -> Object { let fields = Object::new(schema.id().to_string()); @@ -24,7 +24,7 @@ pub fn build_document_object(schema: &Schema) -> Object { /// schema and are contained in a paginated collection. /// /// Contains resolvers for `cursor`, `fields` and `meta`. `fields` simply passes up the query -/// arguments to it's children query fields. `meta` calls the `resolve` method defined on +/// arguments to its children query fields. `meta` calls the `resolve` method defined on /// `DocumentMeta` type. pub fn build_paginated_document_object(schema: &Schema) -> Object { let fields = Object::new(collection_item_name(schema.id())); @@ -56,7 +56,7 @@ pub fn build_paginated_document_object(schema: &Schema) -> Object { /// Add application `fields` and `meta` fields to a GraphQL object. fn with_document_fields(fields: Object, schema: &Schema) -> Object { fields - // The `fields` field passes down the parent value to it's children + // The `fields` field passes down the parent value to its children .field( Field::new( constants::FIELDS_FIELD, diff --git a/aquadoggo/src/graphql/queries/collection.rs b/aquadoggo/src/graphql/queries/collection.rs index c149cabe1..86317d0ec 100644 --- a/aquadoggo/src/graphql/queries/collection.rs +++ b/aquadoggo/src/graphql/queries/collection.rs @@ -56,7 +56,7 @@ mod tests { use serde_json::{json, Value as JsonValue}; use crate::test_utils::{ - add_document, add_schema, add_schema_and_documents, graphql_test_client, test_runner, + add_document, add_schema, add_schema_and_documents, http_test_client, test_runner, TestClient, TestNode, }; @@ -284,6 +284,7 @@ mod tests { ("artist", "X-ray Spex".into(), None), ("title", "Oh Bondage Up Yours!".into(), None), ("release_year", 1977.into(), None), + ("audio", vec![0, 1, 2, 3][..].into(), None), ( "lyrics", vec![ @@ -328,6 +329,7 @@ mod tests { ("artist", "Gang Of Four".into(), None), ("title", "Natural's Not In".into(), None), ("release_year", 1979.into(), None), + ("audio", vec![4, 5, 6, 7][..].into(), None), ( "lyrics", vec![ @@ -385,6 +387,7 @@ mod tests { ("artist", "David Bowie".into(), None), ("title", "Speed Of Life".into(), None), ("release_year", 1977.into(), None), + ("audio", vec![8, 9, 10, 11][..].into(), None), ( "lyrics", OperationValue::RelationList(RelationList::new(vec![])), @@ -404,24 +407,30 @@ mod tests { "collection": value!({ "hasNextPage": false, "totalCount": 2, - "endCursor": "31Ch6qa4mdKcxpWJG4X9Wf5iMvSSxmSGg8cyg9teNR6yKmLncZCmyVUaPFjRNoWcxpeASGqrRiJGR8HSqjWBz5HE", + "endCursor": "24gc7iHafVKTcfRZfVVV8etkSoJMJVsqs1iYJAuHb8oNp32Vi1PcYw6S5GJ8hNhPmHHbP1weVbACYRctHVz4jXjQ", "documents": [ { - "cursor": "273AmFQTk7w6134GhzKUS5tY8qDuaMYBPgbaftZ43G7saiKa73MPapFvjNDixbNjCr5ucNqzNsx2fYdRqRod9U2W", - "fields": { "bool": true, }, + "cursor": "24gZVnL75RPvxMVAiuGT2SgCrHneGZgsvEaiCh5g8qgxGBhcunAffueCUTiyuLDamP1G48KYPmRDBBFG43dh3XJ2", + "fields": { + "bool": true, + "data": "00010203", + }, "meta": { "owner": "2f8e50c2ede6d936ecc3144187ff1c273808185cfbc5ff3d3748d1ff7353fc96", - "documentId": "00200436216389856afb3f3a7d8cb2d2981be85787aebed02031c72eb9c216406c57", - "viewId": "00200436216389856afb3f3a7d8cb2d2981be85787aebed02031c72eb9c216406c57", + "documentId": "0020223f123be0f9025c591fba1a5800ca64084e837315521d5b65a870e874ed8b4e", + "viewId": "0020223f123be0f9025c591fba1a5800ca64084e837315521d5b65a870e874ed8b4e", } }, { - "cursor": "31Ch6qa4mdKcxpWJG4X9Wf5iMvSSxmSGg8cyg9teNR6yKmLncZCmyVUaPFjRNoWcxpeASGqrRiJGR8HSqjWBz5HE", - "fields": { "bool": false, }, + "cursor": "24gc7iHafVKTcfRZfVVV8etkSoJMJVsqs1iYJAuHb8oNp32Vi1PcYw6S5GJ8hNhPmHHbP1weVbACYRctHVz4jXjQ", + "fields": { + "bool": false, + "data": "04050607" + }, "meta": { "owner": "2f8e50c2ede6d936ecc3144187ff1c273808185cfbc5ff3d3748d1ff7353fc96", - "documentId": "0020de552d81948f220d09127dc42963071d086a142c9547e701674d4cac83f29872", - "viewId": "0020de552d81948f220d09127dc42963071d086a142c9547e701674d4cac83f29872", + "documentId": "0020c7dbed85159bbea8f1c44f1d4d7dfbdded6cd43c09ab1a292089e9530964cab9", + "viewId": "0020c7dbed85159bbea8f1c44f1d4d7dfbdded6cd43c09ab1a292089e9530964cab9", } } ] @@ -433,7 +442,7 @@ mod tests { r#" ( first: 1, - after: "31Ch6qa4mdKcxpWJG4X9Wf5iMvSSxmSGg8cyg9teNR6yKmLncZCmyVUaPFjRNoWcxpeASGqrRiJGR8HSqjWBz5HE", + after: "24gc7iHafVKTcfRZfVVV8etkSoJMJVsqs1iYJAuHb8oNp32Vi1PcYw6S5GJ8hNhPmHHbP1weVbACYRctHVz4jXjQ", orderBy: DOCUMENT_ID, orderDirection: ASC, filter: { @@ -453,6 +462,38 @@ mod tests { }), vec![] )] + #[case( + r#"( + first: 2, + filter: { + data: { + eq: "00010203" + } + } + )"#.to_string(), + value!({ + "collection": value!({ + "hasNextPage": false, + "totalCount": 1, + "endCursor": "24gZVnL75RPvxMVAiuGT2SgCrHneGZgsvEaiCh5g8qgxGBhcunAffueCUTiyuLDamP1G48KYPmRDBBFG43dh3XJ2", + "documents": [ + { + "cursor": "24gZVnL75RPvxMVAiuGT2SgCrHneGZgsvEaiCh5g8qgxGBhcunAffueCUTiyuLDamP1G48KYPmRDBBFG43dh3XJ2", + "fields": { + "bool": true, + "data": "00010203", + }, + "meta": { + "owner": "2f8e50c2ede6d936ecc3144187ff1c273808185cfbc5ff3d3748d1ff7353fc96", + "documentId": "0020223f123be0f9025c591fba1a5800ca64084e837315521d5b65a870e874ed8b4e", + "viewId": "0020223f123be0f9025c591fba1a5800ca64084e837315521d5b65a870e874ed8b4e", + } + } + ] + }), + }), + vec![] + )] #[case( r#"(first: 0)"#.to_string(), Value::Null, @@ -469,7 +510,7 @@ mod tests { vec!["Invalid value for argument \"after\", expected type \"Cursor\"".to_string()] )] #[case( - r#"(after: "00205406410aefce40c5cbbb04488f50714b7d5657b9f17eed7358da35379bc20331")"#.to_string(), + r#"(after: "0020d384b69386867b61acebe6b23d4fac8c1425d5dce339bb3ef7c2218c155b3f9a")"#.to_string(), Value::Null, vec!["Invalid value for argument \"after\", expected type \"Cursor\"".to_string()] )] @@ -481,12 +522,12 @@ mod tests { #[case( r#"(orderBy: HELLO)"#.to_string(), Value::Null, - vec!["Invalid value for argument \"orderBy\", enumeration type \"schema_name_00205406410aefce40c5cbbb04488f50714b7d5657b9f17eed7358da35379bc20331OrderBy\" does not contain the value \"HELLO\"".to_string()] + vec!["Invalid value for argument \"orderBy\", enumeration type \"schema_name_0020d384b69386867b61acebe6b23d4fac8c1425d5dce339bb3ef7c2218c155b3f9aOrderBy\" does not contain the value \"HELLO\"".to_string()] )] #[case( r#"(orderBy: "hello")"#.to_string(), Value::Null, - vec!["Invalid value for argument \"orderBy\", enumeration type \"schema_name_00205406410aefce40c5cbbb04488f50714b7d5657b9f17eed7358da35379bc20331OrderBy\" does not contain the value \"hello\"".to_string()] + vec!["Invalid value for argument \"orderBy\", enumeration type \"schema_name_0020d384b69386867b61acebe6b23d4fac8c1425d5dce339bb3ef7c2218c155b3f9aOrderBy\" does not contain the value \"hello\"".to_string()] )] #[case( r#"(orderDirection: HELLO)"#.to_string(), @@ -511,7 +552,7 @@ mod tests { #[case( r#"(filter: { hello: { eq: true }})"#.to_string(), Value::Null, - vec!["Invalid value for argument \"filter\", unknown field \"hello\" of type \"schema_name_00205406410aefce40c5cbbb04488f50714b7d5657b9f17eed7358da35379bc20331Filter\"".to_string()] + vec!["Invalid value for argument \"filter\", unknown field \"hello\" of type \"schema_name_0020d384b69386867b61acebe6b23d4fac8c1425d5dce339bb3ef7c2218c155b3f9aFilter\"".to_string()] )] #[case( r#"(filter: { bool: { contains: "hello" }})"#.to_string(), @@ -570,7 +611,7 @@ mod tests { let schema = add_schema( &mut node, "schema_name", - vec![("bool", FieldType::Boolean)], + vec![("bool", FieldType::Boolean), ("data", FieldType::Bytes)], &key_pair, ) .await; @@ -579,7 +620,7 @@ mod tests { add_document( &mut node, schema.id(), - vec![("bool", true.into())], + vec![("bool", true.into()), ("data", vec![0, 1, 2, 3][..].into())], &key_pair, ) .await; @@ -588,13 +629,16 @@ mod tests { add_document( &mut node, schema.id(), - vec![("bool", false.into())], + vec![ + ("bool", false.into()), + ("data", vec![4, 5, 6, 7][..].into()), + ], &key_pair, ) .await; // Configure and send test query. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; let query = format!( r#"{{ collection: all_{type_name}{query_args} {{ @@ -605,6 +649,7 @@ mod tests { cursor fields {{ bool + data }} meta {{ owner @@ -697,7 +742,7 @@ mod tests { .await; // Configure and send test query. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; let query = format!( r#"{{ collection: all_{type_name} {{ @@ -742,7 +787,7 @@ mod tests { add_document(&mut node, schema.id(), document_values, &key_pair).await; // Configure and send test query. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; let query = format!( r#"{{ collection: all_{type_name} {{ @@ -813,6 +858,9 @@ mod tests { #[case("(filter: { title: { eq: \"Natural's Not In\" } })", "")] #[case("(filter: { title: { notEq: \"Natural's Not In\", in: [ \"Oh Bondage Up Yours!\", \"Speed Of Life\" ] } })", "")] #[case("(filter: { title: { notEq: \"Natural's Not In\" }, release_year: { gt: 1978 }, artist: { in: [ \"X-ray Spex\"] } })", "")] + #[case("(filter: { audio: { notEq: \"aa\" } })", "")] + #[case("(filter: { audio: { eq: \"E8\" } })", "")] + #[case("(filter: { audio: { eq: \"\" } })", "")] #[case( "(orderDirection: DESC, orderBy: title)", "(orderDirection: ASC, orderBy: line)" @@ -852,7 +900,7 @@ mod tests { here_be_some_karaoke_hits(&mut node, &view_ids, &lyric_schema, &key_pair).await; // Init a GraphQL client we'll use to query the node. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; // Perform a paginated collection query for the songs. query_songs(&client, song_schema.id(), &song_args, &lyric_args).await; @@ -870,7 +918,7 @@ mod tests { here_be_some_karaoke_hits(&mut node, &view_ids, &lyric_schema, &key_pair).await; // Init a GraphQL client we'll use to query the node. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; // Perform a paginated collection query for the songs on the node identified by the // schema id. We don't pass any arguments and so will get up to the default number of @@ -921,7 +969,7 @@ mod tests { // That's more my style, so let's get the lyrics for this song. But there are a lot, // so I'll just get the first 2 lines. - // We can identify the song by it's id and then paginate the lyrics field which is a + // We can identify the song by its id and then paginate the lyrics field which is a // relation list of song lyric lines. let oh_bondage_up_yours_id = oh_bondage_up_yours["meta"]["documentId"].as_str().unwrap(); @@ -1103,7 +1151,7 @@ mod tests { here_be_some_karaoke_hits(&mut node, &view_ids, &lyric_schema, &key_pair).await; // Init a GraphQL client we'll use to query the node. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; let data = query_songs(&client, song_schema.id(), "(first: 4)", "").await; assert_eq!(data["query"]["documents"].as_array().unwrap().len(), 3); @@ -1144,7 +1192,7 @@ mod tests { here_be_some_karaoke_hits(&mut node, &view_ids, &lyric_schema, &key_pair).await; // Init a GraphQL client we'll use to query the node. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; let data = query_songs_meta_fields_only(&client, song_schema.id(), "(first: 4)").await; assert_eq!(data["query"]["documents"].as_array().unwrap().len(), 3); diff --git a/aquadoggo/src/graphql/queries/document.rs b/aquadoggo/src/graphql/queries/document.rs index c882f9f4d..0e0603b97 100644 --- a/aquadoggo/src/graphql/queries/document.rs +++ b/aquadoggo/src/graphql/queries/document.rs @@ -100,7 +100,7 @@ mod test { use rstest::rstest; use serde_json::json; - use crate::test_utils::{add_document, add_schema, graphql_test_client, test_runner, TestNode}; + use crate::test_utils::{add_document, add_schema, http_test_client, test_runner, TestNode}; #[rstest] fn single_query(#[from(random_key_pair)] key_pair: KeyPair) { @@ -137,7 +137,7 @@ mod test { let document_id = document.id(); // Configure and send test query. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; let query = format!( r#"{{ byViewId: {type_name}(viewId: "{view_id}") {{ @@ -213,7 +213,7 @@ mod test { // Test single query parameter variations. test_runner(move |node: TestNode| async move { // Configure and send test query. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; let query = format!( r#"{{ view: schema_definition_v1{params} {{ @@ -269,7 +269,7 @@ mod test { .await; // Configure and send test query. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; let query = format!( r#"{{ single: {schema_id}(id: "{view_id}") {{ diff --git a/aquadoggo/src/graphql/queries/next_args.rs b/aquadoggo/src/graphql/queries/next_args.rs index 51bc98e07..8ce545c39 100644 --- a/aquadoggo/src/graphql/queries/next_args.rs +++ b/aquadoggo/src/graphql/queries/next_args.rs @@ -97,13 +97,13 @@ mod tests { use serde_json::json; use crate::test_utils::{ - graphql_test_client, populate_and_materialize, populate_store_config, test_runner, TestNode, + http_test_client, populate_and_materialize, populate_store_config, test_runner, TestNode, }; #[rstest] fn next_args_valid_query() { test_runner(|node: TestNode| async move { - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; // Selected fields need to be alphabetically sorted because that's what the `json` // macro that is used in the assert below produces. let received_entry_args = client @@ -149,7 +149,7 @@ mod tests { // Populates the store and materialises documents and schema. let (key_pairs, document_ids) = populate_and_materialize(&mut node, &config).await; - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; let document_id = document_ids.get(0).expect("There should be a document id"); let public_key = key_pairs .get(0) @@ -190,7 +190,7 @@ mod tests { "nextArgs": { "logId": "0", "seqNum": "2", - "backlink": "0020597040e2b85b4eaf3955f7aaca8f8fd60f00f77549a5554c8dd4081657f0d231", + "backlink": "002098e61a9d946a1f046bd68414bfcc8fec09ddb3954dccaf184eaf7a7f4eb9cd26", "skiplink": null, } }) @@ -201,7 +201,7 @@ mod tests { #[rstest] fn next_args_error_response() { test_runner(|node: TestNode| async move { - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; let response = client .post("/graphql") .json(&json!({ diff --git a/aquadoggo/src/graphql/scalars/document_view_id_scalar.rs b/aquadoggo/src/graphql/scalars/document_view_id_scalar.rs index ef522b908..4cdeb0a57 100644 --- a/aquadoggo/src/graphql/scalars/document_view_id_scalar.rs +++ b/aquadoggo/src/graphql/scalars/document_view_id_scalar.rs @@ -7,7 +7,7 @@ use dynamic_graphql::{Error, Result, Scalar, ScalarValue, Value}; use p2panda_rs::document::DocumentViewId; /// The document view id of a p2panda document. Refers to a specific point in a documents history -/// and can be used to deterministically reconstruct it's state at that time. +/// and can be used to deterministically reconstruct its state at that time. #[derive(Scalar, Clone, Debug, Eq, PartialEq)] #[graphql(name = "DocumentViewId", validator(validate))] pub struct DocumentViewIdScalar(DocumentViewId); diff --git a/aquadoggo/src/graphql/scalars/hex_bytes_scalar.rs b/aquadoggo/src/graphql/scalars/hex_bytes_scalar.rs new file mode 100644 index 000000000..a77c72d44 --- /dev/null +++ b/aquadoggo/src/graphql/scalars/hex_bytes_scalar.rs @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later + +use std::fmt::Display; + +use dynamic_graphql::{Error, Result, Scalar, ScalarValue, Value}; +use serde::Serialize; + +/// Bytes encoded as a hexadecimal string. +#[derive(Scalar, Clone, Debug, Eq, PartialEq, Serialize)] +#[graphql(name = "HexBytes", validator(validate))] +pub struct HexBytesScalar(String); + +impl ScalarValue for HexBytesScalar { + fn from_value(value: Value) -> Result + where + Self: Sized, + { + match &value { + Value::String(value) => { + hex::decode(value)?; + Ok(HexBytesScalar(value.to_string())) + } + _ => Err(Error::new(format!("Expected hex string, found: {value}"))), + } + } + + fn to_value(&self) -> Value { + Value::Binary(self.0.clone().into()) + } +} + +impl From for String { + fn from(hash: HexBytesScalar) -> Self { + hash.0 + } +} + +impl From for HexBytesScalar { + fn from(vec: String) -> Self { + Self(vec) + } +} + +impl From for Value { + fn from(entry: HexBytesScalar) -> Self { + ScalarValue::to_value(&entry) + } +} + +impl Display for HexBytesScalar { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let hex = hex::encode(&self.0); + write!(f, "{}", hex) + } +} + +/// Validation method used internally in `async-graphql` to check scalar values passed into the +/// public api. +fn validate(value: &Value) -> bool { + HexBytesScalar::from_value(value.to_owned()).is_ok() +} diff --git a/aquadoggo/src/graphql/scalars/mod.rs b/aquadoggo/src/graphql/scalars/mod.rs index 0b94149ad..4a887426d 100644 --- a/aquadoggo/src/graphql/scalars/mod.rs +++ b/aquadoggo/src/graphql/scalars/mod.rs @@ -13,6 +13,7 @@ mod document_view_id_scalar; mod encoded_entry_scalar; mod encoded_operation_scalar; mod entry_hash_scalar; +mod hex_bytes_scalar; mod log_id_scalar; mod public_key_scalar; mod seq_num_scalar; @@ -23,6 +24,7 @@ pub use document_view_id_scalar::DocumentViewIdScalar; pub use encoded_entry_scalar::EncodedEntryScalar; pub use encoded_operation_scalar::EncodedOperationScalar; pub use entry_hash_scalar::EntryHashScalar; +pub use hex_bytes_scalar::HexBytesScalar; pub use log_id_scalar::LogIdScalar; pub use public_key_scalar::PublicKeyScalar; pub use seq_num_scalar::SeqNumScalar; diff --git a/aquadoggo/src/graphql/schema.rs b/aquadoggo/src/graphql/schema.rs index 354901501..d82589405 100644 --- a/aquadoggo/src/graphql/schema.rs +++ b/aquadoggo/src/graphql/schema.rs @@ -13,9 +13,9 @@ use tokio::sync::Mutex; use crate::bus::ServiceSender; use crate::db::SqlStore; use crate::graphql::input_values::{ - build_filter_input_object, build_order_enum_value, BooleanFilter, FloatFilter, IntegerFilter, - MetaFilterInputObject, OrderDirection, PinnedRelationFilter, PinnedRelationListFilter, - RelationFilter, RelationListFilter, StringFilter, + build_filter_input_object, build_order_enum_value, BooleanFilter, FloatFilter, HexBytesFilter, + IntegerFilter, MetaFilterInputObject, OrderDirection, PinnedRelationFilter, + PinnedRelationListFilter, RelationFilter, RelationListFilter, StringFilter, }; use crate::graphql::mutations::{MutationRoot, Publish}; use crate::graphql::objects::{ @@ -28,7 +28,8 @@ use crate::graphql::queries::{ use crate::graphql::responses::NextArguments; use crate::graphql::scalars::{ CursorScalar, DocumentIdScalar, DocumentViewIdScalar, EncodedEntryScalar, - EncodedOperationScalar, EntryHashScalar, LogIdScalar, PublicKeyScalar, SeqNumScalar, + EncodedOperationScalar, EntryHashScalar, HexBytesScalar, LogIdScalar, PublicKeyScalar, + SeqNumScalar, }; use crate::schema::SchemaProvider; @@ -52,6 +53,7 @@ pub async fn build_root_schema( .register::() // Register input values .register::() + .register::() .register::() .register::() .register::() @@ -62,6 +64,7 @@ pub async fn build_root_schema( .register::() .register::() // Register scalars + .register::() .register::() .register::() .register::() @@ -111,7 +114,7 @@ pub async fn build_root_schema( .register(filter_input); // Add a query for each schema. It offers an interface to retrieve a single document of - // this schema by it's document id or view id. Its resolver parses and validates the passed + // this schema by its document id or view id. Its resolver parses and validates the passed // parameters, then forwards them up to the children query fields root_query = build_document_query(root_query, &schema); @@ -271,15 +274,15 @@ mod test { use rstest::rstest; use serde_json::{json, Value}; - use crate::test_utils::{add_schema, graphql_test_client, test_runner, TestNode}; + use crate::test_utils::{add_schema, http_test_client, test_runner, TestNode}; #[rstest] fn schema_updates() { test_runner(|mut node: TestNode| async move { // Create test client in the beginning so it is initialised with just the system - // schemas. Then we create a new application schema to test that the graphql schema - // is updated and we can query the changed schema. - let client = graphql_test_client(&node).await; + // schemas. Then we create a new application schema to test that the graphql schema is + // updated and we can query the changed schema. + let client = http_test_client(&node).await; // This test uses a fixed private key to allow us to anticipate the schema typename. let key_pair = key_pair(PRIVATE_KEY); diff --git a/aquadoggo/src/graphql/tests.rs b/aquadoggo/src/graphql/tests.rs index a40e0f900..b7b4e0b31 100644 --- a/aquadoggo/src/graphql/tests.rs +++ b/aquadoggo/src/graphql/tests.rs @@ -9,7 +9,7 @@ use p2panda_rs::{document::DocumentId, schema::FieldType}; use rstest::rstest; use serde_json::json; -use crate::test_utils::{add_document, add_schema, graphql_test_client, test_runner, TestNode}; +use crate::test_utils::{add_document, add_schema, http_test_client, test_runner, TestNode}; // Test querying application documents with scalar fields (no relations) by document id and by view // id. @@ -27,6 +27,7 @@ fn scalar_fields() { ("float", FieldType::Float), ("int", FieldType::Integer), ("text", FieldType::String), + ("bytes", FieldType::Bytes), ], &key_pair, ) @@ -38,13 +39,14 @@ fn scalar_fields() { ("float", (1.0).into()), ("int", 1.into()), ("text", "yes".into()), + ("bytes", vec![0, 1, 2, 3][..].into()), ] .try_into() .unwrap(); let view_id = add_document(&mut node, schema.id(), doc_fields, &key_pair).await; // Configure and send test query - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; let query = format!( r#"{{ scalarDoc: {type_name}(viewId: "{view_id}") {{ @@ -52,7 +54,8 @@ fn scalar_fields() { bool, float, int, - text + text, + bytes }} }}, }}"#, @@ -77,6 +80,7 @@ fn scalar_fields() { "float": 1.0, "int": 1, "text": "yes", + "bytes": "00010203", } }, }); @@ -149,7 +153,7 @@ fn relation_fields() { add_document(&mut node, parent_schema.id(), parent_fields, &key_pair).await; // Configure and send test query - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; let query = format!( r#"{{ result: {}(viewId: "{}") {{ diff --git a/aquadoggo/src/graphql/utils.rs b/aquadoggo/src/graphql/utils.rs index 2b3d5e92a..7f6a6fe8a 100644 --- a/aquadoggo/src/graphql/utils.rs +++ b/aquadoggo/src/graphql/utils.rs @@ -61,6 +61,10 @@ pub fn gql_scalar(operation_value: &OperationValue) -> Value { OperationValue::Float(value) => value.to_owned().into(), OperationValue::Integer(value) => value.to_owned().into(), OperationValue::String(value) => value.to_owned().into(), + OperationValue::Bytes(value) => { + let hex_string = hex::encode(value); + hex_string.into() + } _ => panic!("This method is not used for relation types"), } } @@ -74,6 +78,7 @@ pub fn graphql_type(field_type: &FieldType) -> TypeRef { FieldType::Integer => TypeRef::named(TypeRef::INT), FieldType::Float => TypeRef::named(TypeRef::FLOAT), FieldType::String => TypeRef::named(TypeRef::STRING), + FieldType::Bytes => TypeRef::named("HexBytes"), FieldType::Relation(schema_id) => TypeRef::named(schema_id.to_string()), FieldType::RelationList(schema_id) => TypeRef::named(collection_name(schema_id)), FieldType::PinnedRelation(schema_id) => TypeRef::named(schema_id.to_string()), @@ -91,6 +96,11 @@ pub fn filter_to_operation_value( FieldType::Integer => filter_value.i64()?.into(), FieldType::Float => filter_value.f64()?.into(), FieldType::String => filter_value.string()?.into(), + FieldType::Bytes => { + let hex_string = filter_value.string()?; + let bytes = hex::decode(hex_string)?; + bytes[..].into() + } // We are only ever dealing with list items here FieldType::Relation(_) | FieldType::RelationList(_) => { DocumentId::new(&filter_value.string()?.parse()?).into() diff --git a/aquadoggo/src/http/api.rs b/aquadoggo/src/http/api.rs index 8ec063003..c29a47d6f 100644 --- a/aquadoggo/src/http/api.rs +++ b/aquadoggo/src/http/api.rs @@ -1,21 +1,401 @@ // SPDX-License-Identifier: AGPL-3.0-or-later +use std::path::PathBuf; +use std::str::FromStr; + +use anyhow::{anyhow, Result}; use async_graphql::http::{playground_source, GraphQLPlaygroundConfig}; use async_graphql_axum::{GraphQLRequest, GraphQLResponse}; -use axum::extract::Extension; -use axum::response::{self, IntoResponse}; +use axum::body::StreamBody; +use axum::extract::{Extension, Path}; +use axum::headers::{ETag, IfNoneMatch}; +use axum::http::StatusCode; +use axum::response::{self, IntoResponse, Response}; +use axum::TypedHeader; +use http::header; +use log::warn; +use p2panda_rs::document::traits::AsDocument; +use p2panda_rs::document::{DocumentId, DocumentViewId}; +use p2panda_rs::schema::SchemaId; +use p2panda_rs::storage_provider::traits::DocumentStore; +use p2panda_rs::Human; +use tokio::fs::File; +use tokio_util::io::ReaderStream; use crate::http::context::HttpServiceContext; -/// Handle graphql playground requests at the given path. +/// Handle GraphQL playground requests at the given path. pub async fn handle_graphql_playground(path: &str) -> impl IntoResponse { response::Html(playground_source(GraphQLPlaygroundConfig::new(path))) } -/// Handle graphql requests. +/// Handle GraphQL requests. pub async fn handle_graphql_query( Extension(context): Extension, req: GraphQLRequest, ) -> GraphQLResponse { context.schema.execute(req.into_inner()).await.into() } + +/// Handle requests for a blob document served via HTTP. +/// +/// This method automatically returns the "latest" version of the document. +pub async fn handle_blob_document( + TypedHeader(if_none_match): TypedHeader, + Extension(context): Extension, + Path(document_id): Path, +) -> Result { + let document_id: DocumentId = DocumentId::from_str(&document_id) + .map_err(|err| BlobHttpError::InvalidFormat(err.into()))?; + + let document = context + .store + .get_document(&document_id) + .await + .map_err(|err| BlobHttpError::InternalError(err.into()))? + .ok_or_else(|| BlobHttpError::NotFound)?; + + // Requested document is not a blob, treat this as a "not found" error + if document.schema_id() != &SchemaId::Blob(1) { + return Err(BlobHttpError::NotFound); + } + + respond_with_blob(if_none_match, context.blobs_base_path, document).await +} + +/// Handle requests for a blob document view served via HTTP. +/// +/// This method returns the version which was specified by the document view id. +pub async fn handle_blob_view( + TypedHeader(if_none_match): TypedHeader, + Extension(context): Extension, + Path((document_id, view_id)): Path<(String, String)>, +) -> Result { + let document_id = DocumentId::from_str(&document_id) + .map_err(|err| BlobHttpError::InvalidFormat(err.into()))?; + let view_id = DocumentViewId::from_str(&view_id) + .map_err(|err| BlobHttpError::InvalidFormat(err.into()))?; + + let document = context + .store + .get_document_by_view_id(&view_id) + .await + .map_err(|err| BlobHttpError::InternalError(err.into()))? + .ok_or(BlobHttpError::NotFound)?; + + if document.id() != &document_id || document.schema_id() != &SchemaId::Blob(1) { + return Err(BlobHttpError::NotFound); + } + + respond_with_blob(if_none_match, context.blobs_base_path, document).await +} + +/// Returns HTTP response with the contents, ETag and given MIME type of a blob. +/// +/// Supports basic caching by handling "IfNoneMatch" headers matching the latest ETag. +async fn respond_with_blob( + if_none_match: IfNoneMatch, + blobs_base_path: PathBuf, + document: impl AsDocument, +) -> Result { + let view_id = document.view_id(); + + // Convert document view id into correct ETag value (with quotation marks defined in + // https://datatracker.ietf.org/doc/html/rfc7232#section-2.3) + let to_etag_str = || format!("\"{}\"", view_id); + + // Respond with 304 "not modified" if ETag still matches (document did not get updated) + let etag = + ETag::from_str(&to_etag_str()).map_err(|err| BlobHttpError::InternalError(err.into()))?; + if !if_none_match.precondition_passes(&etag) { + return Ok(StatusCode::NOT_MODIFIED.into_response()); + } + + // Get MIME type of blob + let mime_type_str = match document.get("mime_type") { + Some(p2panda_rs::operation::OperationValue::String(value)) => Ok(value), + _ => Err(BlobHttpError::InternalError(anyhow!( + "Blob document did not contain a valid 'mime_type' field" + ))), + }?; + + // Get body from read-stream of stored file on file system + let mut file_path = blobs_base_path; + file_path.push(format!("{view_id}")); + match File::open(&file_path).await { + Ok(file) => { + let headers = [ + // MIME type to allow browsers to correctly handle this specific blob format + (header::CONTENT_TYPE, mime_type_str), + // ETag to allow browsers handle caching + (header::ETAG, &to_etag_str()), + ]; + + let stream = ReaderStream::new(file); + let body = StreamBody::new(stream); + + Ok((headers, body).into_response()) + } + Err(_) => { + warn!( + "Data inconsistency detected: Blob document {} exists in database but not on file + system at path {}!", + view_id.display(), + file_path.display() + ); + + Err(BlobHttpError::NotFound) + } + } +} + +#[derive(Debug)] +pub enum BlobHttpError { + NotFound, + InvalidFormat(anyhow::Error), + InternalError(anyhow::Error), +} + +impl IntoResponse for BlobHttpError { + fn into_response(self) -> Response { + match self { + BlobHttpError::NotFound => { + (StatusCode::NOT_FOUND, "Could not find document").into_response() + } + BlobHttpError::InvalidFormat(err) => ( + StatusCode::BAD_REQUEST, + format!("Could not parse identifier: {}", err), + ) + .into_response(), + BlobHttpError::InternalError(err) => ( + StatusCode::INTERNAL_SERVER_ERROR, + format!("Something went wrong: {}", err), + ) + .into_response(), + } + } +} + +#[cfg(test)] +mod tests { + use http::{header, StatusCode}; + use p2panda_rs::document::DocumentId; + use p2panda_rs::identity::KeyPair; + use p2panda_rs::schema::validate::MAX_BLOB_PIECE_LENGTH; + use p2panda_rs::test_utils::fixtures::key_pair; + use rstest::rstest; + + use crate::materializer::tasks::blob_task; + use crate::materializer::TaskInput; + use crate::test_utils::{add_blob, http_test_client, test_runner, update_blob, TestNode}; + + #[rstest] + fn responds_with_blob_in_http_body(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + let blob_data = "Hello, World!".as_bytes(); + let blob_view_id = add_blob(&mut node, &blob_data, 6, "text/plain", &key_pair).await; + let document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); + + // Make sure to materialize blob on file system + blob_task( + node.context.clone(), + TaskInput::DocumentViewId(blob_view_id.clone()), + ) + .await + .unwrap(); + + let client = http_test_client(&node).await; + + // "/blobs/" path + let response = client.get(&format!("/blobs/{}", document_id)).send().await; + let status_code = response.status(); + let body = response.text().await; + + assert_eq!(status_code, StatusCode::OK); + assert_eq!(body, "Hello, World!"); + + // "/blobs//" path + let response = client + .get(&format!("/blobs/{}/{}", document_id, blob_view_id)) + .send() + .await; + let status_code = response.status(); + let body = response.text().await; + + assert_eq!(status_code, StatusCode::OK); + assert_eq!(body, "Hello, World!"); + }) + } + + #[rstest] + fn document_route_responds_with_latest_view(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + let blob_data = "Hello, World!".as_bytes(); + let blob_view_id = add_blob(&mut node, &blob_data, 6, "text/plain", &key_pair).await; + let document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); + + // Make sure to materialize blob on file system + blob_task( + node.context.clone(), + TaskInput::DocumentViewId(blob_view_id.clone()), + ) + .await + .unwrap(); + + // Update the blob + let blob_data = "Hello, Panda!".as_bytes(); + let blob_view_id_2 = + update_blob(&mut node, &blob_data, 6, &blob_view_id, &key_pair).await; + + blob_task( + node.context.clone(), + TaskInput::DocumentViewId(blob_view_id_2.clone()), + ) + .await + .unwrap(); + + // Expect to receive latest version + let client = http_test_client(&node).await; + let response = client.get(&format!("/blobs/{}", document_id)).send().await; + let status_code = response.status(); + let body = response.text().await; + + assert_eq!(status_code, StatusCode::OK); + assert_eq!(body, "Hello, Panda!"); + }) + } + + #[rstest] + fn responds_with_content_type_header(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + let blob_data = r#" + + + + + "# + .as_bytes(); + let blob_view_id = add_blob( + &mut node, + &blob_data, + MAX_BLOB_PIECE_LENGTH, + "image/svg+xml", + &key_pair, + ) + .await; + let document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); + + // Make sure to materialize blob on file system + blob_task( + node.context.clone(), + TaskInput::DocumentViewId(blob_view_id.clone()), + ) + .await + .unwrap(); + + // Expect correctly set content type header and body in response + let client = http_test_client(&node).await; + let response = client.get(&format!("/blobs/{}", document_id)).send().await; + let status_code = response.status(); + let headers = response.headers(); + let body = response.bytes().await; + let content_type = headers + .get(header::CONTENT_TYPE) + .expect("ContentType to exist in header"); + + assert_eq!(content_type, "image/svg+xml"); + assert_eq!(status_code, StatusCode::OK); + assert_eq!(body, blob_data); + }) + } + + #[rstest] + fn handles_etag_and_if_none_match_precondition(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + let blob_data = "Hello, World!".as_bytes(); + let blob_view_id = add_blob(&mut node, &blob_data, 6, "text/plain", &key_pair).await; + let document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); + + // Make sure to materialize blob on file system + blob_task( + node.context.clone(), + TaskInput::DocumentViewId(blob_view_id.clone()), + ) + .await + .unwrap(); + + let client = http_test_client(&node).await; + + // 1. Get blob and ETag connected to it + let response = client.get(&format!("/blobs/{}", document_id)).send().await; + let status_code = response.status(); + let headers = response.headers(); + let body = response.text().await; + let etag = headers.get(header::ETAG).expect("ETag to exist in header"); + + assert_eq!(status_code, StatusCode::OK); + assert_eq!(body, "Hello, World!"); + + // 2. Send another request, including the received ETag inside a "IfNoneMatch" header + let response = client + .get(&format!("/blobs/{}", document_id)) + .header(header::IF_NONE_MATCH, etag) + .send() + .await; + let status_code = response.status(); + let body = response.text().await; + assert_eq!(status_code, StatusCode::NOT_MODIFIED); + assert_eq!(body, ""); + + // 3. Update the blob + let blob_data = "Hello, Panda!".as_bytes(); + let blob_view_id_2 = + update_blob(&mut node, &blob_data, 6, &blob_view_id, &key_pair).await; + + // Make sure to materialize blob on file system + blob_task( + node.context.clone(), + TaskInput::DocumentViewId(blob_view_id_2.clone()), + ) + .await + .unwrap(); + + // 4. Send request again, including the (now outdated) ETag + let response = client + .get(&format!("/blobs/{}", document_id)) + .header(header::IF_NONE_MATCH, etag) + .send() + .await; + let status_code = response.status(); + let headers = response.headers(); + let body = response.text().await; + let etag_2 = headers.get(header::ETAG).expect("ETag to exist in header"); + + assert_ne!(etag, etag_2); + assert_eq!(status_code, StatusCode::OK); + assert_eq!(body, "Hello, Panda!"); + }) + } + + #[rstest] + #[case::inexisting_document_id( + "/blobs/0020aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", + StatusCode::NOT_FOUND + )] + #[case::inexisting_document_view_id( + "/blobs/0020aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/0020bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", + StatusCode::NOT_FOUND + )] + #[case::invalid_document_id("/blobs/not_valid", StatusCode::BAD_REQUEST)] + #[case::invalid_document_view_id( + "/blobs/0020aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/not_valid", + StatusCode::BAD_REQUEST + )] + fn error_responses(#[case] path: &'static str, #[case] expected_status_code: StatusCode) { + test_runner(move |node: TestNode| async move { + let client = http_test_client(&node).await; + let response = client.get(path).send().await; + assert_eq!(response.status(), expected_status_code); + }) + } +} diff --git a/aquadoggo/src/http/context.rs b/aquadoggo/src/http/context.rs index 38c40a2e5..01015bf5c 100644 --- a/aquadoggo/src/http/context.rs +++ b/aquadoggo/src/http/context.rs @@ -1,16 +1,28 @@ // SPDX-License-Identifier: AGPL-3.0-or-later +use std::path::PathBuf; + +use crate::db::SqlStore; use crate::graphql::GraphQLSchemaManager; #[derive(Clone)] pub struct HttpServiceContext { + /// SQL database. + pub store: SqlStore, + /// Dynamic GraphQL schema manager. pub schema: GraphQLSchemaManager, + + /// Path of the directory where blobs should be served from. + pub blobs_base_path: PathBuf, } impl HttpServiceContext { - /// Create a new HttpServiceContext. - pub fn new(schema: GraphQLSchemaManager) -> Self { - Self { schema } + pub fn new(store: SqlStore, schema: GraphQLSchemaManager, blobs_base_path: PathBuf) -> Self { + Self { + store, + schema, + blobs_base_path, + } } } diff --git a/aquadoggo/src/http/service.rs b/aquadoggo/src/http/service.rs index 7a2f40e02..855d61193 100644 --- a/aquadoggo/src/http/service.rs +++ b/aquadoggo/src/http/service.rs @@ -14,10 +14,13 @@ use tower_http::cors::{Any, CorsLayer}; use crate::bus::ServiceSender; use crate::context::Context; use crate::graphql::GraphQLSchemaManager; -use crate::http::api::{handle_graphql_playground, handle_graphql_query}; +use crate::http::api::{ + handle_blob_document, handle_blob_view, handle_graphql_playground, handle_graphql_query, +}; use crate::http::context::HttpServiceContext; use crate::manager::{ServiceReadySender, Shutdown}; +/// Route to the GraphQL playground const GRAPHQL_ROUTE: &str = "/graphql"; /// Build HTTP server with GraphQL API. @@ -35,6 +38,9 @@ pub fn build_server(http_context: HttpServiceContext) -> Router { GRAPHQL_ROUTE, get(|| handle_graphql_playground(GRAPHQL_ROUTE)).post(handle_graphql_query), ) + // Add blob routes + .route("/blobs/:document_id", get(handle_blob_document)) + .route("/blobs/:document_id/:view_hash", get(handle_blob_view)) // Add middlewares .layer(cors) // Add shared context @@ -55,8 +61,14 @@ pub async fn http_service( let graphql_schema_manager = GraphQLSchemaManager::new(context.store.clone(), tx, context.schema_provider.clone()).await; + let blobs_base_path = &context.config.blobs_base_path; + // Introduce a new context for all HTTP routes - let http_context = HttpServiceContext::new(graphql_schema_manager); + let http_context = HttpServiceContext::new( + context.store.clone(), + graphql_schema_manager, + blobs_base_path.to_owned(), + ); // Start HTTP server with given port and re-attempt with random port if it was taken already let builder = if let Ok(builder) = axum::Server::try_bind(&http_address) { @@ -108,7 +120,11 @@ mod tests { let schema_provider = SchemaProvider::default(); let graphql_schema_manager = GraphQLSchemaManager::new(node.context.store.clone(), tx, schema_provider).await; - let context = HttpServiceContext::new(graphql_schema_manager); + let context = HttpServiceContext::new( + node.context.store.clone(), + graphql_schema_manager, + node.context.config.blobs_base_path.clone(), + ); let client = TestClient::new(build_server(context)); let response = client diff --git a/aquadoggo/src/materializer/service.rs b/aquadoggo/src/materializer/service.rs index 84bbb4bb3..7b1ebb45c 100644 --- a/aquadoggo/src/materializer/service.rs +++ b/aquadoggo/src/materializer/service.rs @@ -8,7 +8,9 @@ use tokio::task; use crate::bus::{ServiceMessage, ServiceSender}; use crate::context::Context; use crate::manager::{ServiceReadySender, Shutdown}; -use crate::materializer::tasks::{dependency_task, reduce_task, schema_task}; +use crate::materializer::tasks::{ + blob_task, dependency_task, garbage_collection_task, reduce_task, schema_task, +}; use crate::materializer::worker::{Factory, Task, TaskStatus}; use crate::materializer::TaskInput; @@ -38,6 +40,8 @@ pub async fn materializer_service( factory.register("reduce", pool_size, reduce_task); factory.register("dependency", pool_size, dependency_task); factory.register("schema", pool_size, schema_task); + factory.register("blob", pool_size, blob_task); + factory.register("garbage_collection", pool_size, garbage_collection_task); // Get a listener for error signal from factory let on_error = factory.on_error(); diff --git a/aquadoggo/src/materializer/tasks/blob.rs b/aquadoggo/src/materializer/tasks/blob.rs new file mode 100644 index 000000000..b9c1cc28f --- /dev/null +++ b/aquadoggo/src/materializer/tasks/blob.rs @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later + +use anyhow::anyhow; +use futures::{pin_mut, StreamExt}; +use log::{debug, info}; +use p2panda_rs::document::traits::AsDocument; +use p2panda_rs::document::DocumentViewId; +use p2panda_rs::operation::OperationValue; +use p2panda_rs::schema::SchemaId; +use p2panda_rs::storage_provider::traits::DocumentStore; +use tokio::fs::OpenOptions; +use tokio::io::AsyncWriteExt; + +use crate::context::Context; +use crate::db::types::StorageDocument; +use crate::materializer::worker::{TaskError, TaskResult}; +use crate::materializer::TaskInput; + +/// A blob task assembles and persists blobs to the filesystem. +/// +/// Blob tasks are dispatched whenever a blob or blob piece document has all its immediate +/// dependencies available in the store. +pub async fn blob_task(context: Context, input: TaskInput) -> TaskResult { + debug!("Working on {}", input); + + let input_view_id = match input { + TaskInput::DocumentViewId(view_id) => view_id, + _ => return Err(TaskError::Critical("Invalid task input".into())), + }; + + // Determine the schema of the updated view id. + let schema = context + .store + .get_schema_by_document_view(&input_view_id) + .await + .map_err(|err| TaskError::Critical(err.to_string()))? + .unwrap(); + + let updated_blobs: Vec = match schema { + // This task is about an updated blob document so we only handle that. + SchemaId::Blob(_) => { + let document = context + .store + .get_document_by_view_id(&input_view_id) + .await + .map_err(|err| TaskError::Failure(err.to_string()))? + .unwrap(); + Ok(vec![document]) + } + + // This task is about an updated blob piece document that may be used in one or more blob documents. + SchemaId::BlobPiece(_) => get_related_blobs(&input_view_id, &context).await, + _ => Err(TaskError::Critical(format!( + "Unknown system schema id: {}", + schema + ))), + }?; + + // The related blobs are not known yet to this node so we mark this task failed. + if updated_blobs.is_empty() { + return Err(TaskError::Failure( + "Related blob does not exist (yet)".into(), + )); + } + + // Materialize all updated blobs to the filesystem. + for blob_document in updated_blobs.iter() { + // Get a stream of raw blob data + let mut blob_stream = context + .store + .get_blob_by_view_id(blob_document.view_id()) + .await + // We don't raise a critical error here, as it is possible that this method returns an + // error + .map_err(|err| TaskError::Failure(err.to_string()))? + .expect("Blob data exists at this point"); + + // Determine a path for this blob file on the file system + let blob_view_path = context + .config + .blobs_base_path + .join(blob_document.view_id().to_string()); + + // Write the blob to the filesystem + info!("Creating blob at path {}", blob_view_path.display()); + + let mut file = OpenOptions::new() + .write(true) + .create(true) + .open(&blob_view_path) + .await + .map_err(|err| { + TaskError::Critical(format!( + "Could not create blob file @ {}: {}", + blob_view_path.display(), + err + )) + })?; + + // Read from the stream, chunk by chunk, and write every part to the file. This should put + // less pressure on our systems memory and allow writing large blob files + let stream = blob_stream.read_all(); + pin_mut!(stream); + + while let Some(value) = stream.next().await { + match value { + Ok(buf) => file.write(&buf).await.map_err(|err| anyhow!(err)), + Err(err) => Err(anyhow!(err)), + } + .map_err(|err| { + TaskError::Critical(format!( + "Could not write blob file @ {}: {}", + blob_view_path.display(), + err + )) + })?; + } + } + + Ok(None) +} + +/// Retrieve blobs that use the targeted blob piece as one of their fields. +async fn get_related_blobs( + target_blob_piece: &DocumentViewId, + context: &Context, +) -> Result, TaskError> { + // Retrieve all blob documents from the store + let blobs = context + .store + .get_documents_by_schema(&SchemaId::Blob(1)) + .await + .map_err(|err| TaskError::Critical(err.to_string())) + .unwrap(); + + // Collect all blobs that use the targeted blob piece + let mut related_blobs = vec![]; + for blob in blobs { + // We can unwrap the value here as all documents returned from the storage method above + // have a current view (they are not deleted). + let fields_value = blob.get("pieces").unwrap(); + + if let OperationValue::PinnedRelationList(fields) = fields_value { + if fields + .iter() + .any(|field_view_id| field_view_id == target_blob_piece) + { + related_blobs.push(blob) + } else { + continue; + } + } else { + // It is a critical if there are blobs in the store that don't match the blob schema. + Err(TaskError::Critical( + "Blob operation does not have a 'pieces' operation field".into(), + ))? + } + } + + Ok(related_blobs) +} + +#[cfg(test)] +mod tests { + use p2panda_rs::identity::KeyPair; + use p2panda_rs::test_utils::fixtures::key_pair; + use rstest::rstest; + use tokio::fs; + + use crate::materializer::tasks::blob_task; + use crate::materializer::TaskInput; + use crate::test_utils::{add_blob, test_runner, TestNode}; + + #[rstest] + fn materializes_blob_to_filesystem(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + // Publish blob + let blob_data = "Hello, World!"; + let blob_view_id = + add_blob(&mut node, blob_data.as_bytes(), 5, "plain/text", &key_pair).await; + + // Run blob task + let result = blob_task( + node.context.clone(), + TaskInput::DocumentViewId(blob_view_id.clone()), + ) + .await; + + // It shouldn't fail + assert!(result.is_ok(), "{:#?}", result); + // It should return no extra tasks + assert!(result.unwrap().is_none()); + + // Construct the expected path to the blob view file + let base_path = &node.context.config.blobs_base_path; + let blob_path = base_path.join(blob_view_id.to_string()); + + // Read from this file + let retrieved_blob_data = fs::read_to_string(blob_path).await; + + // It should match the complete published blob data + assert!(retrieved_blob_data.is_ok(), "{:?}", retrieved_blob_data); + assert_eq!(blob_data, retrieved_blob_data.unwrap()); + }) + } +} diff --git a/aquadoggo/src/materializer/tasks/dependency.rs b/aquadoggo/src/materializer/tasks/dependency.rs index 4ade3765f..e9d1de9c3 100644 --- a/aquadoggo/src/materializer/tasks/dependency.rs +++ b/aquadoggo/src/materializer/tasks/dependency.rs @@ -131,10 +131,17 @@ pub async fn dependency_task(context: Context, input: TaskInput) -> TaskResult { + next_tasks.push(Task::new( + "blob", + TaskInput::DocumentViewId(document_view.id().clone()), + )); + } _ => {} } } - // Now we check all the "parent" or "inverse" relations, that is _other_ documents pointing at // the one we're currently looking at let mut reverse_tasks = get_inverse_relation_tasks(&context, document.schema_id()).await?; @@ -887,7 +894,7 @@ mod tests { assert_eq!(tasks[0].worker_name(), &String::from("dependency")); // 2. The "dependency" task will try to resolve the pinned document view pointing at - // the "post" document in it's version 2 + // the "post" document in its version 2 let tasks = dependency_task(node_b.context.clone(), tasks[0].input().clone()) .await .unwrap(); @@ -988,8 +995,9 @@ mod tests { .await .unwrap() .expect("Should have returned new tasks"); - assert_eq!(tasks.len(), 1); - assert_eq!(tasks[0].worker_name(), &String::from("dependency")); + assert_eq!(tasks.len(), 2); + assert_eq!(tasks[0].worker_name(), &String::from("garbage_collection")); + assert_eq!(tasks[1].worker_name(), &String::from("dependency")); // We should have now a materialized latest post and comment document but not the // pinned historical version of the post, where the comment was pointing at! @@ -1019,7 +1027,7 @@ mod tests { // 2. The "dependency" task followed materialising the "post" found a reverse relation // to a "comment" document .. it dispatches another "dependency" task for it - let tasks = dependency_task(node_b.context.clone(), tasks[0].input().clone()) + let tasks = dependency_task(node_b.context.clone(), tasks[1].input().clone()) .await .unwrap(); assert_eq!( diff --git a/aquadoggo/src/materializer/tasks/garbage_collection.rs b/aquadoggo/src/materializer/tasks/garbage_collection.rs new file mode 100644 index 000000000..7e6901e8e --- /dev/null +++ b/aquadoggo/src/materializer/tasks/garbage_collection.rs @@ -0,0 +1,657 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later + +use log::debug; +use p2panda_rs::document::DocumentViewId; +use p2panda_rs::operation::traits::AsOperation; +use p2panda_rs::schema::SchemaId; +use p2panda_rs::storage_provider::traits::OperationStore; +use p2panda_rs::Human; + +use crate::context::Context; +use crate::materializer::worker::{TaskError, TaskResult}; +use crate::materializer::{Task, TaskInput}; + +pub async fn garbage_collection_task(context: Context, input: TaskInput) -> TaskResult { + debug!("Working on {}", input); + + match input { + TaskInput::DocumentId(document_id) => { + // This task is concerned with a document which may now have dangling views. We want + // to check for this and delete any views which are no longer needed. + debug!( + "Prune document views for document: {}", + document_id.display() + ); + + // Collect the ids of all views for this document. + let all_document_view_ids: Vec = context + .store + .get_all_document_view_ids(&document_id) + .await + .map_err(|err| TaskError::Critical(err.to_string()))?; + + // Iterate over all document views and delete them if no document view exists which refers + // to it in a pinned relation field AND they are not the current view of a document. + // + // Deletes on "document_views" cascade to "document_view_fields" so rows there are also removed + // from the database. + let mut all_effected_child_relations = vec![]; + let mut deleted_views_count = 0; + for document_view_id in &all_document_view_ids { + // Check if this is the current view of its document. This will still return true + // if the document in question is deleted. + let is_current_view = context + .store + .is_current_view(document_view_id) + .await + .map_err(|err| TaskError::Critical(err.to_string()))?; + + let mut effected_child_relations = vec![]; + let mut view_deleted = false; + + if !is_current_view { + // Before attempting to delete this view we need to fetch the ids of any child documents + // which might have views that could become unpinned as a result of this delete. These + // will be returned if the deletion is successful. + effected_child_relations = context + .store + .get_child_document_ids(document_view_id) + .await + .map_err(|err| TaskError::Critical(err.to_string()))?; + + // Attempt to delete the view. If it is pinned from an existing view the deletion will + // not go ahead. + view_deleted = context + .store + .prune_document_view(document_view_id) + .await + .map_err(|err| TaskError::Critical(err.to_string()))?; + } + + // If the view was deleted then push the effected children to the return array + if view_deleted { + debug!("Deleted view: {}", document_view_id); + deleted_views_count += 1; + all_effected_child_relations.extend(effected_child_relations); + } else { + debug!("Did not delete view: {}", document_view_id); + } + } + + // If the number of deleted views equals the total existing views (minus one for the + // current view), then there is a chance this became completely detached. In this case + // we should check if this document is a blob document and then try to purge it. + if all_document_view_ids.len() - 1 == deleted_views_count { + let operation = context + .store + .get_operation(&document_id.as_str().parse().unwrap()) + .await + .map_err(|err| TaskError::Failure(err.to_string()))? + .expect("Operation exists in store"); + + if let SchemaId::Blob(_) = operation.schema_id() { + // Purge the blob and all its pieces. This only succeeds if no document refers + // to the blob document by either a relation or pinned relation. + context + .store + .purge_blob(&document_id) + .await + .map_err(|err| TaskError::Failure(err.to_string()))?; + } + } + + // We compose some more prune tasks based on the effected documents returned above. + let next_tasks: Vec> = all_effected_child_relations + .iter() + .map(|document_id| { + debug!("Issue prune task for document: {document_id:#?}"); + Task::new( + "garbage_collection", + TaskInput::DocumentId(document_id.to_owned()), + ) + }) + .collect(); + + if next_tasks.is_empty() { + Ok(None) + } else { + Ok(Some(next_tasks)) + } + } + _ => Err(TaskError::Critical("Invalid task input".into())), + } +} + +#[cfg(test)] +mod tests { + use p2panda_rs::document::DocumentId; + use p2panda_rs::identity::KeyPair; + use p2panda_rs::schema::SchemaId; + use p2panda_rs::storage_provider::traits::DocumentStore; + use p2panda_rs::test_utils::fixtures::{key_pair, random_document_view_id}; + use rstest::rstest; + + use crate::materializer::tasks::garbage_collection_task; + use crate::materializer::{Task, TaskInput}; + use crate::test_utils::{ + add_blob, add_schema_and_documents, assert_query, test_runner, update_document, TestNode, + }; + + #[rstest] + fn e2e_pruning(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + // Publish some documents which we will later point relations at. + let (child_schema, child_document_view_ids) = add_schema_and_documents( + &mut node, + "schema_for_child", + vec![ + vec![("uninteresting_field", 1.into(), None)], + vec![("uninteresting_field", 2.into(), None)], + ], + &key_pair, + ) + .await; + + // Create some parent documents which contain a pinned relation list pointing to the + // children created above. + let (parent_schema, parent_document_view_ids) = add_schema_and_documents( + &mut node, + "schema_for_parent", + vec![vec![ + ("name", "parent".into(), None), + ( + "children", + child_document_view_ids.clone().into(), + Some(child_schema.id().to_owned()), + ), + ]], + &key_pair, + ) + .await; + + // Convert view id to document id. + let parent_document_id: DocumentId = parent_document_view_ids[0] + .clone() + .to_string() + .parse() + .unwrap(); + + // Update the parent document so that there are now two views stored in the db, one + // current and one dangling. + let updated_parent_view_id = update_document( + &mut node, + parent_schema.id(), + vec![("name", "Parent".into())], + &parent_document_view_ids[0], + &key_pair, + ) + .await; + + // Get the historic (dangling) view to check it's actually there. + let historic_document_view = node + .context + .store + .get_document_by_view_id(&parent_document_view_ids[0].clone()) + .await + .unwrap(); + + // It is there... + assert!(historic_document_view.is_some()); + + // Create another document, which has a pinned relation to the parent document created + // above. Now the relation graph looks like this + // + // GrandParent --> Parent --> Child1 + // \ + // --> Child2 + // + let (schema_for_grand_parent, grand_parent_document_view_ids) = + add_schema_and_documents( + &mut node, + "schema_for_grand_parent", + vec![vec![ + ("name", "grand parent".into(), None), + ( + "child", + parent_document_view_ids[0].clone().into(), + Some(parent_schema.id().to_owned()), + ), + ]], + &key_pair, + ) + .await; + + // Convert view id to document id. + let grand_parent_document_id: DocumentId = grand_parent_document_view_ids[0] + .clone() + .to_string() + .parse() + .unwrap(); + + // Update the grand parent document to a new view, leaving the previous one dangling. + // + // Note: this test method _does not_ dispatch "garbage_collection" tasks. + update_document( + &mut node, + schema_for_grand_parent.id(), + vec![ + ("name", "Grand Parent".into()), + ("child", updated_parent_view_id.into()), + ], + &grand_parent_document_view_ids[0], + &key_pair, + ) + .await; + + // Get the historic (dangling) view to make sure it exists. + let historic_document_view = node + .context + .store + .get_document_by_view_id(&grand_parent_document_view_ids[0].clone()) + .await + .unwrap(); + + // It does... + assert!(historic_document_view.is_some()); + + // Now prune dangling views for the grand parent document. This method deletes any + // dangling views (not pinned, not current) from the database for this document. It + // returns the document ids of any documents which may have views which have become + // "un-pinned" as a result of this view being removed. In this case, that's the + // document id of the "parent" document. + let next_tasks = garbage_collection_task( + node.context.clone(), + TaskInput::DocumentId(grand_parent_document_id), + ) + .await + .unwrap() + .unwrap(); + + // One new prune task is issued. + assert_eq!(next_tasks.len(), 1); + // It is the parent (which this grand parent relates to) as we expect. + assert_eq!( + next_tasks[0], + Task::new( + "garbage_collection", + TaskInput::DocumentId(parent_document_id) + ) + ); + + // Check the historic view has been deleted. + let historic_document_view = node + .context + .store + .get_document_by_view_id(&grand_parent_document_view_ids[0].clone()) + .await + .unwrap(); + + // It has... + assert!(historic_document_view.is_none()); + + // Now prune dangling views for the parent document. + let next_tasks = + garbage_collection_task(node.context.clone(), next_tasks[0].input().to_owned()) + .await + .unwrap() + .unwrap(); + + // Two new prune tasks issued. + assert_eq!(next_tasks.len(), 2); + // These are the two final child documents. + assert_eq!( + next_tasks, + child_document_view_ids + .iter() + .rev() + .map(|document_view_id| { + let document_id: DocumentId = document_view_id.to_string().parse().unwrap(); + Task::new("garbage_collection", TaskInput::DocumentId(document_id)) + }) + .collect::>>() + ); + + // Check the historic view has been deleted. + let historic_document_view = node + .context + .store + .get_document_by_view_id(&parent_document_view_ids[0].clone()) + .await + .unwrap(); + + // It has. + assert!(historic_document_view.is_none()); + + // Running the child tasks returns no new tasks. + let next_tasks = + garbage_collection_task(node.context.clone(), next_tasks[0].input().to_owned()) + .await + .unwrap(); + + assert!(next_tasks.is_none()); + }); + } + + #[rstest] + fn no_new_tasks_issued_when_no_views_pruned(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + // Create a child document. + let (child_schema, child_document_view_ids) = add_schema_and_documents( + &mut node, + "schema_for_child", + vec![vec![("uninteresting_field", 1.into(), None)]], + &key_pair, + ) + .await; + + // Create a parent document which contains a pinned relation list pointing to the + // child created above. + let (_, parent_document_view_ids) = add_schema_and_documents( + &mut node, + "schema_for_parent", + vec![vec![ + ("name", "parent".into(), None), + ( + "children", + child_document_view_ids.clone().into(), + Some(child_schema.id().to_owned()), + ), + ]], + &key_pair, + ) + .await; + + // Run a garbage collection task for the parent. + let document_id: DocumentId = parent_document_view_ids[0].to_string().parse().unwrap(); + let next_tasks = + garbage_collection_task(node.context.clone(), TaskInput::DocumentId(document_id)) + .await + .unwrap(); + + // No views were pruned so we expect no new tasks to be issued. + assert!(next_tasks.is_none()); + }) + } + + #[rstest] + fn purges_blobs(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + // Publish a blob + let blob_document_view = add_blob( + &mut node, + "Hello World!".as_bytes(), + 6, + "text/plain", + &key_pair, + ) + .await; + let blob_document_id: DocumentId = blob_document_view.to_string().parse().unwrap(); + + // Check the blob is there + let blob = node + .context + .store + .get_blob(&blob_document_id) + .await + .unwrap(); + assert!(blob.is_some()); + + // Run a garbage collection task for the blob document + let next_tasks = garbage_collection_task( + node.context.clone(), + TaskInput::DocumentId(blob_document_id.clone()), + ) + .await + .unwrap(); + + // It shouldn't return any new tasks + assert!(next_tasks.is_none()); + + // The blob should no longer be available + let blob = node + .context + .store + .get_blob(&blob_document_id) + .await + .unwrap(); + assert!(blob.is_none()); + + // And all expected rows deleted from the database. + assert_query(&node, "SELECT entry_hash FROM entries", 0).await; + assert_query(&node, "SELECT operation_id FROM operations_v1", 0).await; + assert_query(&node, "SELECT operation_id FROM operation_fields_v1", 0).await; + assert_query(&node, "SELECT log_id FROM logs", 3).await; + assert_query(&node, "SELECT document_id FROM documents", 0).await; + assert_query(&node, "SELECT document_id FROM document_views", 0).await; + assert_query(&node, "SELECT name FROM document_view_fields", 0).await; + }); + } + + #[rstest] + fn purges_newly_detached_blobs(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + // Create a blob document + let blob_data = "Hello, World!".as_bytes(); + let blob_view_id = add_blob(&mut node, &blob_data, 6, "text/plain", &key_pair).await; + let blob_document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); + + // Relate to the blob from a new document + let (schema, documents_pinning_blob) = add_schema_and_documents( + &mut node, + "img", + vec![vec![( + "blob", + blob_view_id.clone().into(), + Some(SchemaId::Blob(1)), + )]], + &key_pair, + ) + .await; + + // Now update the document to relate to another blob. This means the previously created + // blob is now "dangling" + update_document( + &mut node, + schema.id(), + vec![("blob", random_document_view_id().into())], + &documents_pinning_blob[0].clone(), + &key_pair, + ) + .await; + + // Run a task for the parent document + let document_id: DocumentId = documents_pinning_blob[0].to_string().parse().unwrap(); + let next_tasks = + garbage_collection_task(node.context.clone(), TaskInput::DocumentId(document_id)) + .await + .unwrap() + .unwrap(); + + // It issues one new task which is for the blob document + assert_eq!(next_tasks.len(), 1); + let next_tasks = + garbage_collection_task(node.context.clone(), next_tasks[0].input().to_owned()) + .await + .unwrap(); + // No new tasks issued + assert!(next_tasks.is_none()); + + // The blob has correctly been purged + let blob = node + .context + .store + .get_blob(&blob_document_id) + .await + .unwrap(); + + assert!(blob.is_none()); + }) + } + + #[rstest] + fn other_documents_keep_blob_alive(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + // Create a blob document. + let blob_data = "Hello, World!".as_bytes(); + let blob_view_id = add_blob(&mut node, &blob_data, 6, "text/plain", &key_pair).await; + let blob_document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); + + // Relate to the blob from a new document. + let (schema, documents_pinning_blob) = add_schema_and_documents( + &mut node, + "img", + vec![vec![( + "blob", + blob_view_id.clone().into(), + Some(SchemaId::Blob(1)), + )]], + &key_pair, + ) + .await; + + // Now update the document to relate to another blob. This means the previously + // created blob is now "dangling". + update_document( + &mut node, + schema.id(), + vec![("blob", random_document_view_id().into())], + &documents_pinning_blob[0].clone(), + &key_pair, + ) + .await; + + // Another document relating to the blob (this time from in a relation field). + let _ = add_schema_and_documents( + &mut node, + "img", + vec![vec![( + "blob", + blob_document_id.clone().into(), + Some(SchemaId::Blob(1)), + )]], + &key_pair, + ) + .await; + + // Run a task for the parent document. + let document_id: DocumentId = documents_pinning_blob[0].to_string().parse().unwrap(); + let next_tasks = + garbage_collection_task(node.context.clone(), TaskInput::DocumentId(document_id)) + .await + .unwrap() + .unwrap(); + + // It issues one new task which is for the blob document. + assert_eq!(next_tasks.len(), 1); + let next_tasks = + garbage_collection_task(node.context.clone(), next_tasks[0].input().to_owned()) + .await + .unwrap(); + // No new tasks issued. + assert!(next_tasks.is_none()); + + // The blob should still be there as it was kept alive by a different document. + let blob = node + .context + .store + .get_blob(&blob_document_id) + .await + .unwrap(); + + assert!(blob.is_some()); + }) + } + + #[rstest] + fn all_relation_types_keep_blobs_alive(key_pair: KeyPair) { + test_runner(|mut node: TestNode| async move { + let blob_data = "Hello, World!".as_bytes(); + + // Any type of relation can keep a blob alive, here we create one of each and run + // garbage collection tasks for each blob. + + let blob_view_id_1 = add_blob(&mut node, &blob_data, 6, "text/plain", &key_pair).await; + let _ = add_schema_and_documents( + &mut node, + "img", + vec![vec![( + "blob", + blob_view_id_1.clone().into(), + Some(SchemaId::Blob(1)), + )]], + &key_pair, + ) + .await; + + let blob_view_id_2 = add_blob(&mut node, &blob_data, 6, "text/plain", &key_pair).await; + let _ = add_schema_and_documents( + &mut node, + "img", + vec![vec![( + "blob", + vec![blob_view_id_2.clone()].into(), + Some(SchemaId::Blob(1)), + )]], + &key_pair, + ) + .await; + + let blob_view_id_3 = add_blob(&mut node, &blob_data, 6, "text/plain", &key_pair).await; + let _ = add_schema_and_documents( + &mut node, + "img", + vec![vec![( + "blob", + blob_view_id_3 + .to_string() + .parse::() + .unwrap() + .into(), + Some(SchemaId::Blob(1)), + )]], + &key_pair, + ) + .await; + + let blob_view_id_4 = add_blob(&mut node, &blob_data, 6, "text/plain", &key_pair).await; + let _ = add_schema_and_documents( + &mut node, + "img", + vec![vec![( + "blob", + vec![blob_view_id_4.to_string().parse::().unwrap()].into(), + Some(SchemaId::Blob(1)), + )]], + &key_pair, + ) + .await; + + for blob_view_id in [ + blob_view_id_1, + blob_view_id_2, + blob_view_id_3, + blob_view_id_4, + ] { + let blob_document_id: DocumentId = blob_view_id.to_string().parse().unwrap(); + let next_tasks = garbage_collection_task( + node.context.clone(), + TaskInput::DocumentId(blob_document_id.clone()), + ) + .await + .unwrap(); + + assert!(next_tasks.is_none()); + + // All blobs should be kept alive. + let blob = node + .context + .store + .get_blob(&blob_document_id) + .await + .unwrap(); + + assert!(blob.is_some()); + } + }) + } +} diff --git a/aquadoggo/src/materializer/tasks/mod.rs b/aquadoggo/src/materializer/tasks/mod.rs index 7172b0cdb..4f53e1bec 100644 --- a/aquadoggo/src/materializer/tasks/mod.rs +++ b/aquadoggo/src/materializer/tasks/mod.rs @@ -1,9 +1,13 @@ // SPDX-License-Identifier: AGPL-3.0-or-later +mod blob; mod dependency; +mod garbage_collection; mod reduce; mod schema; +pub use blob::blob_task; pub use dependency::dependency_task; +pub use garbage_collection::garbage_collection_task; pub use reduce::reduce_task; pub use schema::schema_task; diff --git a/aquadoggo/src/materializer/tasks/reduce.rs b/aquadoggo/src/materializer/tasks/reduce.rs index 1e3a4b408..ac12d2a51 100644 --- a/aquadoggo/src/materializer/tasks/reduce.rs +++ b/aquadoggo/src/materializer/tasks/reduce.rs @@ -223,7 +223,7 @@ async fn reduce_document + WithPublicKey>( .map_err(|err| TaskError::Critical(err.to_string()))?; } - // Insert this document into storage. If it already existed, this will update it's + // Insert this document into storage. If it already existed, this will update its // current view context .store @@ -231,6 +231,8 @@ async fn reduce_document + WithPublicKey>( .await .map_err(|err| TaskError::Critical(err.to_string()))?; + let mut tasks = vec![]; + // If the document was deleted, then we return nothing if document.is_deleted() { debug!( @@ -238,7 +240,6 @@ async fn reduce_document + WithPublicKey>( document.display(), document.view_id().display() ); - return Ok(None); } if document.is_edited() { @@ -251,14 +252,31 @@ async fn reduce_document + WithPublicKey>( debug!("Created {}", document.display()); }; - debug!( - "Dispatch dependency task for view with id: {}", - document.view_id() - ); - Ok(Some(vec![Task::new( - "dependency", - TaskInput::DocumentViewId(document.view_id().to_owned()), - )])) + if document.is_deleted() || document.is_edited() { + debug!( + "Dispatch prune task for document with id: {}", + document.id() + ); + + tasks.push(Task::new( + "garbage_collection", + TaskInput::DocumentId(document.id().to_owned()), + )) + } + + if !document.is_deleted() { + debug!( + "Dispatch dependency task for view with id: {}", + document.view_id() + ); + + tasks.push(Task::new( + "dependency", + TaskInput::DocumentViewId(document.view_id().to_owned()), + )); + } + + Ok(Some(tasks)) } Err(err) => { // There is not enough operations yet to materialise this view. Maybe next time! @@ -435,7 +453,7 @@ mod tests { .unwrap() .sorted(); - // Reduce document to it's current view and insert into database + // Reduce document to its current view and insert into database let input = TaskInput::DocumentId(document_id.clone()); assert!(reduce_task(node.context.clone(), input).await.is_ok()); @@ -500,7 +518,7 @@ mod tests { for document_id in &document_ids { let input = TaskInput::DocumentId(document_id.clone()); let tasks = reduce_task(node.context.clone(), input).await.unwrap(); - assert!(tasks.is_none()); + assert_eq!(tasks.unwrap().len(), 1); } for document_id in &document_ids { @@ -527,16 +545,16 @@ mod tests { #[rstest] #[case( populate_store_config(3, 1, 1, false, doggo_schema(), doggo_fields(), doggo_fields()), - true + vec!["garbage_collection".to_string(), "dependency".to_string()] )] // This document is deleted, it shouldn't spawn a dependency task. #[case( populate_store_config(3, 1, 1, true, doggo_schema(), doggo_fields(), doggo_fields()), - false + vec!["garbage_collection".to_string()] )] - fn returns_dependency_task_inputs( + fn returns_correct_dependency_and_prune_tasks( #[case] config: PopulateStoreConfig, - #[case] is_next_task: bool, + #[case] expected_worker_names: Vec, ) { test_runner(move |node: TestNode| async move { // Populate the store with some entries and operations but DON'T materialise any @@ -547,9 +565,16 @@ mod tests { .expect("There should be at least one document id"); let input = TaskInput::DocumentId(document_id.clone()); - let next_task_inputs = reduce_task(node.context.clone(), input).await.unwrap(); + let next_tasks = reduce_task(node.context.clone(), input) + .await + .expect("Ok result") + .expect("Some tasks returned"); - assert_eq!(next_task_inputs.is_some(), is_next_task); + assert_eq!(next_tasks.len(), expected_worker_names.len()); + + for (index, worker_name) in expected_worker_names.iter().enumerate() { + assert_eq!(next_tasks[index].worker_name(), worker_name); + } }); } @@ -560,11 +585,11 @@ mod tests { ) { // Prepare empty database. test_runner(move |node: TestNode| async move { - // Dispatch a reduce task for a document which doesn't exist by it's document id. + // Dispatch a reduce task for a document which doesn't exist by its document id let input = TaskInput::DocumentId(document_id); assert!(reduce_task(node.context.clone(), input).await.is_ok()); - // Dispatch a reduce task for a document which doesn't exist by it's document view id. + // Dispatch a reduce task for a document which doesn't exist by its document view id let input = TaskInput::DocumentViewId(document_view_id); assert!(reduce_task(node.context.clone(), input).await.is_ok()); }); diff --git a/aquadoggo/src/network/service.rs b/aquadoggo/src/network/service.rs index 200523262..720d8e36f 100644 --- a/aquadoggo/src/network/service.rs +++ b/aquadoggo/src/network/service.rs @@ -200,7 +200,7 @@ pub async fn connect_to_relay( // our local public address and (b) enable a freshly started relay to learn its public address. swarm.dial(relay_address.clone())?; - // Wait to get confirmation that we told the relay node it's public address and that they told + // Wait to get confirmation that we told the relay node its public address and that they told // us ours. let mut learned_observed_addr = false; let mut told_relay_observed_addr = false; diff --git a/aquadoggo/src/proptests/document_strategies.rs b/aquadoggo/src/proptests/document_strategies.rs index eb15d455f..8f5cede02 100644 --- a/aquadoggo/src/proptests/document_strategies.rs +++ b/aquadoggo/src/proptests/document_strategies.rs @@ -15,7 +15,7 @@ const MAX_DOCUMENTS_PER_ROOT_SCHEMA: usize = 15; /// Same as above, this is a shrinking value. const MAX_DOCUMENTS_PER_RELATION_LIST: usize = 2; -/// AST representing a document and it's relations. +/// AST representing a document and its relations. #[derive(Debug, Clone)] pub struct DocumentAST { pub schema_id: SchemaId, @@ -44,6 +44,9 @@ pub enum FieldValue { /// String value. String(String), + /// Hex encoded bytes value. + Bytes(Vec), + /// Reference to a document. Relation(DocumentAST), @@ -117,6 +120,15 @@ fn values_from_schema(schema: SchemaAST) -> impl Strategy any::>() + .prop_map(move |value| { + let value = FieldValue::Bytes(value); + DocumentFieldValue { + name: field_name.clone(), + value, + } + }) + .boxed(), SchemaFieldType::Relation => values_from_schema(*relation_schema.clone().unwrap()) .prop_map(move |value| { let schema_id = relation_schema.clone().unwrap().id.clone(); diff --git a/aquadoggo/src/proptests/filter_strategies.rs b/aquadoggo/src/proptests/filter_strategies.rs index f0a95e890..88df7873c 100644 --- a/aquadoggo/src/proptests/filter_strategies.rs +++ b/aquadoggo/src/proptests/filter_strategies.rs @@ -8,7 +8,7 @@ use proptest::strategy::{BoxedStrategy, Just, Strategy}; use proptest_derive::Arbitrary; use crate::proptests::schema_strategies::{SchemaField, SchemaFieldType}; -use crate::proptests::utils::FieldName; +use crate::proptests::utils::{FieldName, HexString}; /// Possible values used in filter arguments. `UniqueIdentifier` is a placeholder for values which /// can be derived at runtime in order to use identifiers which exist in on the node, these include @@ -17,6 +17,7 @@ use crate::proptests::utils::FieldName; pub enum FilterValue { Boolean(bool), String(String), + Bytes(HexString), Integer(i64), Float(f64), UniqueIdentifier, // This is a placeholder for a document id, document view id or public key which is selected at testing time @@ -87,6 +88,7 @@ fn application_field_filter_strategy( | SchemaFieldType::Integer | SchemaFieldType::Float | SchemaFieldType::String + | SchemaFieldType::Bytes | SchemaFieldType::Relation | SchemaFieldType::PinnedRelation => generate_simple_field_filter(field.clone()) .prop_map(|(name, filter)| ((name, filter), Vec::new())) @@ -221,6 +223,18 @@ fn generate_simple_field_filter(field: SchemaField) -> BoxedStrategy<(FieldName, ] .boxed() } + SchemaFieldType::Bytes => { + let field_clone = field.clone(); + prop_oneof![ + any::() + .prop_map(FilterValue::Bytes) + .prop_map(move |value| (field.name.clone(), Filter::Equal(value))), + any::() + .prop_map(FilterValue::Bytes) + .prop_map(move |value| (field_clone.name.clone(), Filter::NotEqual(value))) + ] + .boxed() + } SchemaFieldType::Relation | SchemaFieldType::PinnedRelation => prop_oneof![ ( Just(field.name.clone()), diff --git a/aquadoggo/src/proptests/schema_strategies.rs b/aquadoggo/src/proptests/schema_strategies.rs index 4095bc8e7..32a0c4b74 100644 --- a/aquadoggo/src/proptests/schema_strategies.rs +++ b/aquadoggo/src/proptests/schema_strategies.rs @@ -64,6 +64,7 @@ pub enum SchemaFieldType { Integer, Float, String, + Bytes, Relation, RelationList, PinnedRelation, @@ -107,6 +108,13 @@ fn schema_field() -> impl Strategy { relation_schema: None, } }), + any::().prop_map(|field_name| { + SchemaField { + name: field_name, + field_type: SchemaFieldType::Bytes, + relation_schema: None, + } + }), ]; // Selections for the recursive fields. diff --git a/aquadoggo/src/proptests/tests.rs b/aquadoggo/src/proptests/tests.rs index 60db777c6..fef3f990a 100644 --- a/aquadoggo/src/proptests/tests.rs +++ b/aquadoggo/src/proptests/tests.rs @@ -15,7 +15,7 @@ use crate::proptests::schema_strategies::{schema_strategy, SchemaAST}; use crate::proptests::utils::{ add_documents_from_ast, add_schemas_from_ast, parse_filter, parse_selected_fields, FieldName, }; -use crate::test_utils::{graphql_test_client, test_runner, TestClient, TestNode}; +use crate::test_utils::{http_test_client, test_runner, TestClient, TestNode}; use super::filter_strategies::{ application_filters_strategy, meta_field_filter_strategy, Filter, MetaField, @@ -28,7 +28,7 @@ async fn sanity_checks( schemas: &Vec, ) { let node_schemas = node.context.schema_provider.all().await; - assert_eq!(schemas.len(), node_schemas.len() - 2); // minus 2 for system schema + assert_eq!(schemas.len(), node_schemas.len() - 4); // minus 4 for system schema for schema_id in schemas { let result = node .context @@ -187,7 +187,6 @@ prop_compose! { proptest! { #![proptest_config(Config { - cases: 100, failure_persistence: Some(Box::new(FileFailurePersistence::WithSource("regressions"))), .. Config::default() })] @@ -212,7 +211,7 @@ proptest! { sanity_checks(&node, &documents, &schemas).await; // Create a GraphQL client. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; // Run the test for each schema and related documents that have been generated. for schema_id in schemas { @@ -249,7 +248,7 @@ proptest! { sanity_checks(&node, &documents, &schemas).await; // Create a GraphQL client. - let client = graphql_test_client(&node).await; + let client = http_test_client(&node).await; // Get the root schema from the provider. let schema = node.context.schema_provider.get(&schema_ast.id).await.expect("Schema should exist on node"); diff --git a/aquadoggo/src/proptests/utils.rs b/aquadoggo/src/proptests/utils.rs index 6a6b62d7c..0b58e28a5 100644 --- a/aquadoggo/src/proptests/utils.rs +++ b/aquadoggo/src/proptests/utils.rs @@ -19,6 +19,10 @@ use super::filter_strategies::{Filter, FilterValue}; #[derive(Arbitrary, Debug, Clone, PartialEq, Eq, Hash)] pub struct FieldName(#[proptest(regex = "[A-Za-z]{1}[A-Za-z0-9_]{0,63}")] pub String); +/// A hexadecimal string. +#[derive(Arbitrary, Debug, Clone, PartialEq, Eq, Hash)] +pub struct HexString(#[proptest(regex = "([a-fA-F0-9]{2}){0,64}")] pub String); + /// Add schemas from a schema AST to a test node. #[async_recursion] pub async fn add_schemas_from_ast( @@ -42,6 +46,9 @@ pub async fn add_schemas_from_ast( SchemaFieldType::String => { schema_fields.push((field.name, FieldType::String)); } + SchemaFieldType::Bytes => { + schema_fields.push((field.name, FieldType::Bytes)); + } SchemaFieldType::Relation => { let schema_ast = field.relation_schema.unwrap(); let schema = add_schemas_from_ast(node, &schema_ast, schemas).await; @@ -116,6 +123,9 @@ pub async fn add_documents_from_ast( FieldValue::String(value) => { operation_fields.push((&field.name.0, value.to_owned().into())); } + FieldValue::Bytes(value) => { + operation_fields.push((&field.name.0, value[..].into())); + } FieldValue::Relation(document_ast) => { let document_view_id = add_documents_from_ast(node, &document_ast, documents).await; let operation_id = document_view_id.graph_tips().first().unwrap(); @@ -246,6 +256,9 @@ pub fn parse_filter(filter_args: &mut Vec, name: &FieldName, filter: &Fi FilterValue::String(value) => { filter_args.push(format!("{name}: {{ eq: {} }}", escape_string_value(value))) } + FilterValue::Bytes(value) => { + filter_args.push(format!("{name}: {{ eq: \"{}\" }}", value.0)) + } FilterValue::Integer(value) => filter_args.push(format!("{name}: {{ eq: {value} }}")), FilterValue::Float(value) => filter_args.push(format!("{name}: {{ eq: {value} }}")), }, @@ -260,6 +273,9 @@ pub fn parse_filter(filter_args: &mut Vec, name: &FieldName, filter: &Fi "{name}: {{ notEq: {} }}", escape_string_value(value) )), + FilterValue::Bytes(value) => { + filter_args.push(format!("{name}: {{ notEq: \"{}\" }}", value.0)) + } FilterValue::Integer(value) => { filter_args.push(format!("{name}: {{ notEq: {value} }}")) } @@ -276,6 +292,7 @@ pub fn parse_filter(filter_args: &mut Vec, name: &FieldName, filter: &Fi )), FilterValue::Integer(value) => filter_args.push(format!("{name}: {{ in: [{value}] }}")), FilterValue::Float(value) => filter_args.push(format!("{name}: {{ in: [{value}] }}")), + _ => panic!(), }, Filter::NotIn(value) => match value { FilterValue::UniqueIdentifier => { @@ -294,6 +311,7 @@ pub fn parse_filter(filter_args: &mut Vec, name: &FieldName, filter: &Fi FilterValue::Float(value) => { filter_args.push(format!("{name}: {{ notIn: [{value}] }}")) } + _ => panic!(), }, Filter::GreaterThan(value) => match value { FilterValue::String(value) => { diff --git a/aquadoggo/src/replication/manager.rs b/aquadoggo/src/replication/manager.rs index 964d6fdaa..a0f9092ef 100644 --- a/aquadoggo/src/replication/manager.rs +++ b/aquadoggo/src/replication/manager.rs @@ -743,7 +743,7 @@ mod tests { .unwrap(); // We expect Peer B to drop the incoming request from Peer A and simply wait for a - // response from it's original request. + // response from its original request assert_eq!(result.messages.len(), 0); // Peer A has two sessions running: The one initiated by Peer B and the one it @@ -943,7 +943,7 @@ mod tests { let response = result.unwrap(); // We expect Peer B to drop the incoming request from Peer A and simply wait - // for a response from it's original request. + // for a response from its original request assert_eq!(response.messages.len(), 0); // Both peers have exactly one session running. diff --git a/aquadoggo/src/replication/strategies/diff.rs b/aquadoggo/src/replication/strategies/diff.rs index f3c7a39cc..7d46f0312 100644 --- a/aquadoggo/src/replication/strategies/diff.rs +++ b/aquadoggo/src/replication/strategies/diff.rs @@ -16,7 +16,7 @@ fn remote_requires_entries( remote_log_heights: &HashMap, ) -> Option<(LogId, SeqNum)> { trace!("Local log height: {:?} {:?}", log_id, local_seq_num); - // Get height of the remote log by it's id. + // Get height of the remote log by its id let remote_log_height = remote_log_heights.get(log_id); match remote_log_height { @@ -30,7 +30,7 @@ fn remote_requires_entries( // We increment the seq num as we want it to represent an inclusive lower // bound. // - // We can unwrap as we are incrementing the lower remote seq num which means it's + // We can unwrap as we are incrementing the lower remote seq num which means it // will not reach max seq number. let from_seq_num = remote_seq_num.clone().next().unwrap(); diff --git a/aquadoggo/src/schema/schema_provider.rs b/aquadoggo/src/schema/schema_provider.rs index 4eeeeb8f5..d1edfd4fb 100644 --- a/aquadoggo/src/schema/schema_provider.rs +++ b/aquadoggo/src/schema/schema_provider.rs @@ -81,7 +81,7 @@ impl SchemaProvider { /// Inserts or updates the given schema in this provider. /// - /// Returns `true` if a schema was updated or it already existed in it's current state, and + /// Returns `true` if a schema was updated or it already existed in its current state, and /// `false` if it was inserted. pub async fn update(&self, schema: Schema) -> Result { if let AllowList::Set(allow_schema_ids) = &self.allow_schema_ids { @@ -94,7 +94,7 @@ impl SchemaProvider { let schema_exists = schemas.get(schema.id()).is_some(); if schema_exists { - // Return true here as the schema already exists in it's current state so we don't need + // Return true here as the schema already exists in its current state so we don't need // to mutate the schema store or announce any change. return Ok(true); } @@ -154,7 +154,7 @@ mod test { async fn get_all_schemas() { let provider = SchemaProvider::default(); let result = provider.all().await; - assert_eq!(result.len(), 2); + assert_eq!(result.len(), 4); } #[tokio::test] diff --git a/aquadoggo/src/test_utils/client.rs b/aquadoggo/src/test_utils/client.rs index b57cd86fe..005efcbbb 100644 --- a/aquadoggo/src/test_utils/client.rs +++ b/aquadoggo/src/test_utils/client.rs @@ -2,11 +2,12 @@ use std::convert::TryFrom; use std::net::{SocketAddr, TcpListener}; +use std::time::Duration; use axum::body::HttpBody; use axum::BoxError; use http::header::{HeaderName, HeaderValue}; -use http::{Request, StatusCode}; +use http::{HeaderMap, Request, StatusCode}; use hyper::{Body, Server}; use tokio::sync::broadcast; use tower::make::Shared; @@ -16,7 +17,7 @@ use crate::graphql::GraphQLSchemaManager; use crate::http::{build_server, HttpServiceContext}; use crate::test_utils::TestNode; -/// GraphQL client which can be used for querying a node in tests. +/// HTTP client for testing request and responses. pub struct TestClient { client: reqwest::Client, addr: SocketAddr, @@ -44,14 +45,14 @@ impl TestClient { }); let client = reqwest::Client::builder() - .redirect(reqwest::redirect::Policy::none()) + .timeout(Duration::from_secs(10)) + .redirect(reqwest::redirect::Policy::default()) .build() .unwrap(); TestClient { client, addr } } - #[allow(dead_code)] pub(crate) fn get(&self, url: &str) -> RequestBuilder { RequestBuilder { builder: self.client.get(format!("http://{}{}", self.addr, url)), @@ -65,16 +66,23 @@ impl TestClient { } } -/// Configures a test client that can be used for GraphQL testing. -pub async fn graphql_test_client(node: &TestNode) -> TestClient { +/// Configures a test client that can be used for HTTP API testing. +pub async fn http_test_client(node: &TestNode) -> TestClient { let (tx, _) = broadcast::channel(120); + let manager = GraphQLSchemaManager::new( node.context.store.clone(), tx, node.context.schema_provider.clone(), ) .await; - let http_context = HttpServiceContext::new(manager); + + let http_context = HttpServiceContext::new( + node.context.store.clone(), + manager, + node.context.config.blobs_base_path.to_path_buf(), + ); + TestClient::new(build_server(http_context)) } @@ -103,7 +111,6 @@ impl RequestBuilder { self } - #[allow(dead_code)] pub(crate) fn header(mut self, key: K, value: V) -> Self where HeaderName: TryFrom, @@ -121,6 +128,10 @@ pub(crate) struct TestResponse { } impl TestResponse { + pub(crate) async fn bytes(self) -> Vec { + self.response.bytes().await.unwrap().to_vec() + } + pub(crate) async fn text(self) -> String { self.response.text().await.unwrap() } @@ -132,8 +143,11 @@ impl TestResponse { self.response.json().await.unwrap() } - #[allow(dead_code)] pub(crate) fn status(&self) -> StatusCode { self.response.status() } + + pub(crate) fn headers(&self) -> HeaderMap { + self.response.headers().clone() + } } diff --git a/aquadoggo/src/test_utils/helpers.rs b/aquadoggo/src/test_utils/helpers.rs index 0b004d86f..64a3603d6 100644 --- a/aquadoggo/src/test_utils/helpers.rs +++ b/aquadoggo/src/test_utils/helpers.rs @@ -36,6 +36,7 @@ pub fn doggo_schema() -> Schema { pub fn doggo_fields() -> Vec<(&'static str, OperationValue)> { vec![ ("username", OperationValue::String("bubu".to_owned())), + ("data", OperationValue::Bytes(vec![0, 1, 2, 3])), ("height", OperationValue::Float(3.5)), ("age", OperationValue::Integer(28)), ("is_admin", OperationValue::Boolean(false)), diff --git a/aquadoggo/src/test_utils/mod.rs b/aquadoggo/src/test_utils/mod.rs index 19d444074..809354444 100644 --- a/aquadoggo/src/test_utils/mod.rs +++ b/aquadoggo/src/test_utils/mod.rs @@ -7,12 +7,12 @@ pub mod helpers; mod node; mod runner; -pub use client::{graphql_test_client, TestClient}; +pub use client::{http_test_client, TestClient}; pub use config::TestConfiguration; pub use db::{drop_database, initialize_db, initialize_sqlite_db}; pub use helpers::{build_document, doggo_fields, doggo_schema, schema_from_fields}; pub use node::{ - add_document, add_schema, add_schema_and_documents, populate_and_materialize, - populate_store_config, TestNode, + add_blob, add_document, add_schema, add_schema_and_documents, assert_query, + populate_and_materialize, populate_store_config, update_blob, update_document, TestNode, }; pub use runner::{test_runner, test_runner_with_manager, TestNodeManager}; diff --git a/aquadoggo/src/test_utils/node.rs b/aquadoggo/src/test_utils/node.rs index 85a05cd43..b1668c9ea 100644 --- a/aquadoggo/src/test_utils/node.rs +++ b/aquadoggo/src/test_utils/node.rs @@ -4,12 +4,14 @@ use log::{debug, info}; use p2panda_rs::document::{DocumentId, DocumentViewId}; use p2panda_rs::entry::traits::AsEncodedEntry; use p2panda_rs::identity::KeyPair; -use p2panda_rs::operation::{OperationBuilder, OperationValue}; +use p2panda_rs::operation::{OperationAction, OperationBuilder, OperationId, OperationValue}; use p2panda_rs::schema::{FieldType, Schema, SchemaId, SchemaName}; +use p2panda_rs::storage_provider::traits::OperationStore; use p2panda_rs::test_utils::memory_store::helpers::{ populate_store, send_to_store, PopulateStoreConfig, }; use rstest::fixture; +use sqlx::query_scalar; use crate::context::Context; use crate::db::SqlStore; @@ -97,13 +99,18 @@ pub async fn populate_and_materialize( // Create reduce task input. let input = TaskInput::DocumentId(document_id); // Run reduce task and collect returned dependency tasks. - let dependency_tasks = reduce_task(node.context.clone(), input.clone()) + let next_tasks = reduce_task(node.context.clone(), input.clone()) .await .expect("Reduce document"); // Run dependency tasks. - if let Some(tasks) = dependency_tasks { - for task in tasks { + if let Some(tasks) = next_tasks { + // We only want to issue dependency tasks. + let dependency_tasks = tasks + .iter() + .filter(|task| task.worker_name() == "depenedency"); + + for task in dependency_tasks { dependency_task(node.context.clone(), task.input().to_owned()) .await .expect("Run dependency task"); @@ -145,13 +152,18 @@ pub async fn add_document( .expect("Publish CREATE operation"); let input = TaskInput::DocumentId(DocumentId::from(entry_signed.hash())); - let dependency_tasks = reduce_task(node.context.clone(), input.clone()) + let next_tasks = reduce_task(node.context.clone(), input.clone()) .await .expect("Reduce document"); // Run dependency tasks - if let Some(tasks) = dependency_tasks { - for task in tasks { + if let Some(tasks) = next_tasks { + // We only want to issue dependency tasks. + let dependency_tasks = tasks + .iter() + .filter(|task| task.worker_name() == "dependency"); + + for task in dependency_tasks { dependency_task(node.context.clone(), task.input().to_owned()) .await .expect("Run dependency task"); @@ -264,3 +276,143 @@ pub async fn add_schema_and_documents( (schema, view_ids) } + +/// Helper method for updating documents. +pub async fn update_document( + node: &mut TestNode, + schema_id: &SchemaId, + fields: Vec<(&str, OperationValue)>, + previous: &DocumentViewId, + key_pair: &KeyPair, +) -> DocumentViewId { + // Get requested schema from store. + let schema = node + .context + .schema_provider + .get(schema_id) + .await + .expect("Schema not found"); + + // Build, publish and reduce an update operation for document. + let create_op = OperationBuilder::new(schema.id()) + .action(OperationAction::Update) + .fields(&fields) + .previous(previous) + .build() + .expect("Build operation"); + + let (entry_signed, _) = send_to_store(&node.context.store, &create_op, &schema, key_pair) + .await + .expect("Publish UPDATE operation"); + + let document_id = node + .context + .store + .get_document_id_by_operation_id(&OperationId::from(entry_signed.hash())) + .await + .expect("No db errors") + .expect("Can get document id"); + + let input = TaskInput::DocumentId(document_id); + let next_tasks = reduce_task(node.context.clone(), input.clone()) + .await + .expect("Reduce document"); + + // Run dependency tasks + if let Some(tasks) = next_tasks { + // We only want to issue dependency tasks. + let dependency_tasks = tasks + .iter() + .filter(|task| task.worker_name() == "dependency"); + + for task in dependency_tasks { + dependency_task(node.context.clone(), task.input().to_owned()) + .await + .expect("Run dependency task"); + } + } + DocumentViewId::from(entry_signed.hash()) +} + +/// Splits bytes into chunks with a defined maximum length (256 bytes is the specified maximum) and +/// publishes a blob_piece_v1 document for each chunk. +pub async fn add_blob_pieces( + node: &mut TestNode, + body: &[u8], + max_piece_length: usize, + key_pair: &KeyPair, +) -> Vec { + let blob_pieces = body.chunks(max_piece_length); + + let mut blob_pieces_view_ids = Vec::with_capacity(blob_pieces.len()); + for piece in blob_pieces { + let view_id = add_document( + node, + &SchemaId::BlobPiece(1), + vec![("data", piece.into())], + &key_pair, + ) + .await; + + blob_pieces_view_ids.push(view_id); + } + + blob_pieces_view_ids +} + +pub async fn add_blob( + node: &mut TestNode, + body: &[u8], + max_piece_length: usize, + mime_type: &str, + key_pair: &KeyPair, +) -> DocumentViewId { + let blob_pieces_view_ids = add_blob_pieces(node, body, max_piece_length, key_pair).await; + + let blob_view_id = add_document( + node, + &SchemaId::Blob(1), + vec![ + ("length", { body.len() as i64 }.into()), + ("mime_type", mime_type.into()), + ("pieces", blob_pieces_view_ids.into()), + ], + &key_pair, + ) + .await; + + blob_view_id +} + +pub async fn update_blob( + node: &mut TestNode, + body: &[u8], + max_piece_length: usize, + previous: &DocumentViewId, + key_pair: &KeyPair, +) -> DocumentViewId { + let blob_pieces_view_ids = add_blob_pieces(node, body, max_piece_length, key_pair).await; + + let blob_view_id = update_document( + node, + &SchemaId::Blob(1), + vec![ + ("length", { body.len() as i64 }.into()), + ("pieces", blob_pieces_view_ids.into()), + ], + &previous, + &key_pair, + ) + .await; + + blob_view_id +} + +// Helper for asserting expected number of items yielded from a SQL query. +pub async fn assert_query(node: &TestNode, sql: &str, expected_len: usize) { + let result: Result, _> = + query_scalar(sql).fetch_all(&node.context.store.pool).await; + + assert!(result.is_ok(), "{:#?}", result); + assert_eq!(result.unwrap().len(), expected_len, "{:?}", sql); +} diff --git a/aquadoggo/src/test_utils/runner.rs b/aquadoggo/src/test_utils/runner.rs index 634890fba..9f1080e71 100644 --- a/aquadoggo/src/test_utils/runner.rs +++ b/aquadoggo/src/test_utils/runner.rs @@ -1,6 +1,5 @@ // SPDX-License-Identifier: AGPL-3.0-or-later -use std::panic; use std::sync::Arc; use futures::Future; @@ -101,15 +100,20 @@ pub fn test_runner(test: F) { let (_config, pool) = initialize_db().await; let store = SqlStore::new(pool); - // Construct node config supporting any schema. - let cfg = Configuration::default(); + // Construct temporary blobs directory for the test runner + let temp_dir = tempfile::TempDir::new() + .expect("Could not create temporary test directory for blobs storage"); + + // Construct node config supporting any schema + let mut config = Configuration::default(); + config.blobs_base_path = temp_dir.path().to_path_buf(); // Construct the actual test node let node = TestNode { context: Context::new( store.clone(), KeyPair::new(), - cfg, + config, SchemaProvider::default(), ), }; @@ -135,7 +139,7 @@ pub fn test_runner(test: F) { // there, we need to propagate it further to inform the test runtime about the result match result { Ok(_) => (), - Err(err) => panic::resume_unwind(err.into_panic()), + Err(err) => std::panic::resume_unwind(err.into_panic()), }; }); } @@ -156,7 +160,7 @@ pub fn test_runner_with_manager (), - Err(err) => panic::resume_unwind(err.into_panic()), + Err(err) => std::panic::resume_unwind(err.into_panic()), }; }); } diff --git a/aquadoggo/src/tests.rs b/aquadoggo/src/tests.rs index 2aea479f5..40a3b9b3d 100644 --- a/aquadoggo/src/tests.rs +++ b/aquadoggo/src/tests.rs @@ -156,7 +156,7 @@ async fn e2e() { // Query a document. // // Now that the cafe has been created and updated we can query it from the client. We do can do - // this using it's schema id and document or view id. + // this using its schema id and document or view id. let panda_cafe = query(&client, &panda_cafe_view_id, &cafe_schema_id).await; @@ -175,7 +175,7 @@ async fn e2e() { aquadoggo.shutdown().await; } -/// Publish an entry and it's operation to a node. +/// Publish an entry and its operation to a node. async fn publish(client: &Client, key_pair: &KeyPair, operation: &Operation) -> DocumentViewId { // Publishing operations. // diff --git a/aquadoggo_cli/Cargo.toml b/aquadoggo_cli/Cargo.toml index 6bc76a973..339c51cc6 100644 --- a/aquadoggo_cli/Cargo.toml +++ b/aquadoggo_cli/Cargo.toml @@ -29,15 +29,13 @@ figment = { version = "0.10.10", features = ["toml", "env"] } hex = "0.4.3" libp2p = "0.52.0" log = "0.4.20" -p2panda-rs = "0.7.1" +p2panda-rs = { git = "https://github.com/p2panda/p2panda", rev = "be84d7c4e39c1b67125d80468ccf412cf25ae1d7" } path-clean = "1.0.1" serde = { version = "1.0.185", features = ["serde_derive"] } +tempfile = "3.7.0" tokio = { version = "1.28.2", features = ["full"] } toml = "0.7.6" [dependencies.aquadoggo] version = "~0.5.0" path = "../aquadoggo" - -[dev-dependencies] -tempfile = "3.4.0" diff --git a/aquadoggo_cli/README.md b/aquadoggo_cli/README.md index ac25175b8..0070d925f 100644 --- a/aquadoggo_cli/README.md +++ b/aquadoggo_cli/README.md @@ -85,7 +85,7 @@ depending on your needs. #### Support only certain schemas > "I want to run a node which only replicates and serves data from a limited -> set of schemas. In this case it's schemas required by a mushroom sighting +> set of schemas. In this case its schemas required by a mushroom sighting > app." ```toml @@ -156,8 +156,8 @@ direct_node_addresses = [ #### Persist node identity and database -> "I want my node to persist it's identity and database on the filesystem and -> retreive them whenever it runs again." +> "I want my node to persist its identity, uploaded files and database on the +> filesystem and retreive them whenever it runs again." ```toml # Persist node private key at given location (using Linux XDG paths as an example) @@ -165,6 +165,9 @@ private_key = "$HOME/.local/share/aquadoggo/private-key.txt" # Persist SQLite database at given location database_url = "sqlite:$HOME/.local/share/aquadoggo/db.sqlite3" + +# Persist blobs (large binary files) at given location +blobs_base_path = "$HOME/.local/share/aquadoggo/blobs" ``` ### Configuration @@ -219,6 +222,13 @@ Options: QUIC port for node-node communication and data replication. Defaults to 2022 + -f, --blobs-base-path + Path to folder where blobs (large binary files) are persisted. + Defaults to a temporary directory. + + WARNING: By default your node will not persist any blobs after + shutdown. Set a path for production settings to not loose data. + -k, --private-key Path to persist your ed25519 private key file. Defaults to an ephemeral key only for this current session. diff --git a/aquadoggo_cli/config.toml b/aquadoggo_cli/config.toml index 20a9d0055..26ce05b03 100644 --- a/aquadoggo_cli/config.toml +++ b/aquadoggo_cli/config.toml @@ -88,6 +88,22 @@ http_port = 2020 # quic_port = 2022 +# ゚・。+☆ +# BLOBS +# ゚・。+☆ + +# Path to folder where blobs (large binary files) are persisted. Defaults to a +# temporary directory. +# +# WARNING: By default your node will not persist any blobs after shutdown. Set +# a path for production settings to not loose data. +# +# WARNING: This setting should reflect the `database_url` configuration. If the +# database is set to be stored somewhere permamently, you should do the same +# for blob files to not run into data inconsistencies. +# +# blobs_base_path = "$HOME/.local/share/aquadoggo/blobs" + # ゚・。+☆+。・ # IDENTITY # ゚・。+☆+。・ diff --git a/aquadoggo_cli/src/config.rs b/aquadoggo_cli/src/config.rs index f9b51a77d..375d16ee6 100644 --- a/aquadoggo_cli/src/config.rs +++ b/aquadoggo_cli/src/config.rs @@ -4,6 +4,7 @@ use std::convert::TryFrom; use std::net::{IpAddr, SocketAddr}; use std::path::PathBuf; use std::str::FromStr; +use std::sync::OnceLock; use anyhow::{anyhow, bail, Result}; use aquadoggo::{AllowList, Configuration as NodeConfiguration, NetworkConfiguration}; @@ -16,6 +17,7 @@ use libp2p::multiaddr::Protocol; use libp2p::{Multiaddr, PeerId}; use p2panda_rs::schema::SchemaId; use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use tempfile::TempDir; use crate::utils::absolute_path; @@ -23,6 +25,8 @@ const WILDCARD: &str = "*"; const CONFIG_FILE_NAME: &str = "config.toml"; +static TMP_DIR: OnceLock = OnceLock::new(); + type ConfigFilePath = Option; /// Get configuration from 1. .toml file, 2. environment variables and 3. command line arguments @@ -121,6 +125,15 @@ struct Cli { #[serde(skip_serializing_if = "Option::is_none")] quic_port: Option, + /// Path to folder where blobs (large binary files) are persisted. Defaults to a temporary + /// directory. + /// + /// WARNING: By default your node will not persist any blobs after shutdown. Set a path for + /// production settings to not loose data. + #[arg(short = 'f', long, value_name = "PATH")] + #[serde(skip_serializing_if = "Option::is_none")] + blobs_base_path: Option, + /// Path to persist your ed25519 private key file. Defaults to an ephemeral key only for this /// current session. /// @@ -267,6 +280,7 @@ pub struct Configuration { pub database_max_connections: u32, pub http_port: u16, pub quic_port: u16, + pub blobs_base_path: Option, pub private_key: Option, pub mdns: bool, pub direct_node_addresses: Vec, @@ -286,6 +300,7 @@ impl Default for Configuration { database_max_connections: 32, http_port: 2020, quic_port: 2022, + blobs_base_path: None, mdns: true, private_key: None, direct_node_addresses: vec![], @@ -338,11 +353,26 @@ impl TryFrom for NodeConfiguration { } }; + // Create a temporary blobs directory when none was given + let blobs_base_path = match value.blobs_base_path { + Some(path) => path, + None => TMP_DIR + .get_or_init(|| { + // Initialise a `TempDir` instance globally to make sure it does not run out of + // scope and gets deleted before the end of the application runtime + tempfile::TempDir::new() + .expect("Could not create temporary directory to store blobs") + }) + .path() + .to_path_buf(), + }; + Ok(NodeConfiguration { allow_schema_ids, database_url: value.database_url, database_max_connections: value.database_max_connections, http_port: value.http_port, + blobs_base_path, worker_pool_size: value.worker_pool_size, network: NetworkConfiguration { quic_port: value.quic_port, diff --git a/aquadoggo_cli/src/key_pair.rs b/aquadoggo_cli/src/key_pair.rs index 4635d2b83..9585c5ca6 100644 --- a/aquadoggo_cli/src/key_pair.rs +++ b/aquadoggo_cli/src/key_pair.rs @@ -1,6 +1,6 @@ // SPDX-License-Identifier: AGPL-3.0-or-later -use std::fs::{self, File}; +use std::fs::File; use std::io::{Read, Write}; #[cfg(target_os = "unix")] use std::os::unix::fs::PermissionsExt; @@ -39,8 +39,6 @@ pub fn generate_ephemeral_key_pair() -> KeyPair { fn save_key_pair_to_file(key_pair: &KeyPair, path: PathBuf) -> Result<()> { let private_key_hex = hex::encode(key_pair.private_key().as_bytes()); - // Make sure that directories exist and write file into it - fs::create_dir_all(path.parent().unwrap())?; let mut file = File::create(&path)?; file.write_all(private_key_hex.as_bytes())?; file.sync_all()?; @@ -57,9 +55,7 @@ fn save_key_pair_to_file(key_pair: &KeyPair, path: PathBuf) -> Result<()> { fn save_key_pair_to_file(key_pair: &KeyPair, path: PathBuf) -> Result<()> { let private_key_hex = hex::encode(key_pair.private_key().as_bytes()); - // Make sure that directories exist and write file into it - fs::create_dir_all(path.parent().unwrap())?; - let mut file = File::create(&path)?; + let mut file = File::create(path)?; file.write_all(private_key_hex.as_bytes())?; file.sync_all()?; diff --git a/aquadoggo_cli/src/main.rs b/aquadoggo_cli/src/main.rs index d02d22735..7bd87b836 100644 --- a/aquadoggo_cli/src/main.rs +++ b/aquadoggo_cli/src/main.rs @@ -20,7 +20,11 @@ async fn main() -> anyhow::Result<()> { // Load configuration from command line arguments, environment variables and .toml file let (config_file_path, config) = load_config().context("Could not load configuration")?; - // Set log verbosity based on config. By default scope it always to the "aquadoggo" module. + // Remember if user did not set a blobs directory path, which means that it will default to a + // temporary one + let is_temporary_blobs_path = config.blobs_base_path.is_none(); + + // Set log verbosity based on config. By default scope it always to the "aquadoggo" module let mut builder = env_logger::Builder::new(); let builder = match LevelFilter::from_str(&config.log_level) { Ok(log_level) => builder.filter(Some("aquadoggo"), log_level), @@ -50,7 +54,7 @@ async fn main() -> anyhow::Result<()> { "{}", print_config(key_pair_path, config_file_path, &node_config) ); - show_warnings(&node_config); + show_warnings(&node_config, is_temporary_blobs_path); // Start p2panda node in async runtime let node = Node::start(key_pair, node_config).await; @@ -68,19 +72,36 @@ async fn main() -> anyhow::Result<()> { } /// Show some hopefully helpful warnings around common configuration issues. -fn show_warnings(config: &Configuration) { +fn show_warnings(config: &Configuration, is_temporary_blobs_path: bool) { match &config.allow_schema_ids { AllowList::Set(values) => { if values.is_empty() && !config.network.relay_mode { - warn!("Your node was set to not allow any schema ids which is only useful in combination with enabling relay mode. With this setting you will not be able to interact with any client or node."); + warn!( + "Your node was set to not allow any schema ids which is only useful in + combination with enabling relay mode. With this setting you will not be able to + interact with any client or node." + ); } } AllowList::Wildcard => { - warn!("Allowed schema ids is set to wildcard. Your node will support _any_ schemas it will encounter on the network. This is useful for experimentation and local development but _not_ recommended for production settings."); + warn!( + "Allowed schema ids is set to wildcard. Your node will support _any_ schemas it + will encounter on the network. This is useful for experimentation and local + development but _not_ recommended for production settings." + ); } } if !config.network.relay_addresses.is_empty() && config.network.relay_mode { - warn!("Will not connect to given relay addresses when relay mode is enabled"); + warn!("Will not connect to given relay addresses when relay mode is enabled."); + } + + if config.database_url != "sqlite::memory:" && is_temporary_blobs_path { + warn!( + "Your database is persisted but blobs _are not_ which might result in unrecoverable + data inconsistency (blob operations are stored but the files themselves are _not_). It is + recommended to either set both values (`database_url` and `blobs_base_path`) to an + temporary value or set both to persist all data." + ); } }