From 13d39fa91ffb26e2c7ba6908c017ab7225276f4d Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 12:17:35 +0200 Subject: [PATCH 01/66] Add announce message type --- aquadoggo/src/replication/message.rs | 43 +++++++++++++++++++++------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/aquadoggo/src/replication/message.rs b/aquadoggo/src/replication/message.rs index 1360463f5..e38c4e104 100644 --- a/aquadoggo/src/replication/message.rs +++ b/aquadoggo/src/replication/message.rs @@ -13,32 +13,37 @@ use serde::{Deserialize, Serialize}; use crate::replication::{Mode, SessionId, TargetSet}; -pub const SYNC_REQUEST_TYPE: MessageType = 0; -pub const ENTRY_TYPE: MessageType = 8; -pub const SYNC_DONE_TYPE: MessageType = 9; +pub const ANNOUNCE_TYPE: MessageType = 0; +pub const SYNC_REQUEST_TYPE: MessageType = 1; +pub const SYNC_DONE_TYPE: MessageType = 2; +pub const ENTRY_TYPE: MessageType = 3; pub const HAVE_TYPE: MessageType = 10; pub type MessageType = u64; +pub type Timestamp = u64; + pub type LiveMode = bool; pub type LogHeights = (PublicKey, Vec<(LogId, SeqNum)>); #[derive(Debug, Clone, Eq, PartialEq)] pub enum Message { + Announce(Timestamp, TargetSet), SyncRequest(Mode, TargetSet), + Have(Vec), Entry(EncodedEntry, Option), SyncDone(LiveMode), - Have(Vec), } impl Message { pub fn message_type(&self) -> MessageType { match self { + Message::Announce(_, _) => ANNOUNCE_TYPE, Message::SyncRequest(_, _) => SYNC_REQUEST_TYPE, + Message::Have(_) => HAVE_TYPE, Message::Entry(_, _) => ENTRY_TYPE, Message::SyncDone(_) => SYNC_DONE_TYPE, - Message::Have(_) => HAVE_TYPE, } } } @@ -98,12 +103,23 @@ impl Serialize for SyncMessage { }; match self.message() { + Message::Announce(timestamp, target_set) => { + let mut seq = serialize_header(serializer.serialize_seq(Some(4))?)?; + seq.serialize_element(timestamp)?; + seq.serialize_element(target_set)?; + seq.end() + } Message::SyncRequest(mode, target_set) => { let mut seq = serialize_header(serializer.serialize_seq(Some(4))?)?; seq.serialize_element(mode)?; seq.serialize_element(target_set)?; seq.end() } + Message::Have(log_heights) => { + let mut seq = serialize_header(serializer.serialize_seq(Some(3))?)?; + seq.serialize_element(log_heights)?; + seq.end() + } Message::Entry(entry_bytes, operation_bytes) => { let mut seq = serialize_header(serializer.serialize_seq(Some(4))?)?; seq.serialize_element(entry_bytes)?; @@ -115,11 +131,6 @@ impl Serialize for SyncMessage { seq.serialize_element(live_mode)?; seq.end() } - Message::Have(log_heights) => { - let mut seq = serialize_header(serializer.serialize_seq(Some(3))?)?; - seq.serialize_element(log_heights)?; - seq.end() - } } } } @@ -150,7 +161,17 @@ impl<'de> Deserialize<'de> for SyncMessage { serde::de::Error::custom("missing session id in replication message") })?; - let message = if message_type == SYNC_REQUEST_TYPE { + let message = if message_type == ANNOUNCE_TYPE { + let timestamp: Timestamp = seq.next_element()?.ok_or_else(|| { + serde::de::Error::custom("missing timestamp in announce message") + })?; + + let target_set: TargetSet = seq.next_element()?.ok_or_else(|| { + serde::de::Error::custom("missing target set in announce message") + })?; + + Ok(Message::Announce(timestamp, target_set)) + } else if message_type == SYNC_REQUEST_TYPE { let mode: Mode = seq.next_element()?.ok_or_else(|| { serde::de::Error::custom("missing mode in sync request message") })?; From eaf853479d7e6b67270e6f68c9e956a13d9436be Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 12:25:32 +0200 Subject: [PATCH 02/66] Add announcement state to peer table --- aquadoggo/src/replication/service.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/aquadoggo/src/replication/service.rs b/aquadoggo/src/replication/service.rs index 66b5fade9..de7235e23 100644 --- a/aquadoggo/src/replication/service.rs +++ b/aquadoggo/src/replication/service.rs @@ -66,8 +66,21 @@ pub async fn replication_service( /// Statistics about successful and failed replication sessions for each connected peer. #[derive(Debug, Clone, PartialEq, Eq)] struct PeerStatus { + /// Connectivity information like libp2p peer id and connection id. peer: Peer, + + /// Last known announcement of this peer. This contains a list of schema ids this peer is + /// interested in. + announcement: Option, + + /// Timestamp of the last known announcement. Helps to understand if we can override the + /// previous announcement with a newer one. + announcement_timestamp: u64, + + /// Number of successful replication sessions. successful_count: usize, + + /// Number of failed replication sessions. failed_count: usize, } @@ -75,6 +88,8 @@ impl PeerStatus { pub fn new(peer: Peer) -> Self { Self { peer, + announcement: None, + announcement_timestamp: 0, successful_count: 0, failed_count: 0, } From 4455216e0931a9fb9446e47722f8078a6b46d5f4 Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 13:08:40 +0200 Subject: [PATCH 03/66] Refactor schema provider as the list of schemas is already filtered --- aquadoggo/src/replication/ingest.rs | 13 ++-- aquadoggo/src/replication/service.rs | 80 +++++++++++++++---------- aquadoggo/src/schema/schema_provider.rs | 30 ++++++---- 3 files changed, 76 insertions(+), 47 deletions(-) diff --git a/aquadoggo/src/replication/ingest.rs b/aquadoggo/src/replication/ingest.rs index bfb52ba32..5b66af4bd 100644 --- a/aquadoggo/src/replication/ingest.rs +++ b/aquadoggo/src/replication/ingest.rs @@ -49,10 +49,15 @@ impl SyncIngest { let plain_operation = decode_operation(encoded_operation)?; - // If the node has been configured with supported_schema_ids, check that the sent - // operation follows one of our supported schema. - if let Some(supported_schema_ids) = self.schema_provider.supported_schema_ids() { - if supported_schema_ids.contains(plain_operation.schema_id()) { + // If the node has been configured with a whitelist of supported schema ids, check that the + // sent operation follows one of our supported schema + if self.schema_provider.is_whitelist_active() { + if self + .schema_provider + .supported_schema_ids() + .await + .contains(plain_operation.schema_id()) + { return Err(IngestError::UnsupportedSchema); } } diff --git a/aquadoggo/src/replication/service.rs b/aquadoggo/src/replication/service.rs index de7235e23..72230f301 100644 --- a/aquadoggo/src/replication/service.rs +++ b/aquadoggo/src/replication/service.rs @@ -63,19 +63,28 @@ pub async fn replication_service( Ok(()) } +#[derive(Debug, Clone, PartialEq, Eq)] +struct Announcement { + /// This contains a list of schema ids this peer is interested in. + target_set: TargetSet, + + /// Timestamp of this announcement. Helps to understand if we can override the previous + /// announcement with a newer one. + timestamp: u64, +} + /// Statistics about successful and failed replication sessions for each connected peer. #[derive(Debug, Clone, PartialEq, Eq)] struct PeerStatus { /// Connectivity information like libp2p peer id and connection id. peer: Peer, - /// Last known announcement of this peer. This contains a list of schema ids this peer is - /// interested in. - announcement: Option, + /// Last known announcement of this peer. + announcement: Option, - /// Timestamp of the last known announcement. Helps to understand if we can override the - /// previous announcement with a newer one. - announcement_timestamp: u64, + /// Last time we've announced our local target set with this peer. Helps to check if we need to + /// inform them about any updates from our side. + sent_our_announcement_timestamp: u64, /// Number of successful replication sessions. successful_count: usize, @@ -89,7 +98,7 @@ impl PeerStatus { Self { peer, announcement: None, - announcement_timestamp: 0, + sent_our_announcement_timestamp: 0, successful_count: 0, failed_count: 0, } @@ -100,12 +109,13 @@ impl PeerStatus { /// /// This entails: /// -/// 1. Handles incoming replication- and peer connection messages from other services -/// 2. Maintains a list of currently connected p2panda peers -/// 3. Routes messages to the right replication session with help of the `SyncManager` and returns +/// 1. Manages announcements of us and other peers about which schema ids are supported +/// 2. Handles incoming replication- and peer connection messages from other services +/// 3. Maintains a list of currently connected p2panda peers. +/// 4. Routes messages to the right replication session with help of the `SyncManager` and returns /// responses to other services -/// 4. Schedules new replication sessions -/// 5. Handles replication errors and informs other services about them +/// 5. Schedules new replication sessions +/// 6. Handles replication errors and informs other services about them struct ConnectionManager { /// List of peers the connection mananger knows about and are available for replication. peers: HashMap, @@ -125,6 +135,10 @@ struct ConnectionManager { /// Provider to retrieve our currently supported schema ids. schema_provider: SchemaProvider, + + /// Our latest announcement state we want to propagate to all current and future peers. It + /// contains a list of schema ids we're supporting as a node. + announcement: Announcement, } impl ConnectionManager { @@ -147,24 +161,17 @@ impl ConnectionManager { tx: tx.clone(), rx: BroadcastStream::new(tx.subscribe()), schema_provider: schema_provider.clone(), + announcement: Announcement { + target_set: TargetSet::new(&[]), + timestamp: 0, + }, } } /// Returns set of schema ids we are interested in and support on this node. async fn target_set(&self) -> TargetSet { - let supported_schema = match self.schema_provider.supported_schema_ids() { - // If supported_schema_ids is set return this list. - Some(supported_schema_ids) => supported_schema_ids.to_owned(), - // Otherwise return ids for all schema we know about on this node. - None => self - .schema_provider - .all() - .await - .iter() - .map(|schema| schema.id().to_owned()) - .collect(), - }; - TargetSet::new(&supported_schema) + let supported_schema_ids = self.schema_provider.supported_schema_ids().await; + TargetSet::new(&supported_schema_ids) } /// Register a new peer connection on the manager. @@ -202,14 +209,13 @@ impl ConnectionManager { async fn on_replication_message(&mut self, peer: Peer, message: SyncMessage) { let session_id = message.session_id(); - // If this is a SyncRequest message first we check if the contained TargetSet matches our - // own locally configured TargetSet. + // If this is a SyncRequest message first we check if the contained target set matches our + // own locally configured one. if let Message::SyncRequest(_, target_set) = message.message() { - // If this node has been configured with supported_schema_ids then we check the target - // set of the requests matches our own, otherwise we skip this step and accept any - // target set. - if self.schema_provider.supported_schema_ids().is_some() - && target_set != &self.target_set().await + // If this node has been configured with a whitelist of schema ids then we check the + // target set of the requests matches our own, otherwise we skip this step and accept + // any target set. + if self.schema_provider.is_whitelist_active() && target_set != &self.target_set().await { // If it doesn't match we signal that an error occurred and return at this point. self.on_replication_error(peer, session_id, ReplicationError::UnsupportedTargetSet) @@ -378,8 +384,12 @@ impl ConnectionManager { /// Main event loop running the async streams. pub async fn run(mut self) { + // Subscribe to updates when our target set got changed + let mut schema_provider_rx = self.schema_provider.on_schema_added(); + loop { tokio::select! { + // Service message arrived event = self.rx.next() => match event { Some(Ok(message)) => self.handle_service_message(message).await, Some(Err(err)) => { @@ -390,6 +400,12 @@ impl ConnectionManager { return }, }, + + // Target set got updated + Ok(_) = schema_provider_rx.recv() => { + }, + + // Replication schedule is due Some(_) = self.scheduler.next() => { self.update_sessions().await } diff --git a/aquadoggo/src/schema/schema_provider.rs b/aquadoggo/src/schema/schema_provider.rs index d548e68b5..3cde1704b 100644 --- a/aquadoggo/src/schema/schema_provider.rs +++ b/aquadoggo/src/schema/schema_provider.rs @@ -3,7 +3,7 @@ use std::collections::HashMap; use std::sync::Arc; -use anyhow::{anyhow, Result}; +use anyhow::{bail, Result}; use log::{debug, info}; use p2panda_rs::schema::{Schema, SchemaId, SYSTEM_SCHEMAS}; use p2panda_rs::Human; @@ -18,8 +18,8 @@ pub struct SchemaProvider { /// In-memory store of registered schemas. schemas: Arc>>, - /// Optional list of schema this provider supports. If set only these schema will be added to the schema - /// registry once materialized. + /// Optional list of whitelisted schema ids. When set, only these schema ids will be accepted + /// on this node, if not set _all_ schema ids are accepted. supported_schema_ids: Option>, /// Sender for broadcast channel informing subscribers about updated schemas. @@ -86,9 +86,7 @@ impl SchemaProvider { pub async fn update(&self, schema: Schema) -> Result { if let Some(supported_schema) = self.supported_schema_ids.as_ref() { if !supported_schema.contains(schema.id()) { - return Err(anyhow!( - "Attempted to add unsupported schema to schema provider" - )); + bail!("Attempted to add unsupported schema to schema provider"); } }; @@ -96,8 +94,8 @@ impl SchemaProvider { let schema_exists = schemas.get(schema.id()).is_some(); if schema_exists { - // Return true here as the schema already exists in it's current state so we don't - // need to mutate the schema store or announce any change. + // Return true here as the schema already exists in it's current state so we don't need + // to mutate the schema store or announce any change. return Ok(true); } @@ -114,9 +112,19 @@ impl SchemaProvider { Ok(is_update) } - // Return the configured supported schema. - pub fn supported_schema_ids(&self) -> Option<&Vec> { - self.supported_schema_ids.as_ref() + /// Returns a list of all supported schema ids. + pub async fn supported_schema_ids(&self) -> Vec { + self.all() + .await + .iter() + .map(|schema| schema.id().to_owned()) + .collect() + } + + /// Returns true if a whitelist of supported schema ids was provided through user + /// configuration. + pub fn is_whitelist_active(&self) -> bool { + self.supported_schema_ids.is_some() } } From 467193c0a3bb11470563fcd52cd538a4e849da8b Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 13:18:11 +0200 Subject: [PATCH 04/66] Remove broken test, code will change anyhow soon --- aquadoggo_cli/src/schemas.rs | 30 +++--------------------------- 1 file changed, 3 insertions(+), 27 deletions(-) diff --git a/aquadoggo_cli/src/schemas.rs b/aquadoggo_cli/src/schemas.rs index d29170c7b..bdddc548d 100644 --- a/aquadoggo_cli/src/schemas.rs +++ b/aquadoggo_cli/src/schemas.rs @@ -1,9 +1,10 @@ // SPDX-License-Identifier: AGPL-3.0-or-later -use anyhow::{anyhow, Result}; -use p2panda_rs::schema::SchemaId; use std::fs::File; use std::io::Read; + +use anyhow::{anyhow, Result}; +use p2panda_rs::schema::SchemaId; use toml::Table; pub fn read_schema_ids_from_file(file: &mut File) -> Result> { @@ -15,28 +16,3 @@ pub fn read_schema_ids_from_file(file: &mut File) -> Result> { ))?; Ok(value.clone().try_into::>()?) } - -#[cfg(test)] -mod tests { - use std::fs::{create_dir_all, File}; - use std::io::Write; - - use tempfile::TempDir; - - use super::read_schema_ids_from_file; - - #[test] - fn reads_schema_ids_from_file() { - let tmp_dir = TempDir::new().unwrap(); - let mut tmp_path = tmp_dir.path().to_owned(); - tmp_path.push("schema.toml"); - - create_dir_all(tmp_path.parent().unwrap()).unwrap(); - let mut file = File::create(&tmp_path).unwrap(); - file.write_all("schemas = [\"plant_0020c11ab63099193a8c8516cc00f88bfc8cdd657b94a99fef2fce86aaaede84f87d\"]".as_bytes()).unwrap(); - - let result = read_schema_ids_from_file(&mut file); - println!("{result:?}"); - assert!(result.is_ok()); - } -} From 4c3b3c3ddc4b609e234dcaa6b8bb48b491c7f125 Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 13:18:23 +0200 Subject: [PATCH 05/66] Fix test after changing message types --- aquadoggo/src/replication/message.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aquadoggo/src/replication/message.rs b/aquadoggo/src/replication/message.rs index e38c4e104..6c9aae924 100644 --- a/aquadoggo/src/replication/message.rs +++ b/aquadoggo/src/replication/message.rs @@ -247,7 +247,7 @@ mod tests { 51, Message::SyncRequest(Mode::SetReconciliation, target_set.clone()) )), - serialize_value(cbor!([0, 51, 1, target_set])) + serialize_value(cbor!([1, 51, 1, target_set])) ); assert_eq!( @@ -274,7 +274,7 @@ mod tests { #[rstest] fn deserialize(#[from(random_target_set)] target_set: TargetSet, public_key: PublicKey) { assert_eq!( - deserialize_into::(&serialize_value(cbor!([0, 12, 0, target_set]))) + deserialize_into::(&serialize_value(cbor!([1, 12, 0, target_set]))) .unwrap(), SyncMessage::new( 12, From 902771c07d2a3a0d35715580f07a8cc43ef0a810 Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 13:18:34 +0200 Subject: [PATCH 06/66] Filter schema ids directly in provider --- aquadoggo/src/schema/schema_provider.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/aquadoggo/src/schema/schema_provider.rs b/aquadoggo/src/schema/schema_provider.rs index 3cde1704b..68aed3125 100644 --- a/aquadoggo/src/schema/schema_provider.rs +++ b/aquadoggo/src/schema/schema_provider.rs @@ -113,12 +113,19 @@ impl SchemaProvider { } /// Returns a list of all supported schema ids. + /// + /// If no whitelist was set it returns the list of all currently known schema ids. If a + /// whitelist was set it directly returns the list itself. pub async fn supported_schema_ids(&self) -> Vec { - self.all() - .await - .iter() - .map(|schema| schema.id().to_owned()) - .collect() + match &self.supported_schema_ids { + Some(schema_ids) => schema_ids.clone(), + None => self + .all() + .await + .iter() + .map(|schema| schema.id().to_owned()) + .collect(), + } } /// Returns true if a whitelist of supported schema ids was provided through user From 245693783835754dbfa414f025f43c10c3153c1d Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 13:20:46 +0200 Subject: [PATCH 07/66] Make distinction clearer between whitelisting and supporting schema ids --- aquadoggo/src/schema/schema_provider.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/aquadoggo/src/schema/schema_provider.rs b/aquadoggo/src/schema/schema_provider.rs index 68aed3125..7f67bd807 100644 --- a/aquadoggo/src/schema/schema_provider.rs +++ b/aquadoggo/src/schema/schema_provider.rs @@ -20,7 +20,7 @@ pub struct SchemaProvider { /// Optional list of whitelisted schema ids. When set, only these schema ids will be accepted /// on this node, if not set _all_ schema ids are accepted. - supported_schema_ids: Option>, + whitelisted_schema_ids: Option>, /// Sender for broadcast channel informing subscribers about updated schemas. tx: Sender, @@ -30,7 +30,7 @@ impl SchemaProvider { /// Returns a `SchemaProvider` containing the given application schemas and all system schemas. pub fn new( application_schemas: Vec, - supported_schema_ids: Option>, + whitelisted_schema_ids: Option>, ) -> Self { // Collect all system and application schemas. let mut schemas = SYSTEM_SCHEMAS.clone(); @@ -42,8 +42,8 @@ impl SchemaProvider { index.insert(schema.id().to_owned(), schema.to_owned()); } - if let Some(supported_schema_ids) = &supported_schema_ids { - index.retain(|schema_id, _| supported_schema_ids.contains(schema_id)); + if let Some(schema_ids) = &whitelisted_schema_ids { + index.retain(|id, _| schema_ids.contains(id)); }; let (tx, _) = channel(64); @@ -59,7 +59,7 @@ impl SchemaProvider { Self { schemas: Arc::new(Mutex::new(index)), - supported_schema_ids, + whitelisted_schema_ids, tx, } } @@ -84,8 +84,8 @@ impl SchemaProvider { /// Returns `true` if a schema was updated or it already existed in it's current state, and /// `false` if it was inserted. pub async fn update(&self, schema: Schema) -> Result { - if let Some(supported_schema) = self.supported_schema_ids.as_ref() { - if !supported_schema.contains(schema.id()) { + if let Some(whitelisted_ids) = self.whitelisted_schema_ids.as_ref() { + if !whitelisted_ids.contains(schema.id()) { bail!("Attempted to add unsupported schema to schema provider"); } }; @@ -117,7 +117,7 @@ impl SchemaProvider { /// If no whitelist was set it returns the list of all currently known schema ids. If a /// whitelist was set it directly returns the list itself. pub async fn supported_schema_ids(&self) -> Vec { - match &self.supported_schema_ids { + match &self.whitelisted_schema_ids { Some(schema_ids) => schema_ids.clone(), None => self .all() @@ -131,7 +131,7 @@ impl SchemaProvider { /// Returns true if a whitelist of supported schema ids was provided through user /// configuration. pub fn is_whitelist_active(&self) -> bool { - self.supported_schema_ids.is_some() + self.whitelisted_schema_ids.is_some() } } From da37007339bbb281984e1bdb6a03338954213544 Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 13:22:05 +0200 Subject: [PATCH 08/66] Make clippy happy --- aquadoggo/src/replication/ingest.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/aquadoggo/src/replication/ingest.rs b/aquadoggo/src/replication/ingest.rs index 5b66af4bd..0aee97444 100644 --- a/aquadoggo/src/replication/ingest.rs +++ b/aquadoggo/src/replication/ingest.rs @@ -51,15 +51,14 @@ impl SyncIngest { // If the node has been configured with a whitelist of supported schema ids, check that the // sent operation follows one of our supported schema - if self.schema_provider.is_whitelist_active() { - if self + if self.schema_provider.is_whitelist_active() + && self .schema_provider .supported_schema_ids() .await .contains(plain_operation.schema_id()) - { - return Err(IngestError::UnsupportedSchema); - } + { + return Err(IngestError::UnsupportedSchema); } // Retrieve the schema if it has been materialized on the node. From 99ad83a8d4e7051f77df667a9cf38df5915cf6a6 Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 13:32:18 +0200 Subject: [PATCH 09/66] Generate new announcement state on first run and when provider updates --- aquadoggo/src/replication/service.rs | 33 +++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/aquadoggo/src/replication/service.rs b/aquadoggo/src/replication/service.rs index 72230f301..3c90dc429 100644 --- a/aquadoggo/src/replication/service.rs +++ b/aquadoggo/src/replication/service.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: AGPL-3.0-or-later use std::collections::HashMap; -use std::time::Duration; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; use anyhow::Result; use libp2p::PeerId; @@ -73,6 +73,20 @@ struct Announcement { timestamp: u64, } +impl Announcement { + pub fn new(target_set: TargetSet) -> Self { + let timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("System time invalid, operation system time configured before UNIX epoch") + .as_secs(); + + Self { + timestamp, + target_set, + } + } +} + /// Statistics about successful and failed replication sessions for each connected peer. #[derive(Debug, Clone, PartialEq, Eq)] struct PeerStatus { @@ -138,7 +152,7 @@ struct ConnectionManager { /// Our latest announcement state we want to propagate to all current and future peers. It /// contains a list of schema ids we're supporting as a node. - announcement: Announcement, + announcement: Option, } impl ConnectionManager { @@ -161,10 +175,7 @@ impl ConnectionManager { tx: tx.clone(), rx: BroadcastStream::new(tx.subscribe()), schema_provider: schema_provider.clone(), - announcement: Announcement { - target_set: TargetSet::new(&[]), - timestamp: 0, - }, + announcement: None, } } @@ -285,6 +296,12 @@ impl ConnectionManager { self.send_service_message(ServiceMessage::ReplicationFailed(peer)); } + /// Generates our new announcement state we can then propagate to all known and future peers. + async fn update_announcement(&mut self) { + let target_set = self.target_set().await; + self.announcement = Some(Announcement::new(target_set)); + } + /// Determine if we can attempt new replication sessions with the peers we currently know /// about. async fn update_sessions(&mut self) { @@ -384,6 +401,9 @@ impl ConnectionManager { /// Main event loop running the async streams. pub async fn run(mut self) { + // Generate our own first announcement + self.update_announcement().await; + // Subscribe to updates when our target set got changed let mut schema_provider_rx = self.schema_provider.on_schema_added(); @@ -403,6 +423,7 @@ impl ConnectionManager { // Target set got updated Ok(_) = schema_provider_rx.recv() => { + self.update_announcement().await; }, // Replication schedule is due From 9cd83e8eff02bf755a5e89f19c6878f5cc823dae Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 13:38:50 +0200 Subject: [PATCH 10/66] Move Announcement struct into separate file --- aquadoggo/src/replication/announcement.rs | 29 +++++++++++++++++++++++ aquadoggo/src/replication/mod.rs | 2 ++ aquadoggo/src/replication/service.rs | 29 +++-------------------- 3 files changed, 34 insertions(+), 26 deletions(-) create mode 100644 aquadoggo/src/replication/announcement.rs diff --git a/aquadoggo/src/replication/announcement.rs b/aquadoggo/src/replication/announcement.rs new file mode 100644 index 000000000..e3f785836 --- /dev/null +++ b/aquadoggo/src/replication/announcement.rs @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later + +use std::time::{SystemTime, UNIX_EPOCH}; + +use crate::replication::TargetSet; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Announcement { + /// This contains a list of schema ids this peer is interested in. + target_set: TargetSet, + + /// Timestamp of this announcement. Helps to understand if we can override the previous + /// announcement with a newer one. + timestamp: u64, +} + +impl Announcement { + pub fn new(target_set: TargetSet) -> Self { + let timestamp = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("System time invalid, operation system time configured before UNIX epoch") + .as_secs(); + + Self { + timestamp, + target_set, + } + } +} diff --git a/aquadoggo/src/replication/mod.rs b/aquadoggo/src/replication/mod.rs index 599a64fb3..e11bcb78c 100644 --- a/aquadoggo/src/replication/mod.rs +++ b/aquadoggo/src/replication/mod.rs @@ -1,5 +1,6 @@ // SPDX-License-Identifier: AGPL-3.0-or-later +mod announcement; pub mod errors; mod ingest; mod manager; @@ -11,6 +12,7 @@ mod strategies; mod target_set; pub mod traits; +pub use announcement::Announcement; pub use ingest::SyncIngest; pub use manager::SyncManager; pub use message::{LiveMode, LogHeights, Message, SyncMessage}; diff --git a/aquadoggo/src/replication/service.rs b/aquadoggo/src/replication/service.rs index 3c90dc429..c383a21a2 100644 --- a/aquadoggo/src/replication/service.rs +++ b/aquadoggo/src/replication/service.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: AGPL-3.0-or-later use std::collections::HashMap; -use std::time::{Duration, SystemTime, UNIX_EPOCH}; +use std::time::Duration; use anyhow::Result; use libp2p::PeerId; @@ -22,7 +22,8 @@ use crate::network::identity::to_libp2p_peer_id; use crate::network::Peer; use crate::replication::errors::ReplicationError; use crate::replication::{ - Message, Mode, Session, SessionId, SyncIngest, SyncManager, SyncMessage, TargetSet, + Announcement, Message, Mode, Session, SessionId, SyncIngest, SyncManager, SyncMessage, + TargetSet, }; use crate::schema::SchemaProvider; @@ -63,30 +64,6 @@ pub async fn replication_service( Ok(()) } -#[derive(Debug, Clone, PartialEq, Eq)] -struct Announcement { - /// This contains a list of schema ids this peer is interested in. - target_set: TargetSet, - - /// Timestamp of this announcement. Helps to understand if we can override the previous - /// announcement with a newer one. - timestamp: u64, -} - -impl Announcement { - pub fn new(target_set: TargetSet) -> Self { - let timestamp = SystemTime::now() - .duration_since(UNIX_EPOCH) - .expect("System time invalid, operation system time configured before UNIX epoch") - .as_secs(); - - Self { - timestamp, - target_set, - } - } -} - /// Statistics about successful and failed replication sessions for each connected peer. #[derive(Debug, Clone, PartialEq, Eq)] struct PeerStatus { From db44c983401b4141c2a8a406d847dcb1aa8a6c41 Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 13:40:30 +0200 Subject: [PATCH 11/66] Our own target set should always be valid --- aquadoggo/src/replication/service.rs | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/aquadoggo/src/replication/service.rs b/aquadoggo/src/replication/service.rs index c383a21a2..0c0e3c78b 100644 --- a/aquadoggo/src/replication/service.rs +++ b/aquadoggo/src/replication/service.rs @@ -6,7 +6,7 @@ use std::time::Duration; use anyhow::Result; use libp2p::PeerId; use log::{debug, info, trace, warn}; -use p2panda_rs::{Human, Validate}; +use p2panda_rs::Human; use rand::seq::SliceRandom; use rand::thread_rng; use tokio::task; @@ -285,11 +285,6 @@ impl ConnectionManager { // Determine the target set our node is interested in let target_set = self.target_set().await; - if let Err(err) = target_set.validate() { - warn!("Not initiating replication: {err}"); - return; - } - // Iterate through all currently connected peers let mut attempt_peers: Vec = self .peers From d631d0029a7470373668134a237dc7e65d14a370 Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 14:40:25 +0200 Subject: [PATCH 12/66] Introduce a peer message type for messages outside of replication --- aquadoggo/src/bus.rs | 9 ++- aquadoggo/src/network/mod.rs | 2 +- aquadoggo/src/network/peers/behaviour.rs | 36 +++++++---- aquadoggo/src/network/peers/handler.rs | 11 ++-- aquadoggo/src/network/peers/message.rs | 16 +++++ aquadoggo/src/network/peers/mod.rs | 2 + aquadoggo/src/network/peers/protocol.rs | 4 +- aquadoggo/src/network/service.rs | 9 +-- aquadoggo/src/replication/announcement.rs | 4 +- aquadoggo/src/replication/message.rs | 45 ++++---------- aquadoggo/src/replication/service.rs | 75 +++++++++++++++++------ 11 files changed, 127 insertions(+), 86 deletions(-) create mode 100644 aquadoggo/src/network/peers/message.rs diff --git a/aquadoggo/src/bus.rs b/aquadoggo/src/bus.rs index addc9402c..a01344afc 100644 --- a/aquadoggo/src/bus.rs +++ b/aquadoggo/src/bus.rs @@ -3,8 +3,7 @@ use p2panda_rs::operation::OperationId; use crate::manager::Sender; -use crate::network::Peer; -use crate::replication::SyncMessage; +use crate::network::{Peer, PeerMessage}; /// Sender for cross-service communication bus. pub type ServiceSender = Sender; @@ -21,11 +20,11 @@ pub enum ServiceMessage { /// Node closed a connection to another node. PeerDisconnected(Peer), - /// Node sent a message to remote node for replication. - SentReplicationMessage(Peer, SyncMessage), + /// Node sent a message to remote node. + SentMessage(Peer, PeerMessage), /// Node received a message from remote node for replication. - ReceivedReplicationMessage(Peer, SyncMessage), + ReceivedMessage(Peer, PeerMessage), /// Replication protocol failed with an critical error. ReplicationFailed(Peer), diff --git a/aquadoggo/src/network/mod.rs b/aquadoggo/src/network/mod.rs index a5440ba00..514c5e9c7 100644 --- a/aquadoggo/src/network/mod.rs +++ b/aquadoggo/src/network/mod.rs @@ -10,6 +10,6 @@ mod swarm; mod transport; pub use config::NetworkConfiguration; -pub use peers::Peer; +pub use peers::{Peer, PeerMessage}; pub use service::network_service; pub use shutdown::ShutdownHandler; diff --git a/aquadoggo/src/network/peers/behaviour.rs b/aquadoggo/src/network/peers/behaviour.rs index 898915f25..d5559715a 100644 --- a/aquadoggo/src/network/peers/behaviour.rs +++ b/aquadoggo/src/network/peers/behaviour.rs @@ -12,13 +12,12 @@ use libp2p::swarm::{ use libp2p::{Multiaddr, PeerId}; use crate::network::peers::handler::{Handler, HandlerFromBehaviour, HandlerToBehaviour}; -use crate::network::peers::Peer; -use crate::replication::SyncMessage; +use crate::network::peers::{Peer, PeerMessage}; #[derive(Debug)] pub enum Event { /// Message received on the inbound stream. - MessageReceived(Peer, SyncMessage), + MessageReceived(Peer, PeerMessage), /// We established an inbound or outbound connection to a peer for the first time. PeerConnected(Peer), @@ -105,7 +104,7 @@ impl Behaviour { &mut self, peer_id: PeerId, connection_id: ConnectionId, - message: SyncMessage, + message: PeerMessage, ) { let peer = Peer::new(peer_id, connection_id); self.push_event(ToSwarm::GenerateEvent(Event::MessageReceived( @@ -130,7 +129,7 @@ impl Behaviour { self.enabled = true } - pub fn send_message(&mut self, peer: Peer, message: SyncMessage) { + pub fn send_message(&mut self, peer: Peer, message: PeerMessage) { self.push_event(ToSwarm::NotifyHandler { peer_id: peer.id(), event: HandlerFromBehaviour::Message(message), @@ -236,7 +235,7 @@ mod tests { use p2panda_rs::schema::SchemaId; use rstest::rstest; - use crate::network::Peer; + use crate::network::{Peer, PeerMessage}; use crate::replication::{Message, SyncMessage, TargetSet}; use crate::test_utils::helpers::random_target_set; @@ -306,7 +305,10 @@ mod tests { // Send a message from to swarm_1 local peer from swarm_2 local peer. swarm_1.behaviour_mut().send_message( Peer::new(swarm_2_peer_id, ConnectionId::new_unchecked(1)), - SyncMessage::new(0, Message::SyncRequest(0.into(), TargetSet::new(&vec![]))), + PeerMessage::SyncMessage(SyncMessage::new( + 0, + Message::SyncRequest(0.into(), TargetSet::new(&vec![])), + )), ); // Await a swarm event on swarm_2. @@ -370,13 +372,19 @@ mod tests { // Send a message from swarm_1 to swarm_2 swarm_1.behaviour_mut().send_message( peer_2, - SyncMessage::new(0, Message::SyncRequest(0.into(), target_set_1.clone())), + PeerMessage::SyncMessage(SyncMessage::new( + 0, + Message::SyncRequest(0.into(), target_set_1.clone()), + )), ); // Send a message from swarm_2 to swarm_1 swarm_2.behaviour_mut().send_message( peer_1, - SyncMessage::new(1, Message::SyncRequest(0.into(), target_set_2.clone())), + PeerMessage::SyncMessage(SyncMessage::new( + 1, + Message::SyncRequest(0.into(), target_set_2.clone()), + )), ); // And again add the next behaviour events which occur in either swarms @@ -395,7 +403,10 @@ mod tests { assert_eq!(peer.id(), swarm_2_peer_id); assert_eq!( message.unwrap(), - SyncMessage::new(1, Message::SyncRequest(0.into(), target_set_2.clone())) + PeerMessage::SyncMessage(SyncMessage::new( + 1, + Message::SyncRequest(0.into(), target_set_2.clone()) + )) ); // swarm_2 should have received the message from swarm_1 peer @@ -403,7 +414,10 @@ mod tests { assert_eq!(peer.id(), swarm_1_peer_id); assert_eq!( message.unwrap(), - SyncMessage::new(0, Message::SyncRequest(0.into(), target_set_1)) + PeerMessage::SyncMessage(SyncMessage::new( + 0, + Message::SyncRequest(0.into(), target_set_1) + )) ); } } diff --git a/aquadoggo/src/network/peers/handler.rs b/aquadoggo/src/network/peers/handler.rs index b45ba9d24..43cf8de2f 100644 --- a/aquadoggo/src/network/peers/handler.rs +++ b/aquadoggo/src/network/peers/handler.rs @@ -15,8 +15,7 @@ use libp2p::swarm::{ use log::warn; use thiserror::Error; -use crate::network::peers::{Codec, CodecError, Protocol}; -use crate::replication::SyncMessage; +use crate::network::peers::{Codec, CodecError, PeerMessage, Protocol}; /// The time a connection is maintained to a peer without being in live mode and without /// send/receiving a message from. Connections that idle beyond this timeout are disconnected. @@ -89,7 +88,7 @@ pub struct Handler { outbound_substream_establishing: bool, /// Queue of messages that we want to send to the remote. - send_queue: VecDeque, + send_queue: VecDeque, /// Last time we've observed inbound or outbound messaging activity. last_io_activity: Instant, @@ -140,7 +139,7 @@ impl Handler { #[derive(Debug)] pub enum HandlerFromBehaviour { /// Message to send on outbound stream. - Message(SyncMessage), + Message(PeerMessage), /// Protocol failed with a critical error. CriticalError, @@ -152,7 +151,7 @@ pub enum HandlerFromBehaviour { #[derive(Debug)] pub enum HandlerToBehaviour { /// Message received on the inbound stream. - Message(SyncMessage), + Message(PeerMessage), } #[derive(Debug, Error)] @@ -181,7 +180,7 @@ enum OutboundSubstreamState { WaitingOutput(Stream), /// Waiting to send a message to the remote. - PendingSend(Stream, SyncMessage), + PendingSend(Stream, PeerMessage), /// Waiting to flush the substream so that the data arrives to the remote. PendingFlush(Stream), diff --git a/aquadoggo/src/network/peers/message.rs b/aquadoggo/src/network/peers/message.rs new file mode 100644 index 000000000..1ce81969e --- /dev/null +++ b/aquadoggo/src/network/peers/message.rs @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later + +use serde::{Deserialize, Serialize}; + +use crate::replication::SyncMessage; + +/// p2panda protocol messages which can be sent over the wire. +#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(untagged)] +pub enum PeerMessage { + /// Announcement of peers about the schema ids they are interest in. + // Announce(AnnounceMessage), + + /// Replication status and data exchange. + SyncMessage(SyncMessage), +} diff --git a/aquadoggo/src/network/peers/mod.rs b/aquadoggo/src/network/peers/mod.rs index 579c75f3c..a2fdbfa27 100644 --- a/aquadoggo/src/network/peers/mod.rs +++ b/aquadoggo/src/network/peers/mod.rs @@ -2,10 +2,12 @@ mod behaviour; mod handler; +mod message; mod peer; mod protocol; pub use behaviour::{Behaviour, Event}; pub use handler::Handler; +pub use message::PeerMessage; pub use peer::Peer; pub use protocol::{Codec, CodecError, Protocol, PROTOCOL_NAME}; diff --git a/aquadoggo/src/network/peers/protocol.rs b/aquadoggo/src/network/peers/protocol.rs index ba06e0e55..17e670f50 100644 --- a/aquadoggo/src/network/peers/protocol.rs +++ b/aquadoggo/src/network/peers/protocol.rs @@ -7,13 +7,13 @@ use futures::{future, AsyncRead, AsyncWrite, Future}; use libp2p::core::UpgradeInfo; use libp2p::{InboundUpgrade, OutboundUpgrade}; -use crate::replication::SyncMessage; +use crate::network::peers::PeerMessage; pub const PROTOCOL_NAME: &str = "/p2p/p2panda/1.0.0"; pub type CodecError = CborCodecError; -pub type Codec = CborCodec; +pub type Codec = CborCodec; #[derive(Clone, Debug)] pub struct Protocol; diff --git a/aquadoggo/src/network/service.rs b/aquadoggo/src/network/service.rs index f62e69a19..3e7b76f6a 100644 --- a/aquadoggo/src/network/service.rs +++ b/aquadoggo/src/network/service.rs @@ -344,11 +344,11 @@ impl EventLoop { /// Handle an incoming message via the communication bus from other services. async fn handle_service_message(&mut self, message: ServiceMessage) { match message { - ServiceMessage::SentReplicationMessage(peer, sync_message) => self + ServiceMessage::SentMessage(peer, peer_message) => self .swarm .behaviour_mut() .peers - .send_message(peer, sync_message), + .send_message(peer, peer_message), ServiceMessage::ReplicationFailed(peer) => { self.swarm.behaviour_mut().peers.handle_critical_error(peer); } @@ -368,10 +368,7 @@ impl EventLoop { } peers::Event::MessageReceived(peer, message) => { // Inform other services about received messages from peer - self.send_service_message(ServiceMessage::ReceivedReplicationMessage( - *peer, - message.clone(), - )) + self.send_service_message(ServiceMessage::ReceivedMessage(*peer, message.clone())) } } } diff --git a/aquadoggo/src/replication/announcement.rs b/aquadoggo/src/replication/announcement.rs index e3f785836..24e28e026 100644 --- a/aquadoggo/src/replication/announcement.rs +++ b/aquadoggo/src/replication/announcement.rs @@ -7,11 +7,11 @@ use crate::replication::TargetSet; #[derive(Debug, Clone, PartialEq, Eq)] pub struct Announcement { /// This contains a list of schema ids this peer is interested in. - target_set: TargetSet, + pub target_set: TargetSet, /// Timestamp of this announcement. Helps to understand if we can override the previous /// announcement with a newer one. - timestamp: u64, + pub timestamp: u64, } impl Announcement { diff --git a/aquadoggo/src/replication/message.rs b/aquadoggo/src/replication/message.rs index 6c9aae924..aa7386cf1 100644 --- a/aquadoggo/src/replication/message.rs +++ b/aquadoggo/src/replication/message.rs @@ -13,23 +13,19 @@ use serde::{Deserialize, Serialize}; use crate::replication::{Mode, SessionId, TargetSet}; -pub const ANNOUNCE_TYPE: MessageType = 0; -pub const SYNC_REQUEST_TYPE: MessageType = 1; -pub const SYNC_DONE_TYPE: MessageType = 2; -pub const ENTRY_TYPE: MessageType = 3; +pub const SYNC_REQUEST_TYPE: MessageType = 0; +pub const ENTRY_TYPE: MessageType = 8; +pub const SYNC_DONE_TYPE: MessageType = 9; pub const HAVE_TYPE: MessageType = 10; pub type MessageType = u64; -pub type Timestamp = u64; - pub type LiveMode = bool; pub type LogHeights = (PublicKey, Vec<(LogId, SeqNum)>); #[derive(Debug, Clone, Eq, PartialEq)] pub enum Message { - Announce(Timestamp, TargetSet), SyncRequest(Mode, TargetSet), Have(Vec), Entry(EncodedEntry, Option), @@ -39,11 +35,10 @@ pub enum Message { impl Message { pub fn message_type(&self) -> MessageType { match self { - Message::Announce(_, _) => ANNOUNCE_TYPE, Message::SyncRequest(_, _) => SYNC_REQUEST_TYPE, - Message::Have(_) => HAVE_TYPE, Message::Entry(_, _) => ENTRY_TYPE, Message::SyncDone(_) => SYNC_DONE_TYPE, + Message::Have(_) => HAVE_TYPE, } } } @@ -103,23 +98,12 @@ impl Serialize for SyncMessage { }; match self.message() { - Message::Announce(timestamp, target_set) => { - let mut seq = serialize_header(serializer.serialize_seq(Some(4))?)?; - seq.serialize_element(timestamp)?; - seq.serialize_element(target_set)?; - seq.end() - } Message::SyncRequest(mode, target_set) => { let mut seq = serialize_header(serializer.serialize_seq(Some(4))?)?; seq.serialize_element(mode)?; seq.serialize_element(target_set)?; seq.end() } - Message::Have(log_heights) => { - let mut seq = serialize_header(serializer.serialize_seq(Some(3))?)?; - seq.serialize_element(log_heights)?; - seq.end() - } Message::Entry(entry_bytes, operation_bytes) => { let mut seq = serialize_header(serializer.serialize_seq(Some(4))?)?; seq.serialize_element(entry_bytes)?; @@ -131,6 +115,11 @@ impl Serialize for SyncMessage { seq.serialize_element(live_mode)?; seq.end() } + Message::Have(log_heights) => { + let mut seq = serialize_header(serializer.serialize_seq(Some(3))?)?; + seq.serialize_element(log_heights)?; + seq.end() + } } } } @@ -161,17 +150,7 @@ impl<'de> Deserialize<'de> for SyncMessage { serde::de::Error::custom("missing session id in replication message") })?; - let message = if message_type == ANNOUNCE_TYPE { - let timestamp: Timestamp = seq.next_element()?.ok_or_else(|| { - serde::de::Error::custom("missing timestamp in announce message") - })?; - - let target_set: TargetSet = seq.next_element()?.ok_or_else(|| { - serde::de::Error::custom("missing target set in announce message") - })?; - - Ok(Message::Announce(timestamp, target_set)) - } else if message_type == SYNC_REQUEST_TYPE { + let message = if message_type == SYNC_REQUEST_TYPE { let mode: Mode = seq.next_element()?.ok_or_else(|| { serde::de::Error::custom("missing mode in sync request message") })?; @@ -247,7 +226,7 @@ mod tests { 51, Message::SyncRequest(Mode::SetReconciliation, target_set.clone()) )), - serialize_value(cbor!([1, 51, 1, target_set])) + serialize_value(cbor!([0, 51, 1, target_set])) ); assert_eq!( @@ -274,7 +253,7 @@ mod tests { #[rstest] fn deserialize(#[from(random_target_set)] target_set: TargetSet, public_key: PublicKey) { assert_eq!( - deserialize_into::(&serialize_value(cbor!([1, 12, 0, target_set]))) + deserialize_into::(&serialize_value(cbor!([0, 12, 0, target_set]))) .unwrap(), SyncMessage::new( 12, diff --git a/aquadoggo/src/replication/service.rs b/aquadoggo/src/replication/service.rs index 0c0e3c78b..92d17e812 100644 --- a/aquadoggo/src/replication/service.rs +++ b/aquadoggo/src/replication/service.rs @@ -19,7 +19,7 @@ use crate::context::Context; use crate::db::SqlStore; use crate::manager::{ServiceReadySender, Shutdown}; use crate::network::identity::to_libp2p_peer_id; -use crate::network::Peer; +use crate::network::{Peer, PeerMessage}; use crate::replication::errors::ReplicationError; use crate::replication::{ Announcement, Message, Mode, Session, SessionId, SyncIngest, SyncManager, SyncMessage, @@ -216,8 +216,9 @@ impl ConnectionManager { match self.sync_manager.handle_message(&peer, &message).await { Ok(result) => { for message in result.messages { - self.send_service_message(ServiceMessage::SentReplicationMessage( - peer, message, + self.send_service_message(ServiceMessage::SentMessage( + peer, + PeerMessage::SyncMessage(message), )); } @@ -282,17 +283,27 @@ impl ConnectionManager { /// Determine if we can attempt new replication sessions with the peers we currently know /// about. async fn update_sessions(&mut self) { - // Determine the target set our node is interested in - let target_set = self.target_set().await; + let local_announcement = self + .announcement + .as_ref() + .expect("Our announcement needs to be set latest when we call 'update_sessions'"); // Iterate through all currently connected peers let mut attempt_peers: Vec = self .peers .clone() .into_iter() - .filter_map(|(peer, _)| { + .filter_map(|(peer, status)| { let sessions = self.sync_manager.get_sessions(&peer); + // 1. Did we already receive this peers announcement state? If not we can't do + // anything yet and need to wait. + let remote_target_set: TargetSet = if let Some(announcement) = status.announcement { + announcement.target_set + } else { + return None; + }; + // 1. Check if we're running too many sessions with that peer on this connection // already. This limit is configurable. let active_sessions: Vec<&Session> = sessions @@ -304,7 +315,7 @@ impl ConnectionManager { // set. If we would start that session again it would be considered an error. let has_active_target_set_session = active_sessions .iter() - .any(|session| session.target_set() == target_set); + .any(|session| session.target_set() == local_announcement.target_set); if active_sessions.len() < MAX_SESSIONS_PER_PEER && !has_active_target_set_session { Some(peer) @@ -323,7 +334,24 @@ impl ConnectionManager { attempt_peers.truncate(MAX_PEER_SAMPLE); for peer in attempt_peers { - self.initiate_replication(&peer, &target_set).await; + // @TODO + // self.initiate_replication(&peer, &target_set).await; + } + } + + async fn announce(&self) { + let local_announcement = self + .announcement + .as_ref() + .expect("Our announcement needs to be set latest when we call 'update_sessions'"); + + for (peer, status) in &self.peers { + if status.sent_our_announcement_timestamp > local_announcement.timestamp { + continue; + } + + // @TODO + // self.send_service_message(ServiceMessage::SentMessage(*peer, message)); } } @@ -336,8 +364,9 @@ impl ConnectionManager { { Ok(messages) => { for message in messages { - self.send_service_message(ServiceMessage::SentReplicationMessage( - *peer, message, + self.send_service_message(ServiceMessage::SentMessage( + *peer, + PeerMessage::SyncMessage(message), )); } } @@ -356,9 +385,11 @@ impl ConnectionManager { ServiceMessage::PeerDisconnected(peer) => { self.on_connection_closed(peer).await; } - ServiceMessage::ReceivedReplicationMessage(peer, message) => { - self.on_replication_message(peer, message).await; - } + ServiceMessage::ReceivedMessage(peer, message) => match message { + PeerMessage::SyncMessage(message) => { + self.on_replication_message(peer, message).await; + } + }, _ => (), // Ignore all other messages } } @@ -398,8 +429,9 @@ impl ConnectionManager { self.update_announcement().await; }, - // Replication schedule is due + // Announcement & replication schedule is due Some(_) = self.scheduler.next() => { + self.announce().await; self.update_sessions().await } } @@ -420,7 +452,7 @@ mod tests { use tokio::sync::broadcast; use crate::bus::ServiceMessage; - use crate::network::Peer; + use crate::network::{Peer, PeerMessage}; use crate::replication::service::PeerStatus; use crate::replication::{Message, Mode, SyncMessage, TargetSet}; use crate::schema::SchemaProvider; @@ -465,9 +497,12 @@ mod tests { assert_eq!(rx.len(), 1); assert_eq!( rx.recv().await, - Ok(ServiceMessage::SentReplicationMessage( + Ok(ServiceMessage::SentMessage( remote_peer, - SyncMessage::new(0, Message::SyncRequest(Mode::LogHeight, target_set)) + PeerMessage::SyncMessage(SyncMessage::new( + 0, + Message::SyncRequest(Mode::LogHeight, target_set) + )) )) ); assert_eq!(manager.sync_manager.get_sessions(&remote_peer).len(), 1); @@ -510,12 +545,12 @@ mod tests { ); let unsupported_target_set = TargetSet::new(&[unsupported_schema_id]); manager - .handle_service_message(ServiceMessage::ReceivedReplicationMessage( + .handle_service_message(ServiceMessage::ReceivedMessage( remote_peer, - SyncMessage::new( + PeerMessage::SyncMessage(SyncMessage::new( 0, Message::SyncRequest(Mode::LogHeight, unsupported_target_set), - ), + )), )) .await; From 713892def53a8b18a941266b18c25d45fa1fa495 Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 14:40:59 +0200 Subject: [PATCH 13/66] Update doc string --- aquadoggo/src/bus.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aquadoggo/src/bus.rs b/aquadoggo/src/bus.rs index a01344afc..c7bdbc90c 100644 --- a/aquadoggo/src/bus.rs +++ b/aquadoggo/src/bus.rs @@ -23,7 +23,7 @@ pub enum ServiceMessage { /// Node sent a message to remote node. SentMessage(Peer, PeerMessage), - /// Node received a message from remote node for replication. + /// Node received a message from remote node. ReceivedMessage(Peer, PeerMessage), /// Replication protocol failed with an critical error. From 95174b778370e9f55f59e43767d4abad5818d15d Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 14:59:00 +0200 Subject: [PATCH 14/66] Struct and serde for announce messages --- aquadoggo/src/network/peers/message.rs | 4 +- aquadoggo/src/replication/announcement.rs | 71 +++++++++++++++++++++++ aquadoggo/src/replication/mod.rs | 2 +- aquadoggo/src/replication/service.rs | 1 + 4 files changed, 75 insertions(+), 3 deletions(-) diff --git a/aquadoggo/src/network/peers/message.rs b/aquadoggo/src/network/peers/message.rs index 1ce81969e..6f1c98ea6 100644 --- a/aquadoggo/src/network/peers/message.rs +++ b/aquadoggo/src/network/peers/message.rs @@ -2,14 +2,14 @@ use serde::{Deserialize, Serialize}; -use crate::replication::SyncMessage; +use crate::replication::{AnnouncementMessage, SyncMessage}; /// p2panda protocol messages which can be sent over the wire. #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] #[serde(untagged)] pub enum PeerMessage { /// Announcement of peers about the schema ids they are interest in. - // Announce(AnnounceMessage), + Announce(AnnouncementMessage), /// Replication status and data exchange. SyncMessage(SyncMessage), diff --git a/aquadoggo/src/replication/announcement.rs b/aquadoggo/src/replication/announcement.rs index 24e28e026..177f5bb45 100644 --- a/aquadoggo/src/replication/announcement.rs +++ b/aquadoggo/src/replication/announcement.rs @@ -2,8 +2,15 @@ use std::time::{SystemTime, UNIX_EPOCH}; +use p2panda_rs::Validate; +use serde::de::Visitor; +use serde::ser::SerializeSeq; +use serde::{Deserialize, Serialize}; + use crate::replication::TargetSet; +const ANNOUNCEMENT_MESSAGE_VERSION: u64 = 1; + #[derive(Debug, Clone, PartialEq, Eq)] pub struct Announcement { /// This contains a list of schema ids this peer is interested in. @@ -27,3 +34,67 @@ impl Announcement { } } } + +/// Message which can be used to send announcements over the wire. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct AnnouncementMessage(Announcement); + +impl Serialize for AnnouncementMessage { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + let mut seq = serializer.serialize_seq(Some(3))?; + seq.serialize_element(&ANNOUNCEMENT_MESSAGE_VERSION)?; + seq.serialize_element(&self.0.timestamp)?; + seq.serialize_element(&self.0.target_set)?; + seq.end() + } +} + +impl<'de> Deserialize<'de> for AnnouncementMessage { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + struct MessageVisitor; + + impl<'de> Visitor<'de> for MessageVisitor { + type Value = AnnouncementMessage; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("p2panda announce message") + } + + fn visit_seq(self, mut seq: A) -> Result + where + A: serde::de::SeqAccess<'de>, + { + let version: u64 = seq.next_element()?.ok_or_else(|| { + serde::de::Error::custom("missing version in announce message") + })?; + if version != ANNOUNCEMENT_MESSAGE_VERSION { + return Err(serde::de::Error::custom("invalid announce message version")); + } + + let timestamp: u64 = seq.next_element()?.ok_or_else(|| { + serde::de::Error::custom("missing timestamp in announce message") + })?; + + let target_set: TargetSet = seq.next_element()?.ok_or_else(|| { + serde::de::Error::custom("missing target set in announce message") + })?; + target_set.validate().map_err(|_| { + serde::de::Error::custom("invalid target set in announce message") + })?; + + Ok(AnnouncementMessage(Announcement { + target_set, + timestamp, + })) + } + } + + deserializer.deserialize_seq(MessageVisitor) + } +} diff --git a/aquadoggo/src/replication/mod.rs b/aquadoggo/src/replication/mod.rs index e11bcb78c..202ca7da2 100644 --- a/aquadoggo/src/replication/mod.rs +++ b/aquadoggo/src/replication/mod.rs @@ -12,7 +12,7 @@ mod strategies; mod target_set; pub mod traits; -pub use announcement::Announcement; +pub use announcement::{Announcement, AnnouncementMessage}; pub use ingest::SyncIngest; pub use manager::SyncManager; pub use message::{LiveMode, LogHeights, Message, SyncMessage}; diff --git a/aquadoggo/src/replication/service.rs b/aquadoggo/src/replication/service.rs index 92d17e812..49df09c55 100644 --- a/aquadoggo/src/replication/service.rs +++ b/aquadoggo/src/replication/service.rs @@ -389,6 +389,7 @@ impl ConnectionManager { PeerMessage::SyncMessage(message) => { self.on_replication_message(peer, message).await; } + PeerMessage::Announce(announcement) => todo!(), }, _ => (), // Ignore all other messages } From 2ec3b959c10d115a9bf733f5b78427e724384570 Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 14:59:18 +0200 Subject: [PATCH 15/66] It's actually okay to send an empty target set --- aquadoggo/src/replication/errors.rs | 3 --- aquadoggo/src/replication/target_set.rs | 5 ----- 2 files changed, 8 deletions(-) diff --git a/aquadoggo/src/replication/errors.rs b/aquadoggo/src/replication/errors.rs index b1a3204e3..cb2b737a8 100644 --- a/aquadoggo/src/replication/errors.rs +++ b/aquadoggo/src/replication/errors.rs @@ -56,9 +56,6 @@ pub enum IngestError { #[derive(Error, Debug)] pub enum TargetSetError { - #[error("Target set does not contain any schema ids")] - ZeroSchemaIds, - #[error("Target set contains unsorted or duplicate schema ids")] UnsortedSchemaIds, } diff --git a/aquadoggo/src/replication/target_set.rs b/aquadoggo/src/replication/target_set.rs index 95ca9afcd..aa068e0a1 100644 --- a/aquadoggo/src/replication/target_set.rs +++ b/aquadoggo/src/replication/target_set.rs @@ -60,11 +60,6 @@ impl Validate for TargetSet { type Error = TargetSetError; fn validate(&self) -> Result<(), Self::Error> { - // Check if at least one schema id is given - if self.0.is_empty() { - return Err(TargetSetError::ZeroSchemaIds); - }; - let mut prev_schema_id: Option<&SchemaId> = None; let mut initial_system_schema = true; From 4ca4a83e2a2dc5fe75796e63edf31300b4aaab25 Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 15:01:40 +0200 Subject: [PATCH 16/66] Check if sync request message has a non empty target set --- aquadoggo/src/replication/message.rs | 12 +++++++++++- aquadoggo/src/replication/target_set.rs | 4 ++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/aquadoggo/src/replication/message.rs b/aquadoggo/src/replication/message.rs index aa7386cf1..6f1835f27 100644 --- a/aquadoggo/src/replication/message.rs +++ b/aquadoggo/src/replication/message.rs @@ -6,7 +6,7 @@ use p2panda_rs::entry::EncodedEntry; use p2panda_rs::entry::{LogId, SeqNum}; use p2panda_rs::identity::PublicKey; use p2panda_rs::operation::EncodedOperation; -use p2panda_rs::Human; +use p2panda_rs::{Human, Validate}; use serde::de::Visitor; use serde::ser::SerializeSeq; use serde::{Deserialize, Serialize}; @@ -159,6 +159,16 @@ impl<'de> Deserialize<'de> for SyncMessage { serde::de::Error::custom("missing target set in sync request message") })?; + target_set.validate().map_err(|_| { + serde::de::Error::custom("invalid target set in sync request message") + })?; + + if target_set.is_empty() { + return Err(serde::de::Error::custom( + "empty target set in sync request message", + )); + } + Ok(Message::SyncRequest(mode, target_set)) } else if message_type == ENTRY_TYPE { let entry_bytes: EncodedEntry = seq.next_element()?.ok_or_else(|| { diff --git a/aquadoggo/src/replication/target_set.rs b/aquadoggo/src/replication/target_set.rs index aa068e0a1..0686e8d1e 100644 --- a/aquadoggo/src/replication/target_set.rs +++ b/aquadoggo/src/replication/target_set.rs @@ -41,6 +41,10 @@ impl TargetSet { self.0.contains(schema_id) } + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + fn from_untrusted(schema_ids: Vec) -> Result { // Create target set with potentially invalid data let target_set = Self(schema_ids); From db001caf6c94f28ec84dbe901738818370583852 Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 15:02:53 +0200 Subject: [PATCH 17/66] Add a test for checking against empty target sets --- aquadoggo/src/replication/message.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/aquadoggo/src/replication/message.rs b/aquadoggo/src/replication/message.rs index 6f1835f27..8087239fa 100644 --- a/aquadoggo/src/replication/message.rs +++ b/aquadoggo/src/replication/message.rs @@ -307,6 +307,8 @@ mod tests { #[case::unknown_message_type(cbor!([122, 0]))] #[should_panic(expected = "missing session id in replication message")] #[case::only_message_type(cbor!([0]))] + #[should_panic(expected = "empty target set in sync request")] + #[case::only_message_type(cbor!([0, 0, 0, []]))] #[should_panic(expected = "too many fields for replication message")] #[case::too_many_fields(cbor!([0, 0, 0, ["schema_field_definition_v1"], "too much"]))] fn deserialize_invalid_messages(#[case] cbor: Result) { From 63456ee1636f829e67223d1ad1513631f623d3b5 Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 15:06:06 +0200 Subject: [PATCH 18/66] Send announcement state to all peers which are not informed yet --- aquadoggo/src/replication/announcement.rs | 2 +- aquadoggo/src/replication/service.rs | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/aquadoggo/src/replication/announcement.rs b/aquadoggo/src/replication/announcement.rs index 177f5bb45..471b41efe 100644 --- a/aquadoggo/src/replication/announcement.rs +++ b/aquadoggo/src/replication/announcement.rs @@ -37,7 +37,7 @@ impl Announcement { /// Message which can be used to send announcements over the wire. #[derive(Clone, Debug, PartialEq, Eq)] -pub struct AnnouncementMessage(Announcement); +pub struct AnnouncementMessage(pub Announcement); impl Serialize for AnnouncementMessage { fn serialize(&self, serializer: S) -> Result diff --git a/aquadoggo/src/replication/service.rs b/aquadoggo/src/replication/service.rs index 49df09c55..7737b5298 100644 --- a/aquadoggo/src/replication/service.rs +++ b/aquadoggo/src/replication/service.rs @@ -22,8 +22,8 @@ use crate::network::identity::to_libp2p_peer_id; use crate::network::{Peer, PeerMessage}; use crate::replication::errors::ReplicationError; use crate::replication::{ - Announcement, Message, Mode, Session, SessionId, SyncIngest, SyncManager, SyncMessage, - TargetSet, + Announcement, AnnouncementMessage, Message, Mode, Session, SessionId, SyncIngest, SyncManager, + SyncMessage, TargetSet, }; use crate::schema::SchemaProvider; @@ -339,7 +339,8 @@ impl ConnectionManager { } } - async fn announce(&self) { + /// Send our local announcement state to all peers which are not informed yet. + async fn announce(&mut self) { let local_announcement = self .announcement .as_ref() @@ -350,8 +351,10 @@ impl ConnectionManager { continue; } - // @TODO - // self.send_service_message(ServiceMessage::SentMessage(*peer, message)); + self.send_service_message(ServiceMessage::SentMessage( + *peer, + PeerMessage::Announce(AnnouncementMessage(local_announcement.clone())), + )); } } From d33582ca68a0dee203684e724f33a81d4305b9d7 Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 15:11:28 +0200 Subject: [PATCH 19/66] Update peer status on incoming announcement --- aquadoggo/src/replication/service.rs | 30 +++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/aquadoggo/src/replication/service.rs b/aquadoggo/src/replication/service.rs index 7737b5298..c04a9d592 100644 --- a/aquadoggo/src/replication/service.rs +++ b/aquadoggo/src/replication/service.rs @@ -193,6 +193,28 @@ impl ConnectionManager { } } + /// Update announcement state of a remote peer. + async fn on_announcement_message(&mut self, peer: Peer, announcement: AnnouncementMessage) { + let incoming_announcement = announcement.0; + + match self.peers.get_mut(&peer) { + Some(status) => match &status.announcement { + Some(current) => { + // Only update peer status when incoming announcement has a newer timestamp + if current.timestamp < incoming_announcement.timestamp { + status.announcement = Some(incoming_announcement); + } + } + None => { + status.announcement = Some(incoming_announcement); + } + }, + None => { + trace!("Tried to update announcement state of unknown peer"); + } + } + } + /// Route incoming replication messages to the right session. async fn on_replication_message(&mut self, peer: Peer, message: SyncMessage) { let session_id = message.session_id(); @@ -304,14 +326,14 @@ impl ConnectionManager { return None; }; - // 1. Check if we're running too many sessions with that peer on this connection + // 2. Check if we're running too many sessions with that peer on this connection // already. This limit is configurable. let active_sessions: Vec<&Session> = sessions .iter() .filter(|session| !session.is_done()) .collect(); - // 2. Check if we're already having at least one session concerning the same target + // 3. Check if we're already having at least one session concerning the same target // set. If we would start that session again it would be considered an error. let has_active_target_set_session = active_sessions .iter() @@ -392,7 +414,9 @@ impl ConnectionManager { PeerMessage::SyncMessage(message) => { self.on_replication_message(peer, message).await; } - PeerMessage::Announce(announcement) => todo!(), + PeerMessage::Announce(announcement) => { + self.on_announcement_message(peer, announcement).await; + } }, _ => (), // Ignore all other messages } From ee5b901890e3ec23c55a71d2ca593324fce2dd43 Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 15:12:22 +0200 Subject: [PATCH 20/66] Rename variable --- aquadoggo/src/replication/service.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/aquadoggo/src/replication/service.rs b/aquadoggo/src/replication/service.rs index c04a9d592..325b83af1 100644 --- a/aquadoggo/src/replication/service.rs +++ b/aquadoggo/src/replication/service.rs @@ -194,8 +194,8 @@ impl ConnectionManager { } /// Update announcement state of a remote peer. - async fn on_announcement_message(&mut self, peer: Peer, announcement: AnnouncementMessage) { - let incoming_announcement = announcement.0; + async fn on_announcement_message(&mut self, peer: Peer, message: AnnouncementMessage) { + let incoming_announcement = message.0; match self.peers.get_mut(&peer) { Some(status) => match &status.announcement { @@ -414,8 +414,8 @@ impl ConnectionManager { PeerMessage::SyncMessage(message) => { self.on_replication_message(peer, message).await; } - PeerMessage::Announce(announcement) => { - self.on_announcement_message(peer, announcement).await; + PeerMessage::Announce(message) => { + self.on_announcement_message(peer, message).await; } }, _ => (), // Ignore all other messages From e9880332020a6208a0f6df33d685b30fdccbf38f Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 16:12:17 +0200 Subject: [PATCH 21/66] Calculate intersection between target sets of two peers --- aquadoggo/src/replication/service.rs | 27 ++++++++++------- aquadoggo/src/replication/target_set.rs | 40 +++++++++++++++++++++---- 2 files changed, 51 insertions(+), 16 deletions(-) diff --git a/aquadoggo/src/replication/service.rs b/aquadoggo/src/replication/service.rs index 325b83af1..78a9fec9b 100644 --- a/aquadoggo/src/replication/service.rs +++ b/aquadoggo/src/replication/service.rs @@ -305,13 +305,14 @@ impl ConnectionManager { /// Determine if we can attempt new replication sessions with the peers we currently know /// about. async fn update_sessions(&mut self) { - let local_announcement = self + let local_target_set = &self .announcement .as_ref() - .expect("Our announcement needs to be set latest when we call 'update_sessions'"); + .expect("Our announcement needs to be set latest when we call 'update_sessions'") + .target_set; // Iterate through all currently connected peers - let mut attempt_peers: Vec = self + let mut attempt_peers: Vec<(Peer, TargetSet)> = self .peers .clone() .into_iter() @@ -326,21 +327,28 @@ impl ConnectionManager { return None; }; - // 2. Check if we're running too many sessions with that peer on this connection + // 2. Calculate intersection of local and remote target set. Do we have any + // supported schema id's in common? + let target_set = TargetSet::from_intersection(local_target_set, &remote_target_set); + if target_set.is_empty() { + return None; + } + + // 3. Check if we're running too many sessions with that peer on this connection // already. This limit is configurable. let active_sessions: Vec<&Session> = sessions .iter() .filter(|session| !session.is_done()) .collect(); - // 3. Check if we're already having at least one session concerning the same target + // 4. Check if we're already having at least one session concerning the same target // set. If we would start that session again it would be considered an error. let has_active_target_set_session = active_sessions .iter() - .any(|session| session.target_set() == local_announcement.target_set); + .any(|session| session.target_set() == target_set); if active_sessions.len() < MAX_SESSIONS_PER_PEER && !has_active_target_set_session { - Some(peer) + Some((peer, target_set)) } else { None } @@ -355,9 +363,8 @@ impl ConnectionManager { attempt_peers.shuffle(&mut thread_rng()); attempt_peers.truncate(MAX_PEER_SAMPLE); - for peer in attempt_peers { - // @TODO - // self.initiate_replication(&peer, &target_set).await; + for (peer, target_set) in &attempt_peers { + self.initiate_replication(peer, target_set).await; } } diff --git a/aquadoggo/src/replication/target_set.rs b/aquadoggo/src/replication/target_set.rs index 0686e8d1e..81d863790 100644 --- a/aquadoggo/src/replication/target_set.rs +++ b/aquadoggo/src/replication/target_set.rs @@ -37,12 +37,10 @@ impl TargetSet { Self(deduplicated_set) } - pub fn contains(&self, schema_id: &SchemaId) -> bool { - self.0.contains(schema_id) - } - - pub fn is_empty(&self) -> bool { - self.0.is_empty() + pub fn from_intersection(local_target_set: &TargetSet, remote_target_set: &TargetSet) -> Self { + let mut target_set = local_target_set.clone(); + target_set.0.retain(|id| remote_target_set.contains(id)); + target_set } fn from_untrusted(schema_ids: Vec) -> Result { @@ -55,6 +53,14 @@ impl TargetSet { Ok(target_set) } + pub fn contains(&self, schema_id: &SchemaId) -> bool { + self.0.contains(schema_id) + } + + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + pub fn iter(&self) -> Iter { self.0.iter() } @@ -166,6 +172,28 @@ mod tests { ); } + #[rstest] + fn calculate_intersection( + #[from(random_document_view_id)] document_view_id_1: DocumentViewId, + #[from(random_document_view_id)] document_view_id_2: DocumentViewId, + #[from(random_document_view_id)] document_view_id_3: DocumentViewId, + ) { + let schema_id_1 = + SchemaId::new_application(&SchemaName::new("messages").unwrap(), &document_view_id_1); + let schema_id_2 = + SchemaId::new_application(&SchemaName::new("profiles").unwrap(), &document_view_id_2); + let schema_id_3 = + SchemaId::new_application(&SchemaName::new("events").unwrap(), &document_view_id_3); + + let set_1 = TargetSet::new(&[schema_id_1.clone(), schema_id_2.clone()]); + let set_2 = TargetSet::new(&[schema_id_3.clone(), schema_id_2.clone()]); + + assert_eq!( + TargetSet::from_intersection(&set_1, &set_2), + TargetSet::new(&[schema_id_2.clone()]) + ); + } + #[rstest] #[case(vec![ "venues_0020c13cdc58dfc6f4ebd32992ff089db79980363144bdb2743693a019636fa72ec8".to_string(), From 1bcb122f85f70e2ee8b2bf573325e34334631dff Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 16:13:55 +0200 Subject: [PATCH 22/66] Add entry to CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bb0983f26..8380081ef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Only replicate and materialize configured "supported schema" [#569](https://github.com/p2panda/aquadoggo/pull/469) - Parse supported schema ids from `config.toml` [#473](https://github.com/p2panda/aquadoggo/pull/473) - Fix relayed connections, add DCUtR Holepunching and reduce CLI args [#502](https://github.com/p2panda/aquadoggo/pull/502) +- Target set announcements in network [#515](https://github.com/p2panda/aquadoggo/pull/515) ### Changed From 3b123f3290d5b9b4181c9c02038406860391b298 Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 16:16:29 +0200 Subject: [PATCH 23/66] Announcements contain the replication protocol version --- aquadoggo/src/replication/announcement.rs | 8 +++----- aquadoggo/src/replication/mod.rs | 3 +++ 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/aquadoggo/src/replication/announcement.rs b/aquadoggo/src/replication/announcement.rs index 471b41efe..4e4b11874 100644 --- a/aquadoggo/src/replication/announcement.rs +++ b/aquadoggo/src/replication/announcement.rs @@ -7,9 +7,7 @@ use serde::de::Visitor; use serde::ser::SerializeSeq; use serde::{Deserialize, Serialize}; -use crate::replication::TargetSet; - -const ANNOUNCEMENT_MESSAGE_VERSION: u64 = 1; +use crate::replication::{TargetSet, REPLICATION_PROTOCOL_VERSION}; #[derive(Debug, Clone, PartialEq, Eq)] pub struct Announcement { @@ -45,7 +43,7 @@ impl Serialize for AnnouncementMessage { S: serde::Serializer, { let mut seq = serializer.serialize_seq(Some(3))?; - seq.serialize_element(&ANNOUNCEMENT_MESSAGE_VERSION)?; + seq.serialize_element(&REPLICATION_PROTOCOL_VERSION)?; seq.serialize_element(&self.0.timestamp)?; seq.serialize_element(&self.0.target_set)?; seq.end() @@ -73,7 +71,7 @@ impl<'de> Deserialize<'de> for AnnouncementMessage { let version: u64 = seq.next_element()?.ok_or_else(|| { serde::de::Error::custom("missing version in announce message") })?; - if version != ANNOUNCEMENT_MESSAGE_VERSION { + if version != REPLICATION_PROTOCOL_VERSION { return Err(serde::de::Error::custom("invalid announce message version")); } diff --git a/aquadoggo/src/replication/mod.rs b/aquadoggo/src/replication/mod.rs index 202ca7da2..2cd769de0 100644 --- a/aquadoggo/src/replication/mod.rs +++ b/aquadoggo/src/replication/mod.rs @@ -21,3 +21,6 @@ pub use service::replication_service; pub use session::{Session, SessionId, SessionState}; pub use strategies::{LogHeightStrategy, SetReconciliationStrategy, StrategyResult}; pub use target_set::TargetSet; + +/// Currently supported p2panda replication protocol version. +pub const REPLICATION_PROTOCOL_VERSION: u64 = 1; From c2975a3a3e58f20298d6e7c205ae0f8d1b3d4370 Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 16:22:17 +0200 Subject: [PATCH 24/66] Silently ignore peers with unsupported protocol version --- aquadoggo/src/replication/announcement.rs | 42 ++++++++++++++++------- aquadoggo/src/replication/service.rs | 9 +++-- 2 files changed, 36 insertions(+), 15 deletions(-) diff --git a/aquadoggo/src/replication/announcement.rs b/aquadoggo/src/replication/announcement.rs index 4e4b11874..bc2442684 100644 --- a/aquadoggo/src/replication/announcement.rs +++ b/aquadoggo/src/replication/announcement.rs @@ -33,9 +33,25 @@ impl Announcement { } } +pub type ProtocolVersion = u64; + /// Message which can be used to send announcements over the wire. #[derive(Clone, Debug, PartialEq, Eq)] -pub struct AnnouncementMessage(pub Announcement); +pub struct AnnouncementMessage(ProtocolVersion, Announcement); + +impl AnnouncementMessage { + pub fn new(announcement: Announcement) -> Self { + Self(REPLICATION_PROTOCOL_VERSION, announcement) + } + + pub fn announcement(&self) -> Announcement { + self.1.clone() + } + + pub fn is_version_supported(&self) -> bool { + self.0 == REPLICATION_PROTOCOL_VERSION + } +} impl Serialize for AnnouncementMessage { fn serialize(&self, serializer: S) -> Result @@ -43,9 +59,9 @@ impl Serialize for AnnouncementMessage { S: serde::Serializer, { let mut seq = serializer.serialize_seq(Some(3))?; - seq.serialize_element(&REPLICATION_PROTOCOL_VERSION)?; - seq.serialize_element(&self.0.timestamp)?; - seq.serialize_element(&self.0.target_set)?; + seq.serialize_element(&self.0)?; + seq.serialize_element(&self.1.timestamp)?; + seq.serialize_element(&self.1.target_set)?; seq.end() } } @@ -68,12 +84,9 @@ impl<'de> Deserialize<'de> for AnnouncementMessage { where A: serde::de::SeqAccess<'de>, { - let version: u64 = seq.next_element()?.ok_or_else(|| { - serde::de::Error::custom("missing version in announce message") + let protocol_version: ProtocolVersion = seq.next_element()?.ok_or_else(|| { + serde::de::Error::custom("missing protocol version in announce message") })?; - if version != REPLICATION_PROTOCOL_VERSION { - return Err(serde::de::Error::custom("invalid announce message version")); - } let timestamp: u64 = seq.next_element()?.ok_or_else(|| { serde::de::Error::custom("missing timestamp in announce message") @@ -86,10 +99,13 @@ impl<'de> Deserialize<'de> for AnnouncementMessage { serde::de::Error::custom("invalid target set in announce message") })?; - Ok(AnnouncementMessage(Announcement { - target_set, - timestamp, - })) + Ok(AnnouncementMessage( + protocol_version, + Announcement { + target_set, + timestamp, + }, + )) } } diff --git a/aquadoggo/src/replication/service.rs b/aquadoggo/src/replication/service.rs index 78a9fec9b..7ae8e39e3 100644 --- a/aquadoggo/src/replication/service.rs +++ b/aquadoggo/src/replication/service.rs @@ -195,7 +195,12 @@ impl ConnectionManager { /// Update announcement state of a remote peer. async fn on_announcement_message(&mut self, peer: Peer, message: AnnouncementMessage) { - let incoming_announcement = message.0; + // Check if this node supports our replication protocol version + if !message.is_version_supported() { + return; + } + + let incoming_announcement = message.announcement(); match self.peers.get_mut(&peer) { Some(status) => match &status.announcement { @@ -382,7 +387,7 @@ impl ConnectionManager { self.send_service_message(ServiceMessage::SentMessage( *peer, - PeerMessage::Announce(AnnouncementMessage(local_announcement.clone())), + PeerMessage::Announce(AnnouncementMessage::new(local_announcement.clone())), )); } } From 6da51271cde91663940f0037f1e2a20f187efb7b Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 16:34:05 +0200 Subject: [PATCH 25/66] Test serde for announcement messages --- aquadoggo/src/replication/announcement.rs | 59 +++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/aquadoggo/src/replication/announcement.rs b/aquadoggo/src/replication/announcement.rs index bc2442684..b5b417b89 100644 --- a/aquadoggo/src/replication/announcement.rs +++ b/aquadoggo/src/replication/announcement.rs @@ -99,6 +99,14 @@ impl<'de> Deserialize<'de> for AnnouncementMessage { serde::de::Error::custom("invalid target set in announce message") })?; + if let Some(items_left) = seq.size_hint() { + if items_left > 0 { + return Err(serde::de::Error::custom( + "too many fields for announce message", + )); + } + }; + Ok(AnnouncementMessage( protocol_version, Announcement { @@ -112,3 +120,54 @@ impl<'de> Deserialize<'de> for AnnouncementMessage { deserializer.deserialize_seq(MessageVisitor) } } + +#[cfg(test)] +mod tests { + use ciborium::cbor; + use ciborium::value::{Error, Value}; + use p2panda_rs::serde::{deserialize_into, serialize_from, serialize_value}; + use rstest::rstest; + + use crate::replication::TargetSet; + use crate::test_utils::helpers::random_target_set; + + use super::{Announcement, AnnouncementMessage}; + + #[rstest] + fn serialize(#[from(random_target_set)] target_set: TargetSet) { + let announcement = Announcement::new(target_set.clone()); + assert_eq!( + serialize_from(AnnouncementMessage::new(announcement.clone())), + serialize_value(cbor!([1, announcement.timestamp, target_set])) + ); + } + + #[rstest] + fn deserialize(#[from(random_target_set)] target_set: TargetSet) { + assert_eq!( + deserialize_into::(&serialize_value(cbor!([ + 1, 12345678, target_set + ]))) + .unwrap(), + AnnouncementMessage::new(Announcement { + timestamp: 12345678, + target_set, + }) + ); + } + + #[rstest] + #[should_panic(expected = "missing protocol version in announce message")] + #[case::missing_version(cbor!([]))] + #[should_panic(expected = "missing timestamp in announce message")] + #[case::missing_timestamp(cbor!([122]))] + #[should_panic(expected = "too many fields for announce message")] + #[case::too_many_fields(cbor!([1, 0, ["schema_field_definition_v1"], "too much"]))] + fn deserialize_invalid_messages(#[case] cbor: Result) { + // Check the cbor is valid + assert!(cbor.is_ok()); + + // We unwrap here to cause a panic and then test for expected error stings + deserialize_into::(&serialize_value(cbor)).unwrap(); + } +} From c25fdcc166110eae402f1979d83b648c827b7501 Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 21:55:54 +0200 Subject: [PATCH 26/66] Update routine which gets executed on every established new connection and scheduler beat --- aquadoggo/src/replication/service.rs | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/aquadoggo/src/replication/service.rs b/aquadoggo/src/replication/service.rs index 7ae8e39e3..2e6351397 100644 --- a/aquadoggo/src/replication/service.rs +++ b/aquadoggo/src/replication/service.rs @@ -172,11 +172,20 @@ impl ConnectionManager { } None => { self.peers.insert(peer, PeerStatus::new(peer)); - self.update_sessions().await; + self.on_update().await; } } } + /// Routines which get executed on every scheduler beat and newly established connection. + async fn on_update(&mut self) { + // Inform new peers about our supported protocol version and schema ids + self.announce().await; + + // Check if we can establish replication sessions with peers + self.update_sessions().await; + } + /// Handle a peer connection closing. async fn on_connection_closed(&mut self, peer: Peer) { info!("Closed connection with peer: {}", peer.display()); @@ -471,8 +480,7 @@ impl ConnectionManager { // Announcement & replication schedule is due Some(_) = self.scheduler.next() => { - self.announce().await; - self.update_sessions().await + self.on_update().await; } } } From 4865adf31ef02733d5f82ff9a6ca9b6ed348bd84 Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 22:37:05 +0200 Subject: [PATCH 27/66] Update timestamp when sending announcement to peer --- aquadoggo/src/replication/announcement.rs | 15 +++++++++------ aquadoggo/src/replication/mod.rs | 2 +- aquadoggo/src/replication/service.rs | 20 ++++++++++++-------- 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/aquadoggo/src/replication/announcement.rs b/aquadoggo/src/replication/announcement.rs index b5b417b89..ca016cf90 100644 --- a/aquadoggo/src/replication/announcement.rs +++ b/aquadoggo/src/replication/announcement.rs @@ -9,6 +9,14 @@ use serde::{Deserialize, Serialize}; use crate::replication::{TargetSet, REPLICATION_PROTOCOL_VERSION}; +/// u64 timestamp from UNIX epoch until now. +pub fn now() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("System time invalid, operation system time configured before UNIX epoch") + .as_secs() +} + #[derive(Debug, Clone, PartialEq, Eq)] pub struct Announcement { /// This contains a list of schema ids this peer is interested in. @@ -21,13 +29,8 @@ pub struct Announcement { impl Announcement { pub fn new(target_set: TargetSet) -> Self { - let timestamp = SystemTime::now() - .duration_since(UNIX_EPOCH) - .expect("System time invalid, operation system time configured before UNIX epoch") - .as_secs(); - Self { - timestamp, + timestamp: now(), target_set, } } diff --git a/aquadoggo/src/replication/mod.rs b/aquadoggo/src/replication/mod.rs index 2cd769de0..79a1ce1a3 100644 --- a/aquadoggo/src/replication/mod.rs +++ b/aquadoggo/src/replication/mod.rs @@ -12,7 +12,7 @@ mod strategies; mod target_set; pub mod traits; -pub use announcement::{Announcement, AnnouncementMessage}; +pub use announcement::{now, Announcement, AnnouncementMessage}; pub use ingest::SyncIngest; pub use manager::SyncManager; pub use message::{LiveMode, LogHeights, Message, SyncMessage}; diff --git a/aquadoggo/src/replication/service.rs b/aquadoggo/src/replication/service.rs index 2e6351397..44307d4ce 100644 --- a/aquadoggo/src/replication/service.rs +++ b/aquadoggo/src/replication/service.rs @@ -22,8 +22,8 @@ use crate::network::identity::to_libp2p_peer_id; use crate::network::{Peer, PeerMessage}; use crate::replication::errors::ReplicationError; use crate::replication::{ - Announcement, AnnouncementMessage, Message, Mode, Session, SessionId, SyncIngest, SyncManager, - SyncMessage, TargetSet, + now, Announcement, AnnouncementMessage, Message, Mode, Session, SessionId, SyncIngest, + SyncManager, SyncMessage, TargetSet, }; use crate::schema::SchemaProvider; @@ -390,14 +390,18 @@ impl ConnectionManager { .expect("Our announcement needs to be set latest when we call 'update_sessions'"); for (peer, status) in &self.peers { - if status.sent_our_announcement_timestamp > local_announcement.timestamp { - continue; + if status.sent_our_announcement_timestamp < local_announcement.timestamp { + self.send_service_message(ServiceMessage::SentMessage( + *peer, + PeerMessage::Announce(AnnouncementMessage::new(local_announcement.clone())), + )); } + } - self.send_service_message(ServiceMessage::SentMessage( - *peer, - PeerMessage::Announce(AnnouncementMessage::new(local_announcement.clone())), - )); + for (_, status) in self.peers.iter_mut() { + if status.sent_our_announcement_timestamp < local_announcement.timestamp { + status.sent_our_announcement_timestamp = now(); + } } } From 55673cd30e987c56875e75f0a8b4ba465c4eb1f5 Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 23:25:57 +0200 Subject: [PATCH 28/66] Correctly check if remote target set is valid in sync request --- aquadoggo/src/replication/service.rs | 19 +++++++++++++++++-- aquadoggo/src/replication/target_set.rs | 16 ++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/aquadoggo/src/replication/service.rs b/aquadoggo/src/replication/service.rs index 44307d4ce..2ed9d75ac 100644 --- a/aquadoggo/src/replication/service.rs +++ b/aquadoggo/src/replication/service.rs @@ -216,10 +216,18 @@ impl ConnectionManager { Some(current) => { // Only update peer status when incoming announcement has a newer timestamp if current.timestamp < incoming_announcement.timestamp { + trace!( + "Received updated announcement state from peer {}", + peer.display() + ); status.announcement = Some(incoming_announcement); } } None => { + trace!( + "Received first announcement state from peer {}", + peer.display() + ); status.announcement = Some(incoming_announcement); } }, @@ -235,11 +243,18 @@ impl ConnectionManager { // If this is a SyncRequest message first we check if the contained target set matches our // own locally configured one. - if let Message::SyncRequest(_, target_set) = message.message() { + if let Message::SyncRequest(_, remote_target_set) = message.message() { + let local_target_set = &self + .announcement + .as_ref() + .expect("Our announcement needs to be set latest when we call 'update_sessions'") + .target_set; + // If this node has been configured with a whitelist of schema ids then we check the // target set of the requests matches our own, otherwise we skip this step and accept // any target set. - if self.schema_provider.is_whitelist_active() && target_set != &self.target_set().await + if self.schema_provider.is_whitelist_active() + && !local_target_set.is_valid_set(remote_target_set) { // If it doesn't match we signal that an error occurred and return at this point. self.on_replication_error(peer, session_id, ReplicationError::UnsupportedTargetSet) diff --git a/aquadoggo/src/replication/target_set.rs b/aquadoggo/src/replication/target_set.rs index 81d863790..9f199ecaa 100644 --- a/aquadoggo/src/replication/target_set.rs +++ b/aquadoggo/src/replication/target_set.rs @@ -57,6 +57,13 @@ impl TargetSet { self.0.contains(schema_id) } + /// Returns true if both target sets know about the same elements. + pub fn is_valid_set(&self, target_set: &TargetSet) -> bool { + self.iter() + .find(|schema_id| !target_set.contains(schema_id)) + .is_none() + } + pub fn is_empty(&self) -> bool { self.0.is_empty() } @@ -178,6 +185,7 @@ mod tests { #[from(random_document_view_id)] document_view_id_2: DocumentViewId, #[from(random_document_view_id)] document_view_id_3: DocumentViewId, ) { + // Correctly calculates intersections let schema_id_1 = SchemaId::new_application(&SchemaName::new("messages").unwrap(), &document_view_id_1); let schema_id_2 = @@ -192,6 +200,14 @@ mod tests { TargetSet::from_intersection(&set_1, &set_2), TargetSet::new(&[schema_id_2.clone()]) ); + + // Correctly verifies if both target sets know about all given elements + assert!(TargetSet::new(&[schema_id_2.clone()]) + .is_valid_set(&TargetSet::new(&[schema_id_2.clone()]))); + assert!(TargetSet::new(&[schema_id_3.clone(), schema_id_2.clone()]) + .is_valid_set(&TargetSet::new(&[schema_id_2.clone()]))); + assert!(!TargetSet::new(&[schema_id_1.clone()]) + .is_valid_set(&TargetSet::new(&[schema_id_2.clone(), schema_id_1.clone()]))); } #[rstest] From dace66dd9d57e08cba29d5109a51ea1ece13c68f Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 23:30:35 +0200 Subject: [PATCH 29/66] Temporary fix to account for different message types --- aquadoggo/src/network/peers/message.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/aquadoggo/src/network/peers/message.rs b/aquadoggo/src/network/peers/message.rs index 6f1c98ea6..d7e99f46e 100644 --- a/aquadoggo/src/network/peers/message.rs +++ b/aquadoggo/src/network/peers/message.rs @@ -6,7 +6,6 @@ use crate::replication::{AnnouncementMessage, SyncMessage}; /// p2panda protocol messages which can be sent over the wire. #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] -#[serde(untagged)] pub enum PeerMessage { /// Announcement of peers about the schema ids they are interest in. Announce(AnnouncementMessage), From 331f9c16b3651b41955cd6ee0d3e984c50e80842 Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 23:36:45 +0200 Subject: [PATCH 30/66] Fix validating target sets --- aquadoggo/src/replication/target_set.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/aquadoggo/src/replication/target_set.rs b/aquadoggo/src/replication/target_set.rs index 9f199ecaa..f58e0b675 100644 --- a/aquadoggo/src/replication/target_set.rs +++ b/aquadoggo/src/replication/target_set.rs @@ -57,11 +57,9 @@ impl TargetSet { self.0.contains(schema_id) } - /// Returns true if both target sets know about the same elements. + /// Returns true if there are no unknown elements in external target set. pub fn is_valid_set(&self, target_set: &TargetSet) -> bool { - self.iter() - .find(|schema_id| !target_set.contains(schema_id)) - .is_none() + !target_set.iter().any(|schema_id| !self.contains(schema_id)) } pub fn is_empty(&self) -> bool { From 24af91a237876ab1d384fb70815ee8ef7b5acbe6 Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 23:42:08 +0200 Subject: [PATCH 31/66] Update test --- aquadoggo/src/replication/service.rs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/aquadoggo/src/replication/service.rs b/aquadoggo/src/replication/service.rs index 2ed9d75ac..77aaaa33f 100644 --- a/aquadoggo/src/replication/service.rs +++ b/aquadoggo/src/replication/service.rs @@ -521,7 +521,9 @@ mod tests { use crate::bus::ServiceMessage; use crate::network::{Peer, PeerMessage}; use crate::replication::service::PeerStatus; - use crate::replication::{Message, Mode, SyncMessage, TargetSet}; + use crate::replication::{ + Announcement, AnnouncementMessage, Message, Mode, SyncMessage, TargetSet, + }; use crate::schema::SchemaProvider; use crate::test_utils::{test_runner, TestNode}; @@ -545,6 +547,7 @@ mod tests { ); let target_set = manager.target_set().await; + manager.update_announcement().await; // Inform connection manager about new peer let remote_peer = Peer::new(remote_peer_id, ConnectionId::new_unchecked(1)); @@ -560,19 +563,17 @@ mod tests { assert_eq!(manager.peers.len(), 1); assert_eq!(status.peer, remote_peer); - // Manager attempts a replication session with that peer + // Manager announces target set with peer assert_eq!(rx.len(), 1); assert_eq!( rx.recv().await, Ok(ServiceMessage::SentMessage( remote_peer, - PeerMessage::SyncMessage(SyncMessage::new( - 0, - Message::SyncRequest(Mode::LogHeight, target_set) - )) + PeerMessage::Announce(AnnouncementMessage::new(Announcement::new( + target_set.clone() + ))) )) ); - assert_eq!(manager.sync_manager.get_sessions(&remote_peer).len(), 1); // Inform manager about peer disconnected manager From 40a21a26e3024a6a977b9881087c3f94d0fb5d08 Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 23:52:08 +0200 Subject: [PATCH 32/66] Also test receiving external announcement --- aquadoggo/src/replication/service.rs | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/aquadoggo/src/replication/service.rs b/aquadoggo/src/replication/service.rs index 77aaaa33f..bed6036eb 100644 --- a/aquadoggo/src/replication/service.rs +++ b/aquadoggo/src/replication/service.rs @@ -559,9 +559,11 @@ mod tests { let status = manager .peers .get(&remote_peer) - .expect("Peer to be registered in connection manager"); + .expect("Peer to be registered in connection manager") + .clone(); assert_eq!(manager.peers.len(), 1); assert_eq!(status.peer, remote_peer); + assert!(status.sent_our_announcement_timestamp > 0); // Manager announces target set with peer assert_eq!(rx.len(), 1); @@ -575,6 +577,22 @@ mod tests { )) ); + // Peer informs us about its target set + assert_eq!(status.announcement, None); + let announcement = Announcement::new(target_set.clone()); + manager + .handle_service_message(ServiceMessage::ReceivedMessage( + remote_peer.clone(), + PeerMessage::Announce(AnnouncementMessage::new(announcement.clone())), + )) + .await; + let status = manager + .peers + .get(&remote_peer) + .expect("Peer to be registered in connection manager") + .clone(); + assert_eq!(status.announcement, Some(announcement.clone())); + // Inform manager about peer disconnected manager .handle_service_message(ServiceMessage::PeerDisconnected(remote_peer)) From 5799686d2266e22856ed0218514dc11f64832765 Mon Sep 17 00:00:00 2001 From: adz Date: Sun, 20 Aug 2023 23:55:08 +0200 Subject: [PATCH 33/66] Improve panic message, fix another test --- aquadoggo/src/replication/service.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/aquadoggo/src/replication/service.rs b/aquadoggo/src/replication/service.rs index bed6036eb..087ddfec4 100644 --- a/aquadoggo/src/replication/service.rs +++ b/aquadoggo/src/replication/service.rs @@ -247,7 +247,7 @@ impl ConnectionManager { let local_target_set = &self .announcement .as_ref() - .expect("Our announcement needs to be set latest when we call 'update_sessions'") + .expect("Announcement state needs to be set with 'update_announcement'") .target_set; // If this node has been configured with a whitelist of schema ids then we check the @@ -337,7 +337,7 @@ impl ConnectionManager { let local_target_set = &self .announcement .as_ref() - .expect("Our announcement needs to be set latest when we call 'update_sessions'") + .expect("Announcement state needs to be set with 'update_announcement'") .target_set; // Iterate through all currently connected peers @@ -402,7 +402,7 @@ impl ConnectionManager { let local_announcement = self .announcement .as_ref() - .expect("Our announcement needs to be set latest when we call 'update_sessions'"); + .expect("Announcement state needs to be set with 'update_announcement'"); for (peer, status) in &self.peers { if status.sent_our_announcement_timestamp < local_announcement.timestamp { @@ -618,6 +618,7 @@ mod tests { let schema_provider = SchemaProvider::new(vec![], Some(vec![])); let mut manager = ConnectionManager::new(&schema_provider, &node.context.store, &tx, local_peer_id); + manager.update_announcement().await; let remote_peer = Peer::new(remote_peer_id, ConnectionId::new_unchecked(1)); From 1e6a86c20e16f729bff758c3ce6f0b6951ac8cbe Mon Sep 17 00:00:00 2001 From: adz Date: Mon, 21 Aug 2023 11:25:48 +0200 Subject: [PATCH 34/66] Better CHANGELOG.md entry --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8380081ef..a248669a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,7 +31,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Only replicate and materialize configured "supported schema" [#569](https://github.com/p2panda/aquadoggo/pull/469) - Parse supported schema ids from `config.toml` [#473](https://github.com/p2panda/aquadoggo/pull/473) - Fix relayed connections, add DCUtR Holepunching and reduce CLI args [#502](https://github.com/p2panda/aquadoggo/pull/502) -- Target set announcements in network [#515](https://github.com/p2panda/aquadoggo/pull/515) +- Announce supported schema ids in network before replication [#515](https://github.com/p2panda/aquadoggo/pull/515) ### Changed From 4f8eb1645a3e12c967e5426be7d057ce0a645599 Mon Sep 17 00:00:00 2001 From: adz Date: Tue, 22 Aug 2023 12:07:00 +0200 Subject: [PATCH 35/66] Give announce messages a type as well so serde can distinct the variants --- aquadoggo/src/network/peers/message.rs | 1 + aquadoggo/src/replication/announcement.rs | 15 +++++++++++++-- aquadoggo/src/replication/manager.rs | 4 ++-- aquadoggo/src/replication/message.rs | 12 ++++-------- aquadoggo/src/replication/mod.rs | 9 +++++++++ 5 files changed, 29 insertions(+), 12 deletions(-) diff --git a/aquadoggo/src/network/peers/message.rs b/aquadoggo/src/network/peers/message.rs index d7e99f46e..6f1c98ea6 100644 --- a/aquadoggo/src/network/peers/message.rs +++ b/aquadoggo/src/network/peers/message.rs @@ -6,6 +6,7 @@ use crate::replication::{AnnouncementMessage, SyncMessage}; /// p2panda protocol messages which can be sent over the wire. #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[serde(untagged)] pub enum PeerMessage { /// Announcement of peers about the schema ids they are interest in. Announce(AnnouncementMessage), diff --git a/aquadoggo/src/replication/announcement.rs b/aquadoggo/src/replication/announcement.rs index ca016cf90..73ce4e2d9 100644 --- a/aquadoggo/src/replication/announcement.rs +++ b/aquadoggo/src/replication/announcement.rs @@ -7,7 +7,7 @@ use serde::de::Visitor; use serde::ser::SerializeSeq; use serde::{Deserialize, Serialize}; -use crate::replication::{TargetSet, REPLICATION_PROTOCOL_VERSION}; +use crate::replication::{MessageType, TargetSet, ANNOUNCE_TYPE, REPLICATION_PROTOCOL_VERSION}; /// u64 timestamp from UNIX epoch until now. pub fn now() -> u64 { @@ -61,7 +61,8 @@ impl Serialize for AnnouncementMessage { where S: serde::Serializer, { - let mut seq = serializer.serialize_seq(Some(3))?; + let mut seq = serializer.serialize_seq(Some(4))?; + seq.serialize_element(&ANNOUNCE_TYPE)?; seq.serialize_element(&self.0)?; seq.serialize_element(&self.1.timestamp)?; seq.serialize_element(&self.1.target_set)?; @@ -87,6 +88,16 @@ impl<'de> Deserialize<'de> for AnnouncementMessage { where A: serde::de::SeqAccess<'de>, { + let message_type: MessageType = seq.next_element()?.ok_or_else(|| { + serde::de::Error::custom("missing message type in announce message") + })?; + + if message_type != ANNOUNCE_TYPE { + return Err(serde::de::Error::custom( + "invalid message type for announce message", + )); + } + let protocol_version: ProtocolVersion = seq.next_element()?.ok_or_else(|| { serde::de::Error::custom("missing protocol version in announce message") })?; diff --git a/aquadoggo/src/replication/manager.rs b/aquadoggo/src/replication/manager.rs index 675b41801..73586c899 100644 --- a/aquadoggo/src/replication/manager.rs +++ b/aquadoggo/src/replication/manager.rs @@ -520,8 +520,8 @@ mod tests { use tokio::sync::broadcast; use crate::replication::errors::{DuplicateSessionRequestError, ReplicationError}; - use crate::replication::message::{Message, HAVE_TYPE, SYNC_DONE_TYPE}; - use crate::replication::{Mode, SyncIngest, SyncMessage, TargetSet}; + use crate::replication::message::Message; + use crate::replication::{Mode, SyncIngest, SyncMessage, TargetSet, HAVE_TYPE, SYNC_DONE_TYPE}; use crate::schema::SchemaProvider; use crate::test_utils::helpers::random_target_set; use crate::test_utils::{ diff --git a/aquadoggo/src/replication/message.rs b/aquadoggo/src/replication/message.rs index 8087239fa..f2e8223c0 100644 --- a/aquadoggo/src/replication/message.rs +++ b/aquadoggo/src/replication/message.rs @@ -11,14 +11,10 @@ use serde::de::Visitor; use serde::ser::SerializeSeq; use serde::{Deserialize, Serialize}; -use crate::replication::{Mode, SessionId, TargetSet}; - -pub const SYNC_REQUEST_TYPE: MessageType = 0; -pub const ENTRY_TYPE: MessageType = 8; -pub const SYNC_DONE_TYPE: MessageType = 9; -pub const HAVE_TYPE: MessageType = 10; - -pub type MessageType = u64; +use crate::replication::{ + MessageType, Mode, SessionId, TargetSet, ENTRY_TYPE, HAVE_TYPE, SYNC_DONE_TYPE, + SYNC_REQUEST_TYPE, +}; pub type LiveMode = bool; diff --git a/aquadoggo/src/replication/mod.rs b/aquadoggo/src/replication/mod.rs index 79a1ce1a3..ddddfbb6a 100644 --- a/aquadoggo/src/replication/mod.rs +++ b/aquadoggo/src/replication/mod.rs @@ -22,5 +22,14 @@ pub use session::{Session, SessionId, SessionState}; pub use strategies::{LogHeightStrategy, SetReconciliationStrategy, StrategyResult}; pub use target_set::TargetSet; +pub type MessageType = u64; + +// Integers indicating message type for wire message format. +pub const ANNOUNCE_TYPE: MessageType = 0; +pub const SYNC_REQUEST_TYPE: MessageType = 1; +pub const SYNC_DONE_TYPE: MessageType = 2; +pub const ENTRY_TYPE: MessageType = 3; +pub const HAVE_TYPE: MessageType = 10; + /// Currently supported p2panda replication protocol version. pub const REPLICATION_PROTOCOL_VERSION: u64 = 1; From 5dda8235b1356ea26cee7920a7210f3ff0dd448a Mon Sep 17 00:00:00 2001 From: adz Date: Tue, 22 Aug 2023 12:17:54 +0200 Subject: [PATCH 36/66] Update tests --- aquadoggo/src/replication/announcement.rs | 12 +++++++----- aquadoggo/src/replication/message.rs | 12 ++++++------ aquadoggo/src/replication/mod.rs | 4 ++-- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/aquadoggo/src/replication/announcement.rs b/aquadoggo/src/replication/announcement.rs index 73ce4e2d9..c7b235b51 100644 --- a/aquadoggo/src/replication/announcement.rs +++ b/aquadoggo/src/replication/announcement.rs @@ -152,7 +152,7 @@ mod tests { let announcement = Announcement::new(target_set.clone()); assert_eq!( serialize_from(AnnouncementMessage::new(announcement.clone())), - serialize_value(cbor!([1, announcement.timestamp, target_set])) + serialize_value(cbor!([0, 1, announcement.timestamp, target_set])) ); } @@ -160,7 +160,7 @@ mod tests { fn deserialize(#[from(random_target_set)] target_set: TargetSet) { assert_eq!( deserialize_into::(&serialize_value(cbor!([ - 1, 12345678, target_set + 0, 1, 12345678, target_set ]))) .unwrap(), AnnouncementMessage::new(Announcement { @@ -171,12 +171,14 @@ mod tests { } #[rstest] - #[should_panic(expected = "missing protocol version in announce message")] + #[should_panic(expected = "missing message type in announce message")] #[case::missing_version(cbor!([]))] + #[should_panic(expected = "missing protocol version in announce message")] + #[case::missing_version(cbor!([0]))] #[should_panic(expected = "missing timestamp in announce message")] - #[case::missing_timestamp(cbor!([122]))] + #[case::missing_timestamp(cbor!([0, 122]))] #[should_panic(expected = "too many fields for announce message")] - #[case::too_many_fields(cbor!([1, 0, ["schema_field_definition_v1"], "too much"]))] + #[case::too_many_fields(cbor!([0, 1, 0, ["schema_field_definition_v1"], "too much"]))] fn deserialize_invalid_messages(#[case] cbor: Result) { // Check the cbor is valid assert!(cbor.is_ok()); diff --git a/aquadoggo/src/replication/message.rs b/aquadoggo/src/replication/message.rs index f2e8223c0..e4d7dbfcf 100644 --- a/aquadoggo/src/replication/message.rs +++ b/aquadoggo/src/replication/message.rs @@ -23,9 +23,9 @@ pub type LogHeights = (PublicKey, Vec<(LogId, SeqNum)>); #[derive(Debug, Clone, Eq, PartialEq)] pub enum Message { SyncRequest(Mode, TargetSet), - Have(Vec), Entry(EncodedEntry, Option), SyncDone(LiveMode), + Have(Vec), } impl Message { @@ -232,7 +232,7 @@ mod tests { 51, Message::SyncRequest(Mode::SetReconciliation, target_set.clone()) )), - serialize_value(cbor!([0, 51, 1, target_set])) + serialize_value(cbor!([1, 51, 1, target_set])) ); assert_eq!( @@ -259,7 +259,7 @@ mod tests { #[rstest] fn deserialize(#[from(random_target_set)] target_set: TargetSet, public_key: PublicKey) { assert_eq!( - deserialize_into::(&serialize_value(cbor!([0, 12, 0, target_set]))) + deserialize_into::(&serialize_value(cbor!([1, 12, 0, target_set]))) .unwrap(), SyncMessage::new( 12, @@ -302,11 +302,11 @@ mod tests { #[should_panic(expected = "unknown message type 122 in replication message")] #[case::unknown_message_type(cbor!([122, 0]))] #[should_panic(expected = "missing session id in replication message")] - #[case::only_message_type(cbor!([0]))] + #[case::only_message_type(cbor!([1]))] #[should_panic(expected = "empty target set in sync request")] - #[case::only_message_type(cbor!([0, 0, 0, []]))] + #[case::only_message_type(cbor!([1, 0, 0, []]))] #[should_panic(expected = "too many fields for replication message")] - #[case::too_many_fields(cbor!([0, 0, 0, ["schema_field_definition_v1"], "too much"]))] + #[case::too_many_fields(cbor!([1, 0, 0, ["schema_field_definition_v1"], "too much"]))] fn deserialize_invalid_messages(#[case] cbor: Result) { // Check the cbor is valid assert!(cbor.is_ok()); diff --git a/aquadoggo/src/replication/mod.rs b/aquadoggo/src/replication/mod.rs index ddddfbb6a..ba79445e7 100644 --- a/aquadoggo/src/replication/mod.rs +++ b/aquadoggo/src/replication/mod.rs @@ -27,8 +27,8 @@ pub type MessageType = u64; // Integers indicating message type for wire message format. pub const ANNOUNCE_TYPE: MessageType = 0; pub const SYNC_REQUEST_TYPE: MessageType = 1; -pub const SYNC_DONE_TYPE: MessageType = 2; -pub const ENTRY_TYPE: MessageType = 3; +pub const ENTRY_TYPE: MessageType = 2; +pub const SYNC_DONE_TYPE: MessageType = 3; pub const HAVE_TYPE: MessageType = 10; /// Currently supported p2panda replication protocol version. From 0b8bea2e2234b146e48ef064e0ac334321c1271b Mon Sep 17 00:00:00 2001 From: adz Date: Tue, 22 Aug 2023 22:54:50 +0200 Subject: [PATCH 37/66] Remove data dir, rename fields, introduce schema id enum in config --- Cargo.lock | 145 +++++++++++++++++------- aquadoggo/Cargo.toml | 4 +- aquadoggo/src/config.rs | 130 ++++++++------------- aquadoggo/src/network/behaviour.rs | 27 +++-- aquadoggo/src/network/config.rs | 89 ++++++++------- aquadoggo/src/network/service.rs | 18 ++- aquadoggo/src/node.rs | 8 +- aquadoggo/src/schema/schema_provider.rs | 36 +++--- aquadoggo/src/tests.rs | 8 +- aquadoggo_cli/Cargo.toml | 2 + aquadoggo_cli/src/key_pair.rs | 12 +- aquadoggo_cli/src/main.rs | 85 +++++++------- 12 files changed, 288 insertions(+), 276 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 502cb6cc1..14d042c6a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -170,11 +170,9 @@ dependencies = [ "ciborium", "ctor", "deadqueue", - "directories", "dynamic-graphql", "env_logger", "envy", - "exponential-backoff", "futures", "hex", "http", @@ -215,9 +213,11 @@ dependencies = [ "aquadoggo", "clap", "env_logger", + "figment", "hex", "libp2p", "p2panda-rs", + "serde", "tempfile", "tokio", "toml", @@ -495,7 +495,7 @@ checksum = "0e97ce7de6cf12de5d7226c73f5ba9811622f4db3a5b91b55c53e987e5f91cba" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", ] [[package]] @@ -517,7 +517,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", ] [[package]] @@ -534,7 +534,7 @@ checksum = "a564d521dd56509c4c47480d00b80ee55f7e385ae48db5744c67ad50c92d2ebf" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", ] [[package]] @@ -561,6 +561,12 @@ dependencies = [ "num-traits", ] +[[package]] +name = "atomic" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c59bdb34bc650a32731b31bd8f0829cc15d24a708ee31559e0bb34f2bc320cba" + [[package]] name = "atomic-waker" version = "1.1.1" @@ -931,7 +937,7 @@ dependencies = [ "heck", "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", ] [[package]] @@ -1246,15 +1252,6 @@ dependencies = [ "subtle", ] -[[package]] -name = "directories" -version = "4.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f51c5d4ddabd36886dd3e1438cb358cdcb0d7c499cb99cb4ac2e38e18b5cb210" -dependencies = [ - "dirs-sys", -] - [[package]] name = "dirs" version = "4.0.0" @@ -1283,7 +1280,7 @@ checksum = "487585f4d0c6655fe74905e2504d8ad6908e4db67f744eb140876906c2f3175d" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", ] [[package]] @@ -1438,15 +1435,6 @@ version = "2.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" -[[package]] -name = "exponential-backoff" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "47f78d87d930eee4b5686a2ab032de499c72bd1e954b84262bb03492a0f932cd" -dependencies = [ - "rand 0.8.5", -] - [[package]] name = "fast_chemail" version = "0.9.6" @@ -1471,6 +1459,20 @@ version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e825f6987101665dea6ec934c09ec6d721de7bc1bf92248e1d5810c8cd636b77" +[[package]] +name = "figment" +version = "0.10.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4547e226f4c9ab860571e070a9034192b3175580ecea38da34fcdb53a018c9a5" +dependencies = [ + "atomic", + "pear", + "serde", + "toml", + "uncased", + "version_check", +] + [[package]] name = "flume" version = "0.10.14" @@ -1581,7 +1583,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", ] [[package]] @@ -2005,6 +2007,12 @@ dependencies = [ "hashbrown 0.14.0", ] +[[package]] +name = "inlinable_string" +version = "0.1.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb" + [[package]] name = "instant" version = "0.1.12" @@ -2575,7 +2583,7 @@ dependencies = [ "proc-macro-warning", "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", ] [[package]] @@ -3153,6 +3161,29 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" +[[package]] +name = "pear" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61a386cd715229d399604b50d1361683fe687066f42d56f54be995bc6868f71c" +dependencies = [ + "inlinable_string", + "pear_codegen", + "yansi", +] + +[[package]] +name = "pear_codegen" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da9f0f13dac8069c139e8300a6510e3f4143ecf5259c60b116a9b271b4ca0d54" +dependencies = [ + "proc-macro2 1.0.66", + "proc-macro2-diagnostics", + "quote 1.0.31", + "syn 2.0.29", +] + [[package]] name = "pem" version = "1.1.1" @@ -3198,7 +3229,7 @@ dependencies = [ "pest_meta", "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", ] [[package]] @@ -3229,7 +3260,7 @@ checksum = "ec2e072ecce94ec471b13398d5402c188e76ac03cf74dd1a975161b23a3f6d9c" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", ] [[package]] @@ -3319,7 +3350,7 @@ checksum = "70550716265d1ec349c41f70dd4f964b4fd88394efe4405f0c1da679c4799a07" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", ] [[package]] @@ -3340,6 +3371,19 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "proc-macro2-diagnostics" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" +dependencies = [ + "proc-macro2 1.0.66", + "quote 1.0.31", + "syn 2.0.29", + "version_check", + "yansi", +] + [[package]] name = "prometheus-client" version = "0.21.2" @@ -3976,9 +4020,9 @@ checksum = "b0293b4b29daaf487284529cc2f5675b8e57c61f70167ba415a463651fd6a918" [[package]] name = "serde" -version = "1.0.171" +version = "1.0.185" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30e27d1e4fd7659406c492fd6cfaf2066ba8773de45ca75e855590f856dc34a9" +checksum = "be9b6f69f1dfd54c3b568ffa45c310d6973a5e5148fd40cf515acaf38cf5bc31" dependencies = [ "serde_derive", ] @@ -4015,13 +4059,13 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.171" +version = "1.0.185" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "389894603bd18c46fa56231694f8d827779c0951a667087194cf9de94ed24682" +checksum = "dc59dfdcbad1437773485e0367fea4b090a2e0a16d9ffc46af47764536a298ec" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", ] [[package]] @@ -4396,9 +4440,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.26" +version = "2.0.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45c3457aacde3c65315de5031ec191ce46604304d2446e803d71ade03308d970" +checksum = "c324c494eba9d92503e6f1ef2e6df781e78f6a7705a0202d9801b198807d518a" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.31", @@ -4484,7 +4528,7 @@ checksum = "f1728216d3244de4f14f14f8c15c79be1a7c67867d28d69b719690e2a19fb445" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", ] [[package]] @@ -4557,7 +4601,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", ] [[package]] @@ -4711,7 +4755,7 @@ checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", ] [[package]] @@ -4830,6 +4874,15 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" +[[package]] +name = "uncased" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b9bc53168a4be7402ab86c3aad243a84dd7381d09be0eddc81280c1da95ca68" +dependencies = [ + "version_check", +] + [[package]] name = "unicode-bidi" version = "0.3.13" @@ -5014,7 +5067,7 @@ dependencies = [ "once_cell", "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", "wasm-bindgen-shared", ] @@ -5048,7 +5101,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -5328,6 +5381,12 @@ dependencies = [ "static_assertions 1.1.0", ] +[[package]] +name = "yansi" +version = "1.0.0-rc.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1367295b8f788d371ce2dbc842c7b709c73ee1364d30351dd300ec2203b12377" + [[package]] name = "yasmf-hash" version = "0.1.1" @@ -5370,5 +5429,5 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.31", - "syn 2.0.26", + "syn 2.0.29", ] diff --git a/aquadoggo/Cargo.toml b/aquadoggo/Cargo.toml index 022c034ed..d7a3a2a6b 100644 --- a/aquadoggo/Cargo.toml +++ b/aquadoggo/Cargo.toml @@ -30,10 +30,7 @@ bs58 = "0.4.0" deadqueue = { version = "0.2.3", default-features = false, features = [ "unlimited", ] } -directories = "4.0.1" dynamic-graphql = "0.7.3" -envy = "0.4.2" -exponential-backoff = "1.2.0" futures = "0.3.23" hex = "0.4.3" http = "0.2.9" @@ -87,6 +84,7 @@ async-recursion = "1.0.4" ciborium = "0.2.0" ctor = "0.1.23" env_logger = "0.9.0" +envy = "0.4.2" http = "0.2.9" hyper = "0.14.19" libp2p-swarm-test = "0.2.0" diff --git a/aquadoggo/src/config.rs b/aquadoggo/src/config.rs index 1712a3fed..a7b363a31 100644 --- a/aquadoggo/src/config.rs +++ b/aquadoggo/src/config.rs @@ -1,121 +1,87 @@ // SPDX-License-Identifier: AGPL-3.0-or-later -use std::fs; -use std::path::PathBuf; - use anyhow::Result; -use directories::ProjectDirs; use p2panda_rs::schema::SchemaId; use serde::Deserialize; use crate::network::NetworkConfiguration; -/// Data directory name. -const DATA_DIR_NAME: &str = "aquadoggo"; - -/// Filename of default sqlite database. -const DEFAULT_SQLITE_NAME: &str = "aquadoggo-node.sqlite3"; - /// Configuration object holding all important variables throughout the application. -/// -/// Each configuration also assures that a data directory exists on the host machine where database -/// files or private keys get persisted. -/// -/// When no custom directory path is set it reads the process environment $XDG_DATA_HOME variable -/// to determine the XDG data directory path which is $HOME/.local/share/aquadoggo on Linux by -/// default. -#[derive(Deserialize, Debug, Clone)] +#[derive(Debug, Clone, Deserialize)] #[serde(default)] pub struct Configuration { - /// Path to data directory. - pub base_path: Option, + /// URL / connection string to PostgreSQL or SQLite database. + pub database_url: String, - /// Database url (SQLite or PostgreSQL). - pub database_url: Option, - - /// Maximum number of database connections in pool. + /// Maximum number of connections that the database pool should maintain. + /// + /// Be mindful of the connection limits for the database as well as other applications which + /// may want to connect to the same database (or even multiple instances of the same + /// application in high-availability deployments). pub database_max_connections: u32, - /// RPC API HTTP server port. + /// HTTP port, serving the GraphQL API (for example hosted under + /// http://localhost:2020/graphql). This API is used for client-node communication. Defaults to + /// 2020. pub http_port: u16, - /// Network configuration. - pub network: NetworkConfiguration, - - /// Materializer worker pool size. + /// Number of concurrent workers which defines the maximum of materialization tasks which can + /// be worked on simultaneously. + /// + /// Use a higher number if you run your node on a powerful machine with many CPU cores. Lower + /// number for low-energy devices with limited resources. pub worker_pool_size: u32, - /// The ids of schema this node supports. + /// List of schema ids which a node will replicate and expose on the GraphQL API. /// - /// If `None` then the node will support all system schema and any new schema it discovers. - pub supported_schema_ids: Option>, + /// When whitelisting a schema you automatically opt into announcing, replicating and + /// materializing documents connected to it, supporting applications which are dependent on + /// this data. + pub supported_schema_ids: SupportedSchemaIds, + + /// Network configuration. + pub network: NetworkConfiguration, } impl Default for Configuration { fn default() -> Self { Self { - base_path: None, - database_url: None, + database_url: "sqlite::memory:".into(), database_max_connections: 32, http_port: 2020, - network: NetworkConfiguration::default(), worker_pool_size: 16, - supported_schema_ids: None, + supported_schema_ids: SupportedSchemaIds::Wildcard, + network: NetworkConfiguration::default(), } } } -impl Configuration { - /// Returns the data directory path and creates the folders when not existing. - fn create_data_directory(path: Option) -> Result { - // Use custom data directory path or determine one from host - let base_path = path.unwrap_or_else(|| { - ProjectDirs::from("", "", DATA_DIR_NAME) - .ok_or("Can not determine data directory") - .unwrap() - .data_dir() - .to_path_buf() - }); - - // Create folders when they don't exist yet - fs::create_dir_all(&base_path)?; - - Ok(base_path) - } +#[derive(Debug, Clone)] +pub enum SupportedSchemaIds { + /// Support all schema ids. + Wildcard, - /// Create a new configuration object pulling in the variables from the process environment. - /// This method also assures a data directory exists on the host machine. - pub fn new(path: Option) -> Result { - // Make sure data directory exists - let base_path = Self::create_data_directory(path)?; - - // Create configuration based on defaults and populate with environment variables - let mut config = envy::from_env::()?; - - // Store data directory path in object - config.base_path = Some(base_path); - - // Set default database url (sqlite) when not given - config.database_url = match config.database_url { - Some(url) => Some(url), - None => { - let mut path = config.base_path.clone().unwrap(); - path.push(DEFAULT_SQLITE_NAME); - Some(format!("sqlite:{}", path.to_str().unwrap())) - } - }; + /// Support only a certain list of schema ids. + List(Vec), +} - Ok(config) +impl Default for SupportedSchemaIds { + fn default() -> Self { + Self::Wildcard } } -#[cfg(test)] -impl Configuration { - /// Returns a new configuration object for a node which stores all data temporarily in memory. - pub fn new_ephemeral() -> Self { - Configuration { - database_url: Some("sqlite::memory:".to_string()), - ..Default::default() +impl<'de> Deserialize<'de> for SupportedSchemaIds { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let supported_schema_ids: Vec = Vec::deserialize(deserializer)?; + + if supported_schema_ids.is_empty() { + Ok(Self::Wildcard) + } else { + Ok(Self::List(supported_schema_ids)) } } } diff --git a/aquadoggo/src/network/behaviour.rs b/aquadoggo/src/network/behaviour.rs index c30a312b1..b8a6d5357 100644 --- a/aquadoggo/src/network/behaviour.rs +++ b/aquadoggo/src/network/behaviour.rs @@ -87,16 +87,15 @@ impl P2pandaBehaviour { // Create an identify server behaviour with default configuration if a rendezvous server // address has been provided or the rendezvous server flag is set - let identify = - if network_config.relay_address.is_some() || network_config.relay_server_enabled { - debug!("Identify network behaviour enabled"); - Some(identify::Behaviour::new(identify::Config::new( - format!("{NODE_NAMESPACE}/1.0.0"), - key_pair.public(), - ))) - } else { - None - }; + let identify = if network_config.relay_node.is_some() || network_config.relay { + debug!("Identify network behaviour enabled"); + Some(identify::Behaviour::new(identify::Config::new( + format!("{NODE_NAMESPACE}/1.0.0"), + key_pair.public(), + ))) + } else { + None + }; // Create an mDNS behaviour with default configuration if the mDNS flag is set let mdns = if network_config.mdns { @@ -117,7 +116,7 @@ impl P2pandaBehaviour { // Create a rendezvous client behaviour with default configuration if a rendezvous server // address has been provided - let rendezvous_client = if network_config.relay_address.is_some() { + let rendezvous_client = if network_config.relay_node.is_some() { debug!("Rendezvous client network behaviour enabled"); Some(rendezvous::client::Behaviour::new(key_pair)) } else { @@ -126,7 +125,7 @@ impl P2pandaBehaviour { // Create a rendezvous server behaviour with default configuration if the rendezvous server // flag is set - let rendezvous_server = if network_config.relay_server_enabled { + let rendezvous_server = if network_config.relay { debug!("Rendezvous server network behaviour enabled"); Some(rendezvous::server::Behaviour::new( rendezvous::server::Config::default(), @@ -141,7 +140,7 @@ impl P2pandaBehaviour { // Create a relay server behaviour with default configuration if the relay server flag is // set - let relay_server = if network_config.relay_server_enabled { + let relay_server = if network_config.relay { debug!("Relay server network behaviour enabled"); Some(relay::Behaviour::new( peer_id, @@ -156,7 +155,7 @@ impl P2pandaBehaviour { }; // Create UDP holepunching behaviour (DCUtR) if the flag is set - let dcutr = if network_config.relay_server_enabled || relay_client.is_some() { + let dcutr = if network_config.relay || relay_client.is_some() { Some(dcutr::Behaviour::new(peer_id)) } else { None diff --git a/aquadoggo/src/network/config.rs b/aquadoggo/src/network/config.rs index 214c35217..4cf1c2e76 100644 --- a/aquadoggo/src/network/config.rs +++ b/aquadoggo/src/network/config.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: AGPL-3.0-or-later use libp2p::connection_limits::ConnectionLimits; -use libp2p::{Multiaddr, PeerId}; +use libp2p::Multiaddr; use serde::{Deserialize, Serialize}; /// The namespace used by the `identify` network behaviour. @@ -10,35 +10,37 @@ pub const NODE_NAMESPACE: &str = "aquadoggo"; /// Network config for the node. #[derive(Debug, Clone, Deserialize, Serialize)] pub struct NetworkConfiguration { - /// Dial concurrency factor. - /// - /// Number of addresses concurrently dialed for an outbound connection attempt with a single - /// peer. - pub dial_concurrency_factor: u8, - - /// Maximum incoming connections. - pub max_connections_in: u32, + /// QUIC port for node-to-node communication. + pub quic_port: u16, - /// Maximum outgoing connections. - pub max_connections_out: u32, + /// Discover peers on the local network via mDNS (over IPv4 only, using port 5353). + pub mdns: bool, - /// Maximum pending incoming connections. + /// List of addresses of trusted and known nodes. /// - /// A pending connection is one which has been initiated but has not yet received a response. - pub max_connections_pending_in: u32, - - /// Maximum pending outgoing connections. + /// Nodes mentioned in this list can be connected directly (for example when they are hosted + /// with a static IP Address). If the node needs to connect to other ndoes with changing, + /// dynamic IP addresses or even with nodes behind a firewall or NAT, at least one Relay will + /// be required. + pub trusted_nodes: Vec, + + /// Set to true if node should also function as a relay. Other nodes can use relays to aid + /// discovery and establishing connectivity. /// - /// A pending connection is one which has been initiated but has not yet received a response. - pub max_connections_pending_out: u32, + /// Relays _need_ to be hosted in a way where they can be reached directly, for example with a + /// static IP address through an VPS. + pub relay: bool, - /// Maximum connections per peer (includes outgoing and incoming). - pub max_connections_per_peer: u32, - - /// mDNS discovery enabled. + /// Address of a peer which can act as a relay/rendezvous server. /// - /// Automatically discover peers on the local network (over IPv4 only, using port 5353). - pub mdns: bool, + /// Relays help discover other nodes on the internet (also known as "rendesvouz" or "bootstrap" + /// server) and help establishing direct p2p connections when node is behind a firewall or NAT + /// (also known as "holepunching"). + /// + /// When a direct connection is not possible the relay will help to redirect the (encrypted) + /// traffic as an intermediary between us and other nodes. The node will contact each server + /// and register our IP address for other peers. + pub relay_node: Option, /// Notify handler buffer size. /// @@ -55,22 +57,30 @@ pub struct NetworkConfiguration { /// manager will sleep. pub per_connection_event_buffer_size: usize, - /// The addresses of remote peers to replicate from. - pub remote_peers: Vec, + /// Dial concurrency factor. + /// + /// Number of addresses concurrently dialed for an outbound connection attempt with a single + /// peer. + pub dial_concurrency_factor: u8, - /// QUIC transport port. - pub quic_port: u16, + /// Maximum incoming connections. + pub max_connections_in: u32, - /// Relay server behaviour enabled. + /// Maximum outgoing connections. + pub max_connections_out: u32, + + /// Maximum pending incoming connections. /// - /// Serve as a relay point for peer connections. - pub relay_server_enabled: bool, + /// A pending connection is one which has been initiated but has not yet received a response. + pub max_connections_pending_in: u32, - /// Address of a peer which can act as a relay/rendezvous server. - pub relay_address: Option, + /// Maximum pending outgoing connections. + /// + /// A pending connection is one which has been initiated but has not yet received a response. + pub max_connections_pending_out: u32, - /// Peer id of the relay if known. - pub relay_peer_id: Option, + /// Maximum connections per peer (includes outgoing and incoming). + pub max_connections_per_peer: u32, } impl Default for NetworkConfiguration { @@ -82,14 +92,13 @@ impl Default for NetworkConfiguration { max_connections_pending_in: 8, max_connections_pending_out: 8, max_connections_per_peer: 8, - mdns: false, + mdns: true, notify_handler_buffer_size: 128, per_connection_event_buffer_size: 8, quic_port: 2022, - relay_address: None, - relay_peer_id: None, - remote_peers: Vec::new(), - relay_server_enabled: false, + relay: false, + relay_node: None, + trusted_nodes: Vec::new(), } } } diff --git a/aquadoggo/src/network/service.rs b/aquadoggo/src/network/service.rs index 3e7b76f6a..a905e4d91 100644 --- a/aquadoggo/src/network/service.rs +++ b/aquadoggo/src/network/service.rs @@ -44,7 +44,7 @@ pub async fn network_service( info!("Local peer id: {local_peer_id}"); // The swarm can be initiated with or without "relay" capabilities. - let mut swarm = if network_config.relay_server_enabled { + let mut swarm = if network_config.relay { info!("Networking service initializing with relay capabilities..."); swarm::build_relay_swarm(&network_config, key_pair).await? } else { @@ -79,7 +79,7 @@ pub async fn network_service( // If a relay node address was provided, then connect and performing necessary setup before we // run the main event loop. - if let Some(relay_address) = network_config.relay_address.clone() { + if let Some(relay_address) = network_config.relay_node.clone() { info!("Connecting to relay node at: {relay_address}"); connect_to_relay(&mut swarm, &mut network_config, relay_address).await?; } @@ -104,14 +104,14 @@ pub async fn connect_to_relay( swarm.behaviour_mut().peers.disable(); // Connect to the relay server. Not for the reservation or relayed connection, but to (a) learn - // our local public address and (b) enable a freshly started relay to learn its public - // address. + // our local public address and (b) enable a freshly started relay to learn its public address. swarm.dial(relay_address.clone())?; // Wait to get confirmation that we told the relay node it's public address and that they told // us ours. let mut learned_observed_addr = false; let mut told_relay_observed_addr = false; + let mut learned_relay_peer_id: Option = None; loop { match swarm.next().await.unwrap() { @@ -138,8 +138,8 @@ pub async fn connect_to_relay( relay_address.push(Protocol::P2p(peer_id)); // Update values on the config. - network_config.relay_peer_id = Some(peer_id); - network_config.relay_address = Some(relay_address.clone()); + learned_relay_peer_id = Some(peer_id); + network_config.relay_node = Some(relay_address.clone()); // All done, we've learned our external address successfully. learned_observed_addr = true; @@ -153,9 +153,7 @@ pub async fn connect_to_relay( } // We know the relays peer address was learned in the above step so we unwrap it here. - let relay_peer_id = network_config - .relay_peer_id - .expect("Received relay peer id"); + let relay_peer_id = learned_relay_peer_id.expect("Received relay peer id"); // Now we have received our external address, and we know the relay has too, listen on our // relay circuit address. @@ -382,7 +380,7 @@ impl EventLoop { for address in registration.record.addresses() { let peer_id = registration.record.peer_id(); if peer_id != self.local_peer_id { - if let Some(relay_address) = &self.network_config.relay_address { + if let Some(relay_address) = &self.network_config.relay_node { info!("Add new peer to address book: {} {}", peer_id, address); let peer_circuit_address = relay_address diff --git a/aquadoggo/src/node.rs b/aquadoggo/src/node.rs index 027466ad3..77d85bedb 100644 --- a/aquadoggo/src/node.rs +++ b/aquadoggo/src/node.rs @@ -24,14 +24,10 @@ async fn initialize_db(config: &Configuration) -> Result { openssl_probe::init_ssl_cert_env_vars(); // Create database when not existing - create_database(&config.database_url.clone().unwrap()).await?; + create_database(&config.database_url).await?; // Create connection pool - let pool = connection_pool( - &config.database_url.clone().unwrap(), - config.database_max_connections, - ) - .await?; + let pool = connection_pool(&config.database_url, config.database_max_connections).await?; // Run pending migrations run_pending_migrations(&pool).await?; diff --git a/aquadoggo/src/schema/schema_provider.rs b/aquadoggo/src/schema/schema_provider.rs index 7f67bd807..83afbbb71 100644 --- a/aquadoggo/src/schema/schema_provider.rs +++ b/aquadoggo/src/schema/schema_provider.rs @@ -10,17 +10,19 @@ use p2panda_rs::Human; use tokio::sync::broadcast::{channel, Receiver, Sender}; use tokio::sync::Mutex; -/// Provides fast thread-safe access to system and application schemas. +use crate::config::SupportedSchemaIds; + +/// Provides fast access to system and application schemas. /// /// Application schemas can be added and updated. #[derive(Clone, Debug)] pub struct SchemaProvider { - /// In-memory store of registered schemas. + /// In-memory store of registered and materialized schemas. schemas: Arc>>, - /// Optional list of whitelisted schema ids. When set, only these schema ids will be accepted - /// on this node, if not set _all_ schema ids are accepted. - whitelisted_schema_ids: Option>, + /// Optional list of whitelisted schema ids. When not empty, only these schema ids will be + /// accepted on this node, if not set _all_ schema ids are accepted (wildcard). + supported_schema_ids: SupportedSchemaIds, /// Sender for broadcast channel informing subscribers about updated schemas. tx: Sender, @@ -28,10 +30,7 @@ pub struct SchemaProvider { impl SchemaProvider { /// Returns a `SchemaProvider` containing the given application schemas and all system schemas. - pub fn new( - application_schemas: Vec, - whitelisted_schema_ids: Option>, - ) -> Self { + pub fn new(application_schemas: Vec, supported_schema_ids: SupportedSchemaIds) -> Self { // Collect all system and application schemas. let mut schemas = SYSTEM_SCHEMAS.clone(); schemas.extend(&application_schemas); @@ -42,7 +41,8 @@ impl SchemaProvider { index.insert(schema.id().to_owned(), schema.to_owned()); } - if let Some(schema_ids) = &whitelisted_schema_ids { + // Filter out all unsupported schema ids when list was set + if let SupportedSchemaIds::List(schema_ids) = &supported_schema_ids { index.retain(|id, _| schema_ids.contains(id)); }; @@ -59,7 +59,7 @@ impl SchemaProvider { Self { schemas: Arc::new(Mutex::new(index)), - whitelisted_schema_ids, + supported_schema_ids, tx, } } @@ -84,8 +84,8 @@ impl SchemaProvider { /// Returns `true` if a schema was updated or it already existed in it's current state, and /// `false` if it was inserted. pub async fn update(&self, schema: Schema) -> Result { - if let Some(whitelisted_ids) = self.whitelisted_schema_ids.as_ref() { - if !whitelisted_ids.contains(schema.id()) { + if let SupportedSchemaIds::List(supported_schema_ids) = &self.supported_schema_ids { + if !supported_schema_ids.contains(schema.id()) { bail!("Attempted to add unsupported schema to schema provider"); } }; @@ -117,9 +117,9 @@ impl SchemaProvider { /// If no whitelist was set it returns the list of all currently known schema ids. If a /// whitelist was set it directly returns the list itself. pub async fn supported_schema_ids(&self) -> Vec { - match &self.whitelisted_schema_ids { - Some(schema_ids) => schema_ids.clone(), - None => self + match &self.supported_schema_ids { + SupportedSchemaIds::List(schema_ids) => schema_ids.clone(), + SupportedSchemaIds::Wildcard => self .all() .await .iter() @@ -131,13 +131,13 @@ impl SchemaProvider { /// Returns true if a whitelist of supported schema ids was provided through user /// configuration. pub fn is_whitelist_active(&self) -> bool { - self.whitelisted_schema_ids.is_some() + matches!(self.supported_schema_ids, SupportedSchemaIds::List(_)) } } impl Default for SchemaProvider { fn default() -> Self { - Self::new(Vec::new(), None) + Self::new(Vec::new(), SupportedSchemaIds::Wildcard) } } diff --git a/aquadoggo/src/tests.rs b/aquadoggo/src/tests.rs index 00ba33122..2aea479f5 100644 --- a/aquadoggo/src/tests.rs +++ b/aquadoggo/src/tests.rs @@ -37,13 +37,7 @@ async fn e2e() { // designed to be "local first" which means they are fine if there is currently no internet // connection on your computer. - // Node configuration. - // - // Before even starting the node, we need to configure it a little. We mostly go for the - // default options. The only thing we want to do change is the database config. We want an - // in-memory sqlite database for this test. - - let config = Configuration::new_ephemeral(); + let config = Configuration::default(); let key_pair = KeyPair::new(); // Start the node. diff --git a/aquadoggo_cli/Cargo.toml b/aquadoggo_cli/Cargo.toml index 959217228..057bfb9ae 100644 --- a/aquadoggo_cli/Cargo.toml +++ b/aquadoggo_cli/Cargo.toml @@ -23,9 +23,11 @@ doc = false anyhow = "1.0.62" clap = { version = "4.1.8", features = ["derive"] } env_logger = "0.9.0" +figment = { version = "0.10.10", features = ["toml", "env"] } hex = "0.4.3" libp2p = "0.52.0" p2panda-rs = "0.7.1" +serde = { version = "1.0.185", features = ["serde_derive"] } tokio = { version = "1.28.2", features = ["full"] } toml = "0.7.6" diff --git a/aquadoggo_cli/src/key_pair.rs b/aquadoggo_cli/src/key_pair.rs index 68e9f467a..279600cd7 100644 --- a/aquadoggo_cli/src/key_pair.rs +++ b/aquadoggo_cli/src/key_pair.rs @@ -13,15 +13,12 @@ const KEY_PAIR_FILE_NAME: &str = "private-key"; /// Returns a new instance of `KeyPair` by either loading the private key from a path or generating /// a new one and saving it in the file system. -pub fn generate_or_load_key_pair(base_path: PathBuf) -> Result { - let mut key_pair_path = base_path; - key_pair_path.push(KEY_PAIR_FILE_NAME); - - let key_pair = if key_pair_path.is_file() { - load_key_pair_from_file(key_pair_path)? +pub fn generate_or_load_key_pair(path: PathBuf) -> Result { + let key_pair = if path.is_file() { + load_key_pair_from_file(path)? } else { let key_pair = KeyPair::new(); - save_key_pair_to_file(&key_pair, key_pair_path)?; + save_key_pair_to_file(&key_pair, path)?; key_pair }; @@ -32,7 +29,6 @@ pub fn generate_or_load_key_pair(base_path: PathBuf) -> Result { /// file system. /// /// This method is useful to run nodes for testing purposes. -#[allow(dead_code)] pub fn generate_ephemeral_key_pair() -> KeyPair { KeyPair::new() } diff --git a/aquadoggo_cli/src/main.rs b/aquadoggo_cli/src/main.rs index 2c20c011a..d023c91ba 100644 --- a/aquadoggo_cli/src/main.rs +++ b/aquadoggo_cli/src/main.rs @@ -4,26 +4,25 @@ mod key_pair; mod schemas; -use std::convert::{TryFrom, TryInto}; -use std::fs::File; +use std::convert::TryFrom; use std::net::IpAddr; use anyhow::Result; use aquadoggo::{Configuration, NetworkConfiguration, Node}; use clap::Parser; +use figment::providers::{Env, Format, Serialized, Toml}; +use figment::Figment; use libp2p::multiaddr::Protocol; use libp2p::Multiaddr; +use serde::{Deserialize, Serialize}; -const CONFIG_FILE_PATH: &str = "config.toml"; +const CONFIG_FILE_NAME: &str = "config.toml"; +const CONFIG_ENV_VAR_PREFIX: &str = "DOGGO_"; -#[derive(Parser, Debug)] +#[derive(Parser, Debug, Serialize, Deserialize)] #[command(name = "aquadoggo Node", version)] /// Node server for the p2panda network. struct Cli { - /// Path to data folder, $HOME/.local/share/aquadoggo by default on Linux. - #[arg(short, long)] - data_dir: Option, - /// Port for the http server, 2020 by default. #[arg(short = 'P', long)] http_port: Option, @@ -61,9 +60,9 @@ impl TryFrom for Configuration { type Error = anyhow::Error; fn try_from(cli: Cli) -> Result { - let mut config = Configuration::new(cli.data_dir)?; + let mut config = Configuration::default(); - let relay_address = if let Some(relay_address) = cli.relay_address { + let relay_node = if let Some(relay_address) = cli.relay_address { let mut multiaddr = match relay_address { IpAddr::V4(ip) => Multiaddr::from(Protocol::Ip4(ip)), IpAddr::V6(ip) => Multiaddr::from(Protocol::Ip6(ip)), @@ -82,9 +81,9 @@ impl TryFrom for Configuration { config.network = NetworkConfiguration { mdns: cli.mdns.unwrap_or(false), - relay_server_enabled: cli.enable_relay_server, - relay_address, - remote_peers: cli.remote_node_addresses, + relay: cli.enable_relay_server, + relay_node, + trusted_nodes: cli.remote_node_addresses, ..config.network }; @@ -101,37 +100,33 @@ async fn main() { env_logger::init(); // Parse command line arguments - let cli = Cli::parse(); - - // Load configuration parameters and apply defaults - let mut config: Configuration = cli.try_into().expect("Could not load configuration"); - - // Read schema ids from config.toml file or - let supported_schemas = match File::open(CONFIG_FILE_PATH) { - Ok(mut file) => Some( - schemas::read_schema_ids_from_file(&mut file) - .expect("Reading schema ids from config.toml failed"), - ), - Err(_) => None, - }; - config.supported_schema_ids = supported_schemas; - - // We unwrap the path as we know it has been initialised during the conversion step before - let base_path = config.base_path.clone().unwrap(); - - // Generate new key pair or load it from file - let key_pair = - key_pair::generate_or_load_key_pair(base_path).expect("Could not load key pair from file"); - - // Start p2panda node in async runtime - let node = Node::start(key_pair, config).await; - - // Run this until [CTRL] + [C] got pressed or something went wrong - tokio::select! { - _ = tokio::signal::ctrl_c() => (), - _ = node.on_exit() => (), + let config: Result = Figment::new() + .merge(Toml::file(CONFIG_FILE_NAME)) + .merge(Env::prefixed(CONFIG_ENV_VAR_PREFIX)) + .merge(Serialized::defaults(Cli::parse())) + .extract(); + + match config { + Ok(config) => { + let key_pair = key_pair::generate_ephemeral_key_pair(); + + // Start p2panda node in async runtime + let node = Node::start(key_pair, config).await; + + // Run this until [CTRL] + [C] got pressed or something went wrong + tokio::select! { + _ = tokio::signal::ctrl_c() => (), + _ = node.on_exit() => (), + } + + // Wait until all tasks are gracefully shut down and exit + node.shutdown().await; + } + Err(error) => { + println!("Error: Could not load configuration:"); + for error in error { + println!("- {}", error); + } + } } - - // Wait until all tasks are gracefully shut down and exit - node.shutdown().await; } From 2b8c6ea57ec2d895d36d59e5261f81c7a5b9d4c1 Mon Sep 17 00:00:00 2001 From: adz Date: Wed, 23 Aug 2023 00:28:22 +0200 Subject: [PATCH 38/66] Introduce new config.toml file --- .gitignore | 3 + aquadoggo_cli/config.toml | 166 ++++++++++++++++++++++++++++++ aquadoggo_cli/example_config.toml | 19 ---- 3 files changed, 169 insertions(+), 19 deletions(-) create mode 100644 aquadoggo_cli/config.toml delete mode 100644 aquadoggo_cli/example_config.toml diff --git a/.gitignore b/.gitignore index 7783ef8b4..462c846f4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,9 @@ target/ debug/ + +# Config files config.toml +!aquadoggo_cli/config.toml # IDE .vscode diff --git a/aquadoggo_cli/config.toml b/aquadoggo_cli/config.toml new file mode 100644 index 000000000..69e69b9ef --- /dev/null +++ b/aquadoggo_cli/config.toml @@ -0,0 +1,166 @@ +# aquadoggo configuration file +# +# Copy this file to the location where aquadoggo will be run or refer to it +# using the -c command line argument when running aquadoggo. Replace the +# example values with your own desired configuration. +# +# NOTE: Paths in this file follow the XDG Base Directory Specification for +# Linux. You might want to adjust these values for your regarding operating +# system. + +# ~~~~~~~ +# Schemas +# ~~~~~~~ + +# List of schema ids which a node will replicate and expose on the GraphQL API. +# +# When whitelisting a schema you automatically opt into announcing, replicating +# and materializing documents connected to it, supporting applications which +# are dependent on this data. +# +# It is recommended to set this list to all schema ids your own application +# should support. Defaults to an empty list. +# +# WARNING: When set empty, your node will support _all_ schema ids. This is +# useful for experimentation and local development but _not_ recommended for +# production settings. +# +supported_schema_ids = [ + # To discover new schema, set your node to replicate schema definition + # documents by including these two built-in schema ids. Your node will now + # search for and replicate schemas which have been published to the + # network. + # "schema_field_definition_v1", + # "schema_definition_v1", + + # Once you discover new schemas and want to start replicating their + # documents, then add their schema ids to this list as well. It's also + # possible to load schema directly onto your node using the tool `fishy`: + # https://github.com/p2panda/fishy + # "example_0020a01fe...", +] + +# ~~~~~~~~ +# Database +# ~~~~~~~~ + +# URL / connection string to PostgreSQL or SQLite database. +# +# When commented out it will default to an in-memory SQLite database URL. +# +# WARNING: When commented out, no data will be persisted after the node shuts +# down. Uncomment this value when running on production as you will otherwise +# loose data. +# +# database_uri = "sqlite:$HOME/.local/share/aquadoggo/db.sqlite3" + +# Maximum number of connections that the database pool should maintain. +# +# Be mindful of the connection limits for your database as well as other +# applications which may want to connect to the same database (or even multiple +# instances of the same application in high-availability deployments). +# +database_max_connections = 32 + +# ~~~~~~~ +# Workers +# ~~~~~~~ + +# Number of concurrent workers which defines the maximum of materialization +# tasks which can be worked on simultaneously. +# +# Use a higher number if you run your node on a powerful machine with many CPU +# cores. Lower number for low-energy devices with limited resources. +# +worker_pool_size = 16 + +# ~~~~~ +# Ports +# ~~~~~ + +# HTTP port, serving the GraphQL API (for example hosted under +# http://localhost:2020/graphql). This API is used for client-node +# communication. Defaults to 2020. +# +# When port is taken the node will automatically pick a random, free port. +# +http_port = 2020 + +# QUIC port for node-node communication and data replication. Defaults to 2022. +# +# When port is taken the node will automatically pick a random, free port. +# +quic_port = 2022 + +# ~~~~~~~~ +# Identity +# ~~~~~~~~ + +# Path to persist your ed25519 private key file. The key is used to identify +# you towards other nodes during network discovery and replication. This key is +# _not_ used to create and sign data. +# +# Will be generated newly and stored under this path when node starts for the +# first time. +# +# When commented out, your node will generate an ephemeral private key on every +# start up and _not_ persist it. +# +# private_key = "$HOME/.local/share/aquadoggo/private-key.txt" + +# ~~~~~~~~~~~~~~ +# Local networks +# ~~~~~~~~~~~~~~ + +# mDNS to discover other peers on the local network. Enabled by default. +# +mdns = true + +# ~~~~~ +# Nodes +# ~~~~~ + +# List of addresses of trusted and known nodes. +# +# NOTE: Make sure that nodes mentioned in this list can be connected directly +# (for example when they are hosted with a static IP Address). If you need to +# connect to nodes with changing, dynamic IP addresses or even with nodes +# behind a firewall or NAT, you will need at least one Relay. +# +node_addresses = [ + # "192.0.2.0:2022", + # "192.0.2.2:3000", +] + +# ~~~~~~ +# Relays +# ~~~~~~ + +# Address of relay. +# +# Relays help discover other nodes on the internet (also known as "rendesvouz" +# or "bootstrap" server) and help establishing direct p2p connections when node +# is behind a firewall or NAT (also known as "holepunching"). +# +# When a direct connection is not possible the relay will help to redirect the +# (encrypted) traffic as an intermediary between us and other nodes. The node +# will contact each server and register our IP address for other peers. +# +# WARNING: This will potentially expose your IP address on the network. Do only +# connect to trusted relays or make sure your IP address is hidden via a VPN or +# proxy. +# +# WARNING: Using relays will potentially connect you to untrusted / unknown +# nodes with which you will then exchange data with. If in doubt, use the list +# of known node addresses instead and only connect to trusted nodes. +# +# relay_address = "192.0.2.16:2022" + +# Set to true if our node should also function as a relay. Defaults to false. +# +# Other nodes can use relays to aid discovery and establishing connectivity. +# +# NOTE: Relays _need_ to be hosted in a way where they can be reached directly, +# for example with a static IP address through an VPS. +# +relay = false diff --git a/aquadoggo_cli/example_config.toml b/aquadoggo_cli/example_config.toml deleted file mode 100644 index 82c127373..000000000 --- a/aquadoggo_cli/example_config.toml +++ /dev/null @@ -1,19 +0,0 @@ -# Example config.toml file -# -# Copy this file to the location where aquadoggo will be run and rename to `config.toml`. Replace -# the example values with your own desired configuration. - -# List of schema ids which will configure which documents a node will replicate and expose on the -# GraphQL API. -supported_schema_ids = [ - # To discover new schema, set your node to replicate schema definition documents by including these - # two built-in schema ids. Your node will now search for and replicate schemas which have been - # published to the network. - "schema_field_definition_v1", - "schema_definition_v1", - - # Once you discover new schemas and want to start replicating their documents, then add their - # schema ids to this list as well. It's also possible to load schema directly onto your node - # using the tool `fishy`: https://github.com/p2panda/fishy - "my_interesting_schema_0020a01f72a5f28f6a559b4942e3525de2bb2413d05897526fe2250e3b57384983a2", -] From 3c52f61ea64c6852f09a7b72697ccbade27c312d Mon Sep 17 00:00:00 2001 From: adz Date: Wed, 23 Aug 2023 00:29:06 +0200 Subject: [PATCH 39/66] Better config field names, rename struct to WildcardOption --- aquadoggo/src/config.rs | 35 +++++++------------------ aquadoggo/src/lib.rs | 2 +- aquadoggo/src/network/behaviour.rs | 4 +-- aquadoggo/src/network/config.rs | 8 +++--- aquadoggo/src/network/service.rs | 6 ++--- aquadoggo/src/schema/schema_provider.rs | 21 ++++++++------- 6 files changed, 31 insertions(+), 45 deletions(-) diff --git a/aquadoggo/src/config.rs b/aquadoggo/src/config.rs index a7b363a31..973ea470e 100644 --- a/aquadoggo/src/config.rs +++ b/aquadoggo/src/config.rs @@ -1,14 +1,11 @@ // SPDX-License-Identifier: AGPL-3.0-or-later -use anyhow::Result; use p2panda_rs::schema::SchemaId; -use serde::Deserialize; use crate::network::NetworkConfiguration; /// Configuration object holding all important variables throughout the application. -#[derive(Debug, Clone, Deserialize)] -#[serde(default)] +#[derive(Debug, Clone)] pub struct Configuration { /// URL / connection string to PostgreSQL or SQLite database. pub database_url: String, @@ -37,7 +34,7 @@ pub struct Configuration { /// When whitelisting a schema you automatically opt into announcing, replicating and /// materializing documents connected to it, supporting applications which are dependent on /// this data. - pub supported_schema_ids: SupportedSchemaIds, + pub supported_schema_ids: WildcardOption, /// Network configuration. pub network: NetworkConfiguration, @@ -50,38 +47,24 @@ impl Default for Configuration { database_max_connections: 32, http_port: 2020, worker_pool_size: 16, - supported_schema_ids: SupportedSchemaIds::Wildcard, + supported_schema_ids: WildcardOption::Wildcard, network: NetworkConfiguration::default(), } } } +/// Set a configuration value to either a concrete set of elements or to a wildcard (*). #[derive(Debug, Clone)] -pub enum SupportedSchemaIds { - /// Support all schema ids. +pub enum WildcardOption { + /// Support all possible items. Wildcard, - /// Support only a certain list of schema ids. - List(Vec), + /// Support only a certain set of items. + Set(Vec), } -impl Default for SupportedSchemaIds { +impl Default for WildcardOption { fn default() -> Self { Self::Wildcard } } - -impl<'de> Deserialize<'de> for SupportedSchemaIds { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - let supported_schema_ids: Vec = Vec::deserialize(deserializer)?; - - if supported_schema_ids.is_empty() { - Ok(Self::Wildcard) - } else { - Ok(Self::List(supported_schema_ids)) - } - } -} diff --git a/aquadoggo/src/lib.rs b/aquadoggo/src/lib.rs index 2da7bdb3c..22a8bdc86 100644 --- a/aquadoggo/src/lib.rs +++ b/aquadoggo/src/lib.rs @@ -63,7 +63,7 @@ mod test_utils; #[cfg(test)] mod tests; -pub use crate::config::Configuration; +pub use crate::config::{Configuration, WildcardOption}; pub use crate::network::NetworkConfiguration; pub use node::Node; diff --git a/aquadoggo/src/network/behaviour.rs b/aquadoggo/src/network/behaviour.rs index b8a6d5357..a4760e1bf 100644 --- a/aquadoggo/src/network/behaviour.rs +++ b/aquadoggo/src/network/behaviour.rs @@ -87,7 +87,7 @@ impl P2pandaBehaviour { // Create an identify server behaviour with default configuration if a rendezvous server // address has been provided or the rendezvous server flag is set - let identify = if network_config.relay_node.is_some() || network_config.relay { + let identify = if network_config.relay_address.is_some() || network_config.relay { debug!("Identify network behaviour enabled"); Some(identify::Behaviour::new(identify::Config::new( format!("{NODE_NAMESPACE}/1.0.0"), @@ -116,7 +116,7 @@ impl P2pandaBehaviour { // Create a rendezvous client behaviour with default configuration if a rendezvous server // address has been provided - let rendezvous_client = if network_config.relay_node.is_some() { + let rendezvous_client = if network_config.relay_address.is_some() { debug!("Rendezvous client network behaviour enabled"); Some(rendezvous::client::Behaviour::new(key_pair)) } else { diff --git a/aquadoggo/src/network/config.rs b/aquadoggo/src/network/config.rs index 4cf1c2e76..f1675414b 100644 --- a/aquadoggo/src/network/config.rs +++ b/aquadoggo/src/network/config.rs @@ -22,7 +22,7 @@ pub struct NetworkConfiguration { /// with a static IP Address). If the node needs to connect to other ndoes with changing, /// dynamic IP addresses or even with nodes behind a firewall or NAT, at least one Relay will /// be required. - pub trusted_nodes: Vec, + pub node_addresses: Vec, /// Set to true if node should also function as a relay. Other nodes can use relays to aid /// discovery and establishing connectivity. @@ -40,7 +40,7 @@ pub struct NetworkConfiguration { /// When a direct connection is not possible the relay will help to redirect the (encrypted) /// traffic as an intermediary between us and other nodes. The node will contact each server /// and register our IP address for other peers. - pub relay_node: Option, + pub relay_address: Option, /// Notify handler buffer size. /// @@ -93,12 +93,12 @@ impl Default for NetworkConfiguration { max_connections_pending_out: 8, max_connections_per_peer: 8, mdns: true, + node_addresses: Vec::new(), notify_handler_buffer_size: 128, per_connection_event_buffer_size: 8, quic_port: 2022, relay: false, - relay_node: None, - trusted_nodes: Vec::new(), + relay_address: None, } } } diff --git a/aquadoggo/src/network/service.rs b/aquadoggo/src/network/service.rs index a905e4d91..4e2875357 100644 --- a/aquadoggo/src/network/service.rs +++ b/aquadoggo/src/network/service.rs @@ -79,7 +79,7 @@ pub async fn network_service( // If a relay node address was provided, then connect and performing necessary setup before we // run the main event loop. - if let Some(relay_address) = network_config.relay_node.clone() { + if let Some(relay_address) = network_config.relay_address.clone() { info!("Connecting to relay node at: {relay_address}"); connect_to_relay(&mut swarm, &mut network_config, relay_address).await?; } @@ -139,7 +139,7 @@ pub async fn connect_to_relay( // Update values on the config. learned_relay_peer_id = Some(peer_id); - network_config.relay_node = Some(relay_address.clone()); + network_config.relay_address = Some(relay_address.clone()); // All done, we've learned our external address successfully. learned_observed_addr = true; @@ -380,7 +380,7 @@ impl EventLoop { for address in registration.record.addresses() { let peer_id = registration.record.peer_id(); if peer_id != self.local_peer_id { - if let Some(relay_address) = &self.network_config.relay_node { + if let Some(relay_address) = &self.network_config.relay_address { info!("Add new peer to address book: {} {}", peer_id, address); let peer_circuit_address = relay_address diff --git a/aquadoggo/src/schema/schema_provider.rs b/aquadoggo/src/schema/schema_provider.rs index 83afbbb71..78cc3fd70 100644 --- a/aquadoggo/src/schema/schema_provider.rs +++ b/aquadoggo/src/schema/schema_provider.rs @@ -10,7 +10,7 @@ use p2panda_rs::Human; use tokio::sync::broadcast::{channel, Receiver, Sender}; use tokio::sync::Mutex; -use crate::config::SupportedSchemaIds; +use crate::config::WildcardOption; /// Provides fast access to system and application schemas. /// @@ -22,7 +22,7 @@ pub struct SchemaProvider { /// Optional list of whitelisted schema ids. When not empty, only these schema ids will be /// accepted on this node, if not set _all_ schema ids are accepted (wildcard). - supported_schema_ids: SupportedSchemaIds, + supported_schema_ids: WildcardOption, /// Sender for broadcast channel informing subscribers about updated schemas. tx: Sender, @@ -30,7 +30,10 @@ pub struct SchemaProvider { impl SchemaProvider { /// Returns a `SchemaProvider` containing the given application schemas and all system schemas. - pub fn new(application_schemas: Vec, supported_schema_ids: SupportedSchemaIds) -> Self { + pub fn new( + application_schemas: Vec, + supported_schema_ids: WildcardOption, + ) -> Self { // Collect all system and application schemas. let mut schemas = SYSTEM_SCHEMAS.clone(); schemas.extend(&application_schemas); @@ -42,7 +45,7 @@ impl SchemaProvider { } // Filter out all unsupported schema ids when list was set - if let SupportedSchemaIds::List(schema_ids) = &supported_schema_ids { + if let WildcardOption::Set(schema_ids) = &supported_schema_ids { index.retain(|id, _| schema_ids.contains(id)); }; @@ -84,7 +87,7 @@ impl SchemaProvider { /// Returns `true` if a schema was updated or it already existed in it's current state, and /// `false` if it was inserted. pub async fn update(&self, schema: Schema) -> Result { - if let SupportedSchemaIds::List(supported_schema_ids) = &self.supported_schema_ids { + if let WildcardOption::Set(supported_schema_ids) = &self.supported_schema_ids { if !supported_schema_ids.contains(schema.id()) { bail!("Attempted to add unsupported schema to schema provider"); } @@ -118,8 +121,8 @@ impl SchemaProvider { /// whitelist was set it directly returns the list itself. pub async fn supported_schema_ids(&self) -> Vec { match &self.supported_schema_ids { - SupportedSchemaIds::List(schema_ids) => schema_ids.clone(), - SupportedSchemaIds::Wildcard => self + WildcardOption::Set(schema_ids) => schema_ids.clone(), + WildcardOption::Wildcard => self .all() .await .iter() @@ -131,13 +134,13 @@ impl SchemaProvider { /// Returns true if a whitelist of supported schema ids was provided through user /// configuration. pub fn is_whitelist_active(&self) -> bool { - matches!(self.supported_schema_ids, SupportedSchemaIds::List(_)) + matches!(self.supported_schema_ids, WildcardOption::Set(_)) } } impl Default for SchemaProvider { fn default() -> Self { - Self::new(Vec::new(), SupportedSchemaIds::Wildcard) + Self::new(Vec::new(), WildcardOption::Wildcard) } } From 6b65fc34ff602bf42a0be55caa7ee11074944b5d Mon Sep 17 00:00:00 2001 From: adz Date: Wed, 23 Aug 2023 00:29:32 +0200 Subject: [PATCH 40/66] New command line arguments, find config file automatically --- Cargo.lock | 30 ++++- aquadoggo_cli/Cargo.toml | 1 + aquadoggo_cli/src/key_pair.rs | 3 - aquadoggo_cli/src/main.rs | 214 +++++++++++++++++++++------------- aquadoggo_cli/src/schemas.rs | 18 --- 5 files changed, 166 insertions(+), 100 deletions(-) delete mode 100644 aquadoggo_cli/src/schemas.rs diff --git a/Cargo.lock b/Cargo.lock index 14d042c6a..e071f9464 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -212,6 +212,7 @@ dependencies = [ "anyhow", "aquadoggo", "clap", + "directories", "env_logger", "figment", "hex", @@ -1252,13 +1253,22 @@ dependencies = [ "subtle", ] +[[package]] +name = "directories" +version = "5.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a49173b84e034382284f27f1af4dcbbd231ffa358c0fe316541a7337f376a35" +dependencies = [ + "dirs-sys 0.4.1", +] + [[package]] name = "dirs" version = "4.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059" dependencies = [ - "dirs-sys", + "dirs-sys 0.3.7", ] [[package]] @@ -1272,6 +1282,18 @@ dependencies = [ "winapi", ] +[[package]] +name = "dirs-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" +dependencies = [ + "libc", + "option-ext", + "redox_users", + "windows-sys", +] + [[package]] name = "displaydoc" version = "0.2.4" @@ -3060,6 +3082,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" +[[package]] +name = "option-ext" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" + [[package]] name = "p2panda-rs" version = "0.7.1" diff --git a/aquadoggo_cli/Cargo.toml b/aquadoggo_cli/Cargo.toml index 057bfb9ae..425074731 100644 --- a/aquadoggo_cli/Cargo.toml +++ b/aquadoggo_cli/Cargo.toml @@ -22,6 +22,7 @@ doc = false [dependencies] anyhow = "1.0.62" clap = { version = "4.1.8", features = ["derive"] } +directories = "5.0.1" env_logger = "0.9.0" figment = { version = "0.10.10", features = ["toml", "env"] } hex = "0.4.3" diff --git a/aquadoggo_cli/src/key_pair.rs b/aquadoggo_cli/src/key_pair.rs index 279600cd7..d18775d9f 100644 --- a/aquadoggo_cli/src/key_pair.rs +++ b/aquadoggo_cli/src/key_pair.rs @@ -8,9 +8,6 @@ use std::path::PathBuf; use anyhow::Result; use p2panda_rs::identity::KeyPair; -/// File of the name where the private key will be stored inside. -const KEY_PAIR_FILE_NAME: &str = "private-key"; - /// Returns a new instance of `KeyPair` by either loading the private key from a path or generating /// a new one and saving it in the file system. pub fn generate_or_load_key_pair(path: PathBuf) -> Result { diff --git a/aquadoggo_cli/src/main.rs b/aquadoggo_cli/src/main.rs index d023c91ba..87bbc813f 100644 --- a/aquadoggo_cli/src/main.rs +++ b/aquadoggo_cli/src/main.rs @@ -1,117 +1,174 @@ // SPDX-License-Identifier: AGPL-3.0-or-later -#![allow(clippy::uninlined_format_args)] mod key_pair; -mod schemas; -use std::convert::TryFrom; -use std::net::IpAddr; +use std::net::{IpAddr, SocketAddr}; +use std::path::PathBuf; use anyhow::Result; -use aquadoggo::{Configuration, NetworkConfiguration, Node}; +use aquadoggo::{Configuration as NodeConfiguration, NetworkConfiguration, Node, WildcardOption}; use clap::Parser; +use directories::ProjectDirs; use figment::providers::{Env, Format, Serialized, Toml}; use figment::Figment; use libp2p::multiaddr::Protocol; use libp2p::Multiaddr; +use p2panda_rs::schema::SchemaId; use serde::{Deserialize, Serialize}; const CONFIG_FILE_NAME: &str = "config.toml"; const CONFIG_ENV_VAR_PREFIX: &str = "DOGGO_"; +/// Node server for the p2panda network. #[derive(Parser, Debug, Serialize, Deserialize)] #[command(name = "aquadoggo Node", version)] -/// Node server for the p2panda network. -struct Cli { - /// Port for the http server, 2020 by default. - #[arg(short = 'P', long)] - http_port: Option, - - /// Port for the QUIC transport, 2022 by default for a relay/rendezvous node. - #[arg(short, long)] - quic_port: Option, - - /// URLs of remote nodes to replicate with. - #[arg(short, long)] - remote_node_addresses: Vec, - - /// Enable mDNS for peer discovery over LAN (using port 5353), false by default. - #[arg(short, long)] - mdns: Option, - - /// Enable relay server to facilitate peer connectivity, false by default. - #[arg(long)] - enable_relay_server: bool, - - /// IP address for the relay peer. - /// - /// eg. --relay-address "127.0.0.1" - #[arg(long)] - relay_address: Option, - - /// Port for the relay peer, defaults to expected relay port 2022. - /// - /// eg. --relay-port "1234" - #[arg(long)] - relay_port: Option, +struct Configuration { + /// Path to config.toml file. + #[arg(short = 'c', long)] + config: Option, + + /// List of schema ids which a node will replicate and expose on the GraphQL API. + #[arg(short = 's', long)] + supported_schema_ids: Vec, + + /// URL / connection string to PostgreSQL or SQLite database. + #[arg(short = 'd', long, default_value = "sqlite::memory:")] + database_url: String, + + /// Maximum number of connections that the database pool should maintain. + #[arg(long, default_value_t = 32)] + database_max_connections: u32, + + /// Number of concurrent workers which defines the maximum of materialization + /// tasks which can be worked on simultaneously. + #[arg(long, default_value_t = 16)] + worker_pool_size: u32, + + /// HTTP port, serving the GraphQL API. + #[arg(short = 'p', long, default_value_t = 2020)] + http_port: u16, + + /// QUIC port for node-node communication and data replication. + #[arg(short = 'q', long, default_value_t = 2022)] + quic_port: u16, + + /// Path to persist your ed25519 private key file. + #[arg(short = 'k', long)] + private_key: Option, + + /// mDNS to discover other peers on the local network. + #[arg(short = 'm', long, default_value_t = true)] + mdns: bool, + + /// List of addresses of trusted and known nodes. + #[arg(short = 'n', long)] + node_addresses: Vec, + + /// Address of relay. + #[arg(short = 'r', long)] + relay_address: Option, + + /// Set to true if our node should also function as a relay. Defaults to false. + #[arg(short = 'e', long, default_value_t = false)] + relay: bool, } -impl TryFrom for Configuration { - type Error = anyhow::Error; - - fn try_from(cli: Cli) -> Result { - let mut config = Configuration::default(); - - let relay_node = if let Some(relay_address) = cli.relay_address { - let mut multiaddr = match relay_address { - IpAddr::V4(ip) => Multiaddr::from(Protocol::Ip4(ip)), - IpAddr::V6(ip) => Multiaddr::from(Protocol::Ip6(ip)), - }; - multiaddr.push(Protocol::Udp(cli.relay_port.unwrap_or(2022))); - multiaddr.push(Protocol::QuicV1); - - Some(multiaddr) +impl From for NodeConfiguration { + fn from(cli: Configuration) -> Self { + let supported_schema_ids = if cli.supported_schema_ids.is_empty() { + WildcardOption::Wildcard } else { - None + WildcardOption::Set(cli.supported_schema_ids) }; - if let Some(http_port) = cli.http_port { - config.http_port = http_port; + NodeConfiguration { + database_url: cli.database_url, + database_max_connections: cli.database_max_connections, + http_port: cli.http_port, + worker_pool_size: cli.worker_pool_size, + supported_schema_ids, + network: NetworkConfiguration { + quic_port: cli.quic_port, + mdns: cli.mdns, + node_addresses: cli.relay_address.into_iter().map(to_multiaddress).collect(), + relay: cli.relay, + relay_address: cli.relay_address.map(to_multiaddress), + ..Default::default() + }, } + } +} - config.network = NetworkConfiguration { - mdns: cli.mdns.unwrap_or(false), - relay: cli.enable_relay_server, - relay_node, - trusted_nodes: cli.remote_node_addresses, - ..config.network - }; +fn to_multiaddress(socket_address: SocketAddr) -> Multiaddr { + let mut multiaddr = match socket_address.ip() { + IpAddr::V4(ip) => Multiaddr::from(Protocol::Ip4(ip)), + IpAddr::V6(ip) => Multiaddr::from(Protocol::Ip6(ip)), + }; + multiaddr.push(Protocol::Udp(socket_address.port())); + multiaddr.push(Protocol::QuicV1); + multiaddr +} - if let Some(quic_port) = cli.quic_port { - config.network.quic_port = quic_port; - } +fn try_determine_config_file_path() -> Option { + // Find config file in current folder + let mut current_dir = std::env::current_dir().expect("Could not determine current directory"); + current_dir.push(CONFIG_FILE_NAME); + + // Find config file in XDG config folder + let mut xdg_config_dir: PathBuf = ProjectDirs::from("", "", "aquadoggo") + .expect("Could not determine valid config directory path from operating system") + .config_dir() + .to_path_buf(); + xdg_config_dir.push(CONFIG_FILE_NAME); + + [current_dir, xdg_config_dir] + .iter() + .find(|path| path.exists()) + .cloned() +} + +fn load_config() -> Result { + // Parse command line arguments first + let cli = Configuration::parse(); + + // Determine if a config file path was provided or if we should look for it in common locations + let config_file_path = if cli.config.is_some() { + cli.config.clone() + } else { + try_determine_config_file_path() + }; - Ok(config) + // Get configuration from .toml file (optional), environment variable and command line + // arguments + let mut figment = Figment::new(); + + if let Some(path) = config_file_path { + figment = figment.merge(Toml::file(path)); } + + figment + .merge(Env::prefixed(CONFIG_ENV_VAR_PREFIX)) + .merge(Serialized::defaults(cli)) + .extract() } #[tokio::main] async fn main() { env_logger::init(); - // Parse command line arguments - let config: Result = Figment::new() - .merge(Toml::file(CONFIG_FILE_NAME)) - .merge(Env::prefixed(CONFIG_ENV_VAR_PREFIX)) - .merge(Serialized::defaults(Cli::parse())) - .extract(); - - match config { + match load_config() { Ok(config) => { - let key_pair = key_pair::generate_ephemeral_key_pair(); + // @TODO: Nicer print + println!("{:?}", config); + + let key_pair = match &config.private_key { + Some(path) => key_pair::generate_or_load_key_pair(path.clone()) + .expect("Could not load private key from file"), + None => key_pair::generate_ephemeral_key_pair(), + }; // Start p2panda node in async runtime - let node = Node::start(key_pair, config).await; + let node = Node::start(key_pair, config.into()).await; // Run this until [CTRL] + [C] got pressed or something went wrong tokio::select! { @@ -123,7 +180,8 @@ async fn main() { node.shutdown().await; } Err(error) => { - println!("Error: Could not load configuration:"); + println!("Failed loading configuration:"); + for error in error { println!("- {}", error); } diff --git a/aquadoggo_cli/src/schemas.rs b/aquadoggo_cli/src/schemas.rs deleted file mode 100644 index bdddc548d..000000000 --- a/aquadoggo_cli/src/schemas.rs +++ /dev/null @@ -1,18 +0,0 @@ -// SPDX-License-Identifier: AGPL-3.0-or-later - -use std::fs::File; -use std::io::Read; - -use anyhow::{anyhow, Result}; -use p2panda_rs::schema::SchemaId; -use toml::Table; - -pub fn read_schema_ids_from_file(file: &mut File) -> Result> { - let mut buf = String::new(); - file.read_to_string(&mut buf)?; - let table = buf.parse::().unwrap(); - let value = table.get("supported_schema_ids").ok_or(anyhow!( - "No \"supported_schema_ids\" field found config file" - ))?; - Ok(value.clone().try_into::>()?) -} From f90765ac128e51362552bf54f80c08d620de3b3f Mon Sep 17 00:00:00 2001 From: adz Date: Wed, 23 Aug 2023 00:42:51 +0200 Subject: [PATCH 41/66] Fix tests --- aquadoggo/src/replication/service.rs | 3 ++- aquadoggo/src/schema/schema_provider.rs | 7 +++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/aquadoggo/src/replication/service.rs b/aquadoggo/src/replication/service.rs index 087ddfec4..3d4466a02 100644 --- a/aquadoggo/src/replication/service.rs +++ b/aquadoggo/src/replication/service.rs @@ -526,6 +526,7 @@ mod tests { }; use crate::schema::SchemaProvider; use crate::test_utils::{test_runner, TestNode}; + use crate::WildcardOption; use super::ConnectionManager; @@ -615,7 +616,7 @@ mod tests { test_runner(move |node: TestNode| async move { let (tx, mut rx) = broadcast::channel::(10); - let schema_provider = SchemaProvider::new(vec![], Some(vec![])); + let schema_provider = SchemaProvider::new(vec![], WildcardOption::Set(vec![])); let mut manager = ConnectionManager::new(&schema_provider, &node.context.store, &tx, local_peer_id); manager.update_announcement().await; diff --git a/aquadoggo/src/schema/schema_provider.rs b/aquadoggo/src/schema/schema_provider.rs index 78cc3fd70..dcc7c5584 100644 --- a/aquadoggo/src/schema/schema_provider.rs +++ b/aquadoggo/src/schema/schema_provider.rs @@ -149,6 +149,8 @@ mod test { use p2panda_rs::schema::{FieldType, Schema, SchemaId, SchemaName}; use p2panda_rs::test_utils::fixtures::random_document_view_id; + use crate::WildcardOption; + use super::SchemaProvider; #[tokio::test] @@ -197,7 +199,8 @@ mod test { &[("test_field", FieldType::String)], ) .unwrap(); - let provider = SchemaProvider::new(vec![], Some(vec![new_schema_id.clone()])); + let provider = + SchemaProvider::new(vec![], WildcardOption::Set(vec![new_schema_id.clone()])); let result = provider.update(new_schema).await; assert!(result.is_ok()); assert!(!result.unwrap()); @@ -207,7 +210,7 @@ mod test { #[tokio::test] async fn update_unsupported_schemas() { - let provider = SchemaProvider::new(vec![], Some(vec![])); + let provider = SchemaProvider::new(vec![], WildcardOption::Set(vec![])); let new_schema_id = SchemaId::Application( SchemaName::new("test_schema").unwrap(), random_document_view_id(), From 47b35454b83bc98637060449d715a1777f91b31e Mon Sep 17 00:00:00 2001 From: adz Date: Wed, 23 Aug 2023 00:43:17 +0200 Subject: [PATCH 42/66] Pandada --- aquadoggo_cli/src/main.rs | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/aquadoggo_cli/src/main.rs b/aquadoggo_cli/src/main.rs index 87bbc813f..662a93e60 100644 --- a/aquadoggo_cli/src/main.rs +++ b/aquadoggo_cli/src/main.rs @@ -152,6 +152,35 @@ fn load_config() -> Result { .extract() } +fn panda_da() -> String { + r#" + ██████ ███████ ████ + ████████ ██████ + ██████ ███ + █████ ██ + █ ████ █████ + █ ██████ █ █████ + ██ ████ ███ █████ + █████ ██████ █ + ███████ ██ + █████████ █████████████ + ███████████ █████████ + █████████████████ ████ + ██████ ███████████ ██ + ██████████ █████ █ + █████████ ██ ███ ██ + ██████ █ █ ██ + ██ ██ ███████ ██ + ███████████ ██████ + ████████ ████████████ ██████ + ████ ██████ ██████████ █ ████ + █████████ ████████ ███ ███████ + ████████ ██████ ████████ + █████████ ████████████████████████ ███ + █████████ ██"# + .into() +} + #[tokio::main] async fn main() { env_logger::init(); @@ -159,7 +188,7 @@ async fn main() { match load_config() { Ok(config) => { // @TODO: Nicer print - println!("{:?}", config); + println!("{}\n\n{:#?}", panda_da(), config); let key_pair = match &config.private_key { Some(path) => key_pair::generate_or_load_key_pair(path.clone()) From 3938a6c664dffe068e371977daca2edd9b1c064b Mon Sep 17 00:00:00 2001 From: adz Date: Wed, 23 Aug 2023 09:41:47 +0200 Subject: [PATCH 43/66] Fix naming of allow lists and database url --- aquadoggo/src/config.rs | 19 ++++++------- aquadoggo/src/lib.rs | 2 +- aquadoggo/src/replication/ingest.rs | 6 ++-- aquadoggo/src/replication/service.rs | 8 +++--- aquadoggo/src/schema/schema_provider.rs | 37 ++++++++++++------------- aquadoggo_cli/config.toml | 8 +++--- aquadoggo_cli/src/main.rs | 6 ++-- 7 files changed, 42 insertions(+), 44 deletions(-) diff --git a/aquadoggo/src/config.rs b/aquadoggo/src/config.rs index 973ea470e..655c11a72 100644 --- a/aquadoggo/src/config.rs +++ b/aquadoggo/src/config.rs @@ -31,10 +31,9 @@ pub struct Configuration { /// List of schema ids which a node will replicate and expose on the GraphQL API. /// - /// When whitelisting a schema you automatically opt into announcing, replicating and - /// materializing documents connected to it, supporting applications which are dependent on - /// this data. - pub supported_schema_ids: WildcardOption, + /// When allowing a schema you automatically opt into announcing, replicating and materializing + /// documents connected to it, supporting applications which are dependent on this data. + pub supported_schema_ids: AllowList, /// Network configuration. pub network: NetworkConfiguration, @@ -47,23 +46,23 @@ impl Default for Configuration { database_max_connections: 32, http_port: 2020, worker_pool_size: 16, - supported_schema_ids: WildcardOption::Wildcard, + supported_schema_ids: AllowList::Wildcard, network: NetworkConfiguration::default(), } } } -/// Set a configuration value to either a concrete set of elements or to a wildcard (*). +/// Set a configuration value to either allow a defined set of elements or to a wildcard (*). #[derive(Debug, Clone)] -pub enum WildcardOption { - /// Support all possible items. +pub enum AllowList { + /// Allow all possible items. Wildcard, - /// Support only a certain set of items. + /// Allow only a certain set of items. Set(Vec), } -impl Default for WildcardOption { +impl Default for AllowList { fn default() -> Self { Self::Wildcard } diff --git a/aquadoggo/src/lib.rs b/aquadoggo/src/lib.rs index 22a8bdc86..2c4eb447f 100644 --- a/aquadoggo/src/lib.rs +++ b/aquadoggo/src/lib.rs @@ -63,7 +63,7 @@ mod test_utils; #[cfg(test)] mod tests; -pub use crate::config::{Configuration, WildcardOption}; +pub use crate::config::{AllowList, Configuration}; pub use crate::network::NetworkConfiguration; pub use node::Node; diff --git a/aquadoggo/src/replication/ingest.rs b/aquadoggo/src/replication/ingest.rs index 0aee97444..52bd2b5b9 100644 --- a/aquadoggo/src/replication/ingest.rs +++ b/aquadoggo/src/replication/ingest.rs @@ -49,9 +49,9 @@ impl SyncIngest { let plain_operation = decode_operation(encoded_operation)?; - // If the node has been configured with a whitelist of supported schema ids, check that the - // sent operation follows one of our supported schema - if self.schema_provider.is_whitelist_active() + // If the node has been configured with an allow-list of supported schema ids, check that + // the sent operation follows one of our supported schema + if self.schema_provider.is_allow_list_active() && self .schema_provider .supported_schema_ids() diff --git a/aquadoggo/src/replication/service.rs b/aquadoggo/src/replication/service.rs index 3d4466a02..d3e05c2c9 100644 --- a/aquadoggo/src/replication/service.rs +++ b/aquadoggo/src/replication/service.rs @@ -250,10 +250,10 @@ impl ConnectionManager { .expect("Announcement state needs to be set with 'update_announcement'") .target_set; - // If this node has been configured with a whitelist of schema ids then we check the + // If this node has been configured with an allow-list of schema ids then we check the // target set of the requests matches our own, otherwise we skip this step and accept // any target set. - if self.schema_provider.is_whitelist_active() + if self.schema_provider.is_allow_list_active() && !local_target_set.is_valid_set(remote_target_set) { // If it doesn't match we signal that an error occurred and return at this point. @@ -526,7 +526,7 @@ mod tests { }; use crate::schema::SchemaProvider; use crate::test_utils::{test_runner, TestNode}; - use crate::WildcardOption; + use crate::AllowList; use super::ConnectionManager; @@ -616,7 +616,7 @@ mod tests { test_runner(move |node: TestNode| async move { let (tx, mut rx) = broadcast::channel::(10); - let schema_provider = SchemaProvider::new(vec![], WildcardOption::Set(vec![])); + let schema_provider = SchemaProvider::new(vec![], AllowList::Set(vec![])); let mut manager = ConnectionManager::new(&schema_provider, &node.context.store, &tx, local_peer_id); manager.update_announcement().await; diff --git a/aquadoggo/src/schema/schema_provider.rs b/aquadoggo/src/schema/schema_provider.rs index dcc7c5584..754165298 100644 --- a/aquadoggo/src/schema/schema_provider.rs +++ b/aquadoggo/src/schema/schema_provider.rs @@ -10,7 +10,7 @@ use p2panda_rs::Human; use tokio::sync::broadcast::{channel, Receiver, Sender}; use tokio::sync::Mutex; -use crate::config::WildcardOption; +use crate::config::AllowList; /// Provides fast access to system and application schemas. /// @@ -20,9 +20,9 @@ pub struct SchemaProvider { /// In-memory store of registered and materialized schemas. schemas: Arc>>, - /// Optional list of whitelisted schema ids. When not empty, only these schema ids will be - /// accepted on this node, if not set _all_ schema ids are accepted (wildcard). - supported_schema_ids: WildcardOption, + /// Optional list of allowed schema ids. When not empty, only these schema ids will be accepted + /// on this node, if not set _all_ schema ids are accepted (wildcard). + supported_schema_ids: AllowList, /// Sender for broadcast channel informing subscribers about updated schemas. tx: Sender, @@ -32,7 +32,7 @@ impl SchemaProvider { /// Returns a `SchemaProvider` containing the given application schemas and all system schemas. pub fn new( application_schemas: Vec, - supported_schema_ids: WildcardOption, + supported_schema_ids: AllowList, ) -> Self { // Collect all system and application schemas. let mut schemas = SYSTEM_SCHEMAS.clone(); @@ -45,7 +45,7 @@ impl SchemaProvider { } // Filter out all unsupported schema ids when list was set - if let WildcardOption::Set(schema_ids) = &supported_schema_ids { + if let AllowList::Set(schema_ids) = &supported_schema_ids { index.retain(|id, _| schema_ids.contains(id)); }; @@ -87,7 +87,7 @@ impl SchemaProvider { /// Returns `true` if a schema was updated or it already existed in it's current state, and /// `false` if it was inserted. pub async fn update(&self, schema: Schema) -> Result { - if let WildcardOption::Set(supported_schema_ids) = &self.supported_schema_ids { + if let AllowList::Set(supported_schema_ids) = &self.supported_schema_ids { if !supported_schema_ids.contains(schema.id()) { bail!("Attempted to add unsupported schema to schema provider"); } @@ -117,12 +117,12 @@ impl SchemaProvider { /// Returns a list of all supported schema ids. /// - /// If no whitelist was set it returns the list of all currently known schema ids. If a - /// whitelist was set it directly returns the list itself. + /// If no allow-list was set it returns the list of all currently known schema ids. If an + /// allo-wlist was set it directly returns the list itself. pub async fn supported_schema_ids(&self) -> Vec { match &self.supported_schema_ids { - WildcardOption::Set(schema_ids) => schema_ids.clone(), - WildcardOption::Wildcard => self + AllowList::Set(schema_ids) => schema_ids.clone(), + AllowList::Wildcard => self .all() .await .iter() @@ -131,16 +131,16 @@ impl SchemaProvider { } } - /// Returns true if a whitelist of supported schema ids was provided through user + /// Returns true if an allow-list of supported schema ids was provided through user /// configuration. - pub fn is_whitelist_active(&self) -> bool { - matches!(self.supported_schema_ids, WildcardOption::Set(_)) + pub fn is_allow_list_active(&self) -> bool { + matches!(self.supported_schema_ids, AllowList::Set(_)) } } impl Default for SchemaProvider { fn default() -> Self { - Self::new(Vec::new(), WildcardOption::Wildcard) + Self::new(Vec::new(), AllowList::Wildcard) } } @@ -149,7 +149,7 @@ mod test { use p2panda_rs::schema::{FieldType, Schema, SchemaId, SchemaName}; use p2panda_rs::test_utils::fixtures::random_document_view_id; - use crate::WildcardOption; + use crate::AllowList; use super::SchemaProvider; @@ -199,8 +199,7 @@ mod test { &[("test_field", FieldType::String)], ) .unwrap(); - let provider = - SchemaProvider::new(vec![], WildcardOption::Set(vec![new_schema_id.clone()])); + let provider = SchemaProvider::new(vec![], AllowList::Set(vec![new_schema_id.clone()])); let result = provider.update(new_schema).await; assert!(result.is_ok()); assert!(!result.unwrap()); @@ -210,7 +209,7 @@ mod test { #[tokio::test] async fn update_unsupported_schemas() { - let provider = SchemaProvider::new(vec![], WildcardOption::Set(vec![])); + let provider = SchemaProvider::new(vec![], AllowList::Set(vec![])); let new_schema_id = SchemaId::Application( SchemaName::new("test_schema").unwrap(), random_document_view_id(), diff --git a/aquadoggo_cli/config.toml b/aquadoggo_cli/config.toml index 69e69b9ef..49a740222 100644 --- a/aquadoggo_cli/config.toml +++ b/aquadoggo_cli/config.toml @@ -14,9 +14,9 @@ # List of schema ids which a node will replicate and expose on the GraphQL API. # -# When whitelisting a schema you automatically opt into announcing, replicating -# and materializing documents connected to it, supporting applications which -# are dependent on this data. +# When allowing a schema you automatically opt into announcing, replicating and +# materializing documents connected to it, supporting applications which are +# dependent on this data. # # It is recommended to set this list to all schema ids your own application # should support. Defaults to an empty list. @@ -52,7 +52,7 @@ supported_schema_ids = [ # down. Uncomment this value when running on production as you will otherwise # loose data. # -# database_uri = "sqlite:$HOME/.local/share/aquadoggo/db.sqlite3" +# database_url = "sqlite:$HOME/.local/share/aquadoggo/db.sqlite3" # Maximum number of connections that the database pool should maintain. # diff --git a/aquadoggo_cli/src/main.rs b/aquadoggo_cli/src/main.rs index 662a93e60..ca2f09b2c 100644 --- a/aquadoggo_cli/src/main.rs +++ b/aquadoggo_cli/src/main.rs @@ -6,7 +6,7 @@ use std::net::{IpAddr, SocketAddr}; use std::path::PathBuf; use anyhow::Result; -use aquadoggo::{Configuration as NodeConfiguration, NetworkConfiguration, Node, WildcardOption}; +use aquadoggo::{AllowList, Configuration as NodeConfiguration, NetworkConfiguration, Node}; use clap::Parser; use directories::ProjectDirs; use figment::providers::{Env, Format, Serialized, Toml}; @@ -76,9 +76,9 @@ struct Configuration { impl From for NodeConfiguration { fn from(cli: Configuration) -> Self { let supported_schema_ids = if cli.supported_schema_ids.is_empty() { - WildcardOption::Wildcard + AllowList::Wildcard } else { - WildcardOption::Set(cli.supported_schema_ids) + AllowList::Set(cli.supported_schema_ids) }; NodeConfiguration { From 075ee5145c5f6543a57abc0d3364dff0c6d6e611 Mon Sep 17 00:00:00 2001 From: adz Date: Wed, 23 Aug 2023 09:45:36 +0200 Subject: [PATCH 44/66] Rename to im_a_relay --- aquadoggo/src/network/behaviour.rs | 8 ++++---- aquadoggo/src/network/config.rs | 4 ++-- aquadoggo/src/network/service.rs | 2 +- aquadoggo_cli/config.toml | 8 ++++---- aquadoggo_cli/src/main.rs | 4 ++-- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/aquadoggo/src/network/behaviour.rs b/aquadoggo/src/network/behaviour.rs index a4760e1bf..b7ec2a24d 100644 --- a/aquadoggo/src/network/behaviour.rs +++ b/aquadoggo/src/network/behaviour.rs @@ -87,7 +87,7 @@ impl P2pandaBehaviour { // Create an identify server behaviour with default configuration if a rendezvous server // address has been provided or the rendezvous server flag is set - let identify = if network_config.relay_address.is_some() || network_config.relay { + let identify = if network_config.relay_address.is_some() || network_config.im_a_relay { debug!("Identify network behaviour enabled"); Some(identify::Behaviour::new(identify::Config::new( format!("{NODE_NAMESPACE}/1.0.0"), @@ -125,7 +125,7 @@ impl P2pandaBehaviour { // Create a rendezvous server behaviour with default configuration if the rendezvous server // flag is set - let rendezvous_server = if network_config.relay { + let rendezvous_server = if network_config.im_a_relay { debug!("Rendezvous server network behaviour enabled"); Some(rendezvous::server::Behaviour::new( rendezvous::server::Config::default(), @@ -140,7 +140,7 @@ impl P2pandaBehaviour { // Create a relay server behaviour with default configuration if the relay server flag is // set - let relay_server = if network_config.relay { + let relay_server = if network_config.im_a_relay { debug!("Relay server network behaviour enabled"); Some(relay::Behaviour::new( peer_id, @@ -155,7 +155,7 @@ impl P2pandaBehaviour { }; // Create UDP holepunching behaviour (DCUtR) if the flag is set - let dcutr = if network_config.relay || relay_client.is_some() { + let dcutr = if network_config.im_a_relay || relay_client.is_some() { Some(dcutr::Behaviour::new(peer_id)) } else { None diff --git a/aquadoggo/src/network/config.rs b/aquadoggo/src/network/config.rs index f1675414b..6be523df0 100644 --- a/aquadoggo/src/network/config.rs +++ b/aquadoggo/src/network/config.rs @@ -29,7 +29,7 @@ pub struct NetworkConfiguration { /// /// Relays _need_ to be hosted in a way where they can be reached directly, for example with a /// static IP address through an VPS. - pub relay: bool, + pub im_a_relay: bool, /// Address of a peer which can act as a relay/rendezvous server. /// @@ -97,7 +97,7 @@ impl Default for NetworkConfiguration { notify_handler_buffer_size: 128, per_connection_event_buffer_size: 8, quic_port: 2022, - relay: false, + im_a_relay: false, relay_address: None, } } diff --git a/aquadoggo/src/network/service.rs b/aquadoggo/src/network/service.rs index 4e2875357..7c803fc25 100644 --- a/aquadoggo/src/network/service.rs +++ b/aquadoggo/src/network/service.rs @@ -44,7 +44,7 @@ pub async fn network_service( info!("Local peer id: {local_peer_id}"); // The swarm can be initiated with or without "relay" capabilities. - let mut swarm = if network_config.relay { + let mut swarm = if network_config.im_a_relay { info!("Networking service initializing with relay capabilities..."); swarm::build_relay_swarm(&network_config, key_pair).await? } else { diff --git a/aquadoggo_cli/config.toml b/aquadoggo_cli/config.toml index 49a740222..9b8824530 100644 --- a/aquadoggo_cli/config.toml +++ b/aquadoggo_cli/config.toml @@ -132,9 +132,9 @@ node_addresses = [ # "192.0.2.2:3000", ] -# ~~~~~~ -# Relays -# ~~~~~~ +# ~~~~~ +# Relay +# ~~~~~ # Address of relay. # @@ -163,4 +163,4 @@ node_addresses = [ # NOTE: Relays _need_ to be hosted in a way where they can be reached directly, # for example with a static IP address through an VPS. # -relay = false +im_a_relay = false diff --git a/aquadoggo_cli/src/main.rs b/aquadoggo_cli/src/main.rs index ca2f09b2c..4de1d295e 100644 --- a/aquadoggo_cli/src/main.rs +++ b/aquadoggo_cli/src/main.rs @@ -70,7 +70,7 @@ struct Configuration { /// Set to true if our node should also function as a relay. Defaults to false. #[arg(short = 'e', long, default_value_t = false)] - relay: bool, + im_a_relay: bool, } impl From for NodeConfiguration { @@ -91,7 +91,7 @@ impl From for NodeConfiguration { quic_port: cli.quic_port, mdns: cli.mdns, node_addresses: cli.relay_address.into_iter().map(to_multiaddress).collect(), - relay: cli.relay, + im_a_relay: cli.im_a_relay, relay_address: cli.relay_address.map(to_multiaddress), ..Default::default() }, From 3f5cc8a7599d3c2e254f454e6912ca4036e8f4d6 Mon Sep 17 00:00:00 2001 From: adz Date: Wed, 23 Aug 2023 09:48:09 +0200 Subject: [PATCH 45/66] Make relay singular --- aquadoggo_cli/config.toml | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/aquadoggo_cli/config.toml b/aquadoggo_cli/config.toml index 9b8824530..7652f2a2e 100644 --- a/aquadoggo_cli/config.toml +++ b/aquadoggo_cli/config.toml @@ -138,19 +138,20 @@ node_addresses = [ # Address of relay. # -# Relays help discover other nodes on the internet (also known as "rendesvouz" -# or "bootstrap" server) and help establishing direct p2p connections when node -# is behind a firewall or NAT (also known as "holepunching"). +# A relay helps discover other nodes on the internet (also known as +# "rendesvouz" or "bootstrap" server) and helps establishing direct p2p +# connections when node is behind a firewall or NAT (also known as +# "holepunching"). # # When a direct connection is not possible the relay will help to redirect the # (encrypted) traffic as an intermediary between us and other nodes. The node -# will contact each server and register our IP address for other peers. +# will contact the relay and register our IP address for other peers. # # WARNING: This will potentially expose your IP address on the network. Do only # connect to trusted relays or make sure your IP address is hidden via a VPN or -# proxy. +# proxy if you're concerned about leaking your IP. # -# WARNING: Using relays will potentially connect you to untrusted / unknown +# WARNING: Using a relay will potentially connect you to untrusted / unknown # nodes with which you will then exchange data with. If in doubt, use the list # of known node addresses instead and only connect to trusted nodes. # From d51304fd38fcf024699cba7d775a7441df984e98 Mon Sep 17 00:00:00 2001 From: adz Date: Wed, 23 Aug 2023 10:10:32 +0200 Subject: [PATCH 46/66] Add some todos --- Cargo.lock | 1 + aquadoggo_cli/Cargo.toml | 2 +- aquadoggo_cli/src/main.rs | 74 +++++++++++++++++++++------------------ 3 files changed, 41 insertions(+), 36 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e071f9464..6f32aabe2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -926,6 +926,7 @@ dependencies = [ "anstream", "anstyle", "clap_lex", + "once_cell", "strsim", ] diff --git a/aquadoggo_cli/Cargo.toml b/aquadoggo_cli/Cargo.toml index 425074731..b616a8116 100644 --- a/aquadoggo_cli/Cargo.toml +++ b/aquadoggo_cli/Cargo.toml @@ -21,7 +21,7 @@ doc = false [dependencies] anyhow = "1.0.62" -clap = { version = "4.1.8", features = ["derive"] } +clap = { version = "4.1.8", features = ["derive", "cargo"] } directories = "5.0.1" env_logger = "0.9.0" figment = { version = "0.10.10", features = ["toml", "env"] } diff --git a/aquadoggo_cli/src/main.rs b/aquadoggo_cli/src/main.rs index 4de1d295e..c6f0b266d 100644 --- a/aquadoggo_cli/src/main.rs +++ b/aquadoggo_cli/src/main.rs @@ -2,18 +2,20 @@ mod key_pair; +use std::fmt::Display; use std::net::{IpAddr, SocketAddr}; use std::path::PathBuf; use anyhow::Result; use aquadoggo::{AllowList, Configuration as NodeConfiguration, NetworkConfiguration, Node}; -use clap::Parser; +use clap::{crate_version, Parser}; use directories::ProjectDirs; use figment::providers::{Env, Format, Serialized, Toml}; use figment::Figment; use libp2p::multiaddr::Protocol; use libp2p::Multiaddr; use p2panda_rs::schema::SchemaId; +use p2panda_rs::Human; use serde::{Deserialize, Serialize}; const CONFIG_FILE_NAME: &str = "config.toml"; @@ -73,6 +75,36 @@ struct Configuration { im_a_relay: bool, } +impl Display for Configuration { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!( + f, + "{}", + self.config + .as_ref() + .map_or("No config file provided".into(), |ref path| format!( + "Loading config file from {}", + path.display() + )) + )?; + + write!(f, "\n\n")?; + + // @TODO: Nicer printing of all values + write!(f, "Schemas\n")?; + write!( + f, + "{:<20} {:<20}\n", + "supported_schema_ids", + self.supported_schema_ids + .iter() + .map(|id| id.display()) + .collect::>() + .join(", ") + ) + } +} + impl From for NodeConfiguration { fn from(cli: Configuration) -> Self { let supported_schema_ids = if cli.supported_schema_ids.is_empty() { @@ -129,10 +161,10 @@ fn try_determine_config_file_path() -> Option { fn load_config() -> Result { // Parse command line arguments first - let cli = Configuration::parse(); + let mut cli = Configuration::parse(); // Determine if a config file path was provided or if we should look for it in common locations - let config_file_path = if cli.config.is_some() { + cli.config = if cli.config.is_some() { cli.config.clone() } else { try_determine_config_file_path() @@ -142,54 +174,26 @@ fn load_config() -> Result { // arguments let mut figment = Figment::new(); - if let Some(path) = config_file_path { + if let Some(path) = &cli.config { figment = figment.merge(Toml::file(path)); } + // @TODO: Fix not overriding values when empty array was set figment .merge(Env::prefixed(CONFIG_ENV_VAR_PREFIX)) .merge(Serialized::defaults(cli)) .extract() } -fn panda_da() -> String { - r#" - ██████ ███████ ████ - ████████ ██████ - ██████ ███ - █████ ██ - █ ████ █████ - █ ██████ █ █████ - ██ ████ ███ █████ - █████ ██████ █ - ███████ ██ - █████████ █████████████ - ███████████ █████████ - █████████████████ ████ - ██████ ███████████ ██ - ██████████ █████ █ - █████████ ██ ███ ██ - ██████ █ █ ██ - ██ ██ ███████ ██ - ███████████ ██████ - ████████ ████████████ ██████ - ████ ██████ ██████████ █ ████ - █████████ ████████ ███ ███████ - ████████ ██████ ████████ - █████████ ████████████████████████ ███ - █████████ ██"# - .into() -} - #[tokio::main] async fn main() { env_logger::init(); match load_config() { Ok(config) => { - // @TODO: Nicer print - println!("{}\n\n{:#?}", panda_da(), config); + println!("aquadoggo v{}\n\n{:?}", crate_version!(), config); + // @TODO: Create folders when paths for db or key was set let key_pair = match &config.private_key { Some(path) => key_pair::generate_or_load_key_pair(path.clone()) .expect("Could not load private key from file"), From 595bada247d1d6655d8aac19b0745b82c16dfeb9 Mon Sep 17 00:00:00 2001 From: adz Date: Thu, 24 Aug 2023 10:51:34 +0200 Subject: [PATCH 47/66] Rename to direct_node_addresses --- aquadoggo/src/network/config.rs | 14 +++++++------- aquadoggo_cli/src/main.rs | 16 ++++++++++------ 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/aquadoggo/src/network/config.rs b/aquadoggo/src/network/config.rs index 6be523df0..ef994f0cc 100644 --- a/aquadoggo/src/network/config.rs +++ b/aquadoggo/src/network/config.rs @@ -16,13 +16,13 @@ pub struct NetworkConfiguration { /// Discover peers on the local network via mDNS (over IPv4 only, using port 5353). pub mdns: bool, - /// List of addresses of trusted and known nodes. + /// List of known node addresses (IP + port) we want to connect to directly. /// - /// Nodes mentioned in this list can be connected directly (for example when they are hosted - /// with a static IP Address). If the node needs to connect to other ndoes with changing, - /// dynamic IP addresses or even with nodes behind a firewall or NAT, at least one Relay will - /// be required. - pub node_addresses: Vec, + /// Make sure that nodes mentioned in this list are directly reachable (for example they need + /// to be hosted with a static IP Address). If you need to connect to nodes with changing, + /// dynamic IP addresses or even with nodes behind a firewall or NAT, do not use this field but + /// use at least one relay. + pub direct_node_addresses: Vec, /// Set to true if node should also function as a relay. Other nodes can use relays to aid /// discovery and establishing connectivity. @@ -93,7 +93,7 @@ impl Default for NetworkConfiguration { max_connections_pending_out: 8, max_connections_per_peer: 8, mdns: true, - node_addresses: Vec::new(), + direct_node_addresses: Vec::new(), notify_handler_buffer_size: 128, per_connection_event_buffer_size: 8, quic_port: 2022, diff --git a/aquadoggo_cli/src/main.rs b/aquadoggo_cli/src/main.rs index c6f0b266d..a99a34f07 100644 --- a/aquadoggo_cli/src/main.rs +++ b/aquadoggo_cli/src/main.rs @@ -41,12 +41,12 @@ struct Configuration { #[arg(long, default_value_t = 32)] database_max_connections: u32, - /// Number of concurrent workers which defines the maximum of materialization - /// tasks which can be worked on simultaneously. + /// Number of concurrent workers, defines the maximum of materialization tasks which can be + /// worked on simultaneously. #[arg(long, default_value_t = 16)] worker_pool_size: u32, - /// HTTP port, serving the GraphQL API. + /// HTTP port for client-node communication, serving the GraphQL API. #[arg(short = 'p', long, default_value_t = 2020)] http_port: u16, @@ -62,9 +62,9 @@ struct Configuration { #[arg(short = 'm', long, default_value_t = true)] mdns: bool, - /// List of addresses of trusted and known nodes. + /// List of known node addresses (IP + port) we want to connect to directly. #[arg(short = 'n', long)] - node_addresses: Vec, + direct_node_addresses: Vec, /// Address of relay. #[arg(short = 'r', long)] @@ -122,7 +122,11 @@ impl From for NodeConfiguration { network: NetworkConfiguration { quic_port: cli.quic_port, mdns: cli.mdns, - node_addresses: cli.relay_address.into_iter().map(to_multiaddress).collect(), + direct_node_addresses: cli + .direct_node_addresses + .into_iter() + .map(to_multiaddress) + .collect(), im_a_relay: cli.im_a_relay, relay_address: cli.relay_address.map(to_multiaddress), ..Default::default() From 56da144ea2b1efaae359b5f6102893943d464219 Mon Sep 17 00:00:00 2001 From: adz Date: Thu, 24 Aug 2023 10:52:14 +0200 Subject: [PATCH 48/66] Update config.toml after renaming field --- aquadoggo_cli/config.toml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/aquadoggo_cli/config.toml b/aquadoggo_cli/config.toml index 7652f2a2e..d6b3dcf17 100644 --- a/aquadoggo_cli/config.toml +++ b/aquadoggo_cli/config.toml @@ -120,14 +120,14 @@ mdns = true # Nodes # ~~~~~ -# List of addresses of trusted and known nodes. +# List of known node addresses (IP + port) we want to connect to directly. # -# NOTE: Make sure that nodes mentioned in this list can be connected directly -# (for example when they are hosted with a static IP Address). If you need to +# NOTE: Make sure that nodes mentioned in this list are directly reachable (for +# example they need to be hosted with a static IP Address). If you need to # connect to nodes with changing, dynamic IP addresses or even with nodes -# behind a firewall or NAT, you will need at least one Relay. +# behind a firewall or NAT, do not use this field but use at least one relay. # -node_addresses = [ +direct_node_addresses = [ # "192.0.2.0:2022", # "192.0.2.2:3000", ] From 92d9c1c1d0b6859fc78928e42252fbed63920c33 Mon Sep 17 00:00:00 2001 From: adz Date: Thu, 24 Aug 2023 13:57:46 +0200 Subject: [PATCH 49/66] Allow wildcard strings in config.toml, fix issue with clap overriding values --- aquadoggo/src/config.rs | 64 +++++++++ aquadoggo_cli/config.toml | 42 +++--- aquadoggo_cli/src/config.rs | 271 ++++++++++++++++++++++++++++++++++++ aquadoggo_cli/src/main.rs | 250 ++++++--------------------------- 4 files changed, 399 insertions(+), 228 deletions(-) create mode 100644 aquadoggo_cli/src/config.rs diff --git a/aquadoggo/src/config.rs b/aquadoggo/src/config.rs index 655c11a72..8d82f4902 100644 --- a/aquadoggo/src/config.rs +++ b/aquadoggo/src/config.rs @@ -1,6 +1,10 @@ // SPDX-License-Identifier: AGPL-3.0-or-later +use std::str::FromStr; + +use anyhow::bail; use p2panda_rs::schema::SchemaId; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; use crate::network::NetworkConfiguration; @@ -52,6 +56,8 @@ impl Default for Configuration { } } +const WILDCARD: &'static str = "*"; + /// Set a configuration value to either allow a defined set of elements or to a wildcard (*). #[derive(Debug, Clone)] pub enum AllowList { @@ -62,8 +68,66 @@ pub enum AllowList { Set(Vec), } +impl FromStr for AllowList { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + if s == WILDCARD { + Ok(Self::Wildcard) + } else { + bail!("only wildcard strings allowed") + } + } +} + impl Default for AllowList { fn default() -> Self { Self::Wildcard } } + +impl Serialize for AllowList +where + T: Serialize, +{ + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + match self { + AllowList::Wildcard => serializer.serialize_str(WILDCARD), + AllowList::Set(list) => list.serialize(serializer), + } + } +} + +impl<'de, T> Deserialize<'de> for AllowList +where + T: Deserialize<'de>, +{ + fn deserialize(deserializer: D) -> Result, D::Error> + where + D: Deserializer<'de>, + T: Deserialize<'de>, + { + #[derive(Deserialize)] + #[serde(untagged)] + enum Value { + String(String), + Vec(Vec), + } + + let value = Value::deserialize(deserializer)?; + + match value { + Value::String(str_value) => { + if str_value == WILDCARD { + Ok(AllowList::Wildcard) + } else { + Err(serde::de::Error::custom("only wildcard strings allowed")) + } + } + Value::Vec(vec) => Ok(AllowList::Set(vec)), + } + } +} diff --git a/aquadoggo_cli/config.toml b/aquadoggo_cli/config.toml index d6b3dcf17..4cb6e1a9e 100644 --- a/aquadoggo_cli/config.toml +++ b/aquadoggo_cli/config.toml @@ -19,26 +19,28 @@ # dependent on this data. # # It is recommended to set this list to all schema ids your own application -# should support. Defaults to an empty list. -# -# WARNING: When set empty, your node will support _all_ schema ids. This is -# useful for experimentation and local development but _not_ recommended for -# production settings. -# -supported_schema_ids = [ - # To discover new schema, set your node to replicate schema definition - # documents by including these two built-in schema ids. Your node will now - # search for and replicate schemas which have been published to the - # network. - # "schema_field_definition_v1", - # "schema_definition_v1", - - # Once you discover new schemas and want to start replicating their - # documents, then add their schema ids to this list as well. It's also - # possible to load schema directly onto your node using the tool `fishy`: - # https://github.com/p2panda/fishy - # "example_0020a01fe...", -] +# should support, including all important system schemas. For example: +# +# supported_schema_ids = [ +# # To discover new schema, set your node to replicate schema definition +# # documents by including these two built-in schema ids. Your node will now +# # search for and replicate schemas which have been published to the +# # network. +# "schema_definition_v1", +# "schema_field_definition_v1", +# +# # Once you discover new schemas and want to start replicating their +# # documents, then add their schema ids to this list as well. It's also +# # possible to load or create schemas directly onto your node using the +# # tool `fishy`: https://github.com/p2panda/fishy +# "my_interesting_schema_0020a01fe...", +# ] +# +# WARNING: When set to wildcard "*", your node will support _all_ schema ids it +# will encounter on the network. This is useful for experimentation and local +# development but _not_ recommended for production settings. +# +supported_schema_ids = "*" # ~~~~~~~~ # Database diff --git a/aquadoggo_cli/src/config.rs b/aquadoggo_cli/src/config.rs new file mode 100644 index 000000000..04e25b315 --- /dev/null +++ b/aquadoggo_cli/src/config.rs @@ -0,0 +1,271 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later + +use std::convert::TryFrom; +use std::net::{IpAddr, SocketAddr}; +use std::path::PathBuf; +use std::str::FromStr; + +use anyhow::{anyhow, bail, Result}; +use aquadoggo::{AllowList, Configuration as NodeConfiguration, NetworkConfiguration}; +use clap::Parser; +use directories::ProjectDirs; +use figment::providers::{Env, Format, Serialized, Toml}; +use figment::Figment; +use libp2p::multiaddr::Protocol; +use libp2p::Multiaddr; +use p2panda_rs::schema::SchemaId; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; + +const CONFIG_FILE_NAME: &str = "config.toml"; + +type ConfigFilePath = Option; + +/// Get configuration from 1. .toml file, 2. environment variables and 3. command line arguments +/// (in that order, meaning that later configuration sources take precedence over the earlier +/// ones). +/// +/// Returns a partly unchecked configuration object which results from all of these sources. It +/// still needs to be converted for aquadoggo as it might still contain invalid values. +pub fn load_config() -> Result<(ConfigFilePath, Configuration)> { + // Parse command line arguments first to get optional config file path + let cli = Cli::parse(); + + // Determine if a config file path was provided or if we should look for it in common locations + let config_file_path: ConfigFilePath = match &cli.config { + Some(path) => { + if !path.exists() { + bail!("Config file '{}' does not exist", path.display()); + } + + Some(path.clone()) + } + None => try_determine_config_file_path(), + }; + + let mut figment = Figment::from(Serialized::defaults(Configuration::default())); + if let Some(path) = &config_file_path { + figment = figment.merge(Toml::file(path)); + } + + let config = figment + .merge(Env::raw()) + .merge(Serialized::defaults(cli)) + .extract()?; + + Ok((config_file_path, config)) +} + +/// Command line arguments for user configuration. +/// +/// All arguments are optional and don't get serialized to Figment when they're None. This is to +/// assure that default values do not overwrite all previous settings, even when they haven't been +/// set. +#[derive(Parser, Serialize, Debug)] +#[command( + name = "aquadoggo Node", + about = "Node server for the p2panda network", + version +)] +struct Cli { + /// Path to a config.toml file. + #[arg(short = 'c', long, value_name = "PATH")] + #[serde(skip_serializing_if = "Option::is_none")] + config: Option, + + /// List of schema ids which a node will replicate and expose on the GraphQL API. + #[arg(short = 's', long, value_name = "SCHEMA_ID SCHEMA_ID, ...", num_args = 0..)] + #[serde(skip_serializing_if = "Option::is_none")] + supported_schema_ids: Option>, + + /// URL / connection string to PostgreSQL or SQLite database. + #[arg(short = 'd', long, value_name = "CONNECTION_STRING")] + #[serde(skip_serializing_if = "Option::is_none")] + database_url: Option, + + /// HTTP port for client-node communication, serving the GraphQL API. + #[arg(short = 'p', long, value_name = "PORT")] + #[serde(skip_serializing_if = "Option::is_none")] + http_port: Option, + + /// QUIC port for node-node communication and data replication. + #[arg(short = 'q', long, value_name = "PORT")] + #[serde(skip_serializing_if = "Option::is_none")] + quic_port: Option, + + /// Path to persist your ed25519 private key file. + #[arg(short = 'k', long, value_name = "PATH")] + #[serde(skip_serializing_if = "Option::is_none")] + private_key: Option, + + /// mDNS to discover other peers on the local network. + #[arg(short = 'm', long, value_name = "BOOL")] + #[serde(skip_serializing_if = "Option::is_none")] + mdns: Option, + + /// List of known node addresses we want to connect to directly. + #[arg(short = 'n', long, value_name = "IP:PORT IP:PORT, ...", num_args = 0..)] + #[serde(skip_serializing_if = "Option::is_none")] + direct_node_addresses: Option>, + + /// Address of relay. + #[arg(short = 'r', long, value_name = "IP:PORT")] + #[serde(skip_serializing_if = "Option::is_none")] + relay_address: Option, + + /// Set to true if our node should also function as a relay. + #[arg(short = 'e', long, value_name = "BOOL")] + #[serde(skip_serializing_if = "Option::is_none")] + im_a_relay: Option, +} + +/// Configuration for environment variables and .toml file. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct Configuration { + pub supported_schema_ids: UncheckedAllowList, + pub database_url: String, + pub database_max_connections: u32, + pub worker_pool_size: u32, + pub http_port: u16, + pub quic_port: u16, + pub private_key: Option, + pub mdns: bool, + pub direct_node_addresses: Vec, + pub relay_address: Option, + pub im_a_relay: bool, +} + +impl Default for Configuration { + fn default() -> Self { + Self { + supported_schema_ids: UncheckedAllowList::Set(vec![]), + database_url: "sqlite::memory:".into(), + database_max_connections: 32, + worker_pool_size: 16, + http_port: 2020, + quic_port: 2022, + private_key: None, + mdns: true, + direct_node_addresses: vec![], + relay_address: None, + im_a_relay: false, + } + } +} + +impl TryFrom for NodeConfiguration { + type Error = anyhow::Error; + + fn try_from(value: Configuration) -> Result { + // Check if given schema ids are valid + let supported_schema_ids = match value.supported_schema_ids { + UncheckedAllowList::Wildcard => AllowList::::Wildcard, + UncheckedAllowList::Set(str_values) => { + let schema_ids: Result, anyhow::Error> = str_values + .iter() + .map(|str_value| { + SchemaId::from_str(str_value).map_err(|_| { + anyhow!("Invalid schema id '{str_value}' found in 'supported_schema_ids' list") + }) + }) + .collect(); + + AllowList::Set(schema_ids?) + } + }; + + Ok(NodeConfiguration { + database_url: value.database_url, + database_max_connections: value.database_max_connections, + http_port: value.http_port, + worker_pool_size: value.worker_pool_size, + supported_schema_ids, + network: NetworkConfiguration { + quic_port: value.quic_port, + mdns: value.mdns, + direct_node_addresses: value + .direct_node_addresses + .into_iter() + .map(to_multiaddress) + .collect(), + im_a_relay: value.im_a_relay, + relay_address: value.relay_address.map(to_multiaddress), + ..Default::default() + }, + }) + } +} + +fn to_multiaddress(socket_address: SocketAddr) -> Multiaddr { + let mut multiaddr = match socket_address.ip() { + IpAddr::V4(ip) => Multiaddr::from(Protocol::Ip4(ip)), + IpAddr::V6(ip) => Multiaddr::from(Protocol::Ip6(ip)), + }; + multiaddr.push(Protocol::Udp(socket_address.port())); + multiaddr.push(Protocol::QuicV1); + multiaddr +} + +fn try_determine_config_file_path() -> Option { + // Find config file in current folder + let mut current_dir = std::env::current_dir().expect("Could not determine current directory"); + current_dir.push(CONFIG_FILE_NAME); + + // Find config file in XDG config folder + let mut xdg_config_dir: PathBuf = ProjectDirs::from("", "", "aquadoggo") + .expect("Could not determine valid config directory path from operating system") + .config_dir() + .to_path_buf(); + xdg_config_dir.push(CONFIG_FILE_NAME); + + [current_dir, xdg_config_dir] + .iter() + .find(|path| path.exists()) + .cloned() +} + +const WILDCARD: &'static str = "*"; + +#[derive(Debug, Clone)] +pub enum UncheckedAllowList { + Wildcard, + Set(Vec), +} + +impl Serialize for UncheckedAllowList { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + match self { + UncheckedAllowList::Wildcard => serializer.serialize_str(WILDCARD), + UncheckedAllowList::Set(list) => list.serialize(serializer), + } + } +} + +impl<'de> Deserialize<'de> for UncheckedAllowList { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + #[derive(Deserialize)] + #[serde(untagged)] + enum Value { + String(String), + Vec(Vec), + } + + let value = Value::deserialize(deserializer)?; + + match value { + Value::String(str_value) => { + if str_value == WILDCARD { + Ok(UncheckedAllowList::Wildcard) + } else { + Err(serde::de::Error::custom("only wildcard strings allowed")) + } + } + Value::Vec(vec) => Ok(UncheckedAllowList::Set(vec)), + } + } +} diff --git a/aquadoggo_cli/src/main.rs b/aquadoggo_cli/src/main.rs index a99a34f07..acccd7df7 100644 --- a/aquadoggo_cli/src/main.rs +++ b/aquadoggo_cli/src/main.rs @@ -1,227 +1,61 @@ // SPDX-License-Identifier: AGPL-3.0-or-later +mod config; mod key_pair; -use std::fmt::Display; -use std::net::{IpAddr, SocketAddr}; -use std::path::PathBuf; +use std::convert::TryInto; -use anyhow::Result; -use aquadoggo::{AllowList, Configuration as NodeConfiguration, NetworkConfiguration, Node}; -use clap::{crate_version, Parser}; -use directories::ProjectDirs; -use figment::providers::{Env, Format, Serialized, Toml}; -use figment::Figment; -use libp2p::multiaddr::Protocol; -use libp2p::Multiaddr; -use p2panda_rs::schema::SchemaId; -use p2panda_rs::Human; -use serde::{Deserialize, Serialize}; +use anyhow::Context; +use aquadoggo::Node; +use clap::crate_version; -const CONFIG_FILE_NAME: &str = "config.toml"; -const CONFIG_ENV_VAR_PREFIX: &str = "DOGGO_"; +use crate::config::load_config; +use crate::key_pair::{generate_ephemeral_key_pair, generate_or_load_key_pair}; -/// Node server for the p2panda network. -#[derive(Parser, Debug, Serialize, Deserialize)] -#[command(name = "aquadoggo Node", version)] -struct Configuration { - /// Path to config.toml file. - #[arg(short = 'c', long)] - config: Option, - - /// List of schema ids which a node will replicate and expose on the GraphQL API. - #[arg(short = 's', long)] - supported_schema_ids: Vec, - - /// URL / connection string to PostgreSQL or SQLite database. - #[arg(short = 'd', long, default_value = "sqlite::memory:")] - database_url: String, - - /// Maximum number of connections that the database pool should maintain. - #[arg(long, default_value_t = 32)] - database_max_connections: u32, - - /// Number of concurrent workers, defines the maximum of materialization tasks which can be - /// worked on simultaneously. - #[arg(long, default_value_t = 16)] - worker_pool_size: u32, - - /// HTTP port for client-node communication, serving the GraphQL API. - #[arg(short = 'p', long, default_value_t = 2020)] - http_port: u16, - - /// QUIC port for node-node communication and data replication. - #[arg(short = 'q', long, default_value_t = 2022)] - quic_port: u16, - - /// Path to persist your ed25519 private key file. - #[arg(short = 'k', long)] - private_key: Option, - - /// mDNS to discover other peers on the local network. - #[arg(short = 'm', long, default_value_t = true)] - mdns: bool, - - /// List of known node addresses (IP + port) we want to connect to directly. - #[arg(short = 'n', long)] - direct_node_addresses: Vec, - - /// Address of relay. - #[arg(short = 'r', long)] - relay_address: Option, - - /// Set to true if our node should also function as a relay. Defaults to false. - #[arg(short = 'e', long, default_value_t = false)] - im_a_relay: bool, -} - -impl Display for Configuration { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!( - f, - "{}", - self.config - .as_ref() - .map_or("No config file provided".into(), |ref path| format!( - "Loading config file from {}", - path.display() - )) - )?; +#[tokio::main] +async fn main() -> anyhow::Result<()> { + env_logger::init(); - write!(f, "\n\n")?; + // Load configuration from command line arguments, environment variables and .toml file + let (config_file_path, config) = load_config().context("Could not load configuration")?; - // @TODO: Nicer printing of all values - write!(f, "Schemas\n")?; - write!( - f, - "{:<20} {:<20}\n", - "supported_schema_ids", - self.supported_schema_ids - .iter() - .map(|id| id.display()) - .collect::>() - .join(", ") - ) - } -} + // Convert to `aquadoggo` configuration format and check for invalid inputs + let node_config = config + .clone() + .try_into() + .context("Could not load configuration")?; -impl From for NodeConfiguration { - fn from(cli: Configuration) -> Self { - let supported_schema_ids = if cli.supported_schema_ids.is_empty() { - AllowList::Wildcard - } else { - AllowList::Set(cli.supported_schema_ids) - }; + // @TODO: Create folders when paths for db or key was set + let key_pair = match &config.private_key { + Some(path) => generate_or_load_key_pair(path.clone()) + .context("Could not load private key from file")?, + None => generate_ephemeral_key_pair(), + }; - NodeConfiguration { - database_url: cli.database_url, - database_max_connections: cli.database_max_connections, - http_port: cli.http_port, - worker_pool_size: cli.worker_pool_size, - supported_schema_ids, - network: NetworkConfiguration { - quic_port: cli.quic_port, - mdns: cli.mdns, - direct_node_addresses: cli - .direct_node_addresses - .into_iter() - .map(to_multiaddress) - .collect(), - im_a_relay: cli.im_a_relay, - relay_address: cli.relay_address.map(to_multiaddress), - ..Default::default() - }, + // Show configuration info to the user + println!("aquadoggo v{}\n", crate_version!()); + match config_file_path { + Some(path) => { + println!("Loading config file from {}", path.display()); + } + None => { + println!("No config file provided"); } } -} - -fn to_multiaddress(socket_address: SocketAddr) -> Multiaddr { - let mut multiaddr = match socket_address.ip() { - IpAddr::V4(ip) => Multiaddr::from(Protocol::Ip4(ip)), - IpAddr::V6(ip) => Multiaddr::from(Protocol::Ip6(ip)), - }; - multiaddr.push(Protocol::Udp(socket_address.port())); - multiaddr.push(Protocol::QuicV1); - multiaddr -} - -fn try_determine_config_file_path() -> Option { - // Find config file in current folder - let mut current_dir = std::env::current_dir().expect("Could not determine current directory"); - current_dir.push(CONFIG_FILE_NAME); - - // Find config file in XDG config folder - let mut xdg_config_dir: PathBuf = ProjectDirs::from("", "", "aquadoggo") - .expect("Could not determine valid config directory path from operating system") - .config_dir() - .to_path_buf(); - xdg_config_dir.push(CONFIG_FILE_NAME); - - [current_dir, xdg_config_dir] - .iter() - .find(|path| path.exists()) - .cloned() -} + // @TODO: Improve print + println!("{:?}", config); -fn load_config() -> Result { - // Parse command line arguments first - let mut cli = Configuration::parse(); + // Start p2panda node in async runtime + let node = Node::start(key_pair, node_config).await; - // Determine if a config file path was provided or if we should look for it in common locations - cli.config = if cli.config.is_some() { - cli.config.clone() - } else { - try_determine_config_file_path() - }; - - // Get configuration from .toml file (optional), environment variable and command line - // arguments - let mut figment = Figment::new(); - - if let Some(path) = &cli.config { - figment = figment.merge(Toml::file(path)); + // Run this until [CTRL] + [C] got pressed or something went wrong + tokio::select! { + _ = tokio::signal::ctrl_c() => (), + _ = node.on_exit() => (), } - // @TODO: Fix not overriding values when empty array was set - figment - .merge(Env::prefixed(CONFIG_ENV_VAR_PREFIX)) - .merge(Serialized::defaults(cli)) - .extract() -} + // Wait until all tasks are gracefully shut down and exit + node.shutdown().await; -#[tokio::main] -async fn main() { - env_logger::init(); - - match load_config() { - Ok(config) => { - println!("aquadoggo v{}\n\n{:?}", crate_version!(), config); - - // @TODO: Create folders when paths for db or key was set - let key_pair = match &config.private_key { - Some(path) => key_pair::generate_or_load_key_pair(path.clone()) - .expect("Could not load private key from file"), - None => key_pair::generate_ephemeral_key_pair(), - }; - - // Start p2panda node in async runtime - let node = Node::start(key_pair, config.into()).await; - - // Run this until [CTRL] + [C] got pressed or something went wrong - tokio::select! { - _ = tokio::signal::ctrl_c() => (), - _ = node.on_exit() => (), - } - - // Wait until all tasks are gracefully shut down and exit - node.shutdown().await; - } - Err(error) => { - println!("Failed loading configuration:"); - - for error in error { - println!("- {}", error); - } - } - } + Ok(()) } From 8b86a78e22595283e57ff03e8c5b8d819965320a Mon Sep 17 00:00:00 2001 From: adz Date: Thu, 24 Aug 2023 13:59:54 +0200 Subject: [PATCH 50/66] Add a doc string --- aquadoggo_cli/src/config.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/aquadoggo_cli/src/config.rs b/aquadoggo_cli/src/config.rs index 04e25b315..586fc1993 100644 --- a/aquadoggo_cli/src/config.rs +++ b/aquadoggo_cli/src/config.rs @@ -16,6 +16,8 @@ use libp2p::Multiaddr; use p2panda_rs::schema::SchemaId; use serde::{Deserialize, Deserializer, Serialize, Serializer}; +const WILDCARD: &'static str = "*"; + const CONFIG_FILE_NAME: &str = "config.toml"; type ConfigFilePath = Option; @@ -223,8 +225,9 @@ fn try_determine_config_file_path() -> Option { .cloned() } -const WILDCARD: &'static str = "*"; - +/// Helper struct to deserialize from either a wildcard string "*" or a list of string values. +/// +/// These string values are not checked yet and need to be validated in a succeeding step. #[derive(Debug, Clone)] pub enum UncheckedAllowList { Wildcard, From ac00d92f2ed7db3c293bdb2309d03b5c2489db43 Mon Sep 17 00:00:00 2001 From: adz Date: Thu, 24 Aug 2023 14:01:51 +0200 Subject: [PATCH 51/66] Remove unnecessary serde code for AllowList --- aquadoggo/src/config.rs | 64 ------------------------------------- aquadoggo_cli/src/config.rs | 2 +- 2 files changed, 1 insertion(+), 65 deletions(-) diff --git a/aquadoggo/src/config.rs b/aquadoggo/src/config.rs index 8d82f4902..655c11a72 100644 --- a/aquadoggo/src/config.rs +++ b/aquadoggo/src/config.rs @@ -1,10 +1,6 @@ // SPDX-License-Identifier: AGPL-3.0-or-later -use std::str::FromStr; - -use anyhow::bail; use p2panda_rs::schema::SchemaId; -use serde::{Deserialize, Deserializer, Serialize, Serializer}; use crate::network::NetworkConfiguration; @@ -56,8 +52,6 @@ impl Default for Configuration { } } -const WILDCARD: &'static str = "*"; - /// Set a configuration value to either allow a defined set of elements or to a wildcard (*). #[derive(Debug, Clone)] pub enum AllowList { @@ -68,66 +62,8 @@ pub enum AllowList { Set(Vec), } -impl FromStr for AllowList { - type Err = anyhow::Error; - - fn from_str(s: &str) -> Result { - if s == WILDCARD { - Ok(Self::Wildcard) - } else { - bail!("only wildcard strings allowed") - } - } -} - impl Default for AllowList { fn default() -> Self { Self::Wildcard } } - -impl Serialize for AllowList -where - T: Serialize, -{ - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - match self { - AllowList::Wildcard => serializer.serialize_str(WILDCARD), - AllowList::Set(list) => list.serialize(serializer), - } - } -} - -impl<'de, T> Deserialize<'de> for AllowList -where - T: Deserialize<'de>, -{ - fn deserialize(deserializer: D) -> Result, D::Error> - where - D: Deserializer<'de>, - T: Deserialize<'de>, - { - #[derive(Deserialize)] - #[serde(untagged)] - enum Value { - String(String), - Vec(Vec), - } - - let value = Value::deserialize(deserializer)?; - - match value { - Value::String(str_value) => { - if str_value == WILDCARD { - Ok(AllowList::Wildcard) - } else { - Err(serde::de::Error::custom("only wildcard strings allowed")) - } - } - Value::Vec(vec) => Ok(AllowList::Set(vec)), - } - } -} diff --git a/aquadoggo_cli/src/config.rs b/aquadoggo_cli/src/config.rs index 586fc1993..2c3fc5af3 100644 --- a/aquadoggo_cli/src/config.rs +++ b/aquadoggo_cli/src/config.rs @@ -16,7 +16,7 @@ use libp2p::Multiaddr; use p2panda_rs::schema::SchemaId; use serde::{Deserialize, Deserializer, Serialize, Serializer}; -const WILDCARD: &'static str = "*"; +const WILDCARD: &str = "*"; const CONFIG_FILE_NAME: &str = "config.toml"; From f48d5be2309d5d4b0b9a38775d220408792a4444 Mon Sep 17 00:00:00 2001 From: adz Date: Thu, 24 Aug 2023 14:10:50 +0200 Subject: [PATCH 52/66] We already generate a path for keys --- aquadoggo_cli/src/main.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/aquadoggo_cli/src/main.rs b/aquadoggo_cli/src/main.rs index acccd7df7..9d3a31223 100644 --- a/aquadoggo_cli/src/main.rs +++ b/aquadoggo_cli/src/main.rs @@ -25,7 +25,8 @@ async fn main() -> anyhow::Result<()> { .try_into() .context("Could not load configuration")?; - // @TODO: Create folders when paths for db or key was set + // Generate a new key pair, either just for this session or persisted. Folders are + // automatically created when we picked a path let key_pair = match &config.private_key { Some(path) => generate_or_load_key_pair(path.clone()) .context("Could not load private key from file")?, From 5296668e338bf8a4551fb0e4c095954d3c5f51db Mon Sep 17 00:00:00 2001 From: adz Date: Thu, 24 Aug 2023 14:27:53 +0200 Subject: [PATCH 53/66] Always show absolute path of config file, whatever comes --- Cargo.lock | 7 +++++++ aquadoggo_cli/Cargo.toml | 1 + aquadoggo_cli/src/main.rs | 4 +++- aquadoggo_cli/src/utils.rs | 20 ++++++++++++++++++++ 4 files changed, 31 insertions(+), 1 deletion(-) create mode 100644 aquadoggo_cli/src/utils.rs diff --git a/Cargo.lock b/Cargo.lock index 6f32aabe2..9d19d03f2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -218,6 +218,7 @@ dependencies = [ "hex", "libp2p", "p2panda-rs", + "path-clean", "serde", "tempfile", "tokio", @@ -3190,6 +3191,12 @@ version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" +[[package]] +name = "path-clean" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17359afc20d7ab31fdb42bb844c8b3bb1dabd7dcf7e68428492da7f16966fcef" + [[package]] name = "pear" version = "0.2.7" diff --git a/aquadoggo_cli/Cargo.toml b/aquadoggo_cli/Cargo.toml index b616a8116..26b620061 100644 --- a/aquadoggo_cli/Cargo.toml +++ b/aquadoggo_cli/Cargo.toml @@ -28,6 +28,7 @@ figment = { version = "0.10.10", features = ["toml", "env"] } hex = "0.4.3" libp2p = "0.52.0" p2panda-rs = "0.7.1" +path-clean = "1.0.1" serde = { version = "1.0.185", features = ["serde_derive"] } tokio = { version = "1.28.2", features = ["full"] } toml = "0.7.6" diff --git a/aquadoggo_cli/src/main.rs b/aquadoggo_cli/src/main.rs index 9d3a31223..70abeaf4a 100644 --- a/aquadoggo_cli/src/main.rs +++ b/aquadoggo_cli/src/main.rs @@ -2,6 +2,7 @@ mod config; mod key_pair; +mod utils; use std::convert::TryInto; @@ -9,6 +10,7 @@ use anyhow::Context; use aquadoggo::Node; use clap::crate_version; +use crate::utils::absolute_path; use crate::config::load_config; use crate::key_pair::{generate_ephemeral_key_pair, generate_or_load_key_pair}; @@ -37,7 +39,7 @@ async fn main() -> anyhow::Result<()> { println!("aquadoggo v{}\n", crate_version!()); match config_file_path { Some(path) => { - println!("Loading config file from {}", path.display()); + println!("Loading config file from {}", absolute_path(path).display()); } None => { println!("No config file provided"); diff --git a/aquadoggo_cli/src/utils.rs b/aquadoggo_cli/src/utils.rs new file mode 100644 index 000000000..f49355a28 --- /dev/null +++ b/aquadoggo_cli/src/utils.rs @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later + +use std::env; +use std::path::{Path, PathBuf}; + +use path_clean::PathClean; + +/// Returns the absolute path of a file or directory. +pub fn absolute_path(path: impl AsRef) -> PathBuf { + let path = path.as_ref(); + + let absolute_path = if path.is_absolute() { + path.to_path_buf() + } else { + env::current_dir().expect("Could not determine current directory").join(path) + } + .clean(); + + absolute_path +} From eeb91efc1d602483e952868c74bfce6cf69a2597 Mon Sep 17 00:00:00 2001 From: adz Date: Thu, 24 Aug 2023 17:36:40 +0200 Subject: [PATCH 54/66] Show basic config and all addresses on startup --- Cargo.lock | 13 +++ aquadoggo/Cargo.toml | 1 + aquadoggo/src/http/service.rs | 10 ++- aquadoggo/src/network/behaviour.rs | 8 +- aquadoggo/src/network/config.rs | 4 +- aquadoggo/src/network/mod.rs | 1 + aquadoggo/src/network/service.rs | 23 ++++- aquadoggo/src/network/utils.rs | 24 +++++ aquadoggo/src/schema/schema_provider.rs | 4 +- aquadoggo_cli/Cargo.toml | 1 + aquadoggo_cli/config.toml | 2 +- aquadoggo_cli/src/config.rs | 114 ++++++++++++++++++++++-- aquadoggo_cli/src/main.rs | 16 +--- aquadoggo_cli/src/utils.rs | 10 +-- 14 files changed, 190 insertions(+), 41 deletions(-) create mode 100644 aquadoggo/src/network/utils.rs diff --git a/Cargo.lock b/Cargo.lock index 9d19d03f2..f6b134cfb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -188,6 +188,7 @@ dependencies = [ "proptest", "proptest-derive", "rand 0.8.5", + "regex", "reqwest", "rstest 0.15.0", "rstest_reuse 0.3.0", @@ -212,6 +213,7 @@ dependencies = [ "anyhow", "aquadoggo", "clap", + "colored", "directories", "env_logger", "figment", @@ -955,6 +957,17 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "colored" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2674ec482fbc38012cf31e6c42ba0177b431a0cb6f15fe40efa5aab1bda516f6" +dependencies = [ + "is-terminal", + "lazy_static", + "windows-sys", +] + [[package]] name = "concurrent-queue" version = "2.2.0" diff --git a/aquadoggo/Cargo.toml b/aquadoggo/Cargo.toml index d7a3a2a6b..9e2953536 100644 --- a/aquadoggo/Cargo.toml +++ b/aquadoggo/Cargo.toml @@ -56,6 +56,7 @@ once_cell = "1.18.0" openssl-probe = "0.1.5" p2panda-rs = { version = "0.7.1", features = ["storage-provider"] } rand = "0.8.5" +regex = "1.9.3" serde = { version = "1.0.152", features = ["derive"] } sqlx = { version = "0.6.1", features = [ "any", diff --git a/aquadoggo/src/http/service.rs b/aquadoggo/src/http/service.rs index 31da4afc8..7a2f40e02 100644 --- a/aquadoggo/src/http/service.rs +++ b/aquadoggo/src/http/service.rs @@ -62,11 +62,19 @@ pub async fn http_service( let builder = if let Ok(builder) = axum::Server::try_bind(&http_address) { builder } else { + println!("HTTP port {http_port} was already taken, try random port instead .."); axum::Server::try_bind(&SocketAddr::new(IpAddr::V4(Ipv4Addr::UNSPECIFIED), 0))? }; + let builder = builder.serve(build_server(http_context).into_make_service()); + + let local_address = builder.local_addr(); + println!( + "Go to http://{}/graphql to use GraphQL playground", + local_address + ); + builder - .serve(build_server(http_context).into_make_service()) .with_graceful_shutdown(async { debug!("HTTP service is ready"); if tx_ready.send(()).is_err() { diff --git a/aquadoggo/src/network/behaviour.rs b/aquadoggo/src/network/behaviour.rs index b7ec2a24d..ef2cf55cd 100644 --- a/aquadoggo/src/network/behaviour.rs +++ b/aquadoggo/src/network/behaviour.rs @@ -87,7 +87,7 @@ impl P2pandaBehaviour { // Create an identify server behaviour with default configuration if a rendezvous server // address has been provided or the rendezvous server flag is set - let identify = if network_config.relay_address.is_some() || network_config.im_a_relay { + let identify = if network_config.relay_address.is_some() || network_config.relay_mode { debug!("Identify network behaviour enabled"); Some(identify::Behaviour::new(identify::Config::new( format!("{NODE_NAMESPACE}/1.0.0"), @@ -125,7 +125,7 @@ impl P2pandaBehaviour { // Create a rendezvous server behaviour with default configuration if the rendezvous server // flag is set - let rendezvous_server = if network_config.im_a_relay { + let rendezvous_server = if network_config.relay_mode { debug!("Rendezvous server network behaviour enabled"); Some(rendezvous::server::Behaviour::new( rendezvous::server::Config::default(), @@ -140,7 +140,7 @@ impl P2pandaBehaviour { // Create a relay server behaviour with default configuration if the relay server flag is // set - let relay_server = if network_config.im_a_relay { + let relay_server = if network_config.relay_mode { debug!("Relay server network behaviour enabled"); Some(relay::Behaviour::new( peer_id, @@ -155,7 +155,7 @@ impl P2pandaBehaviour { }; // Create UDP holepunching behaviour (DCUtR) if the flag is set - let dcutr = if network_config.im_a_relay || relay_client.is_some() { + let dcutr = if network_config.relay_mode || relay_client.is_some() { Some(dcutr::Behaviour::new(peer_id)) } else { None diff --git a/aquadoggo/src/network/config.rs b/aquadoggo/src/network/config.rs index ef994f0cc..56b05e371 100644 --- a/aquadoggo/src/network/config.rs +++ b/aquadoggo/src/network/config.rs @@ -29,7 +29,7 @@ pub struct NetworkConfiguration { /// /// Relays _need_ to be hosted in a way where they can be reached directly, for example with a /// static IP address through an VPS. - pub im_a_relay: bool, + pub relay_mode: bool, /// Address of a peer which can act as a relay/rendezvous server. /// @@ -97,7 +97,7 @@ impl Default for NetworkConfiguration { notify_handler_buffer_size: 128, per_connection_event_buffer_size: 8, quic_port: 2022, - im_a_relay: false, + relay_mode: false, relay_address: None, } } diff --git a/aquadoggo/src/network/mod.rs b/aquadoggo/src/network/mod.rs index 514c5e9c7..a9b531dd8 100644 --- a/aquadoggo/src/network/mod.rs +++ b/aquadoggo/src/network/mod.rs @@ -8,6 +8,7 @@ mod service; mod shutdown; mod swarm; mod transport; +pub mod utils; pub use config::NetworkConfiguration; pub use peers::{Peer, PeerMessage}; diff --git a/aquadoggo/src/network/service.rs b/aquadoggo/src/network/service.rs index 7c803fc25..6b7658482 100644 --- a/aquadoggo/src/network/service.rs +++ b/aquadoggo/src/network/service.rs @@ -19,7 +19,7 @@ use crate::context::Context; use crate::manager::{ServiceReadySender, Shutdown}; use crate::network::behaviour::{Event, P2pandaBehaviour}; use crate::network::config::NODE_NAMESPACE; -use crate::network::{identity, peers, swarm, NetworkConfiguration, ShutdownHandler}; +use crate::network::{identity, peers, swarm, utils, NetworkConfiguration, ShutdownHandler}; /// Network service which handles all networking logic for a p2panda node. /// @@ -41,10 +41,10 @@ pub async fn network_service( let key_pair = identity::to_libp2p_key_pair(&context.key_pair); let local_peer_id = key_pair.public().to_peer_id(); - info!("Local peer id: {local_peer_id}"); + println!("Peer id: {local_peer_id}"); // The swarm can be initiated with or without "relay" capabilities. - let mut swarm = if network_config.im_a_relay { + let mut swarm = if network_config.relay_mode { info!("Networking service initializing with relay capabilities..."); swarm::build_relay_swarm(&network_config, key_pair).await? } else { @@ -74,6 +74,10 @@ pub async fn network_service( .with(Protocol::from(Ipv4Addr::UNSPECIFIED)) .with(Protocol::Udp(0)) .with(Protocol::QuicV1); + println!( + "QUIC port {} was already taken, try random port instead ..", + network_config.quic_port + ); swarm.listen_on(random_port_addr)?; } @@ -260,6 +264,7 @@ struct EventLoop { rx: BroadcastStream, network_config: NetworkConfiguration, shutdown_handler: ShutdownHandler, + learned_port: bool, } impl EventLoop { @@ -277,6 +282,7 @@ impl EventLoop { tx, network_config, shutdown_handler, + learned_port: false, } } @@ -306,6 +312,17 @@ impl EventLoop { event = self.swarm.next() => { let event = event.expect("Swarm stream to be infinite"); match event { + SwarmEvent::NewListenAddr { address, .. } => { + if self.learned_port { + continue; + } + + // Only only one QUIC address once + if let Some(address) = utils::to_quic_address(&address) { + println!("Node is listening on 0.0.0.0:{}", address.port()); + self.learned_port = true; + } + } SwarmEvent::Behaviour(Event::Identify(event)) => self.handle_identify_events(&event).await, SwarmEvent::Behaviour(Event::Mdns(event)) => self.handle_mdns_events(&event).await, SwarmEvent::Behaviour(Event::RendezvousClient(event)) => self.handle_rendezvous_client_events(&event).await, diff --git a/aquadoggo/src/network/utils.rs b/aquadoggo/src/network/utils.rs new file mode 100644 index 000000000..5f320d4d0 --- /dev/null +++ b/aquadoggo/src/network/utils.rs @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later + +use std::net::SocketAddr; + +use libp2p::Multiaddr; +use regex::Regex; + +pub fn to_quic_address(address: &Multiaddr) -> Option { + let hay = address.to_string(); + let regex = Regex::new(r"/ip4/(\d+.\d+.\d+.\d+)/udp/(\d+)/quic-v1").unwrap(); + let caps = regex.captures(&hay); + + match caps { + None => None, + Some(caps) => { + let ip_address = caps.get(1).unwrap().as_str(); + let port = caps.get(2).unwrap().as_str(); + let socket = format!("{ip_address}:{port}") + .parse::() + .expect("Tried to convert invalid address"); + Some(socket) + } + } +} diff --git a/aquadoggo/src/schema/schema_provider.rs b/aquadoggo/src/schema/schema_provider.rs index 754165298..ec4681320 100644 --- a/aquadoggo/src/schema/schema_provider.rs +++ b/aquadoggo/src/schema/schema_provider.rs @@ -4,7 +4,7 @@ use std::collections::HashMap; use std::sync::Arc; use anyhow::{bail, Result}; -use log::{debug, info}; +use log::{debug, info, trace}; use p2panda_rs::schema::{Schema, SchemaId, SYSTEM_SCHEMAS}; use p2panda_rs::Human; use tokio::sync::broadcast::{channel, Receiver, Sender}; @@ -51,7 +51,7 @@ impl SchemaProvider { let (tx, _) = channel(64); - debug!( + trace!( "Initialised schema provider:\n- {}", index .values() diff --git a/aquadoggo_cli/Cargo.toml b/aquadoggo_cli/Cargo.toml index 26b620061..970be8619 100644 --- a/aquadoggo_cli/Cargo.toml +++ b/aquadoggo_cli/Cargo.toml @@ -22,6 +22,7 @@ doc = false [dependencies] anyhow = "1.0.62" clap = { version = "4.1.8", features = ["derive", "cargo"] } +colored = "2.0.4" directories = "5.0.1" env_logger = "0.9.0" figment = { version = "0.10.10", features = ["toml", "env"] } diff --git a/aquadoggo_cli/config.toml b/aquadoggo_cli/config.toml index 4cb6e1a9e..48ee8ebf4 100644 --- a/aquadoggo_cli/config.toml +++ b/aquadoggo_cli/config.toml @@ -166,4 +166,4 @@ direct_node_addresses = [ # NOTE: Relays _need_ to be hosted in a way where they can be reached directly, # for example with a static IP address through an VPS. # -im_a_relay = false +relay_mode = false diff --git a/aquadoggo_cli/src/config.rs b/aquadoggo_cli/src/config.rs index 2c3fc5af3..594c3ee35 100644 --- a/aquadoggo_cli/src/config.rs +++ b/aquadoggo_cli/src/config.rs @@ -7,7 +7,8 @@ use std::str::FromStr; use anyhow::{anyhow, bail, Result}; use aquadoggo::{AllowList, Configuration as NodeConfiguration, NetworkConfiguration}; -use clap::Parser; +use clap::{crate_version, Parser}; +use colored::Colorize; use directories::ProjectDirs; use figment::providers::{Env, Format, Serialized, Toml}; use figment::Figment; @@ -16,6 +17,8 @@ use libp2p::Multiaddr; use p2panda_rs::schema::SchemaId; use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use crate::utils::absolute_path; + const WILDCARD: &str = "*"; const CONFIG_FILE_NAME: &str = "config.toml"; @@ -57,11 +60,11 @@ pub fn load_config() -> Result<(ConfigFilePath, Configuration)> { Ok((config_file_path, config)) } -/// Command line arguments for user configuration. +/// Configuration derived from command line arguments. /// /// All arguments are optional and don't get serialized to Figment when they're None. This is to -/// assure that default values do not overwrite all previous settings, even when they haven't been -/// set. +/// assure that default values do not overwrite all previous settings, especially when they haven't +/// been set. #[derive(Parser, Serialize, Debug)] #[command( name = "aquadoggo Node", @@ -117,10 +120,10 @@ struct Cli { /// Set to true if our node should also function as a relay. #[arg(short = 'e', long, value_name = "BOOL")] #[serde(skip_serializing_if = "Option::is_none")] - im_a_relay: Option, + relay_mode: Option, } -/// Configuration for environment variables and .toml file. +/// Configuration derived from environment variables and .toml file. #[derive(Clone, Debug, Serialize, Deserialize)] pub struct Configuration { pub supported_schema_ids: UncheckedAllowList, @@ -133,7 +136,7 @@ pub struct Configuration { pub mdns: bool, pub direct_node_addresses: Vec, pub relay_address: Option, - pub im_a_relay: bool, + pub relay_mode: bool, } impl Default for Configuration { @@ -149,7 +152,7 @@ impl Default for Configuration { mdns: true, direct_node_addresses: vec![], relay_address: None, - im_a_relay: false, + relay_mode: false, } } } @@ -189,7 +192,7 @@ impl TryFrom for NodeConfiguration { .into_iter() .map(to_multiaddress) .collect(), - im_a_relay: value.im_a_relay, + relay_mode: value.relay_mode, relay_address: value.relay_address.map(to_multiaddress), ..Default::default() }, @@ -225,6 +228,99 @@ fn try_determine_config_file_path() -> Option { .cloned() } +pub fn print_config(path: ConfigFilePath, config: &NodeConfiguration) -> String { + println!( + r" ██████ ███████ ████ + ████████ ██████ + ██████ ███ + █████ ██ + █ ████ █████ + █ ██████ █ █████ + ██ ████ ███ █████ + █████ ██████ █ + ███████ ██ + █████████ █████████████ + ███████████ █████████ + █████████████████ ████ + ██████ ███████████ ██ + ██████████ █████ █ + █████████ ██ ███ ██ + ██████ █ █ ██ + ██ ██ ███████ ██ + ███████████ ██████ +████████ ████████████ ██████ +████ ██████ ██████████ █ ████ + █████████ ████████ ███ ███████ + ████████ ██████ ████████ +█████████ ████████████████████████ ███ +█████████ ██ + " + ); + + println!("{} v{}\n", "aquadoggo".underline(), crate_version!()); + + match path { + Some(path) => { + println!( + "Loading config file from {}", + absolute_path(path).display().to_string().blue() + ); + } + None => { + println!("No config file provided"); + } + } + + println!(); + println!("{}\n", "Configuration".underline()); + + let supported_schema_ids: String = match &config.supported_schema_ids { + AllowList::Set(schema_ids) => { + String::from("\n") + + &schema_ids + .iter() + .map(|id| format!("• {id}")) + .collect::>() + .join("\n") + } + AllowList::Wildcard => "support all incoming schemas (*)".into(), + }; + + let database_url = if config.database_url == "sqlite::memory:" { + "memory (data is not persisted)".into() + } else if config.database_url.contains("sqlite:") { + format!("SQLite: {}", config.database_url) + } else { + "PostgreSQL".into() + }; + + let mdns = if config.network.mdns { + "enabled" + } else { + "disabled" + }; + + let relay_mode = if config.network.relay_mode { + "enabled" + } else { + "disabled" + }; + + format!( + r"Supported Schema IDs: {} +Database URL: {} +mDNS: {} +Relay Mode: {} + +Node is ready! +", + supported_schema_ids.blue(), + database_url.blue(), + mdns.blue(), + relay_mode.blue(), + ) +} + /// Helper struct to deserialize from either a wildcard string "*" or a list of string values. /// /// These string values are not checked yet and need to be validated in a succeeding step. diff --git a/aquadoggo_cli/src/main.rs b/aquadoggo_cli/src/main.rs index 70abeaf4a..d63e749b4 100644 --- a/aquadoggo_cli/src/main.rs +++ b/aquadoggo_cli/src/main.rs @@ -8,10 +8,8 @@ use std::convert::TryInto; use anyhow::Context; use aquadoggo::Node; -use clap::crate_version; -use crate::utils::absolute_path; -use crate::config::load_config; +use crate::config::{load_config, print_config}; use crate::key_pair::{generate_ephemeral_key_pair, generate_or_load_key_pair}; #[tokio::main] @@ -36,17 +34,7 @@ async fn main() -> anyhow::Result<()> { }; // Show configuration info to the user - println!("aquadoggo v{}\n", crate_version!()); - match config_file_path { - Some(path) => { - println!("Loading config file from {}", absolute_path(path).display()); - } - None => { - println!("No config file provided"); - } - } - // @TODO: Improve print - println!("{:?}", config); + println!("{}", print_config(config_file_path, &node_config)); // Start p2panda node in async runtime let node = Node::start(key_pair, node_config).await; diff --git a/aquadoggo_cli/src/utils.rs b/aquadoggo_cli/src/utils.rs index f49355a28..477c8047e 100644 --- a/aquadoggo_cli/src/utils.rs +++ b/aquadoggo_cli/src/utils.rs @@ -9,12 +9,12 @@ use path_clean::PathClean; pub fn absolute_path(path: impl AsRef) -> PathBuf { let path = path.as_ref(); - let absolute_path = if path.is_absolute() { + if path.is_absolute() { path.to_path_buf() } else { - env::current_dir().expect("Could not determine current directory").join(path) + env::current_dir() + .expect("Could not determine current directory") + .join(path) } - .clean(); - - absolute_path + .clean() } From f92595ede40277ec624b33bdc97d588ea6ad5cb0 Mon Sep 17 00:00:00 2001 From: adz Date: Thu, 24 Aug 2023 17:40:49 +0200 Subject: [PATCH 55/66] Correct print config when empty array --- aquadoggo_cli/src/config.rs | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/aquadoggo_cli/src/config.rs b/aquadoggo_cli/src/config.rs index 594c3ee35..cfaab95bf 100644 --- a/aquadoggo_cli/src/config.rs +++ b/aquadoggo_cli/src/config.rs @@ -276,12 +276,16 @@ pub fn print_config(path: ConfigFilePath, config: &NodeConfiguration) -> String let supported_schema_ids: String = match &config.supported_schema_ids { AllowList::Set(schema_ids) => { - String::from("\n") - + &schema_ids - .iter() - .map(|id| format!("• {id}")) - .collect::>() - .join("\n") + if schema_ids.is_empty() { + "none (disable replication)".into() + } else { + String::from("\n") + + &schema_ids + .iter() + .map(|id| format!("• {id}")) + .collect::>() + .join("\n") + } } AllowList::Wildcard => "support all incoming schemas (*)".into(), }; From c3e18e63022daef0ace9b047589d7c0a4c04db8f Mon Sep 17 00:00:00 2001 From: adz Date: Fri, 25 Aug 2023 10:17:31 +0200 Subject: [PATCH 56/66] Allow --mdns and --relay-flag args to be used without bool value --- aquadoggo_cli/src/config.rs | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/aquadoggo_cli/src/config.rs b/aquadoggo_cli/src/config.rs index cfaab95bf..a5fe2809e 100644 --- a/aquadoggo_cli/src/config.rs +++ b/aquadoggo_cli/src/config.rs @@ -78,7 +78,7 @@ struct Cli { config: Option, /// List of schema ids which a node will replicate and expose on the GraphQL API. - #[arg(short = 's', long, value_name = "SCHEMA_ID SCHEMA_ID, ...", num_args = 0..)] + #[arg(short = 's', long, value_name = "SCHEMA_ID SCHEMA_ID ...", num_args = 0..)] #[serde(skip_serializing_if = "Option::is_none")] supported_schema_ids: Option>, @@ -103,12 +103,18 @@ struct Cli { private_key: Option, /// mDNS to discover other peers on the local network. - #[arg(short = 'm', long, value_name = "BOOL")] + #[arg( + short = 'm', + long, + value_name = "BOOL", + default_missing_value = "true", + num_args = 0..=1, + )] #[serde(skip_serializing_if = "Option::is_none")] mdns: Option, /// List of known node addresses we want to connect to directly. - #[arg(short = 'n', long, value_name = "IP:PORT IP:PORT, ...", num_args = 0..)] + #[arg(short = 'n', long, value_name = "IP:PORT IP:PORT ...", num_args = 0..)] #[serde(skip_serializing_if = "Option::is_none")] direct_node_addresses: Option>, @@ -118,7 +124,13 @@ struct Cli { relay_address: Option, /// Set to true if our node should also function as a relay. - #[arg(short = 'e', long, value_name = "BOOL")] + #[arg( + short = 'e', + long, + value_name = "BOOL", + default_missing_value = "true", + num_args = 0..=1, + )] #[serde(skip_serializing_if = "Option::is_none")] relay_mode: Option, } From acda72531f243a3839684fdf2f8ca10627b8a1e2 Mon Sep 17 00:00:00 2001 From: adz Date: Fri, 25 Aug 2023 10:55:41 +0200 Subject: [PATCH 57/66] Allow use of wildcard strings in --supported-schema-ids argument --- aquadoggo_cli/src/config.rs | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/aquadoggo_cli/src/config.rs b/aquadoggo_cli/src/config.rs index a5fe2809e..5e61a6af7 100644 --- a/aquadoggo_cli/src/config.rs +++ b/aquadoggo_cli/src/config.rs @@ -79,8 +79,11 @@ struct Cli { /// List of schema ids which a node will replicate and expose on the GraphQL API. #[arg(short = 's', long, value_name = "SCHEMA_ID SCHEMA_ID ...", num_args = 0..)] - #[serde(skip_serializing_if = "Option::is_none")] - supported_schema_ids: Option>, + #[serde( + skip_serializing_if = "Option::is_none", + serialize_with = "serialize_with_wildcard" + )] + supported_schema_ids: Option>, /// URL / connection string to PostgreSQL or SQLite database. #[arg(short = 'd', long, value_name = "CONNECTION_STRING")] @@ -135,6 +138,28 @@ struct Cli { relay_mode: Option, } +/// Clap converts wildcard symbols from command line arguments (for example --supported-schema-ids +/// "*") into an array, (["*"]), but we need it to be just a string ("*"). +fn serialize_with_wildcard( + list: &Option>, + serializer: S, +) -> std::result::Result +where + S: Serializer, +{ + match list { + Some(list) => { + // Wildcard symbol comes in form of an array ["*"], convert it to just a string "*" + if list.len() == 1 && list[0] == WILDCARD { + serializer.serialize_str(WILDCARD) + } else { + list.serialize(serializer) + } + } + None => unreachable!("Serialization is skipped if value is None"), + } +} + /// Configuration derived from environment variables and .toml file. #[derive(Clone, Debug, Serialize, Deserialize)] pub struct Configuration { From 41822ddb67cdb498724c881a8838867c36c22652 Mon Sep 17 00:00:00 2001 From: adz Date: Fri, 25 Aug 2023 11:07:43 +0200 Subject: [PATCH 58/66] Rename to allow-schema-ids config --- aquadoggo/src/config.rs | 4 ++-- aquadoggo/src/node.rs | 6 +++--- aquadoggo/src/replication/service.rs | 4 ++-- aquadoggo/src/schema/schema_provider.rs | 19 ++++++++----------- aquadoggo_cli/config.toml | 2 +- aquadoggo_cli/src/config.rs | 22 ++++++++++++---------- 6 files changed, 28 insertions(+), 29 deletions(-) diff --git a/aquadoggo/src/config.rs b/aquadoggo/src/config.rs index 655c11a72..de7b653f4 100644 --- a/aquadoggo/src/config.rs +++ b/aquadoggo/src/config.rs @@ -33,7 +33,7 @@ pub struct Configuration { /// /// When allowing a schema you automatically opt into announcing, replicating and materializing /// documents connected to it, supporting applications which are dependent on this data. - pub supported_schema_ids: AllowList, + pub allow_schema_ids: AllowList, /// Network configuration. pub network: NetworkConfiguration, @@ -46,7 +46,7 @@ impl Default for Configuration { database_max_connections: 32, http_port: 2020, worker_pool_size: 16, - supported_schema_ids: AllowList::Wildcard, + allow_schema_ids: AllowList::Wildcard, network: NetworkConfiguration::default(), } } diff --git a/aquadoggo/src/node.rs b/aquadoggo/src/node.rs index 77d85bedb..3bd70c151 100644 --- a/aquadoggo/src/node.rs +++ b/aquadoggo/src/node.rs @@ -56,11 +56,11 @@ impl Node { // Initiate the SchemaProvider with all currently known schema from the store. // - // If supported_schema_ids are provided then only schema identified in this list will be - // added to the provider and supported by the node. + // If a list of allowed schema ids is provided then only schema identified in this list + // will be added to the provider and supported by the node. let application_schema = store.get_all_schema().await.unwrap(); let schema_provider = - SchemaProvider::new(application_schema, config.supported_schema_ids.clone()); + SchemaProvider::new(application_schema, config.allow_schema_ids.clone()); // Create service manager with shared data between services let context = Context::new(store, key_pair, config, schema_provider); diff --git a/aquadoggo/src/replication/service.rs b/aquadoggo/src/replication/service.rs index d3e05c2c9..4aa827288 100644 --- a/aquadoggo/src/replication/service.rs +++ b/aquadoggo/src/replication/service.rs @@ -158,8 +158,8 @@ impl ConnectionManager { /// Returns set of schema ids we are interested in and support on this node. async fn target_set(&self) -> TargetSet { - let supported_schema_ids = self.schema_provider.supported_schema_ids().await; - TargetSet::new(&supported_schema_ids) + let allow_schema_ids = self.schema_provider.supported_schema_ids().await; + TargetSet::new(&allow_schema_ids) } /// Register a new peer connection on the manager. diff --git a/aquadoggo/src/schema/schema_provider.rs b/aquadoggo/src/schema/schema_provider.rs index ec4681320..4eeeeb8f5 100644 --- a/aquadoggo/src/schema/schema_provider.rs +++ b/aquadoggo/src/schema/schema_provider.rs @@ -22,7 +22,7 @@ pub struct SchemaProvider { /// Optional list of allowed schema ids. When not empty, only these schema ids will be accepted /// on this node, if not set _all_ schema ids are accepted (wildcard). - supported_schema_ids: AllowList, + allow_schema_ids: AllowList, /// Sender for broadcast channel informing subscribers about updated schemas. tx: Sender, @@ -30,10 +30,7 @@ pub struct SchemaProvider { impl SchemaProvider { /// Returns a `SchemaProvider` containing the given application schemas and all system schemas. - pub fn new( - application_schemas: Vec, - supported_schema_ids: AllowList, - ) -> Self { + pub fn new(application_schemas: Vec, allow_schema_ids: AllowList) -> Self { // Collect all system and application schemas. let mut schemas = SYSTEM_SCHEMAS.clone(); schemas.extend(&application_schemas); @@ -45,7 +42,7 @@ impl SchemaProvider { } // Filter out all unsupported schema ids when list was set - if let AllowList::Set(schema_ids) = &supported_schema_ids { + if let AllowList::Set(schema_ids) = &allow_schema_ids { index.retain(|id, _| schema_ids.contains(id)); }; @@ -62,7 +59,7 @@ impl SchemaProvider { Self { schemas: Arc::new(Mutex::new(index)), - supported_schema_ids, + allow_schema_ids, tx, } } @@ -87,8 +84,8 @@ impl SchemaProvider { /// Returns `true` if a schema was updated or it already existed in it's current state, and /// `false` if it was inserted. pub async fn update(&self, schema: Schema) -> Result { - if let AllowList::Set(supported_schema_ids) = &self.supported_schema_ids { - if !supported_schema_ids.contains(schema.id()) { + if let AllowList::Set(allow_schema_ids) = &self.allow_schema_ids { + if !allow_schema_ids.contains(schema.id()) { bail!("Attempted to add unsupported schema to schema provider"); } }; @@ -120,7 +117,7 @@ impl SchemaProvider { /// If no allow-list was set it returns the list of all currently known schema ids. If an /// allo-wlist was set it directly returns the list itself. pub async fn supported_schema_ids(&self) -> Vec { - match &self.supported_schema_ids { + match &self.allow_schema_ids { AllowList::Set(schema_ids) => schema_ids.clone(), AllowList::Wildcard => self .all() @@ -134,7 +131,7 @@ impl SchemaProvider { /// Returns true if an allow-list of supported schema ids was provided through user /// configuration. pub fn is_allow_list_active(&self) -> bool { - matches!(self.supported_schema_ids, AllowList::Set(_)) + matches!(self.allow_schema_ids, AllowList::Set(_)) } } diff --git a/aquadoggo_cli/config.toml b/aquadoggo_cli/config.toml index 48ee8ebf4..0874ef46f 100644 --- a/aquadoggo_cli/config.toml +++ b/aquadoggo_cli/config.toml @@ -40,7 +40,7 @@ # will encounter on the network. This is useful for experimentation and local # development but _not_ recommended for production settings. # -supported_schema_ids = "*" +allow_schema_ids = "*" # ~~~~~~~~ # Database diff --git a/aquadoggo_cli/src/config.rs b/aquadoggo_cli/src/config.rs index 5e61a6af7..5263201d2 100644 --- a/aquadoggo_cli/src/config.rs +++ b/aquadoggo_cli/src/config.rs @@ -83,7 +83,7 @@ struct Cli { skip_serializing_if = "Option::is_none", serialize_with = "serialize_with_wildcard" )] - supported_schema_ids: Option>, + allow_schema_ids: Option>, /// URL / connection string to PostgreSQL or SQLite database. #[arg(short = 'd', long, value_name = "CONNECTION_STRING")] @@ -163,7 +163,7 @@ where /// Configuration derived from environment variables and .toml file. #[derive(Clone, Debug, Serialize, Deserialize)] pub struct Configuration { - pub supported_schema_ids: UncheckedAllowList, + pub allow_schema_ids: UncheckedAllowList, pub database_url: String, pub database_max_connections: u32, pub worker_pool_size: u32, @@ -179,7 +179,7 @@ pub struct Configuration { impl Default for Configuration { fn default() -> Self { Self { - supported_schema_ids: UncheckedAllowList::Set(vec![]), + allow_schema_ids: UncheckedAllowList::Wildcard, database_url: "sqlite::memory:".into(), database_max_connections: 32, worker_pool_size: 16, @@ -199,14 +199,16 @@ impl TryFrom for NodeConfiguration { fn try_from(value: Configuration) -> Result { // Check if given schema ids are valid - let supported_schema_ids = match value.supported_schema_ids { + let allow_schema_ids = match value.allow_schema_ids { UncheckedAllowList::Wildcard => AllowList::::Wildcard, UncheckedAllowList::Set(str_values) => { let schema_ids: Result, anyhow::Error> = str_values .iter() .map(|str_value| { SchemaId::from_str(str_value).map_err(|_| { - anyhow!("Invalid schema id '{str_value}' found in 'supported_schema_ids' list") + anyhow!( + "Invalid schema id '{str_value}' found in 'allow_schema_ids' list" + ) }) }) .collect(); @@ -220,7 +222,7 @@ impl TryFrom for NodeConfiguration { database_max_connections: value.database_max_connections, http_port: value.http_port, worker_pool_size: value.worker_pool_size, - supported_schema_ids, + allow_schema_ids, network: NetworkConfiguration { quic_port: value.quic_port, mdns: value.mdns, @@ -311,7 +313,7 @@ pub fn print_config(path: ConfigFilePath, config: &NodeConfiguration) -> String println!(); println!("{}\n", "Configuration".underline()); - let supported_schema_ids: String = match &config.supported_schema_ids { + let allow_schema_ids: String = match &config.allow_schema_ids { AllowList::Set(schema_ids) => { if schema_ids.is_empty() { "none (disable replication)".into() @@ -324,7 +326,7 @@ pub fn print_config(path: ConfigFilePath, config: &NodeConfiguration) -> String .join("\n") } } - AllowList::Wildcard => "support all incoming schemas (*)".into(), + AllowList::Wildcard => "* (any schema id)".into(), }; let database_url = if config.database_url == "sqlite::memory:" { @@ -348,14 +350,14 @@ pub fn print_config(path: ConfigFilePath, config: &NodeConfiguration) -> String }; format!( - r"Supported Schema IDs: {} + r"Allow Schema IDs: {} Database URL: {} mDNS: {} Relay Mode: {} Node is ready! ", - supported_schema_ids.blue(), + allow_schema_ids.blue(), database_url.blue(), mdns.blue(), relay_mode.blue(), From 341b0daf5ceb9bc9c1d20bbf2485e530c556ba6a Mon Sep 17 00:00:00 2001 From: adz Date: Fri, 25 Aug 2023 11:08:52 +0200 Subject: [PATCH 59/66] Make sure to not print doc string in about section --- aquadoggo_cli/src/config.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/aquadoggo_cli/src/config.rs b/aquadoggo_cli/src/config.rs index 5263201d2..5f1d0952a 100644 --- a/aquadoggo_cli/src/config.rs +++ b/aquadoggo_cli/src/config.rs @@ -69,6 +69,7 @@ pub fn load_config() -> Result<(ConfigFilePath, Configuration)> { #[command( name = "aquadoggo Node", about = "Node server for the p2panda network", + long_about = None, version )] struct Cli { From 5604862369e919be0c9ccb8b1c6f0e4b5353fe48 Mon Sep 17 00:00:00 2001 From: adz Date: Fri, 25 Aug 2023 11:43:19 +0200 Subject: [PATCH 60/66] Improve texts --- aquadoggo_cli/config.toml | 86 ++++++++++++++++++++----------------- aquadoggo_cli/src/config.rs | 70 +++++++++++++++++++++++++----- 2 files changed, 105 insertions(+), 51 deletions(-) diff --git a/aquadoggo_cli/config.toml b/aquadoggo_cli/config.toml index 0874ef46f..4c9651a58 100644 --- a/aquadoggo_cli/config.toml +++ b/aquadoggo_cli/config.toml @@ -1,28 +1,34 @@ # aquadoggo configuration file # -# Copy this file to the location where aquadoggo will be run or refer to it -# using the -c command line argument when running aquadoggo. Replace the -# example values with your own desired configuration. +# 1. Copy this file to the location where a) aquadoggo will run b) in an XDG +# compliant config directory (for example "$HOME/.config/aquadoggo/config.toml" +# on Linux) or c) refer to it using the -c command line argument when running +# aquadoggo. +# 2. Replace the example values with your own desired configuration. All values +# in this template resemble the defaults +# 3. Check out our website https://p2panda.org for tutorials or official chat +# for further help finding the right configuration for your p2panda network # # NOTE: Paths in this file follow the XDG Base Directory Specification for # Linux. You might want to adjust these values for your regarding operating # system. -# ~~~~~~~ -# Schemas -# ~~~~~~~ +# ゚・。+☆+。 +# SCHEMAS +# ゚・。+☆+。 -# List of schema ids which a node will replicate and expose on the GraphQL API. +# List of schema ids which a node will replicate, persist and expose on the +# GraphQL API. # # When allowing a schema you automatically opt into announcing, replicating and -# materializing documents connected to it, supporting applications which are -# dependent on this data. +# materializing documents connected to it, supporting applications and networks +# which are dependent on this data. # # It is recommended to set this list to all schema ids your own application # should support, including all important system schemas. For example: # -# supported_schema_ids = [ -# # To discover new schema, set your node to replicate schema definition +# allow_schema_ids = [ +# # To discover new schemas, set your node to replicate schema definition # # documents by including these two built-in schema ids. Your node will now # # search for and replicate schemas which have been published to the # # network. @@ -31,20 +37,20 @@ # # # Once you discover new schemas and want to start replicating their # # documents, then add their schema ids to this list as well. It's also -# # possible to load or create schemas directly onto your node using the +# # possible to create and load schemas directly onto your node using the # # tool `fishy`: https://github.com/p2panda/fishy # "my_interesting_schema_0020a01fe...", # ] # -# WARNING: When set to wildcard "*", your node will support _all_ schema ids it +# WARNING: When set to wildcard "*", your node will support _any_ schemas it # will encounter on the network. This is useful for experimentation and local # development but _not_ recommended for production settings. # allow_schema_ids = "*" -# ~~~~~~~~ -# Database -# ~~~~~~~~ +# ゚・。+☆+。・ +# DATABASE +# ゚・。+☆+。・ # URL / connection string to PostgreSQL or SQLite database. # @@ -64,9 +70,9 @@ allow_schema_ids = "*" # database_max_connections = 32 -# ~~~~~~~ -# Workers -# ~~~~~~~ +# ゚・。+☆+。・ +# WORKERS +# ゚・。+☆+。・ # Number of concurrent workers which defines the maximum of materialization # tasks which can be worked on simultaneously. @@ -76,9 +82,9 @@ database_max_connections = 32 # worker_pool_size = 16 -# ~~~~~ -# Ports -# ~~~~~ +# ゚・。+☆ +# PORTS +# ゚・。+☆ # HTTP port, serving the GraphQL API (for example hosted under # http://localhost:2020/graphql). This API is used for client-node @@ -94,33 +100,33 @@ http_port = 2020 # quic_port = 2022 -# ~~~~~~~~ -# Identity -# ~~~~~~~~ +# ゚・。+☆+。・ +# IDENTITY +# ゚・。+☆+。・ # Path to persist your ed25519 private key file. The key is used to identify # you towards other nodes during network discovery and replication. This key is # _not_ used to create and sign data. # -# Will be generated newly and stored under this path when node starts for the -# first time. +# If a path is set, a key will be generated newly and stored under this path +# when node starts for the first time. # -# When commented out, your node will generate an ephemeral private key on every -# start up and _not_ persist it. +# When comment out or no path is set, your node will generate an ephemeral +# private key on every start up and _not_ persist it. # # private_key = "$HOME/.local/share/aquadoggo/private-key.txt" -# ~~~~~~~~~~~~~~ -# Local networks -# ~~~~~~~~~~~~~~ +# ゚・。+☆+。・゚・。+☆+ +# LOCAL NETWORKS +# ゚・。+☆+。・゚・。+☆+ # mDNS to discover other peers on the local network. Enabled by default. # mdns = true -# ~~~~~ -# Nodes -# ~~~~~ +# ゚・。+☆ +# NODES +# ゚・。+☆ # List of known node addresses (IP + port) we want to connect to directly. # @@ -134,11 +140,11 @@ direct_node_addresses = [ # "192.0.2.2:3000", ] -# ~~~~~ -# Relay -# ~~~~~ +# ゚・。+☆ +# RELAY +# ゚・。+☆ -# Address of relay. +# Address of a relay. # # A relay helps discover other nodes on the internet (also known as # "rendesvouz" or "bootstrap" server) and helps establishing direct p2p @@ -159,7 +165,7 @@ direct_node_addresses = [ # # relay_address = "192.0.2.16:2022" -# Set to true if our node should also function as a relay. Defaults to false. +# Set to true if node should also function as a relay. Defaults to false. # # Other nodes can use relays to aid discovery and establishing connectivity. # diff --git a/aquadoggo_cli/src/config.rs b/aquadoggo_cli/src/config.rs index 5f1d0952a..8c066f40d 100644 --- a/aquadoggo_cli/src/config.rs +++ b/aquadoggo_cli/src/config.rs @@ -73,40 +73,70 @@ pub fn load_config() -> Result<(ConfigFilePath, Configuration)> { version )] struct Cli { - /// Path to a config.toml file. + /// Path to an optional "config.toml" file for further configuration. + /// + /// When not set the program will try to find a `config.toml` file in the same folder the + /// program is executed in and otherwise in the regarding operation systems XDG config + /// directory ("$HOME/.config/aquadoggo/config.toml" on Linux). #[arg(short = 'c', long, value_name = "PATH")] #[serde(skip_serializing_if = "Option::is_none")] config: Option, - /// List of schema ids which a node will replicate and expose on the GraphQL API. - #[arg(short = 's', long, value_name = "SCHEMA_ID SCHEMA_ID ...", num_args = 0..)] + /// List of schema ids which a node will replicate, persist and expose on the GraphQL API. + /// Separate multiple values with a whitespace. Defaults to allow _any_ schemas ("*"). + /// + /// When allowing a schema you automatically opt into announcing, replicating and materializing + /// documents connected to it, supporting applications and networks which are dependent on this + /// data. + /// + /// It is recommended to set this list to all schema ids your own application should support, + /// including all important system schemas. + /// + /// WARNING: When set to wildcard "*", your node will support _any_ schemas it will encounter + /// on the network. This is useful for experimentation and local development but _not_ + /// recommended for production settings. + #[arg(short = 's', long, value_name = "SCHEMA_ID", num_args = 0..)] #[serde( skip_serializing_if = "Option::is_none", serialize_with = "serialize_with_wildcard" )] allow_schema_ids: Option>, - /// URL / connection string to PostgreSQL or SQLite database. + /// URL / connection string to PostgreSQL or SQLite database. Defaults to an in-memory SQLite + /// database. + /// + /// WARNING: By default your node will not persist anything after shutdown. Set a database + /// connection url for production settings to not loose data. #[arg(short = 'd', long, value_name = "CONNECTION_STRING")] #[serde(skip_serializing_if = "Option::is_none")] database_url: Option, - /// HTTP port for client-node communication, serving the GraphQL API. + /// HTTP port for client-node communication, serving the GraphQL API. Defaults to 2020. #[arg(short = 'p', long, value_name = "PORT")] #[serde(skip_serializing_if = "Option::is_none")] http_port: Option, - /// QUIC port for node-node communication and data replication. + /// QUIC port for node-node communication and data replication. Defaults to 2022. #[arg(short = 'q', long, value_name = "PORT")] #[serde(skip_serializing_if = "Option::is_none")] quic_port: Option, - /// Path to persist your ed25519 private key file. + /// Path to persist your ed25519 private key file. Defaults to an ephemeral key only for this + /// current session. + /// + /// The key is used to identify you towards other nodes during network discovery and + /// replication. This key is _not_ used to create and sign data. + /// + /// If a path is set, a key will be generated newly and stored under this path when node starts + /// for the first time. + /// + /// When no path is set, your node will generate an ephemeral private key on every start up and + /// _not_ persist it. #[arg(short = 'k', long, value_name = "PATH")] #[serde(skip_serializing_if = "Option::is_none")] private_key: Option, - /// mDNS to discover other peers on the local network. + /// mDNS to discover other peers on the local network. Enabled by default. #[arg( short = 'm', long, @@ -118,16 +148,34 @@ struct Cli { mdns: Option, /// List of known node addresses we want to connect to directly. - #[arg(short = 'n', long, value_name = "IP:PORT IP:PORT ...", num_args = 0..)] + /// + /// Make sure that nodes mentioned in this list are directly reachable (for example they need + /// to be hosted with a static IP Address). If you need to connect to nodes with changing, + /// dynamic IP addresses or even with nodes behind a firewall or NAT, do not use this field but + /// use at least one relay. + #[arg(short = 'n', long, value_name = "IP:PORT", num_args = 0..)] #[serde(skip_serializing_if = "Option::is_none")] direct_node_addresses: Option>, - /// Address of relay. + /// Address of a relay. + /// + /// A relay helps discover other nodes on the internet (also known as "rendesvouz" or + /// "bootstrap" server) and helps establishing direct p2p connections when node is behind a + /// firewall or NAT (also known as "holepunching"). + /// + /// WARNING: This will potentially expose your IP address on the network. Do only connect to + /// trusted relays or make sure your IP address is hidden via a VPN or proxy if you're + /// concerned about leaking your IP. #[arg(short = 'r', long, value_name = "IP:PORT")] #[serde(skip_serializing_if = "Option::is_none")] relay_address: Option, - /// Set to true if our node should also function as a relay. + /// Enable if node should also function as a relay. Disabled by default. + /// + /// Other nodes can use relays to aid discovery and establishing connectivity. + /// + /// Relays _need_ to be hosted in a way where they can be reached directly, for example with a + /// static IP address through an VPS. #[arg( short = 'e', long, From 0fe626b16408ef6921b2a07c3180772823c9ba92 Mon Sep 17 00:00:00 2001 From: adz Date: Fri, 25 Aug 2023 11:49:55 +0200 Subject: [PATCH 61/66] Empty string should lead to empty array --- aquadoggo_cli/src/config.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/aquadoggo_cli/src/config.rs b/aquadoggo_cli/src/config.rs index 8c066f40d..f9ee55f6f 100644 --- a/aquadoggo_cli/src/config.rs +++ b/aquadoggo_cli/src/config.rs @@ -198,9 +198,12 @@ where { match list { Some(list) => { - // Wildcard symbol comes in form of an array ["*"], convert it to just a string "*" if list.len() == 1 && list[0] == WILDCARD { + // Wildcard symbol comes in form of an array ["*"], convert it to just a string "*" serializer.serialize_str(WILDCARD) + } else if list.len() == 1 && list[0].is_empty() { + // Empty string should not lead to [""] but to an empty array [] + Vec::>::new().serialize(serializer) } else { list.serialize(serializer) } From d7c81498f49e621da6d7cc1757230611abf359d4 Mon Sep 17 00:00:00 2001 From: adz Date: Fri, 25 Aug 2023 11:50:57 +0200 Subject: [PATCH 62/66] Show warnings to user for some configs --- Cargo.lock | 5 +++-- aquadoggo_cli/Cargo.toml | 1 + aquadoggo_cli/src/main.rs | 15 ++++++++++++++- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f6b134cfb..aea63f455 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -219,6 +219,7 @@ dependencies = [ "figment", "hex", "libp2p", + "log", "p2panda-rs", "path-clean", "serde", @@ -2745,9 +2746,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.19" +version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" [[package]] name = "lru" diff --git a/aquadoggo_cli/Cargo.toml b/aquadoggo_cli/Cargo.toml index 970be8619..a3b36ab74 100644 --- a/aquadoggo_cli/Cargo.toml +++ b/aquadoggo_cli/Cargo.toml @@ -28,6 +28,7 @@ env_logger = "0.9.0" figment = { version = "0.10.10", features = ["toml", "env"] } hex = "0.4.3" libp2p = "0.52.0" +log = "0.4.20" p2panda-rs = "0.7.1" path-clean = "1.0.1" serde = { version = "1.0.185", features = ["serde_derive"] } diff --git a/aquadoggo_cli/src/main.rs b/aquadoggo_cli/src/main.rs index d63e749b4..87039d9de 100644 --- a/aquadoggo_cli/src/main.rs +++ b/aquadoggo_cli/src/main.rs @@ -7,7 +7,8 @@ mod utils; use std::convert::TryInto; use anyhow::Context; -use aquadoggo::Node; +use aquadoggo::{AllowList, Node}; +use log::warn; use crate::config::{load_config, print_config}; use crate::key_pair::{generate_ephemeral_key_pair, generate_or_load_key_pair}; @@ -36,6 +37,18 @@ async fn main() -> anyhow::Result<()> { // Show configuration info to the user println!("{}", print_config(config_file_path, &node_config)); + // Show some hopefully helpful warnings + match &node_config.allow_schema_ids { + AllowList::Set(values) => { + if values.is_empty() && !node_config.network.relay_mode { + warn!("Your node was set to not allow any schema ids which is only useful in combination with enabling relay mode. With this setting you will not be able to interact with any client or node."); + } + } + AllowList::Wildcard => { + warn!("Allowed schema ids is set to wildcard. Your node will support _any_ schemas it will encounter on the network. This is useful for experimentation and local development but _not_ recommended for production settings."); + } + } + // Start p2panda node in async runtime let node = Node::start(key_pair, node_config).await; From b537329b905f7d930ac9a4290a1b22f333b4485c Mon Sep 17 00:00:00 2001 From: adz Date: Fri, 25 Aug 2023 11:52:22 +0200 Subject: [PATCH 63/66] Minor nice change --- aquadoggo_cli/src/config.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aquadoggo_cli/src/config.rs b/aquadoggo_cli/src/config.rs index f9ee55f6f..795b2b0ce 100644 --- a/aquadoggo_cli/src/config.rs +++ b/aquadoggo_cli/src/config.rs @@ -378,7 +378,7 @@ pub fn print_config(path: ConfigFilePath, config: &NodeConfiguration) -> String .join("\n") } } - AllowList::Wildcard => "* (any schema id)".into(), + AllowList::Wildcard => format!("{WILDCARD} (any schema id)"), }; let database_url = if config.database_url == "sqlite::memory:" { From b1ca15e6c33940dc1ecaa0cb5d11ea1ceb8f942f Mon Sep 17 00:00:00 2001 From: adz Date: Fri, 25 Aug 2023 11:53:56 +0200 Subject: [PATCH 64/66] Remove usage section for now in README.md --- aquadoggo_cli/README.md | 65 ----------------------------------------- 1 file changed, 65 deletions(-) diff --git a/aquadoggo_cli/README.md b/aquadoggo_cli/README.md index 9d65535db..6b29ab298 100644 --- a/aquadoggo_cli/README.md +++ b/aquadoggo_cli/README.md @@ -2,71 +2,6 @@ Node server with GraphQL API for the p2panda network. -## Usage - -``` - -d, --data-dir - Path to data folder, $HOME/.local/share/aquadoggo by default on Linux - - -P, --http-port - Port for the http server, 2020 by default - - -q, --quic-port - Port for the QUIC transport, 2022 by default for a relay/rendezvous node - - -r, --remote-node-addresses - URLs of remote nodes to replicate with - - -m, --mdns - Enable mDNS for peer discovery over LAN (using port 5353), false by default - - [possible values: true, false] - - --enable-relay-server - Enable relay server to facilitate peer connectivity, false by default - - --relay-addr - IP address for the relay peer. - - eg. --relay-addr "127.0.0.1" - - --relay-port - Port for the relay peer, defaults to expected relay port 2022. - - eg. --relay-port "1234" - - -h, --help - Print help (see a summary with '-h') - - -V, --version - Print version -``` - -## Environment variables - -* `RUST_LOG` Can be set to `warn`, `error`, `info`, `debug`, `trace` for logging. -* `DATABASE_URL` Database url (SQLite, PostgreSQL) (default `sqlite:/aquadoggo-node.sqlite3`). -* `DATABASE_MAX_CONNECTIONS` Maximum number of database connections in pool (default `32`). -* `HTTP_PORT` HTTP server port for GraphQL API (default `2020`). -* `WORKER_POOL_SIZE` Materializer worker pool size (default `16`). - -**Example:** - -```bash -# For all debug logs from `aquadoggo` and external crates -RUST_LOG=debug DATABASE_URL=postgres://postgres:postgres@localhost:5432/db cargo run - -# For compact info logs, only directly coming from `aquadoggo` -RUST_LOG=aquadoggo=info DATABASE_URL=postgres://postgres:postgres@localhost:5432/db cargo run -``` - -## Configuring supported schema - -If a `config.toml` file is present then `aquadoggo` will read `supported_schema_ids` at start-up -and be configured to only replicate and offer a query API for schema identified by the listed ids. - -See `example_config.toml` for further instructions. - ## Development ```bash From 626277c90cd702873b0feff2e1cbae149e073437 Mon Sep 17 00:00:00 2001 From: adz Date: Fri, 25 Aug 2023 11:54:52 +0200 Subject: [PATCH 65/66] Add entry to CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a248669a8..e37f45b66 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Parse supported schema ids from `config.toml` [#473](https://github.com/p2panda/aquadoggo/pull/473) - Fix relayed connections, add DCUtR Holepunching and reduce CLI args [#502](https://github.com/p2panda/aquadoggo/pull/502) - Announce supported schema ids in network before replication [#515](https://github.com/p2panda/aquadoggo/pull/515) +- Improved configuration API with "config.toml" file, environment vars and command line arguments [#519](https://github.com/p2panda/aquadoggo/pull/519) ### Changed From c2804b9f971fe5dc9a5bda2c9212ec46a4a58b0d Mon Sep 17 00:00:00 2001 From: adz Date: Fri, 25 Aug 2023 12:39:39 +0200 Subject: [PATCH 66/66] Fix merge --- aquadoggo/src/replication/service.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/aquadoggo/src/replication/service.rs b/aquadoggo/src/replication/service.rs index f9849b782..73ba808c2 100644 --- a/aquadoggo/src/replication/service.rs +++ b/aquadoggo/src/replication/service.rs @@ -243,17 +243,19 @@ impl ConnectionManager { // If this is a SyncRequest message first we check if the contained target set matches our // own locally configured one. - if let Message::SyncRequest(_, remote_supported_schema_ids) = message.message() { + if let Message::SyncRequest(_, target_set) = message.message() { let local_supported_schema_ids = &self .announcement .as_ref() .expect("Announcement state needs to be set with 'update_announcement'") .supported_schema_ids; - // If this node has been configured with a whitelist of schema ids then we check the + // If this node has been configured with an allow list of schema ids then we check the // target set of the requests matches our own, otherwise we skip this step and accept // any target set. - if self.schema_provider.is_allow_list_active() { + if self.schema_provider.is_allow_list_active() + && !local_supported_schema_ids.is_valid_set(target_set) + { // If it doesn't match we signal that an error occurred and return at this point. self.on_replication_error(peer, session_id, ReplicationError::UnsupportedTargetSet) .await;