From 82e85f0e3f1c7d3a6201b49cf3a3f89e6b2fcae3 Mon Sep 17 00:00:00 2001 From: Sam Andreae Date: Tue, 16 Jan 2024 13:48:01 +0000 Subject: [PATCH] Export `ConfigFile` struct which can be de/serialized from and to a configuration file (#607) * clippy * Export ConfigFile from api module * Improve doc string * Add serde defaults to ConfigFile fields * fmt & clippy * Update CHANGELOG --- CHANGELOG.md | 4 + aquadoggo/Cargo.toml | 1 + aquadoggo/src/api/config_file.rs | 359 +++++++++++++++++++++++++++++++ aquadoggo/src/api/mod.rs | 2 + aquadoggo/src/lib.rs | 2 +- aquadoggo_cli/src/config.rs | 199 +---------------- 6 files changed, 373 insertions(+), 194 deletions(-) create mode 100644 aquadoggo/src/api/config_file.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 080a42582..f35a8094e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Export ConfigFile which can be de/serialized to and from a config file [#607](https://github.com/p2panda/aquadoggo/pull/607) + ### Fixed - Fix bug where known schemas are not replicated between nodes [#603](https://github.com/p2panda/aquadoggo/pull/603). diff --git a/aquadoggo/Cargo.toml b/aquadoggo/Cargo.toml index 2a07a2a08..7df75b965 100644 --- a/aquadoggo/Cargo.toml +++ b/aquadoggo/Cargo.toml @@ -66,6 +66,7 @@ sqlx = { version = "0.6.1", features = [ "sqlite", "runtime-tokio-rustls", ] } +tempfile = "3.7.0" thiserror = "1.0.39" tokio = { version = "1.28.2", features = [ "macros", diff --git a/aquadoggo/src/api/config_file.rs b/aquadoggo/src/api/config_file.rs new file mode 100644 index 000000000..b88dba398 --- /dev/null +++ b/aquadoggo/src/api/config_file.rs @@ -0,0 +1,359 @@ +// SPDX-License-Identifier: AGPL-3.0-or-later + +use std::convert::TryFrom; +use std::net::SocketAddr; +use std::path::PathBuf; +use std::str::FromStr; +use std::sync::OnceLock; + +use anyhow::{anyhow, Result}; +use libp2p::PeerId; +use p2panda_rs::schema::SchemaId; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use tempfile::TempDir; + +use crate::{AllowList, Configuration, NetworkConfiguration}; + +const WILDCARD: &str = "*"; + +const DEFAULT_LOG_LEVEL: &str = "off"; + +const DEFAULT_MAX_DATABASE_CONNECTIONS: u32 = 32; + +const DEFAULT_HTTP_PORT: u16 = 2020; + +const DEFAULT_QUIC_PORT: u16 = 2022; + +const DEFAULT_WORKER_POOL_SIZE: u32 = 16; + +const DEFAULT_MDNS: bool = true; + +static TMP_DIR: OnceLock = OnceLock::new(); + +fn default_log_level() -> String { + DEFAULT_LOG_LEVEL.to_string() +} + +fn default_max_database_connections() -> u32 { + DEFAULT_MAX_DATABASE_CONNECTIONS +} + +fn default_http_port() -> u16 { + DEFAULT_HTTP_PORT +} + +fn default_quic_port() -> u16 { + DEFAULT_QUIC_PORT +} + +fn default_database_url() -> String { + // Give each in-memory SQLite database an unique name as we're observing funny issues with + // SQLite sharing data between processes (!) and breaking each others databases + // potentially. + // + // See related issue: https://github.com/p2panda/aquadoggo/issues/568 + let db_name = format!("dbmem{}", rand::random::()); + + // Set "mode=memory" to enable SQLite running in-memory and set "cache=shared", as + // setting it to "private" would break sqlx / SQLite. + // + // See related issue: https://github.com/launchbadge/sqlx/issues/2510 + format!("sqlite://file:{db_name}?mode=memory&cache=shared") +} + +fn default_worker_pool_size() -> u32 { + DEFAULT_WORKER_POOL_SIZE +} + +fn default_mdns() -> bool { + DEFAULT_MDNS +} + +/// Node configuration which can be de/serialized from a config file. +/// +/// See https://github.com/p2panda/aquadoggo/blob/main/aquadoggo_cli/config.toml for example +/// config file and detailed documentation of possible configuration values. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct ConfigFile { + /// Set log verbosity. Use this for learning more about how your node behaves or for debugging. + /// + /// Possible log levels are: ERROR, WARN, INFO, DEBUG, TRACE. They are scoped to "aquadoggo" by + /// default. + /// + /// If you want to adjust the scope for deeper inspection use a filter value, for example + /// "=TRACE" for logging _everything_ or "aquadoggo=INFO,libp2p=DEBUG" etc. + #[serde(default = "default_log_level")] + pub log_level: String, + + /// List of schema ids which a node will replicate, persist and expose on the GraphQL API. + /// Separate multiple values with a whitespace. Defaults to allow _any_ schemas ("*"). + /// + /// When allowing a schema you automatically opt into announcing, replicating and materializing + /// documents connected to it, supporting applications and networks which are dependent on this + /// data. + /// + /// It is recommended to set this list to all schema ids your own application should support, + /// including all important system schemas. + /// + /// WARNING: When set to wildcard "*", your node will support _any_ schemas it will encounter + /// on the network. This is useful for experimentation and local development but _not_ + /// recommended for production settings. + #[serde(default)] + pub allow_schema_ids: UncheckedAllowList, + + /// URL / connection string to PostgreSQL or SQLite database. Defaults to an in-memory SQLite + /// database. + /// + /// WARNING: By default your node will not persist anything after shutdown. Set a database + /// connection url for production settings to not loose data. + #[serde(default = "default_database_url")] + pub database_url: String, + + /// Max database connections, defaults to 32. + #[serde(default = "default_max_database_connections")] + pub database_max_connections: u32, + + /// HTTP port for client-node communication, serving the GraphQL API. Defaults to 2020. + #[serde(default = "default_http_port")] + pub http_port: u16, + + /// QUIC port for node-node communication and data replication. Defaults to 2022. + #[serde(default = "default_quic_port")] + pub quic_port: u16, + + /// Path to folder where blobs (large binary files) are persisted. Defaults to a temporary + /// directory. + /// + /// WARNING: By default your node will not persist any blobs after shutdown. Set a path for + /// production settings to not loose data. + #[serde(default)] + pub blobs_base_path: Option, + + /// Path to persist your ed25519 private key file. Defaults to an ephemeral key only for this + /// current session. + /// + /// The key is used to identify you towards other nodes during network discovery and + /// replication. This key is _not_ used to create and sign data. + /// + /// If a path is set, a key will be generated newly and stored under this path when node starts + /// for the first time. + /// + /// When no path is set, your node will generate an ephemeral private key on every start up and + /// _not_ persist it. + #[serde(default)] + pub private_key: Option, + + /// mDNS to discover other peers on the local network. Enabled by default. + #[serde(default = "default_mdns")] + pub mdns: bool, + + /// List of known node addresses we want to connect to directly. + /// + /// Make sure that nodes mentioned in this list are directly reachable (they need to be hosted + /// with a static IP Address). If you need to connect to nodes with changing, dynamic IP + /// addresses or even with nodes behind a firewall or NAT, do not use this field but use at + /// least one relay. + #[serde(default)] + pub direct_node_addresses: Vec, + + /// List of peers which are allowed to connect to your node. + /// + /// If set then only nodes (identified by their peer id) contained in this list will be able to + /// connect to your node (via a relay or directly). When not set any other node can connect to + /// yours. + /// + /// Peer IDs identify nodes by using their hashed public keys. They do _not_ represent authored + /// data from clients and are only used to authenticate nodes towards each other during + /// networking. + /// + /// Use this list for example for setups where the identifier of the nodes you want to form a + /// network with is known but you still need to use relays as their IP addresses change + /// dynamically. + #[serde(default)] + pub allow_peer_ids: UncheckedAllowList, + + /// List of peers which will be blocked from connecting to your node. + /// + /// If set then any peers (identified by their peer id) contained in this list will be blocked + /// from connecting to your node (via a relay or directly). When an empty list is provided then + /// there are no restrictions on which nodes can connect to yours. + /// + /// Block lists and allow lists are exclusive, which means that you should _either_ use a block + /// list _or_ an allow list depending on your setup. + /// + /// Use this list for example if you want to allow _any_ node to connect to yours _except_ of a + /// known number of excluded nodes. + #[serde(default)] + pub block_peer_ids: Vec, + + /// List of relay addresses. + /// + /// A relay helps discover other nodes on the internet (also known as "rendesvouz" or + /// "bootstrap" server) and helps establishing direct p2p connections when node is behind a + /// firewall or NAT (also known as "holepunching"). + /// + /// WARNING: This will potentially expose your IP address on the network. Do only connect to + /// trusted relays or make sure your IP address is hidden via a VPN or proxy if you're + /// concerned about leaking your IP. + #[serde(default)] + pub relay_addresses: Vec, + + /// Enable if node should also function as a relay. Disabled by default. + /// + /// Other nodes can use relays to aid discovery and establishing connectivity. + /// + /// Relays _need_ to be hosted in a way where they can be reached directly, for example with a + /// static IP address through an VPS. + #[serde(default)] + pub relay_mode: bool, + + /// Worker pool size, defaults to 16. + #[serde(default = "default_worker_pool_size")] + pub worker_pool_size: u32, +} + +impl Default for ConfigFile { + fn default() -> Self { + Self { + log_level: default_log_level(), + allow_schema_ids: UncheckedAllowList::default(), + database_url: default_database_url(), + database_max_connections: default_max_database_connections(), + http_port: default_http_port(), + quic_port: default_quic_port(), + blobs_base_path: None, + mdns: default_mdns(), + private_key: None, + direct_node_addresses: vec![], + allow_peer_ids: UncheckedAllowList::default(), + block_peer_ids: vec![], + relay_addresses: vec![], + relay_mode: false, + worker_pool_size: default_worker_pool_size(), + } + } +} + +impl TryFrom for Configuration { + type Error = anyhow::Error; + + fn try_from(value: ConfigFile) -> Result { + // Check if given schema ids are valid + let allow_schema_ids = match value.allow_schema_ids { + UncheckedAllowList::Wildcard => AllowList::::Wildcard, + UncheckedAllowList::Set(str_values) => { + let schema_ids: Result, anyhow::Error> = str_values + .iter() + .map(|str_value| { + SchemaId::from_str(str_value).map_err(|_| { + anyhow!( + "Invalid schema id '{str_value}' found in 'allow_schema_ids' list" + ) + }) + }) + .collect(); + + AllowList::Set(schema_ids?) + } + }; + + // Check if given peer ids are valid + let allow_peer_ids = match value.allow_peer_ids { + UncheckedAllowList::Wildcard => AllowList::::Wildcard, + UncheckedAllowList::Set(str_values) => { + let peer_ids: Result, anyhow::Error> = str_values + .iter() + .map(|str_value| { + PeerId::from_str(str_value).map_err(|_| { + anyhow!("Invalid peer id '{str_value}' found in 'allow_peer_ids' list") + }) + }) + .collect(); + + AllowList::Set(peer_ids?) + } + }; + + // Create a temporary blobs directory when none was given + let blobs_base_path = match value.blobs_base_path { + Some(path) => path, + None => TMP_DIR + .get_or_init(|| { + // Initialise a `TempDir` instance globally to make sure it does not run out of + // scope and gets deleted before the end of the application runtime + tempfile::TempDir::new() + .expect("Could not create temporary directory to store blobs") + }) + .path() + .to_path_buf(), + }; + + Ok(Configuration { + allow_schema_ids, + database_url: value.database_url, + database_max_connections: value.database_max_connections, + http_port: value.http_port, + blobs_base_path, + worker_pool_size: value.worker_pool_size, + network: NetworkConfiguration { + quic_port: value.quic_port, + mdns: value.mdns, + direct_node_addresses: value.direct_node_addresses, + allow_peer_ids, + block_peer_ids: value.block_peer_ids, + relay_addresses: value.relay_addresses, + relay_mode: value.relay_mode, + ..Default::default() + }, + }) + } +} + +/// Helper struct to deserialize from either a wildcard string "*" or a list of string values. +/// +/// These string values are not checked yet and need to be validated in a succeeding step. +#[derive(Debug, Clone, Default)] +pub enum UncheckedAllowList { + #[default] + Wildcard, + Set(Vec), +} + +impl Serialize for UncheckedAllowList { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + match self { + UncheckedAllowList::Wildcard => serializer.serialize_str(WILDCARD), + UncheckedAllowList::Set(list) => list.serialize(serializer), + } + } +} + +impl<'de> Deserialize<'de> for UncheckedAllowList { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + #[derive(Deserialize)] + #[serde(untagged)] + enum Value { + String(String), + Vec(Vec), + } + + let value = Value::deserialize(deserializer)?; + + match value { + Value::String(str_value) => { + if str_value == WILDCARD { + Ok(UncheckedAllowList::Wildcard) + } else { + Err(serde::de::Error::custom("only wildcard strings allowed")) + } + } + Value::Vec(vec) => Ok(UncheckedAllowList::Set(vec)), + } + } +} diff --git a/aquadoggo/src/api/mod.rs b/aquadoggo/src/api/mod.rs index 06fd66119..89c255bd3 100644 --- a/aquadoggo/src/api/mod.rs +++ b/aquadoggo/src/api/mod.rs @@ -2,9 +2,11 @@ #[allow(clippy::module_inception)] mod api; +mod config_file; mod lock_file; mod migration; pub use api::NodeInterface; +pub use config_file::ConfigFile; pub use lock_file::LockFile; pub use migration::migrate; diff --git a/aquadoggo/src/lib.rs b/aquadoggo/src/lib.rs index 7af6cf617..db4e38eba 100644 --- a/aquadoggo/src/lib.rs +++ b/aquadoggo/src/lib.rs @@ -32,7 +32,7 @@ mod test_utils; #[cfg(test)] mod tests; -pub use crate::api::LockFile; +pub use crate::api::{ConfigFile, LockFile}; pub use crate::config::{AllowList, Configuration}; pub use crate::network::NetworkConfiguration; pub use node::Node; diff --git a/aquadoggo_cli/src/config.rs b/aquadoggo_cli/src/config.rs index 71d8b80cd..90433c058 100644 --- a/aquadoggo_cli/src/config.rs +++ b/aquadoggo_cli/src/config.rs @@ -1,22 +1,17 @@ // SPDX-License-Identifier: AGPL-3.0-or-later -use std::convert::TryFrom; use std::net::SocketAddr; use std::path::PathBuf; -use std::str::FromStr; -use std::sync::OnceLock; -use anyhow::{anyhow, bail, Result}; -use aquadoggo::{AllowList, Configuration as NodeConfiguration, NetworkConfiguration}; +use anyhow::{bail, Result}; +use aquadoggo::{AllowList, ConfigFile, Configuration}; use clap::{crate_version, Parser}; use colored::Colorize; use directories::ProjectDirs; use figment::providers::{Env, Format, Serialized, Toml}; use figment::Figment; use libp2p::PeerId; -use p2panda_rs::schema::SchemaId; -use serde::{Deserialize, Deserializer, Serialize, Serializer}; -use tempfile::TempDir; +use serde::{Serialize, Serializer}; use crate::utils::absolute_path; @@ -24,8 +19,6 @@ const WILDCARD: &str = "*"; const CONFIG_FILE_NAME: &str = "config.toml"; -static TMP_DIR: OnceLock = OnceLock::new(); - type ConfigFilePath = Option; /// Get configuration from 1. .toml file, 2. environment variables and 3. command line arguments @@ -34,7 +27,7 @@ type ConfigFilePath = Option; /// /// Returns a partly unchecked configuration object which results from all of these sources. It /// still needs to be converted for aquadoggo as it might still contain invalid values. -pub fn load_config() -> Result<(ConfigFilePath, Configuration)> { +pub fn load_config() -> Result<(ConfigFilePath, ConfigFile)> { // Parse command line arguments first to get optional config file path let cli = Cli::parse(); @@ -50,7 +43,7 @@ pub fn load_config() -> Result<(ConfigFilePath, Configuration)> { None => try_determine_config_file_path(), }; - let mut figment = Figment::from(Serialized::defaults(Configuration::default())); + let mut figment = Figment::from(Serialized::defaults(ConfigFile::default())); if let Some(path) = &config_file_path { figment = figment.merge(Toml::file(path)); } @@ -270,138 +263,6 @@ where } } -/// Configuration derived from environment variables and .toml file. -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct Configuration { - pub log_level: String, - pub allow_schema_ids: UncheckedAllowList, - pub database_url: String, - pub database_max_connections: u32, - pub http_port: u16, - pub quic_port: u16, - pub blobs_base_path: Option, - pub private_key: Option, - pub mdns: bool, - pub direct_node_addresses: Vec, - pub allow_peer_ids: UncheckedAllowList, - pub block_peer_ids: Vec, - pub relay_addresses: Vec, - pub relay_mode: bool, - pub worker_pool_size: u32, -} - -impl Default for Configuration { - fn default() -> Self { - let database_url = { - // Give each in-memory SQLite database an unique name as we're observing funny issues with - // SQLite sharing data between processes (!) and breaking each others databases - // potentially. - // - // See related issue: https://github.com/p2panda/aquadoggo/issues/568 - let db_name = format!("dbmem{}", rand::random::()); - - // Set "mode=memory" to enable SQLite running in-memory and set "cache=shared", as - // setting it to "private" would break sqlx / SQLite. - // - // See related issue: https://github.com/launchbadge/sqlx/issues/2510 - format!("sqlite://file:{db_name}?mode=memory&cache=shared") - }; - - Self { - log_level: "off".into(), - allow_schema_ids: UncheckedAllowList::Wildcard, - database_url, - database_max_connections: 32, - http_port: 2020, - quic_port: 2022, - blobs_base_path: None, - mdns: true, - private_key: None, - direct_node_addresses: vec![], - allow_peer_ids: UncheckedAllowList::Wildcard, - block_peer_ids: Vec::new(), - relay_addresses: vec![], - relay_mode: false, - worker_pool_size: 16, - } - } -} - -impl TryFrom for NodeConfiguration { - type Error = anyhow::Error; - - fn try_from(value: Configuration) -> Result { - // Check if given schema ids are valid - let allow_schema_ids = match value.allow_schema_ids { - UncheckedAllowList::Wildcard => AllowList::::Wildcard, - UncheckedAllowList::Set(str_values) => { - let schema_ids: Result, anyhow::Error> = str_values - .iter() - .map(|str_value| { - SchemaId::from_str(str_value).map_err(|_| { - anyhow!( - "Invalid schema id '{str_value}' found in 'allow_schema_ids' list" - ) - }) - }) - .collect(); - - AllowList::Set(schema_ids?) - } - }; - - // Check if given peer ids are valid - let allow_peer_ids = match value.allow_peer_ids { - UncheckedAllowList::Wildcard => AllowList::::Wildcard, - UncheckedAllowList::Set(str_values) => { - let peer_ids: Result, anyhow::Error> = str_values - .iter() - .map(|str_value| { - PeerId::from_str(str_value).map_err(|_| { - anyhow!("Invalid peer id '{str_value}' found in 'allow_peer_ids' list") - }) - }) - .collect(); - - AllowList::Set(peer_ids?) - } - }; - - // Create a temporary blobs directory when none was given - let blobs_base_path = match value.blobs_base_path { - Some(path) => path, - None => TMP_DIR - .get_or_init(|| { - // Initialise a `TempDir` instance globally to make sure it does not run out of - // scope and gets deleted before the end of the application runtime - tempfile::TempDir::new() - .expect("Could not create temporary directory to store blobs") - }) - .path() - .to_path_buf(), - }; - - Ok(NodeConfiguration { - allow_schema_ids, - database_url: value.database_url, - database_max_connections: value.database_max_connections, - http_port: value.http_port, - blobs_base_path, - worker_pool_size: value.worker_pool_size, - network: NetworkConfiguration { - quic_port: value.quic_port, - mdns: value.mdns, - direct_node_addresses: value.direct_node_addresses, - allow_peer_ids, - block_peer_ids: value.block_peer_ids, - relay_addresses: value.relay_addresses, - relay_mode: value.relay_mode, - ..Default::default() - }, - }) - } -} - fn try_determine_config_file_path() -> Option { // Find config file in current folder let mut current_dir = std::env::current_dir().expect("Could not determine current directory"); @@ -423,7 +284,7 @@ fn try_determine_config_file_path() -> Option { pub fn print_config( private_key_path: Option<&PathBuf>, config_file_path: ConfigFilePath, - config: &NodeConfiguration, + config: &Configuration, ) -> String { println!( r" ██████ ███████ ████ @@ -531,51 +392,3 @@ Node is ready! relay_mode.blue(), ) } - -/// Helper struct to deserialize from either a wildcard string "*" or a list of string values. -/// -/// These string values are not checked yet and need to be validated in a succeeding step. -#[derive(Debug, Clone)] -pub enum UncheckedAllowList { - Wildcard, - Set(Vec), -} - -impl Serialize for UncheckedAllowList { - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - match self { - UncheckedAllowList::Wildcard => serializer.serialize_str(WILDCARD), - UncheckedAllowList::Set(list) => list.serialize(serializer), - } - } -} - -impl<'de> Deserialize<'de> for UncheckedAllowList { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - #[derive(Deserialize)] - #[serde(untagged)] - enum Value { - String(String), - Vec(Vec), - } - - let value = Value::deserialize(deserializer)?; - - match value { - Value::String(str_value) => { - if str_value == WILDCARD { - Ok(UncheckedAllowList::Wildcard) - } else { - Err(serde::de::Error::custom("only wildcard strings allowed")) - } - } - Value::Vec(vec) => Ok(UncheckedAllowList::Set(vec)), - } - } -}