Skip to content

Commit

Permalink
Export ConfigFile struct which can be de/serialized from and to a c…
Browse files Browse the repository at this point in the history
…onfiguration file (#607)

* clippy

* Export ConfigFile from api module

* Improve doc string

* Add serde defaults to ConfigFile fields

* fmt & clippy

* Update CHANGELOG
  • Loading branch information
sandreae authored Jan 16, 2024
1 parent 0205e2c commit 82e85f0
Show file tree
Hide file tree
Showing 6 changed files with 373 additions and 194 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Added

- Export ConfigFile which can be de/serialized to and from a config file [#607](https://github.com/p2panda/aquadoggo/pull/607)

### Fixed

- Fix bug where known schemas are not replicated between nodes [#603](https://github.com/p2panda/aquadoggo/pull/603).
Expand Down
1 change: 1 addition & 0 deletions aquadoggo/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ sqlx = { version = "0.6.1", features = [
"sqlite",
"runtime-tokio-rustls",
] }
tempfile = "3.7.0"
thiserror = "1.0.39"
tokio = { version = "1.28.2", features = [
"macros",
Expand Down
359 changes: 359 additions & 0 deletions aquadoggo/src/api/config_file.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,359 @@
// SPDX-License-Identifier: AGPL-3.0-or-later

use std::convert::TryFrom;
use std::net::SocketAddr;
use std::path::PathBuf;
use std::str::FromStr;
use std::sync::OnceLock;

use anyhow::{anyhow, Result};
use libp2p::PeerId;
use p2panda_rs::schema::SchemaId;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use tempfile::TempDir;

use crate::{AllowList, Configuration, NetworkConfiguration};

const WILDCARD: &str = "*";

const DEFAULT_LOG_LEVEL: &str = "off";

const DEFAULT_MAX_DATABASE_CONNECTIONS: u32 = 32;

const DEFAULT_HTTP_PORT: u16 = 2020;

const DEFAULT_QUIC_PORT: u16 = 2022;

const DEFAULT_WORKER_POOL_SIZE: u32 = 16;

const DEFAULT_MDNS: bool = true;

static TMP_DIR: OnceLock<TempDir> = OnceLock::new();

fn default_log_level() -> String {
DEFAULT_LOG_LEVEL.to_string()
}

fn default_max_database_connections() -> u32 {
DEFAULT_MAX_DATABASE_CONNECTIONS
}

fn default_http_port() -> u16 {
DEFAULT_HTTP_PORT
}

fn default_quic_port() -> u16 {
DEFAULT_QUIC_PORT
}

fn default_database_url() -> String {
// Give each in-memory SQLite database an unique name as we're observing funny issues with
// SQLite sharing data between processes (!) and breaking each others databases
// potentially.
//
// See related issue: https://github.com/p2panda/aquadoggo/issues/568
let db_name = format!("dbmem{}", rand::random::<u32>());

// Set "mode=memory" to enable SQLite running in-memory and set "cache=shared", as
// setting it to "private" would break sqlx / SQLite.
//
// See related issue: https://github.com/launchbadge/sqlx/issues/2510
format!("sqlite://file:{db_name}?mode=memory&cache=shared")
}

fn default_worker_pool_size() -> u32 {
DEFAULT_WORKER_POOL_SIZE
}

fn default_mdns() -> bool {
DEFAULT_MDNS
}

/// Node configuration which can be de/serialized from a config file.
///
/// See https://github.com/p2panda/aquadoggo/blob/main/aquadoggo_cli/config.toml for example
/// config file and detailed documentation of possible configuration values.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub struct ConfigFile {
/// Set log verbosity. Use this for learning more about how your node behaves or for debugging.
///
/// Possible log levels are: ERROR, WARN, INFO, DEBUG, TRACE. They are scoped to "aquadoggo" by
/// default.
///
/// If you want to adjust the scope for deeper inspection use a filter value, for example
/// "=TRACE" for logging _everything_ or "aquadoggo=INFO,libp2p=DEBUG" etc.
#[serde(default = "default_log_level")]
pub log_level: String,

/// List of schema ids which a node will replicate, persist and expose on the GraphQL API.
/// Separate multiple values with a whitespace. Defaults to allow _any_ schemas ("*").
///
/// When allowing a schema you automatically opt into announcing, replicating and materializing
/// documents connected to it, supporting applications and networks which are dependent on this
/// data.
///
/// It is recommended to set this list to all schema ids your own application should support,
/// including all important system schemas.
///
/// WARNING: When set to wildcard "*", your node will support _any_ schemas it will encounter
/// on the network. This is useful for experimentation and local development but _not_
/// recommended for production settings.
#[serde(default)]
pub allow_schema_ids: UncheckedAllowList,

/// URL / connection string to PostgreSQL or SQLite database. Defaults to an in-memory SQLite
/// database.
///
/// WARNING: By default your node will not persist anything after shutdown. Set a database
/// connection url for production settings to not loose data.
#[serde(default = "default_database_url")]
pub database_url: String,

/// Max database connections, defaults to 32.
#[serde(default = "default_max_database_connections")]
pub database_max_connections: u32,

/// HTTP port for client-node communication, serving the GraphQL API. Defaults to 2020.
#[serde(default = "default_http_port")]
pub http_port: u16,

/// QUIC port for node-node communication and data replication. Defaults to 2022.
#[serde(default = "default_quic_port")]
pub quic_port: u16,

/// Path to folder where blobs (large binary files) are persisted. Defaults to a temporary
/// directory.
///
/// WARNING: By default your node will not persist any blobs after shutdown. Set a path for
/// production settings to not loose data.
#[serde(default)]
pub blobs_base_path: Option<PathBuf>,

/// Path to persist your ed25519 private key file. Defaults to an ephemeral key only for this
/// current session.
///
/// The key is used to identify you towards other nodes during network discovery and
/// replication. This key is _not_ used to create and sign data.
///
/// If a path is set, a key will be generated newly and stored under this path when node starts
/// for the first time.
///
/// When no path is set, your node will generate an ephemeral private key on every start up and
/// _not_ persist it.
#[serde(default)]
pub private_key: Option<PathBuf>,

/// mDNS to discover other peers on the local network. Enabled by default.
#[serde(default = "default_mdns")]
pub mdns: bool,

/// List of known node addresses we want to connect to directly.
///
/// Make sure that nodes mentioned in this list are directly reachable (they need to be hosted
/// with a static IP Address). If you need to connect to nodes with changing, dynamic IP
/// addresses or even with nodes behind a firewall or NAT, do not use this field but use at
/// least one relay.
#[serde(default)]
pub direct_node_addresses: Vec<SocketAddr>,

/// List of peers which are allowed to connect to your node.
///
/// If set then only nodes (identified by their peer id) contained in this list will be able to
/// connect to your node (via a relay or directly). When not set any other node can connect to
/// yours.
///
/// Peer IDs identify nodes by using their hashed public keys. They do _not_ represent authored
/// data from clients and are only used to authenticate nodes towards each other during
/// networking.
///
/// Use this list for example for setups where the identifier of the nodes you want to form a
/// network with is known but you still need to use relays as their IP addresses change
/// dynamically.
#[serde(default)]
pub allow_peer_ids: UncheckedAllowList,

/// List of peers which will be blocked from connecting to your node.
///
/// If set then any peers (identified by their peer id) contained in this list will be blocked
/// from connecting to your node (via a relay or directly). When an empty list is provided then
/// there are no restrictions on which nodes can connect to yours.
///
/// Block lists and allow lists are exclusive, which means that you should _either_ use a block
/// list _or_ an allow list depending on your setup.
///
/// Use this list for example if you want to allow _any_ node to connect to yours _except_ of a
/// known number of excluded nodes.
#[serde(default)]
pub block_peer_ids: Vec<PeerId>,

/// List of relay addresses.
///
/// A relay helps discover other nodes on the internet (also known as "rendesvouz" or
/// "bootstrap" server) and helps establishing direct p2p connections when node is behind a
/// firewall or NAT (also known as "holepunching").
///
/// WARNING: This will potentially expose your IP address on the network. Do only connect to
/// trusted relays or make sure your IP address is hidden via a VPN or proxy if you're
/// concerned about leaking your IP.
#[serde(default)]
pub relay_addresses: Vec<SocketAddr>,

/// Enable if node should also function as a relay. Disabled by default.
///
/// Other nodes can use relays to aid discovery and establishing connectivity.
///
/// Relays _need_ to be hosted in a way where they can be reached directly, for example with a
/// static IP address through an VPS.
#[serde(default)]
pub relay_mode: bool,

/// Worker pool size, defaults to 16.
#[serde(default = "default_worker_pool_size")]
pub worker_pool_size: u32,
}

impl Default for ConfigFile {
fn default() -> Self {
Self {
log_level: default_log_level(),
allow_schema_ids: UncheckedAllowList::default(),
database_url: default_database_url(),
database_max_connections: default_max_database_connections(),
http_port: default_http_port(),
quic_port: default_quic_port(),
blobs_base_path: None,
mdns: default_mdns(),
private_key: None,
direct_node_addresses: vec![],
allow_peer_ids: UncheckedAllowList::default(),
block_peer_ids: vec![],
relay_addresses: vec![],
relay_mode: false,
worker_pool_size: default_worker_pool_size(),
}
}
}

impl TryFrom<ConfigFile> for Configuration {
type Error = anyhow::Error;

fn try_from(value: ConfigFile) -> Result<Self, Self::Error> {
// Check if given schema ids are valid
let allow_schema_ids = match value.allow_schema_ids {
UncheckedAllowList::Wildcard => AllowList::<SchemaId>::Wildcard,
UncheckedAllowList::Set(str_values) => {
let schema_ids: Result<Vec<SchemaId>, anyhow::Error> = str_values
.iter()
.map(|str_value| {
SchemaId::from_str(str_value).map_err(|_| {
anyhow!(
"Invalid schema id '{str_value}' found in 'allow_schema_ids' list"
)
})
})
.collect();

AllowList::Set(schema_ids?)
}
};

// Check if given peer ids are valid
let allow_peer_ids = match value.allow_peer_ids {
UncheckedAllowList::Wildcard => AllowList::<PeerId>::Wildcard,
UncheckedAllowList::Set(str_values) => {
let peer_ids: Result<Vec<PeerId>, anyhow::Error> = str_values
.iter()
.map(|str_value| {
PeerId::from_str(str_value).map_err(|_| {
anyhow!("Invalid peer id '{str_value}' found in 'allow_peer_ids' list")
})
})
.collect();

AllowList::Set(peer_ids?)
}
};

// Create a temporary blobs directory when none was given
let blobs_base_path = match value.blobs_base_path {
Some(path) => path,
None => TMP_DIR
.get_or_init(|| {
// Initialise a `TempDir` instance globally to make sure it does not run out of
// scope and gets deleted before the end of the application runtime
tempfile::TempDir::new()
.expect("Could not create temporary directory to store blobs")
})
.path()
.to_path_buf(),
};

Ok(Configuration {
allow_schema_ids,
database_url: value.database_url,
database_max_connections: value.database_max_connections,
http_port: value.http_port,
blobs_base_path,
worker_pool_size: value.worker_pool_size,
network: NetworkConfiguration {
quic_port: value.quic_port,
mdns: value.mdns,
direct_node_addresses: value.direct_node_addresses,
allow_peer_ids,
block_peer_ids: value.block_peer_ids,
relay_addresses: value.relay_addresses,
relay_mode: value.relay_mode,
..Default::default()
},
})
}
}

/// Helper struct to deserialize from either a wildcard string "*" or a list of string values.
///
/// These string values are not checked yet and need to be validated in a succeeding step.
#[derive(Debug, Clone, Default)]
pub enum UncheckedAllowList {
#[default]
Wildcard,
Set(Vec<String>),
}

impl Serialize for UncheckedAllowList {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
match self {
UncheckedAllowList::Wildcard => serializer.serialize_str(WILDCARD),
UncheckedAllowList::Set(list) => list.serialize(serializer),
}
}
}

impl<'de> Deserialize<'de> for UncheckedAllowList {
fn deserialize<D>(deserializer: D) -> Result<UncheckedAllowList, D::Error>
where
D: Deserializer<'de>,
{
#[derive(Deserialize)]
#[serde(untagged)]
enum Value<T> {
String(String),
Vec(Vec<T>),
}

let value = Value::deserialize(deserializer)?;

match value {
Value::String(str_value) => {
if str_value == WILDCARD {
Ok(UncheckedAllowList::Wildcard)
} else {
Err(serde::de::Error::custom("only wildcard strings allowed"))
}
}
Value::Vec(vec) => Ok(UncheckedAllowList::Set(vec)),
}
}
}
2 changes: 2 additions & 0 deletions aquadoggo/src/api/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@

#[allow(clippy::module_inception)]
mod api;
mod config_file;
mod lock_file;
mod migration;

pub use api::NodeInterface;
pub use config_file::ConfigFile;
pub use lock_file::LockFile;
pub use migration::migrate;
2 changes: 1 addition & 1 deletion aquadoggo/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ mod test_utils;
#[cfg(test)]
mod tests;

pub use crate::api::LockFile;
pub use crate::api::{ConfigFile, LockFile};
pub use crate::config::{AllowList, Configuration};
pub use crate::network::NetworkConfiguration;
pub use node::Node;
Expand Down
Loading

0 comments on commit 82e85f0

Please sign in to comment.