Skip to content

Commit

Permalink
Regions can now be read-only (#6150)
Browse files Browse the repository at this point in the history
Up until this point, regions were only ever read-write, and region
snapshots were read-only. In order to support snapshot replacement,
Crucible recently gained support for read-only downstairs that performs
a "clone" operation to copy blocks from another read-only downstairs.

This commit adds a "read-only" flag to Region, and adds support for
Nexus initializing a downstairs with this new clone option.
  • Loading branch information
jmpesp authored Jul 29, 2024
1 parent 013df0a commit 1bb75f2
Show file tree
Hide file tree
Showing 19 changed files with 214 additions and 133 deletions.
8 changes: 8 additions & 0 deletions nexus/db-model/src/region.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ pub struct Region {
// The port that was returned when the region was created. This field didn't
// originally exist, so records may not have it filled in.
port: Option<SqlU16>,

// A region may be read-only
read_only: bool,
}

impl Region {
Expand All @@ -53,6 +56,7 @@ impl Region {
blocks_per_extent: u64,
extent_count: u64,
port: u16,
read_only: bool,
) -> Self {
Self {
identity: RegionIdentity::new(Uuid::new_v4()),
Expand All @@ -62,6 +66,7 @@ impl Region {
blocks_per_extent: blocks_per_extent as i64,
extent_count: extent_count as i64,
port: Some(port.into()),
read_only,
}
}

Expand Down Expand Up @@ -91,4 +96,7 @@ impl Region {
pub fn port(&self) -> Option<u16> {
self.port.map(|port| port.into())
}
pub fn read_only(&self) -> bool {
self.read_only
}
}
2 changes: 2 additions & 0 deletions nexus/db-model/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1046,6 +1046,8 @@ table! {
extent_count -> Int8,

port -> Nullable<Int4>,

read_only -> Bool,
}
}

Expand Down
3 changes: 2 additions & 1 deletion nexus/db-model/src/schema_versions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use std::collections::BTreeMap;
///
/// This must be updated when you change the database schema. Refer to
/// schema/crdb/README.adoc in the root of this repository for details.
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(83, 0, 0);
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(84, 0, 0);

/// List of all past database schema versions, in *reverse* order
///
Expand All @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy<Vec<KnownVersion>> = Lazy::new(|| {
// | leaving the first copy as an example for the next person.
// v
// KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"),
KnownVersion::new(84, "region-read-only"),
KnownVersion::new(83, "dataset-address-optional"),
KnownVersion::new(82, "region-port"),
KnownVersion::new(81, "add-nullable-filesystem-pool"),
Expand Down
10 changes: 7 additions & 3 deletions nexus/db-queries/src/db/datastore/region.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use crate::db::model::Region;
use crate::db::model::SqlU16;
use crate::db::pagination::paginated;
use crate::db::pagination::Paginator;
use crate::db::queries::region_allocation::RegionParameters;
use crate::db::update_and_check::UpdateAndCheck;
use crate::db::update_and_check::UpdateStatus;
use crate::transaction_retry::OptionalError;
Expand Down Expand Up @@ -259,9 +260,12 @@ impl DataStore {
let query = crate::db::queries::region_allocation::allocation_query(
volume_id,
maybe_snapshot_id,
block_size,
blocks_per_extent,
extent_count,
RegionParameters {
block_size,
blocks_per_extent,
extent_count,
read_only: false,
},
allocation_strategy,
num_regions_required,
);
Expand Down
1 change: 1 addition & 0 deletions nexus/db-queries/src/db/datastore/volume.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2349,6 +2349,7 @@ mod tests {
10,
10,
10001,
false,
);

region_and_volume_ids[i].0 = region.id();
Expand Down
74 changes: 41 additions & 33 deletions nexus/db-queries/src/db/queries/region_allocation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,18 @@ type SelectableSql<T> = <
<T as diesel::Selectable<Pg>>::SelectExpression as diesel::Expression
>::SqlType;

/// Parameters for the region(s) being allocated
#[derive(Debug, Clone, Copy)]
pub struct RegionParameters {
pub block_size: u64,
pub blocks_per_extent: u64,
pub extent_count: u64,

/// True if the region will be filled with a Clone operation and is meant to
/// be read-only.
pub read_only: bool,
}

/// For a given volume, idempotently allocate enough regions (according to some
/// allocation strategy) to meet some redundancy level. This should only be used
/// for the region set that is in the top level of the Volume (not the deeper
Expand All @@ -75,9 +87,7 @@ type SelectableSql<T> = <
pub fn allocation_query(
volume_id: uuid::Uuid,
snapshot_id: Option<uuid::Uuid>,
block_size: u64,
blocks_per_extent: u64,
extent_count: u64,
params: RegionParameters,
allocation_strategy: &RegionAllocationStrategy,
redundancy: usize,
) -> TypedSqlQuery<(SelectableSql<Dataset>, SelectableSql<Region>)> {
Expand All @@ -104,7 +114,8 @@ pub fn allocation_query(

let seed = seed.to_le_bytes().to_vec();

let size_delta = block_size * blocks_per_extent * extent_count;
let size_delta =
params.block_size * params.blocks_per_extent * params.extent_count;
let redundancy: i64 = i64::try_from(redundancy).unwrap();

let builder = QueryBuilder::new().sql(
Expand Down Expand Up @@ -243,7 +254,8 @@ pub fn allocation_query(
").param().sql(" AS block_size,
").param().sql(" AS blocks_per_extent,
").param().sql(" AS extent_count,
NULL AS port
NULL AS port,
").param().sql(" AS read_only
FROM shuffled_candidate_datasets")
// Only select the *additional* number of candidate regions for the required
// redundancy level
Expand All @@ -253,9 +265,10 @@ pub fn allocation_query(
))
),")
.bind::<sql_types::Uuid, _>(volume_id)
.bind::<sql_types::BigInt, _>(block_size as i64)
.bind::<sql_types::BigInt, _>(blocks_per_extent as i64)
.bind::<sql_types::BigInt, _>(extent_count as i64)
.bind::<sql_types::BigInt, _>(params.block_size as i64)
.bind::<sql_types::BigInt, _>(params.blocks_per_extent as i64)
.bind::<sql_types::BigInt, _>(params.extent_count as i64)
.bind::<sql_types::Bool, _>(params.read_only)
.bind::<sql_types::BigInt, _>(redundancy)

// A subquery which summarizes the changes we intend to make, showing:
Expand Down Expand Up @@ -355,7 +368,7 @@ pub fn allocation_query(
.sql("
inserted_regions AS (
INSERT INTO region
(id, time_created, time_modified, dataset_id, volume_id, block_size, blocks_per_extent, extent_count, port)
(id, time_created, time_modified, dataset_id, volume_id, block_size, blocks_per_extent, extent_count, port, read_only)
SELECT ").sql(AllColumnsOfRegion::with_prefix("candidate_regions")).sql("
FROM candidate_regions
WHERE
Expand Down Expand Up @@ -405,9 +418,12 @@ mod test {
#[tokio::test]
async fn expectorate_query() {
let volume_id = Uuid::nil();
let block_size = 512;
let blocks_per_extent = 4;
let extent_count = 8;
let params = RegionParameters {
block_size: 512,
blocks_per_extent: 4,
extent_count: 8,
read_only: false,
};

// Start with snapshot_id = None

Expand All @@ -418,14 +434,13 @@ mod test {
let region_allocate = allocation_query(
volume_id,
snapshot_id,
block_size,
blocks_per_extent,
extent_count,
params,
&RegionAllocationStrategy::RandomWithDistinctSleds {
seed: Some(1),
},
REGION_REDUNDANCY_THRESHOLD,
);

expectorate_query_contents(
&region_allocate,
"tests/output/region_allocate_distinct_sleds.sql",
Expand All @@ -437,9 +452,7 @@ mod test {
let region_allocate = allocation_query(
volume_id,
snapshot_id,
block_size,
blocks_per_extent,
extent_count,
params,
&RegionAllocationStrategy::Random { seed: Some(1) },
REGION_REDUNDANCY_THRESHOLD,
);
Expand All @@ -458,9 +471,7 @@ mod test {
let region_allocate = allocation_query(
volume_id,
snapshot_id,
block_size,
blocks_per_extent,
extent_count,
params,
&RegionAllocationStrategy::RandomWithDistinctSleds {
seed: Some(1),
},
Expand All @@ -477,9 +488,7 @@ mod test {
let region_allocate = allocation_query(
volume_id,
snapshot_id,
block_size,
blocks_per_extent,
extent_count,
params,
&RegionAllocationStrategy::Random { seed: Some(1) },
REGION_REDUNDANCY_THRESHOLD,
);
Expand All @@ -502,18 +511,19 @@ mod test {
let conn = pool.pool().get().await.unwrap();

let volume_id = Uuid::new_v4();
let block_size = 512;
let blocks_per_extent = 4;
let extent_count = 8;
let params = RegionParameters {
block_size: 512,
blocks_per_extent: 4,
extent_count: 8,
read_only: false,
};

// First structure: Explain the query with "RandomWithDistinctSleds"

let region_allocate = allocation_query(
volume_id,
None,
block_size,
blocks_per_extent,
extent_count,
params,
&RegionAllocationStrategy::RandomWithDistinctSleds { seed: None },
REGION_REDUNDANCY_THRESHOLD,
);
Expand All @@ -527,9 +537,7 @@ mod test {
let region_allocate = allocation_query(
volume_id,
None,
block_size,
blocks_per_extent,
extent_count,
params,
&RegionAllocationStrategy::Random { seed: None },
REGION_REDUNDANCY_THRESHOLD,
);
Expand Down
31 changes: 19 additions & 12 deletions nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@ WITH
region.block_size,
region.blocks_per_extent,
region.extent_count,
region.port
region.port,
region.read_only
FROM
region
WHERE
Expand Down Expand Up @@ -99,11 +100,12 @@ WITH
$8 AS block_size,
$9 AS blocks_per_extent,
$10 AS extent_count,
NULL AS port
NULL AS port,
$11 AS read_only
FROM
shuffled_candidate_datasets
LIMIT
$11 - (SELECT count(*) FROM old_regions)
$12 - (SELECT count(*) FROM old_regions)
),
proposed_dataset_changes
AS (
Expand All @@ -122,7 +124,7 @@ WITH
SELECT
(
(
(SELECT count(*) FROM old_regions LIMIT 1) < $12
(SELECT count(*) FROM old_regions LIMIT 1) < $13
AND CAST(
IF(
(
Expand All @@ -132,7 +134,7 @@ WITH
+ (SELECT count(*) FROM existing_zpools LIMIT 1)
)
)
>= $13
>= $14
),
'TRUE',
'Not enough space'
Expand All @@ -149,7 +151,7 @@ WITH
+ (SELECT count(*) FROM old_regions LIMIT 1)
)
)
>= $14
>= $15
),
'TRUE',
'Not enough datasets'
Expand Down Expand Up @@ -185,7 +187,7 @@ WITH
1
)
)
>= $15
>= $16
),
'TRUE',
'Not enough unique zpools selected'
Expand All @@ -208,7 +210,8 @@ WITH
block_size,
blocks_per_extent,
extent_count,
port
port,
read_only
)
SELECT
candidate_regions.id,
Expand All @@ -219,7 +222,8 @@ WITH
candidate_regions.block_size,
candidate_regions.blocks_per_extent,
candidate_regions.extent_count,
candidate_regions.port
candidate_regions.port,
candidate_regions.read_only
FROM
candidate_regions
WHERE
Expand All @@ -233,7 +237,8 @@ WITH
region.block_size,
region.blocks_per_extent,
region.extent_count,
region.port
region.port,
region.read_only
),
updated_datasets
AS (
Expand Down Expand Up @@ -287,7 +292,8 @@ WITH
old_regions.block_size,
old_regions.blocks_per_extent,
old_regions.extent_count,
old_regions.port
old_regions.port,
old_regions.read_only
FROM
old_regions INNER JOIN dataset ON old_regions.dataset_id = dataset.id
)
Expand All @@ -312,7 +318,8 @@ UNION
inserted_regions.block_size,
inserted_regions.blocks_per_extent,
inserted_regions.extent_count,
inserted_regions.port
inserted_regions.port,
inserted_regions.read_only
FROM
inserted_regions
INNER JOIN updated_datasets ON inserted_regions.dataset_id = updated_datasets.id
Expand Down
Loading

0 comments on commit 1bb75f2

Please sign in to comment.