From 860aba81a06e8d97010bdc67666e1c1b6184a825 Mon Sep 17 00:00:00 2001 From: Ameer Hamza Date: Thu, 12 Oct 2023 03:31:11 +0500 Subject: [PATCH 1/6] zvol: Cleanup set property zvol_set_volmode() and zvol_set_snapdev() share a common code path. Merge this shared code path into zvol_set_common(). Signed-off-by: Ameer Hamza --- include/sys/zvol.h | 3 +- module/zfs/zfs_ioctl.c | 4 +- module/zfs/zvol.c | 129 ++++++++++------------------------------- 3 files changed, 33 insertions(+), 103 deletions(-) diff --git a/include/sys/zvol.h b/include/sys/zvol.h index 15a3034aefb5..5e92a3367b9f 100644 --- a/include/sys/zvol.h +++ b/include/sys/zvol.h @@ -50,8 +50,7 @@ extern int zvol_get_stats(objset_t *, nvlist_t *); extern boolean_t zvol_is_zvol(const char *); extern void zvol_create_cb(objset_t *, void *, cred_t *, dmu_tx_t *); extern int zvol_set_volsize(const char *, uint64_t); -extern int zvol_set_snapdev(const char *, zprop_source_t, uint64_t); -extern int zvol_set_volmode(const char *, zprop_source_t, uint64_t); +extern int zvol_set_common(const char *, zfs_prop_t, zprop_source_t, uint64_t); extern zvol_state_handle_t *zvol_suspend(const char *); extern int zvol_resume(zvol_state_handle_t *); extern void *zvol_tag(zvol_state_handle_t *); diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 2738385e260b..d780b9f3da17 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -2524,10 +2524,8 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, err = zvol_set_volsize(dsname, intval); break; case ZFS_PROP_SNAPDEV: - err = zvol_set_snapdev(dsname, source, intval); - break; case ZFS_PROP_VOLMODE: - err = zvol_set_volmode(dsname, source, intval); + err = zvol_set_common(dsname, prop, source, intval); break; case ZFS_PROP_VERSION: { diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 53dcb4dee448..023a6386d665 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -1541,6 +1541,7 @@ typedef struct zvol_set_prop_int_arg { const char *zsda_name; uint64_t zsda_value; zprop_source_t zsda_source; + zfs_prop_t zsda_prop; dmu_tx_t *zsda_tx; } zvol_set_prop_int_arg_t; @@ -1549,7 +1550,7 @@ typedef struct zvol_set_prop_int_arg { * conditions are imposed. */ static int -zvol_set_snapdev_check(void *arg, dmu_tx_t *tx) +zvol_set_common_check(void *arg, dmu_tx_t *tx) { zvol_set_prop_int_arg_t *zsda = arg; dsl_pool_t *dp = dmu_tx_pool(tx); @@ -1566,103 +1567,33 @@ zvol_set_snapdev_check(void *arg, dmu_tx_t *tx) } static int -zvol_set_snapdev_sync_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg) +zvol_set_common_sync_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg) { - (void) arg; + zvol_set_prop_int_arg_t *zsda = arg; char dsname[MAXNAMELEN]; zvol_task_t *task; - uint64_t snapdev; + uint64_t prop; + const char *prop_name = zfs_prop_to_name(zsda->zsda_prop); dsl_dataset_name(ds, dsname); - if (dsl_prop_get_int_ds(ds, "snapdev", &snapdev) != 0) - return (0); - task = zvol_task_alloc(ZVOL_ASYNC_SET_SNAPDEV, dsname, NULL, snapdev); - if (task == NULL) - return (0); - - (void) taskq_dispatch(dp->dp_spa->spa_zvol_taskq, zvol_task_cb, - task, TQ_SLEEP); - return (0); -} -/* - * Traverse all child datasets and apply snapdev appropriately. - * We call dsl_prop_set_sync_impl() here to set the value only on the toplevel - * dataset and read the effective "snapdev" on every child in the callback - * function: this is because the value is not guaranteed to be the same in the - * whole dataset hierarchy. - */ -static void -zvol_set_snapdev_sync(void *arg, dmu_tx_t *tx) -{ - zvol_set_prop_int_arg_t *zsda = arg; - dsl_pool_t *dp = dmu_tx_pool(tx); - dsl_dir_t *dd; - dsl_dataset_t *ds; - int error; - - VERIFY0(dsl_dir_hold(dp, zsda->zsda_name, FTAG, &dd, NULL)); - zsda->zsda_tx = tx; + if (dsl_prop_get_int_ds(ds, prop_name, &prop) != 0) + return (0); - error = dsl_dataset_hold(dp, zsda->zsda_name, FTAG, &ds); - if (error == 0) { - dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_SNAPDEV), - zsda->zsda_source, sizeof (zsda->zsda_value), 1, - &zsda->zsda_value, zsda->zsda_tx); - dsl_dataset_rele(ds, FTAG); + switch (zsda->zsda_prop) { + case ZFS_PROP_VOLMODE: + task = zvol_task_alloc(ZVOL_ASYNC_SET_VOLMODE, dsname, + NULL, prop); + break; + case ZFS_PROP_SNAPDEV: + task = zvol_task_alloc(ZVOL_ASYNC_SET_SNAPDEV, dsname, + NULL, prop); + break; + default: + task = NULL; + break; } - dmu_objset_find_dp(dp, dd->dd_object, zvol_set_snapdev_sync_cb, - zsda, DS_FIND_CHILDREN); - - dsl_dir_rele(dd, FTAG); -} - -int -zvol_set_snapdev(const char *ddname, zprop_source_t source, uint64_t snapdev) -{ - zvol_set_prop_int_arg_t zsda; - - zsda.zsda_name = ddname; - zsda.zsda_source = source; - zsda.zsda_value = snapdev; - - return (dsl_sync_task(ddname, zvol_set_snapdev_check, - zvol_set_snapdev_sync, &zsda, 0, ZFS_SPACE_CHECK_NONE)); -} -/* - * Sanity check the dataset for safe use by the sync task. No additional - * conditions are imposed. - */ -static int -zvol_set_volmode_check(void *arg, dmu_tx_t *tx) -{ - zvol_set_prop_int_arg_t *zsda = arg; - dsl_pool_t *dp = dmu_tx_pool(tx); - dsl_dir_t *dd; - int error; - - error = dsl_dir_hold(dp, zsda->zsda_name, FTAG, &dd, NULL); - if (error != 0) - return (error); - - dsl_dir_rele(dd, FTAG); - - return (error); -} - -static int -zvol_set_volmode_sync_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg) -{ - (void) arg; - char dsname[MAXNAMELEN]; - zvol_task_t *task; - uint64_t volmode; - - dsl_dataset_name(ds, dsname); - if (dsl_prop_get_int_ds(ds, "volmode", &volmode) != 0) - return (0); - task = zvol_task_alloc(ZVOL_ASYNC_SET_VOLMODE, dsname, NULL, volmode); if (task == NULL) return (0); @@ -1672,14 +1603,14 @@ zvol_set_volmode_sync_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg) } /* - * Traverse all child datasets and apply volmode appropriately. + * Traverse all child datasets and apply the property appropriately. * We call dsl_prop_set_sync_impl() here to set the value only on the toplevel - * dataset and read the effective "volmode" on every child in the callback + * dataset and read the effective "property" on every child in the callback * function: this is because the value is not guaranteed to be the same in the * whole dataset hierarchy. */ static void -zvol_set_volmode_sync(void *arg, dmu_tx_t *tx) +zvol_set_common_sync(void *arg, dmu_tx_t *tx) { zvol_set_prop_int_arg_t *zsda = arg; dsl_pool_t *dp = dmu_tx_pool(tx); @@ -1692,29 +1623,31 @@ zvol_set_volmode_sync(void *arg, dmu_tx_t *tx) error = dsl_dataset_hold(dp, zsda->zsda_name, FTAG, &ds); if (error == 0) { - dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_VOLMODE), + dsl_prop_set_sync_impl(ds, zfs_prop_to_name(zsda->zsda_prop), zsda->zsda_source, sizeof (zsda->zsda_value), 1, &zsda->zsda_value, zsda->zsda_tx); dsl_dataset_rele(ds, FTAG); } - dmu_objset_find_dp(dp, dd->dd_object, zvol_set_volmode_sync_cb, + dmu_objset_find_dp(dp, dd->dd_object, zvol_set_common_sync_cb, zsda, DS_FIND_CHILDREN); dsl_dir_rele(dd, FTAG); } int -zvol_set_volmode(const char *ddname, zprop_source_t source, uint64_t volmode) +zvol_set_common(const char *ddname, zfs_prop_t prop, zprop_source_t source, + uint64_t val) { zvol_set_prop_int_arg_t zsda; zsda.zsda_name = ddname; zsda.zsda_source = source; - zsda.zsda_value = volmode; + zsda.zsda_value = val; + zsda.zsda_prop = prop; - return (dsl_sync_task(ddname, zvol_set_volmode_check, - zvol_set_volmode_sync, &zsda, 0, ZFS_SPACE_CHECK_NONE)); + return (dsl_sync_task(ddname, zvol_set_common_check, + zvol_set_common_sync, &zsda, 0, ZFS_SPACE_CHECK_NONE)); } void From eeeff7321db2ddbe85dc97b6b63748c378f6afe7 Mon Sep 17 00:00:00 2001 From: Ameer Hamza Date: Wed, 25 Oct 2023 02:53:27 +0500 Subject: [PATCH 2/6] zvol: Implement zvol threading as a Property Currently, zvol threading can be switched through the zvol_request_sync module parameter system-wide. By making it a zvol property, zvol threading can be switched per zvol. Signed-off-by: Ameer Hamza --- include/sys/fs/zfs.h | 1 + include/sys/zvol.h | 1 + include/sys/zvol_impl.h | 1 + lib/libzfs/libzfs.abi | 3 ++- man/man7/zfsprops.7 | 12 ++++++++++++ module/os/linux/zfs/zvol_os.c | 9 ++++++++- module/zcommon/zfs_prop.c | 3 +++ module/zfs/zfs_ioctl.c | 9 +++++++++ module/zfs/zvol.c | 14 ++++++++++++++ 9 files changed, 51 insertions(+), 2 deletions(-) diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h index bc940e8a7929..d8b6ff3fbd4e 100644 --- a/include/sys/fs/zfs.h +++ b/include/sys/fs/zfs.h @@ -191,6 +191,7 @@ typedef enum { ZFS_PROP_REDACTED, ZFS_PROP_REDACT_SNAPS, ZFS_PROP_SNAPSHOTS_CHANGED, + ZFS_PROP_VOLTHREADING, ZFS_NUM_PROPS } zfs_prop_t; diff --git a/include/sys/zvol.h b/include/sys/zvol.h index 5e92a3367b9f..c9eefbeb48d9 100644 --- a/include/sys/zvol.h +++ b/include/sys/zvol.h @@ -50,6 +50,7 @@ extern int zvol_get_stats(objset_t *, nvlist_t *); extern boolean_t zvol_is_zvol(const char *); extern void zvol_create_cb(objset_t *, void *, cred_t *, dmu_tx_t *); extern int zvol_set_volsize(const char *, uint64_t); +extern int zvol_set_volthreading(const char *, boolean_t); extern int zvol_set_common(const char *, zfs_prop_t, zprop_source_t, uint64_t); extern zvol_state_handle_t *zvol_suspend(const char *); extern int zvol_resume(zvol_state_handle_t *); diff --git a/include/sys/zvol_impl.h b/include/sys/zvol_impl.h index 3243917bcd3f..de4f6dec8648 100644 --- a/include/sys/zvol_impl.h +++ b/include/sys/zvol_impl.h @@ -58,6 +58,7 @@ typedef struct zvol_state { atomic_t zv_suspend_ref; /* refcount for suspend */ krwlock_t zv_suspend_lock; /* suspend lock */ struct zvol_state_os *zv_zso; /* private platform state */ + boolean_t zv_threading; /* volthreading property */ } zvol_state_t; diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi index 2d612a16b227..fdc36487dc50 100644 --- a/lib/libzfs/libzfs.abi +++ b/lib/libzfs/libzfs.abi @@ -1867,7 +1867,8 @@ - + + diff --git a/man/man7/zfsprops.7 b/man/man7/zfsprops.7 index f8554c81c874..069be5e3837a 100644 --- a/man/man7/zfsprops.7 +++ b/man/man7/zfsprops.7 @@ -1192,6 +1192,18 @@ are equivalent to the and .Sy noexec mount options. +.It Sy volthreading Ns = Ns Sy on Ns | Ns Sy off +Controls internal zvol threading. +The value +.Sy off +disables zvol threading, and zvol relies on application threads. +The default value is +.Sy on , +which enables threading within a zvol. +Please note that this property will be overridden by +.Sy zvol_request_sync +module parameter. +This property is only applicable to Linux. .It Sy filesystem_limit Ns = Ns Ar count Ns | Ns Sy none Limits the number of filesystems and volumes that can exist under this point in the dataset tree. diff --git a/module/os/linux/zfs/zvol_os.c b/module/os/linux/zfs/zvol_os.c index f94ce69fb9e2..a7bcda252f3f 100644 --- a/module/os/linux/zfs/zvol_os.c +++ b/module/os/linux/zfs/zvol_os.c @@ -512,7 +512,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq, uint64_t size = io_size(bio, rq); int rw = io_data_dir(bio, rq); - if (zvol_request_sync) + if (zvol_request_sync || zv->zv_threading == B_FALSE) force_sync = 1; zv_request_t zvr = { @@ -1304,6 +1304,7 @@ zvol_os_create_minor(const char *name) int error = 0; int idx; uint64_t hash = zvol_name_hash(name); + uint64_t volthreading; bool replayed_zil = B_FALSE; if (zvol_inhibit_dev) @@ -1350,6 +1351,12 @@ zvol_os_create_minor(const char *name) zv->zv_volsize = volsize; zv->zv_objset = os; + /* Default */ + zv->zv_threading = B_TRUE; + if (dsl_prop_get_integer(name, "volthreading", &volthreading, NULL) + == 0) + zv->zv_threading = volthreading; + set_capacity(zv->zv_zso->zvo_disk, zv->zv_volsize >> 9); blk_queue_max_hw_sectors(zv->zv_zso->zvo_queue, diff --git a/module/zcommon/zfs_prop.c b/module/zcommon/zfs_prop.c index 00b5393a8906..f63c2ad2f61f 100644 --- a/module/zcommon/zfs_prop.c +++ b/module/zcommon/zfs_prop.c @@ -611,6 +611,9 @@ zfs_prop_init(void) ZVOL_DEFAULT_BLOCKSIZE, PROP_ONETIME, ZFS_TYPE_VOLUME, "512 to 128k, power of 2", "VOLBLOCK", B_FALSE, sfeatures); + zprop_register_index(ZFS_PROP_VOLTHREADING, "volthreading", + 1, PROP_DEFAULT, ZFS_TYPE_VOLUME, "on | off", "zvol threading", + boolean_table, sfeatures); zprop_register_number(ZFS_PROP_USEDSNAP, "usedbysnapshots", 0, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "", "USEDSNAP", B_FALSE, sfeatures); diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index d780b9f3da17..72d5fc819203 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -2523,6 +2523,15 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, case ZFS_PROP_VOLSIZE: err = zvol_set_volsize(dsname, intval); break; + case ZFS_PROP_VOLTHREADING: + err = zvol_set_volthreading(dsname, intval); + /* + * Set err to -1 to force the zfs_set_prop_nvlist code down the + * default path to set the value in the nvlist. + */ + if (err == 0) + err = -1; + break; case ZFS_PROP_SNAPDEV: case ZFS_PROP_VOLMODE: err = zvol_set_common(dsname, prop, source, intval); diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index 023a6386d665..c576e34696c8 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -369,6 +369,20 @@ zvol_set_volsize(const char *name, uint64_t volsize) return (SET_ERROR(error)); } +/* + * Update volthreading. + */ +int +zvol_set_volthreading(const char *name, boolean_t value) +{ + zvol_state_t *zv = zvol_find_by_name(name, RW_NONE); + if (zv == NULL) + return (ENOENT); + zv->zv_threading = value; + mutex_exit(&zv->zv_state_lock); + return (0); +} + /* * Sanity check volume block size. */ From 4505cdd56b7cf30031cbd3a30dca7ccf24b8ee9d Mon Sep 17 00:00:00 2001 From: Ameer Hamza Date: Wed, 18 Oct 2023 00:19:58 +0500 Subject: [PATCH 3/6] zvol: fix delayed update to block device ro entry The change in the zvol readonly property does not update the block device readonly entry until the first IO to the ZVOL. This patch addresses the issue by updating the block device readonly property from the set property IOCTL call. Signed-off-by: Ameer Hamza --- include/sys/zvol.h | 1 + module/zfs/zfs_ioctl.c | 9 +++++++++ module/zfs/zvol.c | 20 ++++++++++++++++++++ 3 files changed, 30 insertions(+) diff --git a/include/sys/zvol.h b/include/sys/zvol.h index c9eefbeb48d9..c79fe1d9ad22 100644 --- a/include/sys/zvol.h +++ b/include/sys/zvol.h @@ -52,6 +52,7 @@ extern void zvol_create_cb(objset_t *, void *, cred_t *, dmu_tx_t *); extern int zvol_set_volsize(const char *, uint64_t); extern int zvol_set_volthreading(const char *, boolean_t); extern int zvol_set_common(const char *, zfs_prop_t, zprop_source_t, uint64_t); +extern int zvol_set_ro(const char *, boolean_t); extern zvol_state_handle_t *zvol_suspend(const char *); extern int zvol_resume(zvol_state_handle_t *); extern void *zvol_tag(zvol_state_handle_t *); diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index 72d5fc819203..01e16e102fc4 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -2536,6 +2536,15 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source, case ZFS_PROP_VOLMODE: err = zvol_set_common(dsname, prop, source, intval); break; + case ZFS_PROP_READONLY: + err = zvol_set_ro(dsname, intval); + /* + * Set err to -1 to force the zfs_set_prop_nvlist code down the + * default path to set the value in the nvlist. + */ + if (err == 0) + err = -1; + break; case ZFS_PROP_VERSION: { zfsvfs_t *zfsvfs; diff --git a/module/zfs/zvol.c b/module/zfs/zvol.c index c576e34696c8..e661c55d1802 100644 --- a/module/zfs/zvol.c +++ b/module/zfs/zvol.c @@ -383,6 +383,26 @@ zvol_set_volthreading(const char *name, boolean_t value) return (0); } +/* + * Update zvol ro property. + */ +int +zvol_set_ro(const char *name, boolean_t value) +{ + zvol_state_t *zv = zvol_find_by_name(name, RW_NONE); + if (zv == NULL) + return (-1); + if (value) { + zvol_os_set_disk_ro(zv, 1); + zv->zv_flags |= ZVOL_RDONLY; + } else { + zvol_os_set_disk_ro(zv, 0); + zv->zv_flags &= ~ZVOL_RDONLY; + } + mutex_exit(&zv->zv_state_lock); + return (0); +} + /* * Sanity check volume block size. */ From 57f7291f10e2bac1804f5c7ecd85b753bda49e94 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Tue, 12 Dec 2023 15:53:59 -0500 Subject: [PATCH 4/6] dbuf: Handle arcbuf assignment after block cloning In some cases dbuf_assign_arcbuf() may be called on a block that was recently cloned. If it happened in current TXG we must undo the block cloning first, since the only one dirty record per TXG can't and shouldn't mean both cloning and overwrite same time. Reviewed-by: Kay Pedersen Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. Closes #15653 --- module/zfs/dbuf.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 5a7fe42b602a..7691cd85f6c2 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -2930,7 +2930,8 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx) while (db->db_state == DB_READ || db->db_state == DB_FILL) cv_wait(&db->db_changed, &db->db_mtx); - ASSERT(db->db_state == DB_CACHED || db->db_state == DB_UNCACHED); + ASSERT(db->db_state == DB_CACHED || db->db_state == DB_UNCACHED || + db->db_state == DB_NOFILL); if (db->db_state == DB_CACHED && zfs_refcount_count(&db->db_holds) - 1 > db->db_dirtycnt) { @@ -2967,6 +2968,15 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx) arc_buf_destroy(db->db_buf, db); } db->db_buf = NULL; + } else if (db->db_state == DB_NOFILL) { + /* + * We will be completely replacing the cloned block. In case + * it was cloned in this transaction group, let's undirty the + * pending clone and mark the block as uncached. This will be + * as if the clone was never done. + */ + VERIFY(!dbuf_undirty(db, tx)); + db->db_state = DB_UNCACHED; } ASSERT(db->db_buf == NULL); dbuf_set_data(db, buf); From 03255b069102deab6480f785fd1178b9dd5f3222 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dag-Erling=20Sm=C3=B8rgrav?= Date: Wed, 30 Aug 2023 17:13:09 +0200 Subject: [PATCH 5/6] Add VERIFY0P() and ASSERT0P() macros. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These macros are similar to VERIFY0() and ASSERT0() but are intended for pointers, and therefore use uintptr_t instead of int64_t. Reviewed-by: Brian Behlendorf Reviewed-by: Kay Pedersen Reviewed-by: Alexander Motin Signed-off-by: Dag-Erling Smørgrav Closes #15225 --- include/os/freebsd/spl/sys/debug.h | 13 +++++++++++++ include/os/linux/spl/sys/debug.h | 13 +++++++++++++ lib/libspl/include/assert.h | 11 +++++++++++ 3 files changed, 37 insertions(+) diff --git a/include/os/freebsd/spl/sys/debug.h b/include/os/freebsd/spl/sys/debug.h index 3e67cf0e9a7d..c788b54d5ac4 100644 --- a/include/os/freebsd/spl/sys/debug.h +++ b/include/os/freebsd/spl/sys/debug.h @@ -39,12 +39,14 @@ * ASSERT3U() - Assert unsigned X OP Y is true, if not panic. * ASSERT3P() - Assert pointer X OP Y is true, if not panic. * ASSERT0() - Assert value is zero, if not panic. + * ASSERT0P() - Assert pointer is null, if not panic. * VERIFY() - Verify X is true, if not panic. * VERIFY3B() - Verify boolean X OP Y is true, if not panic. * VERIFY3S() - Verify signed X OP Y is true, if not panic. * VERIFY3U() - Verify unsigned X OP Y is true, if not panic. * VERIFY3P() - Verify pointer X OP Y is true, if not panic. * VERIFY0() - Verify value is zero, if not panic. + * VERIFY0P() - Verify pointer is null, if not panic. */ #ifndef _SPL_DEBUG_H @@ -136,6 +138,15 @@ spl_assert(const char *buf, const char *file, const char *func, int line) (long long) (_verify3_right)); \ } while (0) +#define VERIFY0P(RIGHT) do { \ + const uintptr_t _verify0_right = (uintptr_t)(RIGHT); \ + if (unlikely(!(0 == _verify0_right))) \ + spl_panic(__FILE__, __FUNCTION__, __LINE__, \ + "VERIFY0P(" #RIGHT ") " \ + "failed (NULL == %p)\n", \ + (void *)_verify0_right); \ + } while (0) + /* * Debugging disabled (--disable-debug) */ @@ -151,6 +162,7 @@ spl_assert(const char *buf, const char *file, const char *func, int line) #define ASSERT3P(x, y, z) \ ((void) sizeof ((uintptr_t)(x)), (void) sizeof ((uintptr_t)(z))) #define ASSERT0(x) ((void) sizeof ((uintptr_t)(x))) +#define ASSERT0P(x) ((void) sizeof ((uintptr_t)(x))) #define IMPLY(A, B) \ ((void) sizeof ((uintptr_t)(A)), (void) sizeof ((uintptr_t)(B))) #define EQUIV(A, B) \ @@ -166,6 +178,7 @@ spl_assert(const char *buf, const char *file, const char *func, int line) #define ASSERT3U VERIFY3U #define ASSERT3P VERIFY3P #define ASSERT0 VERIFY0 +#define ASSERT0P VERIFY0P #define ASSERT VERIFY #define IMPLY(A, B) \ ((void)(likely((!(A)) || (B)) || \ diff --git a/include/os/linux/spl/sys/debug.h b/include/os/linux/spl/sys/debug.h index 007238574fe1..82338d212edd 100644 --- a/include/os/linux/spl/sys/debug.h +++ b/include/os/linux/spl/sys/debug.h @@ -34,12 +34,14 @@ * ASSERT3U() - Assert unsigned X OP Y is true, if not panic. * ASSERT3P() - Assert pointer X OP Y is true, if not panic. * ASSERT0() - Assert value is zero, if not panic. + * ASSERT0P() - Assert pointer is null, if not panic. * VERIFY() - Verify X is true, if not panic. * VERIFY3B() - Verify boolean X OP Y is true, if not panic. * VERIFY3S() - Verify signed X OP Y is true, if not panic. * VERIFY3U() - Verify unsigned X OP Y is true, if not panic. * VERIFY3P() - Verify pointer X OP Y is true, if not panic. * VERIFY0() - Verify value is zero, if not panic. + * VERIFY0P() - Verify pointer is null, if not panic. */ #ifndef _SPL_DEBUG_H @@ -140,6 +142,15 @@ spl_assert(const char *buf, const char *file, const char *func, int line) (long long) (_verify3_right)); \ } while (0) +#define VERIFY0P(RIGHT) do { \ + const uintptr_t _verify0_right = (uintptr_t)(RIGHT); \ + if (unlikely(!(0 == _verify0_right))) \ + spl_panic(__FILE__, __FUNCTION__, __LINE__, \ + "VERIFY0P(" #RIGHT ") " \ + "failed (NULL == %px)\n", \ + (void *)_verify0_right); \ + } while (0) + #define VERIFY_IMPLY(A, B) \ ((void)(likely((!(A)) || (B)) || \ spl_assert("(" #A ") implies (" #B ")", \ @@ -165,6 +176,7 @@ spl_assert(const char *buf, const char *file, const char *func, int line) #define ASSERT3P(x, y, z) \ ((void) sizeof ((uintptr_t)(x)), (void) sizeof ((uintptr_t)(z))) #define ASSERT0(x) ((void) sizeof ((uintptr_t)(x))) +#define ASSERT0P(x) ((void) sizeof ((uintptr_t)(x))) #define IMPLY(A, B) \ ((void) sizeof ((uintptr_t)(A)), (void) sizeof ((uintptr_t)(B))) #define EQUIV(A, B) \ @@ -180,6 +192,7 @@ spl_assert(const char *buf, const char *file, const char *func, int line) #define ASSERT3U VERIFY3U #define ASSERT3P VERIFY3P #define ASSERT0 VERIFY0 +#define ASSERT0P VERIFY0P #define ASSERT VERIFY #define IMPLY VERIFY_IMPLY #define EQUIV VERIFY_EQUIV diff --git a/lib/libspl/include/assert.h b/lib/libspl/include/assert.h index c5bf0f0cc8f1..79e1016eabc4 100644 --- a/lib/libspl/include/assert.h +++ b/lib/libspl/include/assert.h @@ -120,6 +120,15 @@ do { \ (u_longlong_t)__left); \ } while (0) +#define VERIFY0P(LEFT) \ +do { \ + const uintptr_t __left = (uintptr_t)(LEFT); \ + if (!(__left == 0)) \ + libspl_assertf(__FILE__, __FUNCTION__, __LINE__, \ + "%s == 0 (%p == 0)", #LEFT, \ + (void *)__left); \ +} while (0) + #ifdef assert #undef assert #endif @@ -134,6 +143,7 @@ do { \ #define ASSERT3P(x, y, z) \ ((void) sizeof ((uintptr_t)(x)), (void) sizeof ((uintptr_t)(z))) #define ASSERT0(x) ((void) sizeof ((uintptr_t)(x))) +#define ASSERT0P(x) ((void) sizeof ((uintptr_t)(x))) #define ASSERT(x) ((void) sizeof ((uintptr_t)(x))) #define assert(x) ((void) sizeof ((uintptr_t)(x))) #define IMPLY(A, B) \ @@ -146,6 +156,7 @@ do { \ #define ASSERT3U VERIFY3U #define ASSERT3P VERIFY3P #define ASSERT0 VERIFY0 +#define ASSERT0P VERIFY0P #define ASSERT VERIFY #define assert VERIFY #define IMPLY(A, B) \ From ba69ae241c8ae8dae922ce625b47a5e0fd429ffd Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Tue, 12 Dec 2023 15:59:24 -0500 Subject: [PATCH 6/6] dbuf: Set dr_data when unoverriding after clone Block cloning normally creates dirty record without dr_data. But if the block is read after cloning, it is moved into DB_CACHED state and receives the data buffer. If after that we call dbuf_unoverride() to convert the dirty record into normal write, we should give it the data buffer from dbuf and release one. Reviewed-by: Kay Pedersen Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. Closes #15654 Closes #15656 --- module/zfs/dbuf.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index 7691cd85f6c2..e77461fb9c98 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -1904,7 +1904,6 @@ dbuf_unoverride(dbuf_dirty_record_t *dr) dmu_buf_impl_t *db = dr->dr_dbuf; blkptr_t *bp = &dr->dt.dl.dr_overridden_by; uint64_t txg = dr->dr_txg; - boolean_t release; ASSERT(MUTEX_HELD(&db->db_mtx)); /* @@ -1925,7 +1924,10 @@ dbuf_unoverride(dbuf_dirty_record_t *dr) if (!BP_IS_HOLE(bp) && !dr->dt.dl.dr_nopwrite) zio_free(db->db_objset->os_spa, txg, bp); - release = !dr->dt.dl.dr_brtwrite; + if (dr->dt.dl.dr_brtwrite) { + ASSERT0P(dr->dt.dl.dr_data); + dr->dt.dl.dr_data = db->db_buf; + } dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN; dr->dt.dl.dr_nopwrite = B_FALSE; dr->dt.dl.dr_brtwrite = B_FALSE; @@ -1939,7 +1941,7 @@ dbuf_unoverride(dbuf_dirty_record_t *dr) * the buf thawed to save the effort of freezing & * immediately re-thawing it. */ - if (release) + if (dr->dt.dl.dr_data) arc_release(dr->dt.dl.dr_data, db); }