Skip to content

Commit

Permalink
zvol: multi taskq support
Browse files Browse the repository at this point in the history
Signed-off-by: Ameer Hamza <[email protected]>
  • Loading branch information
ixhamza committed Oct 30, 2023
1 parent fec7af5 commit f586620
Showing 1 changed file with 87 additions and 11 deletions.
98 changes: 87 additions & 11 deletions module/os/linux/zfs/zvol_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
#ifdef HAVE_BLK_MQ
#include <linux/blk-mq.h>
#endif
#include <cityhash.h>

static void zvol_request_impl(zvol_state_t *zv, struct bio *bio,
struct request *rq, boolean_t force_sync);
Expand All @@ -52,6 +53,9 @@ static unsigned int zvol_major = ZVOL_MAJOR;
static unsigned int zvol_request_sync = 0;
static unsigned int zvol_prefetch_bytes = (128 * 1024);
static unsigned long zvol_max_discard_blocks = 16384;
/* 1>>26 = 64M offset */
static unsigned int zvol_taskq_offset_shift = 26;
static unsigned int zvol_use_single_taskq = 0;

#ifndef HAVE_BLKDEV_GET_ERESTARTSYS
static unsigned int zvol_open_timeout_ms = 1000;
Expand Down Expand Up @@ -114,7 +118,11 @@ struct zvol_state_os {
boolean_t use_blk_mq;
};

static taskq_t *zvol_taskq;
typedef struct zvol_taskqs {
uint_t zvol_tqs_count;
taskq_t **zvol_tqs_taskq;
} zvol_taskqs_t;
static zvol_taskqs_t zv_tq;
static struct ida zvol_ida;

typedef struct zv_request_stack {
Expand Down Expand Up @@ -493,7 +501,6 @@ zvol_read_task(void *arg)
zv_request_task_free(task);
}


/*
* Process a BIO or request
*
Expand Down Expand Up @@ -532,6 +539,18 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
}

zv_request_task_t *task;
zvol_taskqs_t *ztqs = &zv_tq;
int blk_mq_hw_queue = 0;
#ifdef HAVE_BLK_MQ
if (rq && rq->q->queuedata == zv)
blk_mq_hw_queue = rq->mq_hctx->queue_num;
#endif
uint_t tq_idx = 0;
if (zvol_use_single_taskq == 0) {
uint64_t taskq_hash = cityhash4((uintptr_t)zv, offset >>
zvol_taskq_offset_shift, blk_mq_hw_queue, 0);
tq_idx = taskq_hash % ztqs->zvol_tqs_count;
}

if (rw == WRITE) {
if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
Expand Down Expand Up @@ -601,15 +620,15 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
zvol_discard(&zvr);
} else {
task = zv_request_task_create(zvr);
taskq_dispatch_ent(zvol_taskq,
taskq_dispatch_ent(ztqs->zvol_tqs_taskq[tq_idx],
zvol_discard_task, task, 0, &task->ent);
}
} else {
if (force_sync) {
zvol_write(&zvr);
} else {
task = zv_request_task_create(zvr);
taskq_dispatch_ent(zvol_taskq,
taskq_dispatch_ent(ztqs->zvol_tqs_taskq[tq_idx],
zvol_write_task, task, 0, &task->ent);
}
}
Expand All @@ -631,7 +650,7 @@ zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
zvol_read(&zvr);
} else {
task = zv_request_task_create(zvr);
taskq_dispatch_ent(zvol_taskq,
taskq_dispatch_ent(ztqs->zvol_tqs_taskq[tq_idx],
zvol_read_task, task, 0, &task->ent);
}
}
Expand Down Expand Up @@ -1555,6 +1574,38 @@ zvol_init(void)
zvol_actual_threads = MIN(MAX(zvol_threads, 1), 1024);
}

/*
* Atleast use 32 zvol_threads but for many core system,
* prefer 6 threads per taskq, but no more taskqs
* than threads in them on large systems.
*
* taskq total
* cpus taskqs threads threads
* ------- ------- ------- -------
* 1 1 32 32
* 2 1 32 32
* 4 1 32 32
* 8 2 16 32
* 16 3 11 33
* 32 5 7 35
* 64 8 8 64
* 128 11 12 132
* 256 16 16 256
*/
zvol_taskqs_t *ztqs = &zv_tq;
uint_t num_tqs = 1;
if (zvol_use_single_taskq == 0) {
num_tqs = 1 + num_online_cpus() / 6;
while (num_tqs * num_tqs > zvol_actual_threads)
num_tqs--;
}
uint_t per_tq_thread = zvol_actual_threads / num_tqs;
if (per_tq_thread * num_tqs < zvol_actual_threads)
per_tq_thread++;
ztqs->zvol_tqs_count = num_tqs;
ztqs->zvol_tqs_taskq = kmem_alloc(num_tqs * sizeof (taskq_t *),
KM_SLEEP);

error = register_blkdev(zvol_major, ZVOL_DRIVER);
if (error) {
printk(KERN_INFO "ZFS: register_blkdev() failed %d\n", error);
Expand All @@ -1576,11 +1627,17 @@ zvol_init(void)
1024);
}
#endif
zvol_taskq = taskq_create(ZVOL_DRIVER, zvol_actual_threads, maxclsyspri,
zvol_actual_threads, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
if (zvol_taskq == NULL) {
unregister_blkdev(zvol_major, ZVOL_DRIVER);
return (-ENOMEM);
for (uint_t i = 0; i < num_tqs; i++) {
char name[32];
(void) snprintf(name, sizeof (name), "%s_tq-%u",
ZVOL_DRIVER, i);
ztqs->zvol_tqs_taskq[i] = taskq_create(name, per_tq_thread,
maxclsyspri, per_tq_thread, INT_MAX,
TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
if (ztqs->zvol_tqs_taskq[i] == NULL) {
unregister_blkdev(zvol_major, ZVOL_DRIVER);
return (-ENOMEM);
}
}

zvol_init_impl();
Expand All @@ -1591,9 +1648,22 @@ zvol_init(void)
void
zvol_fini(void)
{
zvol_taskqs_t *ztqs = &zv_tq;
zvol_fini_impl();
unregister_blkdev(zvol_major, ZVOL_DRIVER);
taskq_destroy(zvol_taskq);

if (ztqs->zvol_tqs_taskq == NULL) {
ASSERT3U(ztqs->zvol_tqs_taskq, ==, 0);
} else {
for (uint_t i = 0; i < ztqs->zvol_tqs_count; i++) {
ASSERT3P(ztqs->zvol_tqs_taskq[i], !=, NULL);
taskq_destroy(ztqs->zvol_tqs_taskq[i]);
}
kmem_free(ztqs->zvol_tqs_taskq, ztqs->zvol_tqs_count *
sizeof (taskq_t *));
ztqs->zvol_tqs_taskq = NULL;
}

ida_destroy(&zvol_ida);
}

Expand All @@ -1608,9 +1678,15 @@ module_param(zvol_threads, uint, 0444);
MODULE_PARM_DESC(zvol_threads, "Number of threads to handle I/O requests. Set"
"to 0 to use all active CPUs");

module_param(zvol_use_single_taskq, uint, 0444);
MODULE_PARM_DESC(zvol_use_single_taskq, "Debugging Purpose: Single taskq");

module_param(zvol_request_sync, uint, 0644);
MODULE_PARM_DESC(zvol_request_sync, "Synchronously handle bio requests");

module_param(zvol_taskq_offset_shift, uint, 0644);
MODULE_PARM_DESC(zvol_taskq_offset_shift, "Offset for multi taskqs");

module_param(zvol_max_discard_blocks, ulong, 0444);
MODULE_PARM_DESC(zvol_max_discard_blocks, "Max number of blocks to discard");

Expand Down

0 comments on commit f586620

Please sign in to comment.