From 36b1fd7ca5da455331487a956730b190e59dad01 Mon Sep 17 00:00:00 2001 From: Atsushi Abe Date: Tue, 18 Aug 2020 21:27:51 +0900 Subject: [PATCH 1/6] Introduce refcount to DPR --- messages/iosched_unified/root.txt | 6 + src/iosched/unified.c | 343 +++++++++++++++++++----------- src/libltfs/ltfs_fsops.c | 28 ++- src/ltfs_fuse.c | 2 +- 4 files changed, 245 insertions(+), 134 deletions(-) diff --git a/messages/iosched_unified/root.txt b/messages/iosched_unified/root.txt index 3d8f4e0c..9ab8277b 100644 --- a/messages/iosched_unified/root.txt +++ b/messages/iosched_unified/root.txt @@ -64,5 +64,11 @@ root:table { 13024I:string { "Clean up extents and append index at index partition (%d)." } 13025I:string { "Get error position (%d, %d)." } 13026E:string { "Write perm handling error : %s (%d)." } + 13027D:string { "Created DPR : %s." } + 13028D:string { "Removed DPR : %s." } + 13029D:string { "DPR (%s): %s (%u)." } + 13030D:string { "No DPR on %s (req = 0x%llx)." } + 13031D:string { "Created a request (0x%llx)." } + 13032D:string { "Removed a request (0x%llx)." } } } diff --git a/src/iosched/unified.c b/src/iosched/unified.c index 56a39a04..97faedc6 100644 --- a/src/iosched/unified.c +++ b/src/iosched/unified.c @@ -78,36 +78,17 @@ enum request_state { REQUEST_IP /**< A request already written to the DP, waiting to be written to the IP */ }; -/** - * A read request. These are used internally by unified_read to queue up read requests and issue - * them after releasing the dentry's iosched_lock. - */ -struct read_request { - TAILQ_ENTRY(read_request) list; /**< Pointers for linked list of requests */ - uint64_t offset; /**< File offset for the read request */ - char *buf; /**< Buffer which will receive data */ - size_t count; /**< Number of bytes to read */ -}; - -/** - * A write request. - */ -struct write_request { - TAILQ_ENTRY(write_request) list; /**< Pointers for linked list of requests */ - uint64_t offset; /**< Starting file offset for this request */ - size_t count; /**< Current request length, always <= cache block size */ - void *write_cache; /**< Cache block containing this request's data */ - enum request_state state; /**< Current state of the request */ -}; - /** * Per-dentry private data structure. It records a list of outstanding write requests * and associated data. */ struct dentry_priv { - struct dentry *dentry; /**< Dentry associated with this request list */ - ltfs_mutex_t io_lock; /**< Lock controlling file I/O to this dentry */ - uint64_t file_size; /**< Real file size, including outstanding write requests */ + struct dentry *dentry; /**< Dentry associated with this request list */ + uint64_t file_size; /**< Real file size, including outstanding write requests */ + ltfs_mutex_t io_lock; /**< Lock controlling file I/O to this dentry */ + + uint32_t numhandles; /**< Reference count */ + ltfs_mutex_t ref_lock; /**< Lock numhandles */ /** * Index partition write flag. This is set if the file's name and size match the volume's @@ -156,6 +137,29 @@ struct dentry_priv { TAILQ_HEAD(ext_struct, extent_info) alt_extentlist; }; +/** + * A read request. These are used internally by unified_read to queue up read requests and issue + * them after releasing the dentry's iosched_lock. + */ +struct read_request { + TAILQ_ENTRY(read_request) list; /**< Pointers for linked list of requests */ + uint64_t offset; /**< File offset for the read request */ + char *buf; /**< Buffer which will receive data */ + size_t count; /**< Number of bytes to read */ +}; + +/** + * A write request. + */ +struct write_request { + TAILQ_ENTRY(write_request) list; /**< Pointers for linked list of requests */ + uint64_t offset; /**< Starting file offset for this request */ + size_t count; /**< Current request length, always <= cache block size */ + void *write_cache; /**< Cache block containing this request's data */ + struct dentry_priv *dpr; /**< Corresponding dentry_priv */ + enum request_state state; /**< Current state of the request */ +}; + /** * Main scheduler data structure. Each scheduler instance has exactly one of these. */ @@ -222,8 +226,10 @@ struct unified_data { /* Prototypes */ -int _unified_get_dentry_priv(struct dentry *d, struct dentry_priv **dentry_priv, - struct unified_data *priv); +int _unified_get_dentry_priv(struct dentry *d, + struct dentry_priv **dentry_priv, + struct unified_data *priv); +void _unified_put_dentry_priv(struct dentry_priv *dentry_priv, struct unified_data *priv); ltfs_thread_return _unified_writer_thread(void *iosched_handle); void _unified_process_queue(enum request_state queue, struct unified_data *priv); void _unified_process_index_queue(struct unified_data *priv); @@ -244,9 +250,6 @@ int _unified_merge_requests(struct write_request *dest, struct write_request *sr void **spare_cache, struct dentry_priv *dpr, struct unified_data *priv); int _unified_flush_unlocked(struct dentry *d, struct unified_data *priv); int _unified_flush_all(struct unified_data *priv); -void _unified_free_dentry_priv_conditional(struct dentry *d, uint32_t target_handles, - struct unified_data *priv); -void _unified_free_dentry_priv(struct dentry *d, struct unified_data *priv); void _unified_set_write_ip(struct dentry_priv *dpr, struct unified_data *priv); void _unified_unset_write_ip(struct dentry_priv *dpr, struct unified_data *priv); void _unified_handle_write_error(ssize_t write_ret, struct write_request *req, @@ -393,7 +396,7 @@ int unified_destroy(void *iosched_handle) /* Push IP extents to libltfs and free remaining dentry_priv structures */ if (! TAILQ_EMPTY(&priv->ext_queue)) { TAILQ_FOREACH_SAFE(dpr, &priv->ext_queue, ext_queue, aux) - _unified_free_dentry_priv(dpr->dentry, priv); + _unified_put_dentry_priv(dpr, priv); } /* Free data structures */ @@ -434,6 +437,9 @@ int unified_open(const char *path, bool open_write, struct dentry **dentry, void ltfs_profiler_add_entry(priv->profiler, &priv->proflock, IOSCHED_REQ_ENTER(REQ_IOS_OPEN)); ret = ltfs_fsraw_open(path, open_write, dentry, ((struct unified_data *)iosched_handle)->vol); + if (!ret) { + ret = _unified_get_dentry_priv(*dentry, NULL, priv); + } ltfs_profiler_add_entry(priv->profiler, &priv->proflock, IOSCHED_REQ_EXIT(REQ_IOS_OPEN)); return ret; } @@ -450,6 +456,7 @@ int unified_close(struct dentry *d, bool flush, void *iosched_handle) { int write_error, ret = 0; struct unified_data *priv = iosched_handle; + struct dentry_priv *dpr; CHECK_ARG_NULL(d, -LTFS_NULL_ARG); CHECK_ARG_NULL(iosched_handle, -LTFS_NULL_ARG); @@ -457,15 +464,18 @@ int unified_close(struct dentry *d, bool flush, void *iosched_handle) acquireread_mrsw(&priv->lock); ltfs_mutex_lock(&d->iosched_lock); + ret = _unified_get_dentry_priv(d, &dpr, priv); if (flush) ret = _unified_flush_unlocked(d, priv); write_error = _unified_get_write_error(d->iosched_priv); - _unified_free_dentry_priv_conditional(d, 3, priv); + _unified_put_dentry_priv(dpr, priv); /* Release DPR captured in this function */ + ltfs_mutex_unlock(&d->iosched_lock); releaseread_mrsw(&priv->lock); /* No need to hold any scheduler locks when closing the file. All writes which were * outstanding when the close request started have been issued. */ + _unified_put_dentry_priv(dpr, priv); /* Release DPR captured in unified_open() */ ltfs_fsraw_close(d); ltfs_profiler_add_entry(priv->profiler, &priv->proflock, IOSCHED_REQ_EXIT(REQ_IOS_CLOSE)); return ret ? ret : write_error ? write_error : 0; @@ -514,8 +524,7 @@ ssize_t unified_read(struct dentry *d, char *buf, size_t size, off_t offset, voi goto out; releaseread_mrsw(&priv->vol->lock); - ltfs_mutex_lock(&d->iosched_lock); - dpr = d->iosched_priv; + _unified_get_dentry_priv(d, &dpr, priv); if (! dpr) { ltfs_mutex_unlock(&d->iosched_lock); ret = ltfs_fsraw_read(d, buf, size, offset, priv->vol); @@ -636,6 +645,9 @@ ssize_t unified_read(struct dentry *d, char *buf, size_t size, off_t offset, voi ltfs_mutex_unlock(&d->iosched_lock); out: + if (dpr) + _unified_put_dentry_priv(dpr, priv); + releaseread_mrsw(&priv->lock); ltfs_profiler_add_entry(priv->profiler, &priv->proflock, IOSCHED_REQ_EXIT(REQ_IOS_READ)); @@ -705,6 +717,8 @@ ssize_t unified_write(struct dentry *d, const char *buf, size_t size, off_t offs ret = _unified_get_write_error(dpr); if (ret < 0) { /* Propagate the write error to the caller */ + if (dpr) + _unified_put_dentry_priv(dpr, priv); ltfs_mutex_unlock(&d->iosched_lock); releaseread_mrsw(&priv->lock); ltfs_profiler_add_entry(priv->profiler, &priv->proflock, IOSCHED_REQ_EXIT(REQ_IOS_WRITE)); @@ -715,6 +729,8 @@ ssize_t unified_write(struct dentry *d, const char *buf, size_t size, off_t offs if (! checked_readonly) { ret = ltfs_get_tape_readonly(priv->vol); if (ret < 0) { + if (dpr) + _unified_put_dentry_priv(dpr, priv); ltfs_mutex_unlock(&d->iosched_lock); releaseread_mrsw(&priv->lock); ltfs_profiler_add_entry(priv->profiler, &priv->proflock, IOSCHED_REQ_EXIT(REQ_IOS_WRITE)); @@ -732,7 +748,6 @@ ssize_t unified_write(struct dentry *d, const char *buf, size_t size, off_t offs } do_append: - if (TAILQ_EMPTY(&dpr->requests)) { req = NULL; last_offset = 0; @@ -759,8 +774,11 @@ ssize_t unified_write(struct dentry *d, const char *buf, size_t size, off_t offs NULL, d, priv); if (ret < 0) goto out; - else if (ret == 0) + else if (ret == 0) { + if (dpr) + _unified_put_dentry_priv(dpr, priv); goto write_start; + } buf += ret; offset += ret; @@ -782,8 +800,11 @@ ssize_t unified_write(struct dentry *d, const char *buf, size_t size, off_t offs ret = _unified_insert_new_request(buf, offset, size, &spare_cache, false, req, d, priv); if (ret < 0) goto out; - else if (ret == 0) + else if (ret == 0) { + if (dpr) + _unified_put_dentry_priv(dpr, priv); goto write_start; + } prev_req = TAILQ_PREV(req, req_struct, list); buf += ret; @@ -841,12 +862,15 @@ ssize_t unified_write(struct dentry *d, const char *buf, size_t size, off_t offs /* Split */ copy_offset = (offset - req->offset) + size; ret = _unified_insert_new_request(req_cache + copy_offset, - req->offset + copy_offset, req->count - copy_offset, - &spare_cache, true, aux, d, priv); + req->offset + copy_offset, req->count - copy_offset, + &spare_cache, true, aux, d, priv); if (ret < 0) goto out; - else if (ret == 0) + else if (ret == 0) { + if (dpr) + _unified_put_dentry_priv(dpr, priv); goto write_start; + } req->count = offset - req->offset; req = TAILQ_NEXT(req, list); goto do_insert_before; @@ -864,6 +888,9 @@ ssize_t unified_write(struct dentry *d, const char *buf, size_t size, off_t offs goto do_append; out: + if (dpr) + _unified_put_dentry_priv(dpr, priv); + if (ret >= 0) { int err = ltfs_get_volume_lock(false, priv->vol); /* It's undesirable to fail the write here, as we have no way to roll back the cache @@ -880,8 +907,10 @@ ssize_t unified_write(struct dentry *d, const char *buf, size_t size, off_t offs } } ltfs_mutex_unlock(&d->iosched_lock); - if (spare_cache) + if (spare_cache) { + _unified_put_dentry_priv(dpr, priv); _unified_cache_free(spare_cache, 0, priv); + } releaseread_mrsw(&priv->lock); ltfs_profiler_add_entry(priv->profiler, &priv->proflock, IOSCHED_REQ_EXIT(REQ_IOS_WRITE)); return (ret < 0) ? ret : (ssize_t)original_size; @@ -947,7 +976,7 @@ int unified_truncate(struct dentry *d, off_t length, void *iosched_handle) acquireread_mrsw(&priv->lock); ltfs_mutex_lock(&d->iosched_lock); - dpr = d->iosched_priv; + _unified_get_dentry_priv(d, &dpr, priv); if (dpr) { if ((uint64_t)length < dpr->file_size) { if (! TAILQ_EMPTY(&dpr->requests)) { @@ -1001,7 +1030,9 @@ int unified_truncate(struct dentry *d, off_t length, void *iosched_handle) ltfs_mutex_unlock(&d->iosched_lock); releaseread_mrsw(&priv->lock); - if (! dpr) + if (dpr) + _unified_put_dentry_priv(dpr, priv); + else ret = ltfs_fsraw_truncate(d, length, priv->vol); ltfs_profiler_add_entry(priv->profiler, &priv->proflock, IOSCHED_REQ_EXIT(REQ_IOS_TRUNCATE)); @@ -1206,8 +1237,6 @@ void _unified_process_index_queue(struct unified_data *priv) _unified_free_request(req, priv); } } - - _unified_free_dentry_priv_conditional(dentry_priv->dentry, 2, priv); } releasewrite_mrsw(&priv->lock); } @@ -1359,17 +1388,28 @@ void _unified_process_data_queue(enum request_state queue, struct unified_data * * @return 0 on success or a negative value on error. This function always succeeds * if alloc is false. */ -int _unified_get_dentry_priv(struct dentry *d, struct dentry_priv **dentry_priv, - struct unified_data *priv) +int _unified_get_dentry_priv(struct dentry *d, + struct dentry_priv **dentry_priv, + struct unified_data *priv) { int ret; size_t max_filesize; struct dentry_priv *dpr; + acquireread_mrsw(&d->meta_lock); if (d->iosched_priv) { - *dentry_priv = d->iosched_priv; + dpr = d->iosched_priv; + ltfs_mutex_lock(&dpr->ref_lock); + dpr->numhandles++; + if (dentry_priv) { + *dentry_priv = d->iosched_priv; + } + ltfsmsg(LTFS_DEBUG3, 13029D, "Inc", d->name.name, dpr->numhandles); + ltfs_mutex_unlock(&dpr->ref_lock); + releaseread_mrsw(&d->meta_lock); return 0; } + releaseread_mrsw(&d->meta_lock); dpr = calloc(1, sizeof(struct dentry_priv)); if (! dpr) { @@ -1398,20 +1438,80 @@ int _unified_get_dentry_priv(struct dentry *d, struct dentry_priv **dentry_priv, return -LTFS_MUTEX_INIT; } - acquireread_mrsw(&d->meta_lock); + ret = ltfs_mutex_init(&dpr->ref_lock); + if (ret) { + /* Failed to initialize mutex in scheduler private data (%d) */ + ltfsmsg(LTFS_ERR, 13009E, ret); + ltfs_mutex_destroy(&dpr->io_lock); + ltfs_mutex_destroy(&dpr->write_error_lock); + free(dpr); + return -LTFS_MUTEX_INIT; + } + + ltfs_fsraw_get_dentry(d, priv->vol); + + acquirewrite_mrsw(&d->meta_lock); + ltfs_mutex_lock(&dpr->ref_lock); dpr->file_size = d->size; dpr->write_ip = d->matches_name_criteria; - releaseread_mrsw(&d->meta_lock); + dpr->numhandles = 1; + d->iosched_priv = dpr; + ltfs_mutex_unlock(&dpr->ref_lock); + releasewrite_mrsw(&d->meta_lock); + max_filesize = index_criteria_get_max_filesize(priv->vol); if (max_filesize == 0 || dpr->file_size > max_filesize) dpr->write_ip = false; - d->iosched_priv = dpr; - ltfs_fsraw_get_dentry(d, priv->vol); - *dentry_priv = dpr; + if (dentry_priv) { + *dentry_priv = dpr; + } + + ltfsmsg(LTFS_DEBUG, 13027D, d->name.name); + return 0; } +void _unified_put_dentry_priv(struct dentry_priv *dentry_priv, struct unified_data *priv) +{ + struct dentry_priv *dpr = dentry_priv; + struct dentry *d = dpr->dentry; + + acquirewrite_mrsw(&d->meta_lock); + ltfs_mutex_lock(&dpr->ref_lock); + if (dpr->numhandles > 0) { + dpr->numhandles--; + } + + if (!dpr->numhandles) { + d->iosched_priv = NULL; + ltfs_mutex_unlock(&dpr->ref_lock); + releasewrite_mrsw(&d->meta_lock); + + if (! TAILQ_EMPTY(&dpr->requests)) + ltfsmsg(LTFS_WARN, 13022W); + + /* Sent alt_extentlist to libltfs */ + if (dpr->write_ip && ! TAILQ_EMPTY(&dpr->alt_extentlist)) + _unified_clear_alt_extentlist(true, dpr, priv); + + ltfs_mutex_destroy(&dpr->write_error_lock); + ltfs_mutex_destroy(&dpr->ref_lock); + ltfs_mutex_destroy(&dpr->io_lock); + free(dpr); + + ltfs_fsraw_put_dentry(d, priv->vol); + + ltfsmsg(LTFS_DEBUG, 13028D, d->name.name); + } else { + ltfsmsg(LTFS_DEBUG3, 13029D, "Dec", d->name.name, dpr->numhandles); + ltfs_mutex_unlock(&dpr->ref_lock); + releasewrite_mrsw(&d->meta_lock); + } + + return; +} + /** * Add an extent to the alternate extent list. * Adds the dentry_priv to the ext_queue if it isn't already there. This is needed on unmount @@ -1656,8 +1756,19 @@ int _unified_update_queue_membership(bool add, bool all, enum request_state queu */ void _unified_free_request(struct write_request *req, struct unified_data *priv) { - if (req->write_cache) + struct dentry_priv *dpr = req->dpr; + + if (req->write_cache) { _unified_cache_free(req->write_cache, req->count, priv); + } + + if (dpr) { + ltfsmsg(LTFS_DEBUG, 13032D, req); + _unified_put_dentry_priv(req->dpr, priv); + } else { + ltfsmsg(LTFS_DEBUG, 13030D, __FUNCTION__, req); + } + free(req); } @@ -1741,8 +1852,8 @@ ssize_t _unified_insert_new_request(const char *buf, off_t offset, size_t count, bool ip_state, struct write_request *req, struct dentry *d, struct unified_data *priv) { int ret; - struct dentry_priv *dpr = d->iosched_priv; - struct write_request *new_req; + struct dentry_priv *dpr = NULL; + struct write_request *new_req = NULL; size_t copy_count; if (! (*cache)) { @@ -1772,6 +1883,10 @@ ssize_t _unified_insert_new_request(const char *buf, off_t offset, size_t count, releaseread_mrsw(&priv->lock); return -LTFS_NO_MEMORY; } + + _unified_get_dentry_priv(d, &dpr, priv); + + new_req->dpr = dpr; new_req->offset = offset; new_req->count = copy_count; if (ip_state) @@ -1790,6 +1905,8 @@ ssize_t _unified_insert_new_request(const char *buf, off_t offset, size_t count, if (new_req->offset + new_req->count > dpr->file_size) dpr->file_size = new_req->offset + new_req->count; + ltfsmsg(LTFS_DEBUG, 13031D, new_req); + return (ssize_t)count; } @@ -1922,6 +2039,50 @@ int _unified_merge_requests(struct write_request *dest, struct write_request *sr * @param priv I/O scheduler private data. * @return 0 on success or a negative value on error. */ +#if 1 +int _unified_flush_unlocked(struct dentry *d, struct unified_data *priv) +{ + ssize_t ret = 0; + struct dentry_priv *dpr; + struct write_request *req, *aux; + + CHECK_ARG_NULL(d, -LTFS_NULL_ARG); + CHECK_ARG_NULL(priv, -LTFS_NULL_ARG); + + _unified_get_dentry_priv(d, &dpr, priv); + if (! dpr) { + return 0; + } + + /* Check for previous write errors */ + ret = _unified_get_write_error(dpr); + if (ret < 0) { + _unified_put_dentry_priv(dpr, priv); + return ret; + } + + if (TAILQ_EMPTY(&dpr->requests)) { + _unified_put_dentry_priv(dpr, priv); + return 0; + } + + /* Enqueue requests to DP queue */ + TAILQ_FOREACH_SAFE(req, &dpr->requests, list, aux) { + if (req->state == REQUEST_PARTIAL) { + _unified_update_queue_membership(false, false, REQUEST_PARTIAL, dpr, priv); + req->state = REQUEST_DP; + _unified_update_queue_membership(true, false, REQUEST_DP, dpr, priv); + } + } + + /* Tell background thread a write request is ready */ + ltfs_thread_cond_signal(&priv->queue_cond); + + _unified_put_dentry_priv(dpr, priv); + + return 0; +} +#else int _unified_flush_unlocked(struct dentry *d, struct unified_data *priv) { ssize_t ret = 0; @@ -1979,6 +2140,7 @@ int _unified_flush_unlocked(struct dentry *d, struct unified_data *priv) ret = _unified_get_write_error(dpr); return (ret < 0) ? ret : 0; } +#endif /** * Flush all dentries to the data partition. @@ -2022,75 +2184,6 @@ int _unified_flush_all(struct unified_data *priv) return 0; } -/** - * Free a dentry_priv structure if it has no open handles, outstanding requests or - * queued IP extents. - * The caller is assumed to have a handle on the dentry, so "no open handles" means - * d->numhandles == 2 normally, numhandles == 1 if d has been unlinked or if IP processing - * just finished and there are no open handles. - * The caller should also hold appropriate locks, which usually means d->iosched_lock. - * @param d Dentry to free priv structure for. - * @param target_handles Only free the dentry_priv if the dentry has this many handles or fewer. - * @param priv I/O scheduler private data. - */ -void _unified_free_dentry_priv_conditional(struct dentry *d, uint32_t target_handles, - struct unified_data *priv) -{ - uint32_t numhandles; - struct dentry_priv *dpr; - - acquireread_mrsw(&d->meta_lock); - numhandles = d->numhandles; - releaseread_mrsw(&d->meta_lock); - - dpr = d->iosched_priv; - if (dpr && numhandles <= target_handles && TAILQ_EMPTY(&dpr->requests) && - TAILQ_EMPTY(&dpr->alt_extentlist)) { - /* Take I/O lock first. The background thread could be processing this dentry */ - ltfs_mutex_lock(&dpr->io_lock); - ltfs_mutex_unlock(&dpr->io_lock); - - ltfs_mutex_destroy(&dpr->write_error_lock); - ltfs_mutex_destroy(&dpr->io_lock); - free(dpr); - d->iosched_priv = NULL; - ltfs_fsraw_put_dentry(d, priv->vol); - } -} - -/** - * Free a dentry_priv structure unconditionally, dispatching its alt_extentlist to libltfs - * if write_ip is enabled. - * This is called on unmount to ensure that IP extents hit the tape. It emits a warning if the - * dentry has any outstanding write requests. - * @param d Dentry to free priv structure for. - * @param priv I/O scheduler private data. - */ -void _unified_free_dentry_priv(struct dentry *d, struct unified_data *priv) -{ - struct dentry_priv *dpr = d->iosched_priv; - - if (! dpr) - return; - - if (! TAILQ_EMPTY(&dpr->requests)) - ltfsmsg(LTFS_WARN, 13022W); - - /* Wait for background thread to finish flushing requests */ - ltfs_mutex_lock(&dpr->io_lock); - ltfs_mutex_unlock(&dpr->io_lock); - - /* Sent alt_extentlist to libltfs */ - if (dpr->write_ip && ! TAILQ_EMPTY(&dpr->alt_extentlist)) - _unified_clear_alt_extentlist(true, dpr, priv); - - ltfs_mutex_destroy(&dpr->write_error_lock); - ltfs_mutex_destroy(&dpr->io_lock); - free(dpr); - d->iosched_priv = NULL; - ltfs_fsraw_put_dentry(d, priv->vol); -} - /** * Set the write_ip flag for a dentry_priv structure. * This also requires updating the global DP request counter so that the background writer thread diff --git a/src/libltfs/ltfs_fsops.c b/src/libltfs/ltfs_fsops.c index 671b2cfe..8c941942 100644 --- a/src/libltfs/ltfs_fsops.c +++ b/src/libltfs/ltfs_fsops.c @@ -268,12 +268,10 @@ int ltfs_fsops_create(const char *path, bool isdir, bool readonly, bool overwrit /* Look up parent directory */ fs_split_path(path_norm, &filename, strlen(path_norm) + 1); - if (dcache_initialized(vol)) { - ret = asprintf(&dentry_path, "%s/%s", path_norm, filename); - if (ret < 0) { - ltfsmsg(LTFS_ERR, 10001E, "ltfs_fsops_create: dentry_path"); - goto out_dispose; - } + ret = asprintf(&dentry_path, "%s/%s", path_norm, filename); + if (ret < 0) { + ltfsmsg(LTFS_ERR, 10001E, "ltfs_fsops_create: dentry_path"); + goto out_dispose; } /* Lookup the parent dentry. On success, parent->contents_lock will be held in write mode */ @@ -341,7 +339,14 @@ int ltfs_fsops_create(const char *path, bool isdir, bool readonly, bool overwrit d->vol = vol; d->parent = parent; ++d->link_count; - ++d->numhandles; + + if (!iosched_initialized(vol)) { + /* + * numhandles will be incremented in iosched_open() below when ioscheduler is + * enabled. + */ + ++d->numhandles; + } /* Block end */ if (isdir) @@ -365,7 +370,7 @@ int ltfs_fsops_create(const char *path, bool isdir, bool readonly, bool overwrit ltfs_set_index_dirty(false, false, vol->index); d->dirty = true; ltfs_mutex_unlock(&vol->index->dirty_lock); - vol->file_open_count ++; + vol->file_open_count++; *dentry = d; ret = 0; @@ -381,6 +386,13 @@ int ltfs_fsops_create(const char *path, bool isdir, bool readonly, bool overwrit } } + if (ret == 0 && iosched_initialized(vol)) { + ret = iosched_open(dentry_path, overwrite, &d, vol); + if (ret < 0) { + fs_release_dentry(d); + } + } + if (ret == 0 && parent->is_appendonly) { ltfs_file_id id; ret = ltfs_fsops_setxattr(path, "user.ltfs.vendor.IBM.appendonly", "1", 1, 0, &id, vol); diff --git a/src/ltfs_fuse.c b/src/ltfs_fuse.c index f26501d2..842cfebf 100644 --- a/src/ltfs_fuse.c +++ b/src/ltfs_fuse.c @@ -162,7 +162,7 @@ static void _free_file_info(struct file_info *fi) * @return File handle information, or NULL if memory allocation failed or if 'priv' is NULL. */ static struct file_info *_file_open(const char *path, void *d, struct file_info *spare, - struct ltfs_fuse_data *priv) + struct ltfs_fuse_data *priv) { struct file_info *fi = NULL; CHECK_ARG_NULL(priv, NULL); From 7826779b34cd4a15d025253704aa170b00e8c498 Mon Sep 17 00:00:00 2001 From: Atsushi Abe Date: Sat, 22 Aug 2020 21:14:50 +0900 Subject: [PATCH 2/6] Introduce defered flush --- src/iosched/unified.c | 91 +++++++++++++++++++++++++--- src/tape_drivers/linux/sg/.gitignore | 1 + 2 files changed, 82 insertions(+), 10 deletions(-) diff --git a/src/iosched/unified.c b/src/iosched/unified.c index 97faedc6..df6f08b7 100644 --- a/src/iosched/unified.c +++ b/src/iosched/unified.c @@ -249,7 +249,9 @@ size_t _unified_update_request(const char *buf, off_t offset, size_t size, int _unified_merge_requests(struct write_request *dest, struct write_request *src, void **spare_cache, struct dentry_priv *dpr, struct unified_data *priv); int _unified_flush_unlocked(struct dentry *d, struct unified_data *priv); +int _unified_exit_unlocked(struct dentry *d, struct unified_data *priv); int _unified_flush_all(struct unified_data *priv); +int _unified_exit_all(struct unified_data *priv); void _unified_set_write_ip(struct dentry_priv *dpr, struct unified_data *priv); void _unified_unset_write_ip(struct dentry_priv *dpr, struct unified_data *priv); void _unified_handle_write_error(ssize_t write_ret, struct write_request *req, @@ -1148,7 +1150,7 @@ ltfs_thread_return _unified_writer_thread(void *iosched_handle) ltfs_profiler_add_entry(priv->profiler, &priv->proflock, IOSCHED_REQ_ENTER(REQ_IOS_IOSCHED)); if (! priv->writer_keepalive) { ltfs_thread_mutex_unlock(&priv->queue_lock); - _unified_flush_all(priv); + _unified_exit_all(priv); _unified_process_queue(REQUEST_IP, priv); break; @@ -1763,7 +1765,7 @@ void _unified_free_request(struct write_request *req, struct unified_data *priv) } if (dpr) { - ltfsmsg(LTFS_DEBUG, 13032D, req); + ltfsmsg(LTFS_DEBUG3, 13032D, req); _unified_put_dentry_priv(req->dpr, priv); } else { ltfsmsg(LTFS_DEBUG, 13030D, __FUNCTION__, req); @@ -1905,7 +1907,7 @@ ssize_t _unified_insert_new_request(const char *buf, off_t offset, size_t count, if (new_req->offset + new_req->count > dpr->file_size) dpr->file_size = new_req->offset + new_req->count; - ltfsmsg(LTFS_DEBUG, 13031D, new_req); + ltfsmsg(LTFS_DEBUG3, 13031D, new_req); return (ssize_t)count; } @@ -2039,10 +2041,10 @@ int _unified_merge_requests(struct write_request *dest, struct write_request *sr * @param priv I/O scheduler private data. * @return 0 on success or a negative value on error. */ -#if 1 int _unified_flush_unlocked(struct dentry *d, struct unified_data *priv) { ssize_t ret = 0; + bool requeued = false; struct dentry_priv *dpr; struct write_request *req, *aux; @@ -2067,23 +2069,51 @@ int _unified_flush_unlocked(struct dentry *d, struct unified_data *priv) } /* Enqueue requests to DP queue */ + ltfs_thread_mutex_lock(&priv->queue_lock); TAILQ_FOREACH_SAFE(req, &dpr->requests, list, aux) { if (req->state == REQUEST_PARTIAL) { - _unified_update_queue_membership(false, false, REQUEST_PARTIAL, dpr, priv); + if (dpr->in_working_set == 1) { + TAILQ_REMOVE(&priv->working_set, dpr, working_set); + --priv->ws_count; + } + if (dpr->in_working_set) { + --priv->ws_request_count; + --dpr->in_working_set; + } + req->state = REQUEST_DP; - _unified_update_queue_membership(true, false, REQUEST_DP, dpr, priv); + + if (! dpr->in_dp_queue) { + TAILQ_INSERT_TAIL(&priv->dp_queue, dpr, dp_queue); + ++priv->dp_count; + } + if (! dpr->write_ip) + ++priv->dp_request_count; + ++dpr->in_dp_queue; + + requeued = true; } } + ltfs_thread_mutex_unlock(&priv->queue_lock); /* Tell background thread a write request is ready */ - ltfs_thread_cond_signal(&priv->queue_cond); + if (requeued) + ltfs_thread_cond_signal(&priv->queue_cond); _unified_put_dentry_priv(dpr, priv); return 0; } -#else -int _unified_flush_unlocked(struct dentry *d, struct unified_data *priv) + +/** + * Flush requests for a dentry directly. + * The caller should hold (a) d->iosched_lock and a read lock on priv->lock, or + * (b) a write lock on priv->lock. + * @param d Dentry to flush. + * @param priv I/O scheduler private data. + * @return 0 on success or a negative value on error. + */ +int _unified_exit_unlocked(struct dentry *d, struct unified_data *priv) { ssize_t ret = 0; struct dentry_priv *dpr; @@ -2140,7 +2170,6 @@ int _unified_flush_unlocked(struct dentry *d, struct unified_data *priv) ret = _unified_get_write_error(dpr); return (ret < 0) ? ret : 0; } -#endif /** * Flush all dentries to the data partition. @@ -2184,6 +2213,48 @@ int _unified_flush_all(struct unified_data *priv) return 0; } +/** + * Flush all dentries to the data partition directly. + * If this function returns success, there are no REQUEST_DP or REQUEST_PARTIAL requests left + * in the scheduler. There may still be REQUEST_IP requests lying around. + * @param priv I/O scheduler instance to flush. + * @return 0 on success or a negative value on error. + */ +int _unified_exit_all(struct unified_data *priv) +{ + int ret; + struct dentry_priv *dpr, *aux; + + CHECK_ARG_NULL(priv, -LTFS_NULL_ARG); + + acquirewrite_mrsw(&priv->lock); + + if (! TAILQ_EMPTY(&priv->dp_queue)) { + TAILQ_FOREACH_SAFE(dpr, &priv->dp_queue, dp_queue, aux) { + ret = _unified_exit_unlocked(dpr->dentry, priv); + if (ret < 0) { + ltfsmsg(LTFS_ERR, 13020E, dpr->dentry->platform_safe_name, ret); + releasewrite_mrsw(&priv->lock); + return ret; + } + } + } + + if (! TAILQ_EMPTY(&priv->working_set)) { + TAILQ_FOREACH_SAFE(dpr, &priv->working_set, working_set, aux) { + ret = _unified_exit_unlocked(dpr->dentry, priv); + if (ret < 0) { + ltfsmsg(LTFS_ERR, 13020E, dpr->dentry->platform_safe_name, ret); + releasewrite_mrsw(&priv->lock); + return ret; + } + } + } + + releasewrite_mrsw(&priv->lock); + return 0; +} + /** * Set the write_ip flag for a dentry_priv structure. * This also requires updating the global DP request counter so that the background writer thread diff --git a/src/tape_drivers/linux/sg/.gitignore b/src/tape_drivers/linux/sg/.gitignore index ce4d0b0c..0907b2ce 100644 --- a/src/tape_drivers/linux/sg/.gitignore +++ b/src/tape_drivers/linux/sg/.gitignore @@ -1,4 +1,5 @@ vendor_compat.c ibm_tape.c hp_tape.c +quantum_tape.c open_factor.c From 10be046d651c0b6c407d666c6cac57c748f6ef48 Mon Sep 17 00:00:00 2001 From: Atsushi Abe Date: Mon, 24 Aug 2020 20:24:32 +0900 Subject: [PATCH 3/6] Support flush all correctly --- messages/iosched_unified/root.txt | 5 ++ src/iosched/fcfs.c | 9 ++- src/iosched/unified.c | 106 ++++++++++++++++++++++++++++-- src/libltfs/ltfs_fsops.c | 4 +- 4 files changed, 114 insertions(+), 10 deletions(-) diff --git a/messages/iosched_unified/root.txt b/messages/iosched_unified/root.txt index 9ab8277b..ce6ed4dc 100644 --- a/messages/iosched_unified/root.txt +++ b/messages/iosched_unified/root.txt @@ -70,5 +70,10 @@ root:table { 13030D:string { "No DPR on %s (req = 0x%llx)." } 13031D:string { "Created a request (0x%llx)." } 13032D:string { "Removed a request (0x%llx)." } + 13033D:string { "Flushing all (%s, %s)." } + 13034D:string { "Send a broadcast from %s." } + 13035D:string { "Waiting a broadcast." } + 13036D:string { "Received a broadcast." } + 13037D:string { "Flush is done." } } } diff --git a/src/iosched/fcfs.c b/src/iosched/fcfs.c index e54c4747..e652fc25 100644 --- a/src/iosched/fcfs.c +++ b/src/iosched/fcfs.c @@ -110,13 +110,18 @@ int fcfs_destroy(void *iosched_handle) */ int fcfs_open(const char *path, bool open_write, struct dentry **dentry, void *iosched_handle) { + int ret; struct fcfs_data *priv = (struct fcfs_data *) iosched_handle; CHECK_ARG_NULL(path, -LTFS_NULL_ARG); CHECK_ARG_NULL(dentry, -LTFS_NULL_ARG); CHECK_ARG_NULL(iosched_handle, -LTFS_NULL_ARG); - return ltfs_fsraw_open(path, open_write, dentry, priv->vol); + ret = ltfs_fsraw_open(path, open_write, dentry, priv->vol); + if (!ret) + ltfs_fsraw_get_dentry(d, priv->vol); + + return ret; } /** @@ -131,6 +136,8 @@ int fcfs_close(struct dentry *d, bool flush, void *iosched_handle) CHECK_ARG_NULL(d, -LTFS_NULL_ARG); CHECK_ARG_NULL(iosched_handle, -LTFS_NULL_ARG); + ltfs_fsraw_put_dentry(d, priv->vol); + return ltfs_fsraw_close(d); } diff --git a/src/iosched/unified.c b/src/iosched/unified.c index df6f08b7..97432445 100644 --- a/src/iosched/unified.c +++ b/src/iosched/unified.c @@ -63,6 +63,8 @@ #include "libltfs/arch/time_internal.h" #include "cache_manager.h" +//#define USE_DIRECT_FLUSH /* Use direct flush at flush all */ + /** * Maximum number of requests targeting the Index Partition to keep before flushing * them to the tape, as a fraction of the total number of cache blocks in the pool. @@ -215,6 +217,15 @@ struct unified_data { uint32_t dp_request_count; /**< Number of requests in REQUEST_DP state which will NOT change to IP state */ uint32_t ip_request_count; /**< Number of requests in REQUEST_IP state */ + /** + * writer thread Lock. + * Take this before manipulating the working_set, dp_queue and ip_queue lists + * or the corresponding request counters. Do not take the sched_lock or any dentry_priv lock + * while holding this lock. + */ + ltfs_thread_mutex_t writer_lock; + ltfs_thread_cond_t writer_cond; /**< Signal this variable when the writer thread starts to write blocks */ + ltfs_thread_t writer_thread; /**< Background writer thread ID */ bool writer_keepalive; /**< Used to terminate the background writer thread */ void *pool; /**< Handle to the cache manager */ @@ -333,10 +344,33 @@ void *unified_init(struct ltfs_volume *vol) free(priv); return NULL; } + ret = ltfs_thread_mutex_init(&priv->writer_lock); + if (ret) { + /* Cannot initialize scheduler: failed to initialize mutex %s (%d) */ + ltfsmsg(LTFS_ERR, 13006E, "writer_lock", ret); + ltfs_thread_cond_destroy(&priv->cache_cond); + ltfs_thread_mutex_destroy(&priv->cache_lock); + cache_manager_destroy(priv->pool); + free(priv); + return NULL; + } + ret = ltfs_thread_cond_init(&priv->writer_cond); + if (ret) { + /* Cannot initialize scheduler: failed to initialize condition variable %s (%d) */ + ltfsmsg(LTFS_ERR, 13007E, "writer_cond", ret); + ltfs_thread_mutex_destroy(&priv->writer_lock); + ltfs_thread_cond_destroy(&priv->cache_cond); + ltfs_thread_mutex_destroy(&priv->cache_lock); + cache_manager_destroy(priv->pool); + free(priv); + return NULL; + } ret = init_mrsw(&priv->lock); if (ret < 0) { ltfsmsg(LTFS_ERR, 13006E, "lock", ret); + ltfs_thread_cond_destroy(&priv->writer_cond); + ltfs_thread_mutex_destroy(&priv->writer_lock); ltfs_thread_cond_destroy(&priv->queue_cond); ltfs_thread_mutex_destroy(&priv->queue_lock); ltfs_thread_cond_destroy(&priv->cache_cond); @@ -357,7 +391,9 @@ void *unified_init(struct ltfs_volume *vol) ret = ltfs_thread_create(&priv->writer_thread, _unified_writer_thread, priv); if (ret) { /* Cannot initialize scheduler: failed to create thread */ - ltfsmsg(LTFS_ERR, 13008E, "queue_cond", ret); + ltfsmsg(LTFS_ERR, 13008E, "writer thread", ret); + ltfs_thread_cond_destroy(&priv->writer_cond); + ltfs_thread_mutex_destroy(&priv->writer_lock); ltfs_thread_cond_destroy(&priv->queue_cond); ltfs_thread_mutex_destroy(&priv->queue_lock); ltfs_thread_cond_destroy(&priv->cache_cond); @@ -402,6 +438,8 @@ int unified_destroy(void *iosched_handle) } /* Free data structures */ + ltfs_thread_cond_destroy(&priv->writer_cond); + ltfs_thread_mutex_destroy(&priv->writer_lock); ltfs_thread_cond_destroy(&priv->queue_cond); ltfs_thread_mutex_destroy(&priv->queue_lock); ltfs_thread_cond_destroy(&priv->cache_cond); @@ -940,8 +978,13 @@ int unified_flush(struct dentry *d, bool closeflag, void *iosched_handle) ret = _unified_flush_unlocked(d, priv); ltfs_mutex_unlock(&d->iosched_lock); releasewrite_mrsw(&priv->lock); - } else + } else { +#ifdef USE_DIRECT_FLUSH + ret = _unified_exit_all(priv); +#else ret = _unified_flush_all(priv); +#endif + } ltfs_profiler_add_entry(priv->profiler, &priv->proflock, IOSCHED_REQ_EXIT(REQ_IOS_FLUSH)); return ret; @@ -1150,6 +1193,7 @@ ltfs_thread_return _unified_writer_thread(void *iosched_handle) ltfs_profiler_add_entry(priv->profiler, &priv->proflock, IOSCHED_REQ_ENTER(REQ_IOS_IOSCHED)); if (! priv->writer_keepalive) { ltfs_thread_mutex_unlock(&priv->queue_lock); + ltfs_thread_cond_broadcast(&priv->writer_cond); _unified_exit_all(priv); _unified_process_queue(REQUEST_IP, priv); break; @@ -1201,7 +1245,13 @@ void _unified_process_index_queue(struct unified_data *priv) partition_id = ltfs_ip_id(priv->vol); + ltfs_thread_mutex_lock(&priv->writer_lock); acquirewrite_mrsw(&priv->lock); + + ltfsmsg(LTFS_DEBUG3, 13034D, "IP"); + ltfs_thread_cond_broadcast(&priv->writer_cond); + ltfs_thread_mutex_unlock(&priv->writer_lock); + TAILQ_FOREACH_SAFE(dentry_priv, &priv->ip_queue, ip_queue, dpr_aux) { /* Remove dentry_priv from the IP queue, process its IP requests, * then free it if the request list is empty. */ @@ -1251,7 +1301,13 @@ void _unified_process_data_queue(enum request_state queue, struct unified_data * uint32_t count, i; ssize_t ret; + ltfs_thread_mutex_lock(&priv->writer_lock); acquireread_mrsw(&priv->lock); + + ltfsmsg(LTFS_DEBUG3, 13034D, "DP"); + ltfs_thread_cond_broadcast(&priv->writer_cond); + ltfs_thread_mutex_unlock(&priv->writer_lock); + ltfs_thread_mutex_lock(&priv->queue_lock); count = queue == REQUEST_DP ? priv->dp_count : priv->dp_count + priv->ws_count; ltfs_thread_mutex_unlock(&priv->queue_lock); @@ -1260,9 +1316,9 @@ void _unified_process_data_queue(enum request_state queue, struct unified_data * * Process only the 'count' entries that are known to be in the queue. * This is needed to guarantee a limited runtime. */ - for (i=0; iqueue_lock); break; } - dentry = dentry_priv->dentry; + + if (dentry_priv) { + dentry = dentry_priv->dentry; + _unified_get_dentry_priv(dentry, NULL, priv); + } + ltfs_thread_mutex_unlock(&priv->queue_lock); if (! dentry) { @@ -1372,8 +1433,10 @@ void _unified_process_data_queue(enum request_state queue, struct unified_data * } } - if (dentry_priv) + if (dentry_priv) { ltfs_mutex_unlock(&dentry_priv->io_lock); + _unified_put_dentry_priv(dentry_priv, priv); + } } releaseread_mrsw(&priv->lock); @@ -2182,34 +2245,63 @@ int _unified_flush_all(struct unified_data *priv) { int ret; struct dentry_priv *dpr, *aux; + bool empty = false; CHECK_ARG_NULL(priv, -LTFS_NULL_ARG); + ltfs_thread_mutex_lock(&priv->writer_lock); acquirewrite_mrsw(&priv->lock); if (! TAILQ_EMPTY(&priv->dp_queue)) { TAILQ_FOREACH_SAFE(dpr, &priv->dp_queue, dp_queue, aux) { + ltfsmsg(LTFS_DEBUG, 13033D, "DP", dpr->dentry->platform_safe_name); + ltfs_mutex_lock(&dpr->dentry->iosched_lock); ret = _unified_flush_unlocked(dpr->dentry, priv); + ltfs_mutex_unlock(&dpr->dentry->iosched_lock); if (ret < 0) { ltfsmsg(LTFS_ERR, 13020E, dpr->dentry->platform_safe_name, ret); releasewrite_mrsw(&priv->lock); return ret; } } + } else { + empty = true; } if (! TAILQ_EMPTY(&priv->working_set)) { TAILQ_FOREACH_SAFE(dpr, &priv->working_set, working_set, aux) { + ltfsmsg(LTFS_DEBUG, 13033D, "WS", dpr->dentry->platform_safe_name); + ltfs_mutex_lock(&dpr->dentry->iosched_lock); ret = _unified_flush_unlocked(dpr->dentry, priv); + ltfs_mutex_unlock(&dpr->dentry->iosched_lock); if (ret < 0) { ltfsmsg(LTFS_ERR, 13020E, dpr->dentry->platform_safe_name, ret); releasewrite_mrsw(&priv->lock); return ret; } } + } else { + if (empty) + empty = true; + else + empty = false; } releasewrite_mrsw(&priv->lock); + if (!empty) { + ltfsmsg(LTFS_DEBUG3, 13035D); + ltfs_thread_cond_wait(&priv->writer_cond, &priv->writer_lock); + ltfs_thread_mutex_unlock(&priv->writer_lock); + ltfsmsg(LTFS_DEBUG3, 13036D); + + /* Confirm the writer thread processed the blocks requeued above */ + acquirewrite_mrsw(&priv->lock); + releasewrite_mrsw(&priv->lock); + ltfsmsg(LTFS_DEBUG3, 13037D); + } else { + ltfs_thread_mutex_unlock(&priv->writer_lock); + } + return 0; } diff --git a/src/libltfs/ltfs_fsops.c b/src/libltfs/ltfs_fsops.c index 8c941942..990ed369 100644 --- a/src/libltfs/ltfs_fsops.c +++ b/src/libltfs/ltfs_fsops.c @@ -340,7 +340,7 @@ int ltfs_fsops_create(const char *path, bool isdir, bool readonly, bool overwrit d->parent = parent; ++d->link_count; - if (!iosched_initialized(vol)) { + if (isdir || !iosched_initialized(vol)) { /* * numhandles will be incremented in iosched_open() below when ioscheduler is * enabled. @@ -386,7 +386,7 @@ int ltfs_fsops_create(const char *path, bool isdir, bool readonly, bool overwrit } } - if (ret == 0 && iosched_initialized(vol)) { + if (!isdir && !ret && iosched_initialized(vol)) { ret = iosched_open(dentry_path, overwrite, &d, vol); if (ret < 0) { fs_release_dentry(d); From 5fce7dd7700a4fb37f21745d8511535298cfe590 Mon Sep 17 00:00:00 2001 From: Atsushi Abe Date: Tue, 25 Aug 2020 18:30:09 +0900 Subject: [PATCH 4/6] Introduce request counter on profiler --- src/iosched/fcfs.c | 4 +++- src/iosched/unified.c | 8 +++----- src/libltfs/iosched_ops.h | 11 ++++++----- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/iosched/fcfs.c b/src/iosched/fcfs.c index e652fc25..4a9e9aba 100644 --- a/src/iosched/fcfs.c +++ b/src/iosched/fcfs.c @@ -119,7 +119,7 @@ int fcfs_open(const char *path, bool open_write, struct dentry **dentry, void *i ret = ltfs_fsraw_open(path, open_write, dentry, priv->vol); if (!ret) - ltfs_fsraw_get_dentry(d, priv->vol); + ltfs_fsraw_get_dentry(*dentry, priv->vol); return ret; } @@ -133,6 +133,8 @@ int fcfs_open(const char *path, bool open_write, struct dentry **dentry, void *i */ int fcfs_close(struct dentry *d, bool flush, void *iosched_handle) { + struct fcfs_data *priv = (struct fcfs_data *) iosched_handle; + CHECK_ARG_NULL(d, -LTFS_NULL_ARG); CHECK_ARG_NULL(iosched_handle, -LTFS_NULL_ARG); diff --git a/src/iosched/unified.c b/src/iosched/unified.c index 97432445..b46f3f6c 100644 --- a/src/iosched/unified.c +++ b/src/iosched/unified.c @@ -1389,8 +1389,6 @@ void _unified_process_data_queue(enum request_state queue, struct unified_data * } else { TAILQ_REMOVE(&dentry_priv->requests, req, list); TAILQ_INSERT_TAIL(&local_req_list, req, list); - if (queue != REQUEST_PARTIAL) - ltfs_profiler_add_entry(priv->profiler, &priv->proflock, IOSCHED_REQ_EVENT(REQ_IOS_DEQUEUE_DP)); } } } @@ -1761,7 +1759,6 @@ int _unified_update_queue_membership(bool add, bool all, enum request_state queu if (! dentry_priv->write_ip) ++priv->dp_request_count; ++dentry_priv->in_dp_queue; - ltfs_profiler_add_entry(priv->profiler, &priv->proflock, IOSCHED_REQ_EVENT(REQ_IOS_ENQUEUE_DP)); } else { if ((all && dentry_priv->in_dp_queue) || dentry_priv->in_dp_queue == 1) { TAILQ_REMOVE(&priv->dp_queue, dentry_priv, dp_queue); @@ -1787,7 +1784,6 @@ int _unified_update_queue_membership(bool add, bool all, enum request_state queu } ++dentry_priv->in_ip_queue; ++priv->ip_request_count; - ltfs_profiler_add_entry(priv->profiler, &priv->proflock, IOSCHED_REQ_EVENT(REQ_IOS_ENQUEUE_IP)); } else { if ((all && dentry_priv->in_ip_queue) || dentry_priv->in_ip_queue == 1) { TAILQ_REMOVE(&priv->ip_queue, dentry_priv, ip_queue); @@ -1800,7 +1796,6 @@ int _unified_update_queue_membership(bool add, bool all, enum request_state queu --dentry_priv->in_ip_queue; --priv->ip_request_count; } - ltfs_profiler_add_entry(priv->profiler, &priv->proflock, IOSCHED_REQ_EVENT(REQ_IOS_DEQUEUE_IP)); } break; @@ -1835,6 +1830,8 @@ void _unified_free_request(struct write_request *req, struct unified_data *priv) } free(req); + + ltfs_profiler_add_entry(priv->profiler, &priv->proflock, IOSCHED_REQ_EVENT(REQ_IOS_DEL_REQUEST)); } /** @@ -1971,6 +1968,7 @@ ssize_t _unified_insert_new_request(const char *buf, off_t offset, size_t count, dpr->file_size = new_req->offset + new_req->count; ltfsmsg(LTFS_DEBUG3, 13031D, new_req); + ltfs_profiler_add_entry(priv->profiler, &priv->proflock, IOSCHED_REQ_EVENT(REQ_IOS_ADD_REQUEST)); return (ssize_t)count; } diff --git a/src/libltfs/iosched_ops.h b/src/libltfs/iosched_ops.h index 9d437df1..a91ab0f3 100644 --- a/src/libltfs/iosched_ops.h +++ b/src/libltfs/iosched_ops.h @@ -85,7 +85,6 @@ const char *iosched_get_message_bundle_name(void **message_data); /** * Request type definisions for LTFS request profile */ - #define REQ_IOS_OPEN 0000 /**< open */ #define REQ_IOS_CLOSE 0001 /**< close */ #define REQ_IOS_READ 0002 /**< read */ @@ -95,9 +94,11 @@ const char *iosched_get_message_bundle_name(void **message_data); #define REQ_IOS_GETFSIZE 0006 /**< get_filesize */ #define REQ_IOS_UPDPLACE 0007 /**< update_data_placement */ #define REQ_IOS_IOSCHED 0008 /**< (io_scheduler ... _unified_writer_thread) */ -#define REQ_IOS_ENQUEUE_IP 0009 /**< Enqueue data block to IP */ -#define REQ_IOS_DEQUEUE_IP 000A /**< Dequeue data block to IP */ -#define REQ_IOS_ENQUEUE_DP 000B /**< Enqueue data block to DP */ -#define REQ_IOS_DEQUEUE_DP 000C /**< Dequeue data block to DP */ +#define REQ_IOS_ENQUEUE_IP 0009 /**< Enqueue data block to IP (unused at this time) */ +#define REQ_IOS_DEQUEUE_IP 000A /**< Dequeue data block to IP (unused at this time) */ +#define REQ_IOS_ENQUEUE_DP 000B /**< Enqueue data block to DP (unused at this time) */ +#define REQ_IOS_DEQUEUE_DP 000C /**< Dequeue data block to DP (unused at this time) */ +#define REQ_IOS_ADD_REQUEST 000D /**< Add a request */ +#define REQ_IOS_DEL_REQUEST 000E /**< Remove a request */ #endif /* __iosched_ops_h */ From 4c406a24ab3dd23a9b742ae5c12e68a664f888e0 Mon Sep 17 00:00:00 2001 From: Atsushi Abe Date: Tue, 25 Aug 2020 18:45:03 +0900 Subject: [PATCH 5/6] Enhance comments and messages --- messages/iosched_unified/root.txt | 6 +++--- src/iosched/unified.c | 25 ++++++++++++++++++++++--- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/messages/iosched_unified/root.txt b/messages/iosched_unified/root.txt index ce6ed4dc..ca0deecc 100644 --- a/messages/iosched_unified/root.txt +++ b/messages/iosched_unified/root.txt @@ -72,8 +72,8 @@ root:table { 13032D:string { "Removed a request (0x%llx)." } 13033D:string { "Flushing all (%s, %s)." } 13034D:string { "Send a broadcast from %s." } - 13035D:string { "Waiting a broadcast." } - 13036D:string { "Received a broadcast." } - 13037D:string { "Flush is done." } + 13035D:string { "Waiting a broadcast signal." } + 13036D:string { "Received a broadcast signal." } + 13037D:string { "Flush all operation is finished." } } } diff --git a/src/iosched/unified.c b/src/iosched/unified.c index b46f3f6c..f1db930e 100644 --- a/src/iosched/unified.c +++ b/src/iosched/unified.c @@ -1248,6 +1248,11 @@ void _unified_process_index_queue(struct unified_data *priv) ltfs_thread_mutex_lock(&priv->writer_lock); acquirewrite_mrsw(&priv->lock); + /* + * Send a signal to all threads the writer thread woke up and processing queued requests. + * Waiter could try to lock priv->lock after receiving this signal. And it is guaranteed that + * all queued requests are flushed to the drive once priv->lock is captured. + */ ltfsmsg(LTFS_DEBUG3, 13034D, "IP"); ltfs_thread_cond_broadcast(&priv->writer_cond); ltfs_thread_mutex_unlock(&priv->writer_lock); @@ -1304,6 +1309,11 @@ void _unified_process_data_queue(enum request_state queue, struct unified_data * ltfs_thread_mutex_lock(&priv->writer_lock); acquireread_mrsw(&priv->lock); + /* + * Send a signal to all threads the writer thread woke up and processing queued requests. + * Waiter could try to lock priv->lock after receiving this signal. And it is guaranteed that + * all queued requests are flushed to the drive once priv->lock is captured. + */ ltfsmsg(LTFS_DEBUG3, 13034D, "DP"); ltfs_thread_cond_broadcast(&priv->writer_cond); ltfs_thread_mutex_unlock(&priv->writer_lock); @@ -2287,15 +2297,24 @@ int _unified_flush_all(struct unified_data *priv) releasewrite_mrsw(&priv->lock); if (!empty) { - ltfsmsg(LTFS_DEBUG3, 13035D); + /* + * Wait a signal from the writer thread if pending request was existed. + * The writer thread wakes up once a signal comes, and at this time + * the writer thread shall hold priv->lock. + */ + ltfsmsg(LTFS_DEBUG2, 13035D); ltfs_thread_cond_wait(&priv->writer_cond, &priv->writer_lock); ltfs_thread_mutex_unlock(&priv->writer_lock); ltfsmsg(LTFS_DEBUG3, 13036D); - /* Confirm the writer thread processed the blocks requeued above */ + /* + * prov->lock shall be released when all queued requests are processed. + * So we can consider the queued requests on this flush operation is already flushed + * to the drive. + */ acquirewrite_mrsw(&priv->lock); releasewrite_mrsw(&priv->lock); - ltfsmsg(LTFS_DEBUG3, 13037D); + ltfsmsg(LTFS_DEBUG2, 13037D); } else { ltfs_thread_mutex_unlock(&priv->writer_lock); } From 5afeed17e69f4d70c17ecb3cd08df34d605516d1 Mon Sep 17 00:00:00 2001 From: Atsushi Abe Date: Wed, 2 Dec 2020 23:02:16 +0900 Subject: [PATCH 6/6] Refrect Lucas's comments --- messages/iosched_unified/root.txt | 1 + src/iosched/fcfs.c | 2 +- src/iosched/unified.c | 109 +++++++++++++++++------------- 3 files changed, 63 insertions(+), 49 deletions(-) diff --git a/messages/iosched_unified/root.txt b/messages/iosched_unified/root.txt index ca0deecc..759b33ad 100644 --- a/messages/iosched_unified/root.txt +++ b/messages/iosched_unified/root.txt @@ -75,5 +75,6 @@ root:table { 13035D:string { "Waiting a broadcast signal." } 13036D:string { "Received a broadcast signal." } 13037D:string { "Flush all operation is finished." } + 13038E:string { "Cannot capture dentry private in %s (%d)"} } } diff --git a/src/iosched/fcfs.c b/src/iosched/fcfs.c index 4a9e9aba..4491f9a2 100644 --- a/src/iosched/fcfs.c +++ b/src/iosched/fcfs.c @@ -118,7 +118,7 @@ int fcfs_open(const char *path, bool open_write, struct dentry **dentry, void *i CHECK_ARG_NULL(iosched_handle, -LTFS_NULL_ARG); ret = ltfs_fsraw_open(path, open_write, dentry, priv->vol); - if (!ret) + if (ret == 0) ltfs_fsraw_get_dentry(*dentry, priv->vol); return ret; diff --git a/src/iosched/unified.c b/src/iosched/unified.c index f1db930e..b31f69ee 100644 --- a/src/iosched/unified.c +++ b/src/iosched/unified.c @@ -223,6 +223,7 @@ struct unified_data { * or the corresponding request counters. Do not take the sched_lock or any dentry_priv lock * while holding this lock. */ + bool did_writer_wake_up; /**< Flag to show, writer already woke up by a signal or not */ ltfs_thread_mutex_t writer_lock; ltfs_thread_cond_t writer_cond; /**< Signal this variable when the writer thread starts to write blocks */ @@ -477,7 +478,7 @@ int unified_open(const char *path, bool open_write, struct dentry **dentry, void ltfs_profiler_add_entry(priv->profiler, &priv->proflock, IOSCHED_REQ_ENTER(REQ_IOS_OPEN)); ret = ltfs_fsraw_open(path, open_write, dentry, ((struct unified_data *)iosched_handle)->vol); - if (!ret) { + if (ret == 0) { ret = _unified_get_dentry_priv(*dentry, NULL, priv); } ltfs_profiler_add_entry(priv->profiler, &priv->proflock, IOSCHED_REQ_EXIT(REQ_IOS_OPEN)); @@ -494,9 +495,9 @@ int unified_open(const char *path, bool open_write, struct dentry **dentry, void */ int unified_close(struct dentry *d, bool flush, void *iosched_handle) { - int write_error, ret = 0; + int write_error = 0, ret = 0; struct unified_data *priv = iosched_handle; - struct dentry_priv *dpr; + struct dentry_priv *dpr = NULL; CHECK_ARG_NULL(d, -LTFS_NULL_ARG); CHECK_ARG_NULL(iosched_handle, -LTFS_NULL_ARG); @@ -504,12 +505,20 @@ int unified_close(struct dentry *d, bool flush, void *iosched_handle) acquireread_mrsw(&priv->lock); ltfs_mutex_lock(&d->iosched_lock); + ret = _unified_get_dentry_priv(d, &dpr, priv); + if (ret < 0) { + ltfsmsg(LTFS_ERR, 13038E, __FUNCTION__, ret); + goto out; + } + if (flush) ret = _unified_flush_unlocked(d, priv); write_error = _unified_get_write_error(d->iosched_priv); + _unified_put_dentry_priv(dpr, priv); /* Release DPR captured in this function */ +out: ltfs_mutex_unlock(&d->iosched_lock); releaseread_mrsw(&priv->lock); @@ -518,6 +527,7 @@ int unified_close(struct dentry *d, bool flush, void *iosched_handle) _unified_put_dentry_priv(dpr, priv); /* Release DPR captured in unified_open() */ ltfs_fsraw_close(d); ltfs_profiler_add_entry(priv->profiler, &priv->proflock, IOSCHED_REQ_EXIT(REQ_IOS_CLOSE)); + return ret ? ret : write_error ? write_error : 0; } @@ -564,6 +574,7 @@ ssize_t unified_read(struct dentry *d, char *buf, size_t size, off_t offset, voi goto out; releaseread_mrsw(&priv->vol->lock); + ltfs_mutex_lock(&d->iosched_lock); _unified_get_dentry_priv(d, &dpr, priv); if (! dpr) { ltfs_mutex_unlock(&d->iosched_lock); @@ -936,19 +947,18 @@ ssize_t unified_write(struct dentry *d, const char *buf, size_t size, off_t offs /* It's undesirable to fail the write here, as we have no way to roll back the cache * to its previous state. There's no harm in ignoring revalidation errors at this point. */ if (err == 0) { - if (isupdatetime) { - acquirewrite_mrsw(&d->meta_lock); - get_current_timespec(&d->modify_time); - d->change_time = d->modify_time; - releasewrite_mrsw(&d->meta_lock); - } + if (isupdatetime) { + acquirewrite_mrsw(&d->meta_lock); + get_current_timespec(&d->modify_time); + d->change_time = d->modify_time; + releasewrite_mrsw(&d->meta_lock); + } /* Don't set index dirty flag here. Will be set later by ltfs_fsraw_add_extent. */ releaseread_mrsw(&priv->vol->lock); } } ltfs_mutex_unlock(&d->iosched_lock); if (spare_cache) { - _unified_put_dentry_priv(dpr, priv); _unified_cache_free(spare_cache, 0, priv); } releaseread_mrsw(&priv->lock); @@ -1070,16 +1080,16 @@ int unified_truncate(struct dentry *d, off_t length, void *iosched_handle) ltfs_mutex_lock(&dpr->io_lock); ret = ltfs_fsraw_truncate(d, length, priv->vol); ltfs_mutex_unlock(&dpr->io_lock); + + _unified_put_dentry_priv(dpr, priv); + } else { + /* No corresponded DPR just call normal truncate */ + ret = ltfs_fsraw_truncate(d, length, priv->vol); } ltfs_mutex_unlock(&d->iosched_lock); releaseread_mrsw(&priv->lock); - if (dpr) - _unified_put_dentry_priv(dpr, priv); - else - ret = ltfs_fsraw_truncate(d, length, priv->vol); - ltfs_profiler_add_entry(priv->profiler, &priv->proflock, IOSCHED_REQ_EXIT(REQ_IOS_TRUNCATE)); return ret; } @@ -1193,6 +1203,7 @@ ltfs_thread_return _unified_writer_thread(void *iosched_handle) ltfs_profiler_add_entry(priv->profiler, &priv->proflock, IOSCHED_REQ_ENTER(REQ_IOS_IOSCHED)); if (! priv->writer_keepalive) { ltfs_thread_mutex_unlock(&priv->queue_lock); + priv->did_writer_wake_up = true; ltfs_thread_cond_broadcast(&priv->writer_cond); _unified_exit_all(priv); _unified_process_queue(REQUEST_IP, priv); @@ -1254,6 +1265,7 @@ void _unified_process_index_queue(struct unified_data *priv) * all queued requests are flushed to the drive once priv->lock is captured. */ ltfsmsg(LTFS_DEBUG3, 13034D, "IP"); + priv->did_writer_wake_up = true; ltfs_thread_cond_broadcast(&priv->writer_cond); ltfs_thread_mutex_unlock(&priv->writer_lock); @@ -1315,6 +1327,7 @@ void _unified_process_data_queue(enum request_state queue, struct unified_data * * all queued requests are flushed to the drive once priv->lock is captured. */ ltfsmsg(LTFS_DEBUG3, 13034D, "DP"); + priv->did_writer_wake_up = true; ltfs_thread_cond_broadcast(&priv->writer_cond); ltfs_thread_mutex_unlock(&priv->writer_lock); @@ -1475,7 +1488,7 @@ int _unified_get_dentry_priv(struct dentry *d, ltfs_mutex_lock(&dpr->ref_lock); dpr->numhandles++; if (dentry_priv) { - *dentry_priv = d->iosched_priv; + *dentry_priv = dpr; } ltfsmsg(LTFS_DEBUG3, 13029D, "Inc", d->name.name, dpr->numhandles); ltfs_mutex_unlock(&dpr->ref_lock); @@ -1834,7 +1847,7 @@ void _unified_free_request(struct write_request *req, struct unified_data *priv) if (dpr) { ltfsmsg(LTFS_DEBUG3, 13032D, req); - _unified_put_dentry_priv(req->dpr, priv); + _unified_put_dentry_priv(dpr, priv); } else { ltfsmsg(LTFS_DEBUG, 13030D, __FUNCTION__, req); } @@ -1924,8 +1937,8 @@ ssize_t _unified_insert_new_request(const char *buf, off_t offset, size_t count, bool ip_state, struct write_request *req, struct dentry *d, struct unified_data *priv) { int ret; - struct dentry_priv *dpr = NULL; - struct write_request *new_req = NULL; + struct dentry_priv *dpr; + struct write_request *new_req; size_t copy_count; if (! (*cache)) { @@ -2260,39 +2273,37 @@ int _unified_flush_all(struct unified_data *priv) ltfs_thread_mutex_lock(&priv->writer_lock); acquirewrite_mrsw(&priv->lock); - if (! TAILQ_EMPTY(&priv->dp_queue)) { - TAILQ_FOREACH_SAFE(dpr, &priv->dp_queue, dp_queue, aux) { - ltfsmsg(LTFS_DEBUG, 13033D, "DP", dpr->dentry->platform_safe_name); - ltfs_mutex_lock(&dpr->dentry->iosched_lock); - ret = _unified_flush_unlocked(dpr->dentry, priv); - ltfs_mutex_unlock(&dpr->dentry->iosched_lock); - if (ret < 0) { - ltfsmsg(LTFS_ERR, 13020E, dpr->dentry->platform_safe_name, ret); - releasewrite_mrsw(&priv->lock); - return ret; + /* First of all, test both are empty */ + if (TAILQ_EMPTY(&priv->dp_queue) && TAILQ_EMPTY(&priv->working_set)) { + empty = true; + } else { + if (! TAILQ_EMPTY(&priv->dp_queue)) { + TAILQ_FOREACH_SAFE(dpr, &priv->dp_queue, dp_queue, aux) { + ltfsmsg(LTFS_DEBUG, 13033D, "DP", dpr->dentry->platform_safe_name); + ltfs_mutex_lock(&dpr->dentry->iosched_lock); + ret = _unified_flush_unlocked(dpr->dentry, priv); + ltfs_mutex_unlock(&dpr->dentry->iosched_lock); + if (ret < 0) { + ltfsmsg(LTFS_ERR, 13020E, dpr->dentry->platform_safe_name, ret); + releasewrite_mrsw(&priv->lock); + return ret; + } } } - } else { - empty = true; - } - if (! TAILQ_EMPTY(&priv->working_set)) { - TAILQ_FOREACH_SAFE(dpr, &priv->working_set, working_set, aux) { - ltfsmsg(LTFS_DEBUG, 13033D, "WS", dpr->dentry->platform_safe_name); - ltfs_mutex_lock(&dpr->dentry->iosched_lock); - ret = _unified_flush_unlocked(dpr->dentry, priv); - ltfs_mutex_unlock(&dpr->dentry->iosched_lock); - if (ret < 0) { - ltfsmsg(LTFS_ERR, 13020E, dpr->dentry->platform_safe_name, ret); - releasewrite_mrsw(&priv->lock); - return ret; + if (! TAILQ_EMPTY(&priv->working_set)) { + TAILQ_FOREACH_SAFE(dpr, &priv->working_set, working_set, aux) { + ltfsmsg(LTFS_DEBUG, 13033D, "WS", dpr->dentry->platform_safe_name); + ltfs_mutex_lock(&dpr->dentry->iosched_lock); + ret = _unified_flush_unlocked(dpr->dentry, priv); + ltfs_mutex_unlock(&dpr->dentry->iosched_lock); + if (ret < 0) { + ltfsmsg(LTFS_ERR, 13020E, dpr->dentry->platform_safe_name, ret); + releasewrite_mrsw(&priv->lock); + return ret; + } } } - } else { - if (empty) - empty = true; - else - empty = false; } releasewrite_mrsw(&priv->lock); @@ -2303,7 +2314,9 @@ int _unified_flush_all(struct unified_data *priv) * the writer thread shall hold priv->lock. */ ltfsmsg(LTFS_DEBUG2, 13035D); - ltfs_thread_cond_wait(&priv->writer_cond, &priv->writer_lock); + priv->did_writer_wake_up = false; + while (!priv->did_writer_wake_up) + ltfs_thread_cond_wait(&priv->writer_cond, &priv->writer_lock); ltfs_thread_mutex_unlock(&priv->writer_lock); ltfsmsg(LTFS_DEBUG3, 13036D);