From 790b3ce537f015c0077110158ca406e3bbb8b96d Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Mon, 29 Jul 2024 16:57:29 -0400 Subject: [PATCH] Linux: Make zfs_prune() fair on NUMA systems Previous code evicted nr_to_scan items from each node. This not only multiplies the eviction by number of nodes, but may totally exhaust smaller ones, evicting inodes used by the iacive workload and requiring their immediate recreation. This patch spreads the requested eviction between all NUMA nodes proportionally to their evictable counts, which should be closer to expected LRU logic. Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. --- module/os/linux/zfs/zfs_vfsops.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/module/os/linux/zfs/zfs_vfsops.c b/module/os/linux/zfs/zfs_vfsops.c index dcc586362cc7..d315355be93b 100644 --- a/module/os/linux/zfs/zfs_vfsops.c +++ b/module/os/linux/zfs/zfs_vfsops.c @@ -1282,14 +1282,22 @@ zfs_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects) defined(SHRINK_CONTROL_HAS_NID) && \ defined(SHRINKER_NUMA_AWARE) if (shrinker->flags & SHRINKER_NUMA_AWARE) { + long tc = 1; + for_each_online_node(sc.nid) { + long c = shrinker->count_objects(shrinker, &sc); + if (c == 0 || c == SHRINK_EMPTY) + continue; + tc += c; + } *objects = 0; for_each_online_node(sc.nid) { + long c = shrinker->count_objects(shrinker, &sc); + if (c == 0 || c == SHRINK_EMPTY) + continue; + if (c > tc) + tc = c; + sc.nr_to_scan = mult_frac(nr_to_scan, c, tc) + 1; *objects += (*shrinker->scan_objects)(shrinker, &sc); - /* - * reset sc.nr_to_scan, modified by - * scan_objects == super_cache_scan - */ - sc.nr_to_scan = nr_to_scan; } } else { *objects = (*shrinker->scan_objects)(shrinker, &sc);