diff --git a/alpenhorn/cli/group/show.py b/alpenhorn/cli/group/show.py index afd9e50a5..ebdd88e62 100644 --- a/alpenhorn/cli/group/show.py +++ b/alpenhorn/cli/group/show.py @@ -7,6 +7,7 @@ from ...db import StorageGroup, StorageNode from ..cli import echo +from ..node.stats import get_stats @click.command() @@ -51,19 +52,55 @@ def show(group_name, node_details, node_stats): if nodes: if node_details or node_stats: if node_details: - data = [ - ( + details = { + node.id: ( node.name, node.host, "Yes" if node.active else "No", node.io_class if node.io_class else "Default", ) for node in nodes + } + if node_stats: + stats = get_stats(nodes, False) + + # Make table + data = [] + if node_stats and node_details: + headers = [ + "Name", + "Host", + "Active", + "I/O Class", + "File Count", + "Total Size", + "% Full", ] + for node in nodes: + data.append( + ( + *details[node.id], + stats[node.id]["count"], + stats[node.id]["size"], + stats[node.id]["percent"], + ) + ) + elif node_details: headers = ["Name", "Host", "Active", "I/O Class"] - if node_stats: - # TODO: add --node-stats support when "alpenhorn node stats" is implemented - raise NotImplementedError() + for node in nodes: + data.append(details[node.id]) + else: + headers = ["Name", "File Count", "Total Size", "% Full"] + for node in nodes: + data.append( + ( + node.name, + stats[node.id]["count"], + stats[node.id]["size"], + stats[node.id]["percent"], + ) + ) + echo(tabulate(data, headers=headers)) else: # simple list diff --git a/alpenhorn/cli/node/__init__.py b/alpenhorn/cli/node/__init__.py index bbf32810d..d6d06e0c3 100644 --- a/alpenhorn/cli/node/__init__.py +++ b/alpenhorn/cli/node/__init__.py @@ -19,6 +19,7 @@ from .list import list_ from .modify import modify from .rename import rename +from .show import show from .stats import stats RE_LOCK_FILE = re.compile(r"^\..*\.lock$") @@ -36,6 +37,7 @@ def cli(): cli.add_command(list_, "list") cli.add_command(modify, "modify") cli.add_command(rename, "rename") +cli.add_command(show, "show") cli.add_command(stats, "stats") diff --git a/alpenhorn/cli/node/show.py b/alpenhorn/cli/node/show.py new file mode 100644 index 000000000..d71c00619 --- /dev/null +++ b/alpenhorn/cli/node/show.py @@ -0,0 +1,98 @@ +"""alpenhorn node show command""" + +import json +import click +import peewee as pw + +from ...common.util import pretty_bytes +from ...db import StorageGroup, StorageNode +from ..cli import echo +from .stats import get_stats + + +@click.command() +@click.argument("name", metavar="NAME") +@click.option("--stats", is_flag=True, help="Show usage stats of the node.") +def show(name, stats): + """Show details of a Storage Node. + + Shows details of the Storage Node named NODE. + """ + + try: + node = StorageNode.get(name=name) + except pw.DoesNotExist: + raise click.ClickException(f"no such node: {name}") + + if node.storage_type == "A": + type_name = "Archive" + elif node.storage_type == "T": + type_name = "Transport" + else: + type_name = "-" + + if node.max_total_gb: + max_total = pretty_bytes(node.max_total_gb * 2**30) + else: + max_total = "-" + + if node.min_avail_gb: + min_avail = pretty_bytes(node.min_avail_gb * 2**30) + else: + min_avail = "-" + + if node.avail_gb: + avail = pretty_bytes(node.avail_gb * 2**30) + else: + avail = "-" + + if node.avail_gb_last_checked: + last_checked = node.avail_gb_last_checked.ctime() + " UTC" + else: + last_checked = "???" + + # Print a report + echo(" Storage Node: " + node.name) + echo(" Storage Group: " + node.group.name) + echo(" Active: " + ("Yes" if node.active else "No")) + echo(" Type: " + type_name) + echo(" Notes: " + (node.notes if node.notes else "")) + echo(" I/O Class: " + (node.io_class if node.io_class else "Default")) + echo() + echo(" Daemon Host: " + (node.host if node.host else "")) + echo(" Log-in Address: " + (node.address if node.address else "")) + echo("Log-in Username: " + (node.username if node.username else "")) + echo() + echo(" Auto-Import: " + ("On" if node.auto_import else "Off")) + echo( + " Auto-Verify: " + + (f"On (Size: {node.auto_verify})" if node.auto_verify else "Off") + ) + echo(" Max Total: " + max_total) + echo(" Available: " + avail) + echo(" Min Available: " + min_avail) + echo(" Last Checked: " + last_checked) + + echo("\nI/O Config:\n") + if node.io_config: + try: + io_config = json.loads(node.io_config) + if io_config: + # Find length of longest key (but not too long) + keylen = min(max([len(key) for key in io_config]), 30) + for key, value in io_config.items(): + echo(" " + key.rjust(keylen) + ": " + str(value)) + else: + echo(" empty") + except json.JSONDecodeError: + echo("INVALID (JSON decode error)") + else: + echo(" none") + + if stats: + stats = get_stats([node], False)[node.id] + + echo("\nStats:\n") + echo(" Total Files: " + str(stats["count"])) + echo(" Total Size: " + stats["size"]) + echo(" Usage: " + stats["percent"].lstrip() + "%") diff --git a/alpenhorn/cli/node/stats.py b/alpenhorn/cli/node/stats.py index 731703a75..6313464ff 100644 --- a/alpenhorn/cli/node/stats.py +++ b/alpenhorn/cli/node/stats.py @@ -1,8 +1,11 @@ """alpenhorn node stats command""" +from __future__ import annotations + import click import peewee as pw from tabulate import tabulate +from collections import defaultdict from ...db import StorageGroup, StorageNode, ArchiveFile, ArchiveFileCopy from ...common.util import pretty_bytes @@ -10,6 +13,129 @@ from ..cli import echo +def get_stats(nodes: list[StorageNode], extra_stats: bool) -> dict[int, dict]: + """Generate usage stats for nodes. + + Parameters + ---------- + nodes: + a list of StorageNodes to generate stats for + extra_stats: + If True also return corrupt/suspect/missing counts + + Returns + ------- + stats: + a dict of dicts of stats keyed by node id + """ + + stats = defaultdict(dict) + for row in ( + StorageNode.select( + StorageNode.id.alias("id"), + pw.fn.COUNT(ArchiveFileCopy.id).alias("count"), + pw.fn.Sum(ArchiveFile.size_b).alias("size"), + ) + .join(ArchiveFileCopy, pw.JOIN.LEFT_OUTER) + .join(ArchiveFile, pw.JOIN.LEFT_OUTER) + .where( + ArchiveFileCopy.has_file == "Y", + ArchiveFileCopy.wants_file == "Y", + StorageNode.id << nodes, + ) + .group_by(StorageNode.id) + .dicts() + ): + stats[row["id"]] = row + + # Add the extra stats, if requested + if extra_stats: + # We could make this a huge, nasty SQL query + # by employing multiple subqueries, but I think it's + # probably more readable if we do it one-by-one, even + # though that's going to be a bit more work for the client + # itself + + # Corrupt counts + for row in ( + StorageNode.select( + StorageNode.id, + pw.fn.COUNT(ArchiveFileCopy.id), + ) + .join(ArchiveFileCopy, pw.JOIN.LEFT_OUTER) + .where( + ArchiveFileCopy.has_file == "X", + ArchiveFileCopy.wants_file == "Y", + StorageNode.id << nodes, + ) + .group_by(StorageNode.id) + .tuples() + ): + stats[row[0]]["corrupt"] = row[1] + + # Suspect counts + for row in ( + StorageNode.select( + StorageNode.id, + pw.fn.COUNT(ArchiveFileCopy.id), + ) + .join(ArchiveFileCopy, pw.JOIN.LEFT_OUTER) + .where( + ArchiveFileCopy.has_file == "M", + ArchiveFileCopy.wants_file == "Y", + StorageNode.id << nodes, + ) + .group_by(StorageNode.id) + .tuples() + ): + stats[row[0]]["suspect"] = row[1] + + # Missing counts + for row in ( + StorageNode.select( + StorageNode.id, + pw.fn.COUNT(ArchiveFileCopy.id), + ) + .join(ArchiveFileCopy, pw.JOIN.LEFT_OUTER) + .where( + ArchiveFileCopy.has_file == "N", + ArchiveFileCopy.wants_file == "Y", + StorageNode.id << nodes, + ) + .group_by(StorageNode.id) + .tuples() + ): + stats[row[0]]["missing"] = row[1] + + # Some post-processing + for node in nodes: + node_stats = stats[node.id] + if "count" not in node_stats or not node_stats["count"]: + stats[node.id]["count"] = 0 + + if "size" in node_stats and node_stats["size"]: + size = pretty_bytes(node_stats["size"]) + if node.max_total_gb: + percent = 100.0 * node_stats["size"] / node.max_total_gb / 2**30 + stats[node.id]["percent"] = f"{percent:5.2f}" + else: + stats[node.id]["percent"] = "-" + stats[node.id]["size"] = size + else: + stats[node.id]["size"] = "-" + stats[node.id]["percent"] = "-" + + if extra_stats: + if "corrupt" not in node_stats or not node_stats["corrupt"]: + stats[node.id]["corrupt"] = "-" + if "suspect" not in node_stats or not node_stats["suspect"]: + stats[node.id]["suspect"] = "-" + if "missing" not in node_stats or not node_stats["missing"]: + stats[node.id]["missing"] = "-" + + return stats + + @click.command() @click.option( "--active/--inactive", @@ -52,117 +178,46 @@ def stats(active, group, host, extra_stats): nodes = list(query.execute()) # Now fetch stats - stats = { - row["id"]: row - for row in StorageNode.select( - StorageNode.id, - pw.fn.COUNT(ArchiveFileCopy.id).alias("count"), - pw.fn.Sum(ArchiveFile.size_b).alias("size"), - ) - .join(ArchiveFileCopy, pw.JOIN.LEFT_OUTER) - .join(ArchiveFile, pw.JOIN.LEFT_OUTER) - .where( - ArchiveFileCopy.has_file == "Y", - ArchiveFileCopy.wants_file == "Y", - StorageNode.id << nodes, - ) - .group_by(StorageNode.id) - .dicts() - } + stats = get_stats(nodes, extra_stats) # Compose table headers = ["Name", "File Count", "Total Size", "% Full"] colalign = ["left", "right", "right", "right"] + if extra_stats: + headers += ["Corrupt Files", "Suspect Files", "Missing Files"] + colalign += ["right", "right", "right"] + + # Create table rows data = [] for node in nodes: if node.id not in stats: - data.append((node.name, 0, "-", "-")) + if extra_stats: + data.append((node.name, 0, "-", "-", "-", "-", "-")) + else: + data.append((node.name, 0, "-", "-")) continue node_stats = stats[node.id] - if node_stats["size"]: - size = pretty_bytes(node_stats["size"]) - if node.max_total_gb: - percent = 100.0 * node_stats["size"] / node.max_total_gb / 2**30 - percent = f"{percent:5.2f}" - else: - percent = "-" - else: - size = "-" - percent = "-" - data.append((node.name, node_stats["count"], size, percent)) - - # Add the extra stats, if requested - if extra_stats: - headers += ["Corrupt Files", "Suspect Files", "Missing Files"] - colalign += ["right", "right", "right"] - - # We could make this a huge, nasty SQL query - # by employing multiple subqueries, but I think it's - # probably more readable if we do it one-by-one, even - # though that's going to be a bit more work for the CLI - # itself - # Corrupt counts - corrupt = { - row[0]: row[1] - for row in ( - StorageNode.select( - StorageNode.name, - pw.fn.COUNT(ArchiveFileCopy.id), - ) - .join(ArchiveFileCopy, pw.JOIN.LEFT_OUTER) - .where( - ArchiveFileCopy.has_file == "X", - ArchiveFileCopy.wants_file == "Y", - StorageNode.id << nodes, - ) - .group_by(StorageNode.id) - ).tuples() - } - suspect = { - row[0]: row[1] - for row in ( - StorageNode.select( - StorageNode.name, - pw.fn.COUNT(ArchiveFileCopy.id), - ) - .join(ArchiveFileCopy, pw.JOIN.LEFT_OUTER) - .where( - ArchiveFileCopy.has_file == "M", - ArchiveFileCopy.wants_file == "Y", - StorageNode.id << nodes, - ) - .group_by(StorageNode.id) - ).tuples() - } - missing = { - row[0]: row[1] - for row in ( - StorageNode.select( - StorageNode.name, - pw.fn.COUNT(ArchiveFileCopy.id), - ) - .join(ArchiveFileCopy, pw.JOIN.LEFT_OUTER) - .where( - ArchiveFileCopy.has_file == "N", - ArchiveFileCopy.wants_file == "Y", - StorageNode.id << nodes, + if extra_stats: + data.append( + ( + node.name, + node_stats["count"], + node_stats["size"], + node_stats["percent"], + node_stats["corrupt"], + node_stats["suspect"], + node_stats["missing"], ) - .group_by(StorageNode.id) - ).tuples() - } - - old_data = data - data = [] - echo(f"C: {corrupt}") - for row in old_data: + ) + else: data.append( ( - *row, - corrupt.get(row[0], "-"), - suspect.get(row[0], "-"), - missing.get(row[0], "-"), + node.name, + node_stats["count"], + node_stats["size"], + node_stats["percent"], ) ) diff --git a/tests/cli/group/test_show.py b/tests/cli/group/test_show.py index 93ab934b9..ccf60645b 100644 --- a/tests/cli/group/test_show.py +++ b/tests/cli/group/test_show.py @@ -1,7 +1,13 @@ """Test CLI: alpenhorn group show""" import pytest -from alpenhorn.db import StorageGroup, StorageNode +from alpenhorn.db import ( + StorageGroup, + StorageNode, + ArchiveAcq, + ArchiveFile, + ArchiveFileCopy, +) def test_no_show(clidb, cli): @@ -80,8 +86,8 @@ def test_show_io_config(clidb, cli): assert "Param2" in result.output -def test_show_node_details(clidb, cli): - """Test show --node_details.""" +def test_show_node_details(clidb, cli, assert_row_present): + """Test show --node-details.""" # Make a StorageGroup with some nodes in it. group = StorageGroup.create(name="SGroup", io_class="IOClass") @@ -92,12 +98,79 @@ def test_show_node_details(clidb, cli): result = cli(0, ["group", "show", "SGroup", "--node-details"]) - assert "Node1" in result.output - assert "Yes" in result.output - assert "over_here" in result.output - assert "Default" in result.output + assert_row_present(result.output, "Node1", "over_here", "Yes", "Default") + assert_row_present(result.output, "Node2", "over_there", "No", "NodeClass") - assert "Node1" in result.output - assert "No" in result.output - assert "over_there" in result.output - assert "NodeClass" in result.output + +def test_show_node_stats(clidb, cli, assert_row_present): + """Test show --node-stats.""" + + # Make a StorageGroup with some nodes in it. + group = StorageGroup.create(name="SGroup", io_class="IOClass") + node1 = StorageNode.create(name="Node1", group=group, active=True, host="over_here") + node2 = StorageNode.create( + name="Node2", + group=group, + active=False, + host="over_there", + io_class="NodeClass", + max_total_gb=1, + ) + + # And some files + acq = ArchiveAcq.create(name="acq") + file = ArchiveFile.create(name="File1", acq=acq, size_b=1234) + ArchiveFileCopy.create(file=file, node=node1, has_file="Y", wants_file="Y") + ArchiveFileCopy.create(file=file, node=node2, has_file="X", wants_file="Y") + + file = ArchiveFile.create(name="File2", acq=acq, size_b=2345) + ArchiveFileCopy.create(file=file, node=node1, has_file="N", wants_file="Y") + ArchiveFileCopy.create(file=file, node=node2, has_file="Y", wants_file="Y") + + file = ArchiveFile.create(name="File3", acq=acq, size_b=3456) + ArchiveFileCopy.create(file=file, node=node1, has_file="Y", wants_file="Y") + ArchiveFileCopy.create(file=file, node=node2, has_file="Y", wants_file="Y") + + result = cli(0, ["group", "show", "SGroup", "--node-stats"]) + + assert_row_present(result.output, "Node1", 2, "4.580 kiB", "-") + assert_row_present(result.output, "Node2", 2, "5.665 kiB", "0.00") + + +def test_show_node_details_stats(clidb, cli, assert_row_present): + """Test show --node-details --node-stats.""" + + # Make a StorageGroup with some nodes in it. + group = StorageGroup.create(name="SGroup", io_class="IOClass") + node1 = StorageNode.create(name="Node1", group=group, active=True, host="over_here") + node2 = StorageNode.create( + name="Node2", + group=group, + active=False, + host="over_there", + io_class="NodeClass", + max_total_gb=1, + ) + + # And some files + acq = ArchiveAcq.create(name="acq") + file = ArchiveFile.create(name="File1", acq=acq, size_b=1234) + ArchiveFileCopy.create(file=file, node=node1, has_file="Y", wants_file="Y") + ArchiveFileCopy.create(file=file, node=node2, has_file="X", wants_file="Y") + + file = ArchiveFile.create(name="File2", acq=acq, size_b=2345) + ArchiveFileCopy.create(file=file, node=node1, has_file="N", wants_file="Y") + ArchiveFileCopy.create(file=file, node=node2, has_file="Y", wants_file="Y") + + file = ArchiveFile.create(name="File3", acq=acq, size_b=3456) + ArchiveFileCopy.create(file=file, node=node1, has_file="Y", wants_file="Y") + ArchiveFileCopy.create(file=file, node=node2, has_file="Y", wants_file="Y") + + result = cli(0, ["group", "show", "SGroup", "--node-stats", "--node-details"]) + + assert_row_present( + result.output, "Node1", "over_here", "Yes", "Default", 2, "4.580 kiB", "-" + ) + assert_row_present( + result.output, "Node2", "over_there", "No", "NodeClass", 2, "5.665 kiB", "0.00" + ) diff --git a/tests/cli/node/test_show.py b/tests/cli/node/test_show.py new file mode 100644 index 000000000..11a0581cb --- /dev/null +++ b/tests/cli/node/test_show.py @@ -0,0 +1,137 @@ +"""Test CLI: alpenhorn node show""" + +import pytest +from alpenhorn.db import ( + StorageGroup, + StorageNode, + ArchiveAcq, + ArchiveFile, + ArchiveFileCopy, + utcnow, +) + + +def test_no_show(clidb, cli): + """Test showing nothing.""" + + cli(1, ["node", "show", "TEST"]) + + +def test_show_defaults(clidb, cli): + """Test show with default parameters.""" + + # Make a StorageGroup with some nodes in it. + group = StorageGroup.create(name="SGroup") + node = StorageNode.create(name="SNode", group=group) + + result = cli(0, ["node", "show", "SNode"]) + + assert "SNode" in result.output + assert "SGroup" in result.output + assert "Archive" in result.output + assert "Notes" in result.output + assert "Default" in result.output + assert "I/O Config" in result.output + + +def test_show_empty_full(clidb, cli): + """Test show most fields full.""" + + now = utcnow() + + group = StorageGroup.create(name="Group") + StorageNode.create( + name="Node", + group=group, + notes="Comment", + io_class="IOClass", + active=True, + auto_import=1, + auto_verify=11, + host="Host", + address="Addr", + username="User", + max_total_gb=10.5, + min_avail_gb=0.25, + avail_gb=3.333, + avail_gb_last_checked=now, + ) + + result = cli(0, ["node", "show", "Node"]) + + assert "Comment" in result.output + assert "IOClass" in result.output + assert "Active: Yes" in result.output + assert "Auto-Import: On" in result.output + assert "Auto-Verify: On" in result.output + assert "11" in result.output + assert "Host" in result.output + assert "Addr" in result.output + assert "User" in result.output + assert "10.50 GiB" in result.output + assert "3.333 GiB" in result.output + assert "256.0 MiB" in result.output + assert now.ctime() + " UTC" in result.output + + +def test_show_empty_io_config(clidb, cli): + """Test show with empty I/O config.""" + + group = StorageGroup.create(name="Group") + StorageNode.create(name="Node", group=group, io_class="IOClass", io_config="{}") + + result = cli(0, ["node", "show", "Node"]) + + assert "IOClass" in result.output + assert "I/O Config" in result.output + assert "empty" in result.output + + +def test_show_io_config(clidb, cli): + """Test show with I/O config.""" + + # Make a StorageGroup with some nodes in it. + group = StorageGroup.create(name="Group") + group = StorageNode.create( + name="Node", + group=group, + io_class="IOClass", + io_config='{"Param1": 1, "Param2": 2}', + ) + + result = cli(0, ["node", "show", "Node"]) + + assert "Param1" in result.output + assert "Param2" in result.output + + +def test_show_node_stats(clidb, cli): + """Test show --stats.""" + + group = StorageGroup.create(name="Group") + node = StorageNode.create( + name="Node", + group=group, + active=True, + max_total_gb=2**-17, # 2**(30-17) == 2**13 == 8 kiB + ) + + acq = ArchiveAcq.create(name="acq") + file = ArchiveFile.create(name="File1", acq=acq, size_b=1234) + ArchiveFileCopy.create(file=file, node=node, has_file="Y", wants_file="Y") + + file = ArchiveFile.create(name="File2", acq=acq, size_b=2345) + ArchiveFileCopy.create(file=file, node=node, has_file="X", wants_file="Y") + + file = ArchiveFile.create(name="File3", acq=acq, size_b=3456) + ArchiveFileCopy.create(file=file, node=node, has_file="Y", wants_file="Y") + + result = cli(0, ["node", "show", "Node", "--stats"]) + + assert "Total Files: 2" in result.output + + # 1234 + 3456 = 4690 bytes = 4.580078 kiB + assert "4.580 kiB" in result.output + + # 4.58 out of 8 == 57.25 percent + assert "57.25%" in result.output