From ea24900329f36863f9929c1055affad2662faf84 Mon Sep 17 00:00:00 2001 From: Joud Masoud Date: Wed, 15 Nov 2023 14:48:33 +0300 Subject: [PATCH] cli: multiple filters in get-metadata command --- cernopendata_client/cli.py | 74 ++++---------------- cernopendata_client/metadater.py | 115 +++++++++++++++++++++++++++++++ docs/usage.rst | 18 ++--- tests/test_cli_get_metadata.py | 50 -------------- tests/test_metadater.py | 108 +++++++++++++++++++++++++++++ 5 files changed, 244 insertions(+), 121 deletions(-) create mode 100644 cernopendata_client/metadater.py create mode 100644 tests/test_metadater.py diff --git a/cernopendata_client/cli.py b/cernopendata_client/cli.py index 9e78d2b..bc5ee3d 100644 --- a/cernopendata_client/cli.py +++ b/cernopendata_client/cli.py @@ -40,6 +40,7 @@ ) from .walker import get_list_directory from .verifier import get_file_info_local, verify_file_info +from .metadater import filter_metadata, handle_error_message from .config import ( SERVER_HTTP_URI, LIST_DIRECTORY_TIMEOUT, @@ -68,60 +69,6 @@ def version(): display_message(msg=__version__) -def filter_metadata(output_field, filters, output_json): - # noqa: D301 - """Filter metadata objects based on specified criteria. - - :param output_field: Name of the array containing objects to access - :param filters: Argument of the --filter option in the format some_field_name=some_value - :param output_json: JSON containing metadata objects - - :type field: str - :type filters: str - :type output_json: list or dict - - :return: None - """ - filter_fields = filters.split("=") - if len(filter_fields) != 2: - display_message( - msg_type="error", - msg="Invalid filter format. Use --filter some_field_name=some_value", - ) - sys.exit(1) - filterField_name, filterField_value = filter_fields - filterField_names = filterField_name.split(".") - for object in output_json: - if object == "$schema": - display_message( - msg_type="error", - msg="Field '{}' is not present in metadata".format(output_field), - ) - sys.exit(1) - try: - if filterField_value == object[filterField_names[-1]]: - if output_field in object: - display_message(msg=object[output_field]) - else: - display_message(msg=json.dumps(object, indent=4)) - return - except (KeyError, TypeError): - display_message( - msg_type="error", - msg="Field '{}' is not present in metadata".format( - filterField_names[-1] - ), - ) - sys.exit(1) - display_message( - msg_type="error", - msg="No objects found with {}={}".format( - filterField_names[-1], filterField_value - ), - ) - sys.exit(1) - - @cernopendata_client.command() @click.option("--recid", type=click.INT, help="Record ID") @click.option("--doi", help="Digital Object Identifier") @@ -141,7 +88,7 @@ def filter_metadata(output_field, filters, output_json): @click.option( "--filter", "filters", - multiple=False, + multiple=True, help="Filter only certain output values matching filtering criteria. [Use --filter some_field_name=some_value]", ) def get_metadata(server, recid, doi, title, output_value, filters): @@ -163,6 +110,7 @@ def get_metadata(server, recid, doi, title, output_value, filters): output_json = record_json["metadata"] if output_value: fields = output_value.split(".") + wrong_field = True try: for field in fields: output_json = output_json[field] @@ -173,16 +121,18 @@ def get_metadata(server, recid, doi, title, output_value, filters): try: if filters: filter_metadata(field, filters, output_json) + wrong_field = False else: for object in output_json: - display_message(msg=object[field]) - return + if field in object: + wrong_field = False + display_message(msg=object[field]) except (KeyError, TypeError): - display_message( - msg_type="error", - msg="Field '{}' is not present in metadata".format(field), - ) - sys.exit(1) + handle_error_message(field) + if wrong_field: + handle_error_message(field) + return + if isinstance(output_json, (dict, list)): display_message(msg=json.dumps(output_json, indent=4)) else: # print strings or numbers more simply diff --git a/cernopendata_client/metadater.py b/cernopendata_client/metadater.py new file mode 100644 index 0000000..9226055 --- /dev/null +++ b/cernopendata_client/metadater.py @@ -0,0 +1,115 @@ +# -*- coding: utf-8 -*- +# This file is part of cernopendata-client. +# +# Copyright (C) 2020 CERN. +# +# cernopendata-client is free software; you can redistribute it and/or modify +# it under the terms of the GPLv3 license; see LICENSE file for more details. + +"""cernopendata-client file metadater related utilities.""" + +import json +import sys +from collections import Counter + +from .printer import display_message + + +def handle_error_message(field): + # noqa: D301 + """Handle error message. + + :param field: Name of the field to access + + :type field: str + + :return: None + """ + display_message( + msg_type="error", + msg="Field '{}' is not present in metadata".format(field), + ) + sys.exit(1) + + +def filter_matching_output(matching_objects, output_field, output_json): + # noqa: D301 + """Filter matching objects based on specified criteria. + + :param possible: Dictionary containing matching objects + :param output_field: Name of the array containing objects to access + :param output_json: JSON containing metadata objects + + :type possible: dict + :type field: str + :type output_json: list or dict + + :return: None + """ + index_frequency = Counter( + [int(item.split("_")[1]) for item in matching_objects.keys()] + ) + + output_object_index, count = index_frequency.most_common(1)[0] + output_object = output_json[output_object_index] + if count == 1: + for object in matching_objects.values(): + if output_field in object: + display_message(msg=object[output_field]) + else: + display_message(msg=json.dumps(object, indent=4)) + else: + if output_field in output_object: + display_message(msg=output_object[output_field]) + else: + display_message(msg=json.dumps(output_object, indent=4)) + + +def filter_metadata(output_field, filters, output_json): + # noqa: D301 + """Filter metadata objects based on specified criteria. + + :param output_field: Name of the array containing objects to access + :param filters: Argument of the --filter option in the format some_field_name=some_value + :param output_json: JSON containing metadata objects + + :type field: str + :type filters: str + :type output_json: list or dict + + :return: None + """ + matching_objects = {} + for filter in filters: + no_objects_found = True + wrong_field = True + + filter_fields = filter.split("=") + if len(filter_fields) != 2: + display_message( + msg_type="error", + msg="Invalid filter format. Use --filter some_field_name=some_value", + ) + sys.exit(1) + filterField_name, filterField_value = filter_fields + filterField_names = filterField_name.split(".") + for index, object in enumerate(output_json): + if object == "$schema": + handle_error_message(output_field) + if filterField_name in object: + wrong_field = False + if filterField_value == object[filterField_names[-1]]: + matching_objects[f"{filterField_names[-1]}_{index}"] = object + no_objects_found = False + if no_objects_found and not wrong_field: + display_message( + msg_type="error", + msg="No objects found with {}={}".format( + filterField_names[-1], filterField_value + ), + ) + sys.exit(1) + elif wrong_field: + handle_error_message(filterField_names[-1]) + if matching_objects: + filter_matching_output(matching_objects, output_field, output_json) diff --git a/docs/usage.rst b/docs/usage.rst index 3770fdf..81c6b2d 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -95,15 +95,15 @@ You can use the **--filter** command-line option to achieve this: .. code-block:: console - $ cernopendata-client get-metadata --recid 329 --output-value authors.orcid - 0000-0002-2634-4958 - 0000-0001-8363-9827 - 0000-0002-9764-9783 - 0000-0002-9266-1783 - 0000-0001-5610-1060 - 0000-0001-7613-8063 - $ cernopendata-client get-metadata --recid 329 --output-value authors.orcid --filter name='Rousseau, David' - 0000-0001-7613-8063 + $ cernopendata-client get-metadata --recid 329 --output-value authors.name + Adam-Bourdarios, Claire + Cowan, Glen + Germain, Cecile + Guyon, Isabelle + Kégl, Balázs + Rousseau, David + $ cernopendata-client get-metadata --recid 329 --output-value authors.name --filter affiliation='Orsay, LAL; Paris, IN2P3; Orsay' --filter orcid='0000-0001-7613-8063' + Rousseau, David Listing available data files diff --git a/tests/test_cli_get_metadata.py b/tests/test_cli_get_metadata.py index a3aab5a..69e94cc 100644 --- a/tests/test_cli_get_metadata.py +++ b/tests/test_cli_get_metadata.py @@ -69,24 +69,6 @@ def test_get_metadata_from_output_fields(): assert "FT_R_42_V10A::All" in test_result.output -def test_get_metadata_from_filter_fields(): - """Test `get-metadata --recid --output-value --filter` command.""" - test_get_metadata = CliRunner() - test_result = test_get_metadata.invoke( - get_metadata, - [ - "--recid", - 1, - "--output-value", - "usage.links.description", - "--filter", - "url=/docs/cms-getting-started-2010", - ], - ) - assert test_result.exit_code == 0 - assert "Getting started with CMS open data" in test_result.output - - def test_get_metadata_from_output_fields_one(): """Test `get-metadata --recid --output-value` command.""" test_get_metadata = CliRunner() @@ -115,38 +97,6 @@ def test_get_metadata_from_output_fields_wrong(): assert "Field 'global_tag' is not present in metadata\n" in test_result.output -def test_get_metadata_from_filter_fields_wrong(): - """Test `get-metadata --recid --output-value --filter` command for wrong values.""" - test_get_metadata = CliRunner() - test_result = test_get_metadata.invoke( - get_metadata, - [ - "--recid", - 1, - "--output-value", - "usage.links.description", - "--filter", - "link=/docs/cms-getting-started-2010", - ], - ) - assert test_result.exit_code == 1 - assert "Field 'link' is not present in metadata\n" in test_result.output - - -def test_get_metadata_from_filter_fields_emptyg(): - """Test `get-metadata --recid --output-value --filter` command with empty values.""" - test_get_metadata = CliRunner() - test_result = test_get_metadata.invoke( - get_metadata, - ["--recid", 1, "--output-value", "usage.links.description", "--filter", "url"], - ) - assert test_result.exit_code == 1 - assert ( - "Invalid filter format. Use --filter some_field_name=some_value" - in test_result.output - ) - - def test_get_metadata_empty_value(): """Test get_metadata() command with empty value.""" test_get_metadata_empty_value = CliRunner() diff --git a/tests/test_metadater.py b/tests/test_metadater.py new file mode 100644 index 0000000..b0a7bf5 --- /dev/null +++ b/tests/test_metadater.py @@ -0,0 +1,108 @@ +# -*- coding: utf-8 -*- +# +# This file is part of cernopendata-client. +# +# Copyright (C) 2020 CERN. +# +# cernopendata-client is free software; you can redistribute it and/or modify +# it under the terms of the GPLv3 license; see LICENSE file for more details. + +"""cernopendata-client file metadater test.""" + +from click.testing import CliRunner +from cernopendata_client.cli import get_metadata + +from cernopendata_client.metadater import ( + filter_metadata, + filter_matching_output, +) + + +def test_get_metadata_from_filter_fields_empty(): + """Test `get-metadata --recid --output-value --filter` command with empty values.""" + test_get_metadata = CliRunner() + test_result = test_get_metadata.invoke( + get_metadata, + ["--recid", 1, "--output-value", "usage.links.description", "--filter", "url"], + ) + assert test_result.exit_code == 1 + assert ( + "Invalid filter format. Use --filter some_field_name=some_value" + in test_result.output + ) + + +def test_get_metadata_from_filter_metadata_one(): + """Test `get-metadata --recid --output-value --filter` command.""" + test_get_metadata = CliRunner() + test_result = test_get_metadata.invoke( + get_metadata, + [ + "--recid", + 1, + "--output-value", + "usage.links.description", + "--filter", + "url=/docs/cms-getting-started-2010", + ], + ) + assert test_result.exit_code == 0 + assert "Getting started with CMS open data" in test_result.output + + +def test_get_metadata_from_filter_metadata_two(): + """Test `get-metadata --recid --output-value --filter` command.""" + test_get_metadata = CliRunner() + test_result = test_get_metadata.invoke( + get_metadata, + [ + "--recid", + 451, + "--output-value", + "authors.name", + "--filter", + "affiliation=CERN", + "--filter", + "ccid=CCID-722528", + ], + ) + assert test_result.exit_code == 0 + assert "Plagge, Michael" in test_result.output + + +def test_get_metadata_from_filter_metadata_wrong_one(): + """Test `get-metadata --recid --output-value --filter` command for wrong values.""" + test_get_metadata = CliRunner() + test_result = test_get_metadata.invoke( + get_metadata, + [ + "--recid", + 1, + "--output-value", + "usage.links.description", + "--filter", + "link=/docs/cms-getting-started-2010", + ], + ) + assert test_result.exit_code == 1 + assert "Field 'link' is not present in metadata" in test_result.output + + +def test_get_metadata_from_filter_metadata_wrong_two(): + """Test `get-metadata --recid --output-value --filter` command for wrong values.""" + test_get_metadata = CliRunner() + test_result = test_get_metadata.invoke( + get_metadata, + [ + "--recid", + 1, + "--output-value", + "usage.links.description", + "--filter", + "url=/docs/cms-getting-started-20", + ], + ) + assert test_result.exit_code == 1 + assert ( + "No objects found with url=/docs/cms-getting-started-20" in test_result.output + )