Skip to content

Commit

Permalink
cli: multiple filters in get-metadata command
Browse files Browse the repository at this point in the history
  • Loading branch information
joudmas committed Nov 15, 2023
1 parent 40a3b3b commit ea24900
Show file tree
Hide file tree
Showing 5 changed files with 244 additions and 121 deletions.
74 changes: 12 additions & 62 deletions cernopendata_client/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
)
from .walker import get_list_directory
from .verifier import get_file_info_local, verify_file_info
from .metadater import filter_metadata, handle_error_message
from .config import (
SERVER_HTTP_URI,
LIST_DIRECTORY_TIMEOUT,
Expand Down Expand Up @@ -68,60 +69,6 @@ def version():
display_message(msg=__version__)


def filter_metadata(output_field, filters, output_json):
# noqa: D301
"""Filter metadata objects based on specified criteria.
:param output_field: Name of the array containing objects to access
:param filters: Argument of the --filter option in the format some_field_name=some_value
:param output_json: JSON containing metadata objects
:type field: str
:type filters: str
:type output_json: list or dict
:return: None
"""
filter_fields = filters.split("=")
if len(filter_fields) != 2:
display_message(
msg_type="error",
msg="Invalid filter format. Use --filter some_field_name=some_value",
)
sys.exit(1)
filterField_name, filterField_value = filter_fields
filterField_names = filterField_name.split(".")
for object in output_json:
if object == "$schema":
display_message(
msg_type="error",
msg="Field '{}' is not present in metadata".format(output_field),
)
sys.exit(1)
try:
if filterField_value == object[filterField_names[-1]]:
if output_field in object:
display_message(msg=object[output_field])
else:
display_message(msg=json.dumps(object, indent=4))
return
except (KeyError, TypeError):
display_message(
msg_type="error",
msg="Field '{}' is not present in metadata".format(
filterField_names[-1]
),
)
sys.exit(1)
display_message(
msg_type="error",
msg="No objects found with {}={}".format(
filterField_names[-1], filterField_value
),
)
sys.exit(1)


@cernopendata_client.command()
@click.option("--recid", type=click.INT, help="Record ID")
@click.option("--doi", help="Digital Object Identifier")
Expand All @@ -141,7 +88,7 @@ def filter_metadata(output_field, filters, output_json):
@click.option(
"--filter",
"filters",
multiple=False,
multiple=True,
help="Filter only certain output values matching filtering criteria. [Use --filter some_field_name=some_value]",
)
def get_metadata(server, recid, doi, title, output_value, filters):
Expand All @@ -163,6 +110,7 @@ def get_metadata(server, recid, doi, title, output_value, filters):
output_json = record_json["metadata"]
if output_value:
fields = output_value.split(".")
wrong_field = True
try:
for field in fields:
output_json = output_json[field]
Expand All @@ -173,16 +121,18 @@ def get_metadata(server, recid, doi, title, output_value, filters):
try:
if filters:
filter_metadata(field, filters, output_json)
wrong_field = False
else:
for object in output_json:
display_message(msg=object[field])
return
if field in object:
wrong_field = False
display_message(msg=object[field])
except (KeyError, TypeError):
display_message(
msg_type="error",
msg="Field '{}' is not present in metadata".format(field),
)
sys.exit(1)
handle_error_message(field)

Check warning on line 131 in cernopendata_client/cli.py

View check run for this annotation

Codecov / codecov/patch

cernopendata_client/cli.py#L131

Added line #L131 was not covered by tests
if wrong_field:
handle_error_message(field)
return

if isinstance(output_json, (dict, list)):
display_message(msg=json.dumps(output_json, indent=4))
else: # print strings or numbers more simply
Expand Down
115 changes: 115 additions & 0 deletions cernopendata_client/metadater.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# -*- coding: utf-8 -*-
# This file is part of cernopendata-client.
#
# Copyright (C) 2020 CERN.
#
# cernopendata-client is free software; you can redistribute it and/or modify
# it under the terms of the GPLv3 license; see LICENSE file for more details.

"""cernopendata-client file metadater related utilities."""

import json
import sys
from collections import Counter

from .printer import display_message


def handle_error_message(field):
# noqa: D301
"""Handle error message.
:param field: Name of the field to access
:type field: str
:return: None
"""
display_message(
msg_type="error",
msg="Field '{}' is not present in metadata".format(field),
)
sys.exit(1)


def filter_matching_output(matching_objects, output_field, output_json):
# noqa: D301
"""Filter matching objects based on specified criteria.
:param possible: Dictionary containing matching objects
:param output_field: Name of the array containing objects to access
:param output_json: JSON containing metadata objects
:type possible: dict
:type field: str
:type output_json: list or dict
:return: None
"""
index_frequency = Counter(
[int(item.split("_")[1]) for item in matching_objects.keys()]
)

output_object_index, count = index_frequency.most_common(1)[0]
output_object = output_json[output_object_index]
if count == 1:
for object in matching_objects.values():
if output_field in object:
display_message(msg=object[output_field])
else:
display_message(msg=json.dumps(object, indent=4))

Check warning on line 60 in cernopendata_client/metadater.py

View check run for this annotation

Codecov / codecov/patch

cernopendata_client/metadater.py#L60

Added line #L60 was not covered by tests
else:
if output_field in output_object:
display_message(msg=output_object[output_field])
else:
display_message(msg=json.dumps(output_object, indent=4))

Check warning on line 65 in cernopendata_client/metadater.py

View check run for this annotation

Codecov / codecov/patch

cernopendata_client/metadater.py#L65

Added line #L65 was not covered by tests


def filter_metadata(output_field, filters, output_json):
# noqa: D301
"""Filter metadata objects based on specified criteria.
:param output_field: Name of the array containing objects to access
:param filters: Argument of the --filter option in the format some_field_name=some_value
:param output_json: JSON containing metadata objects
:type field: str
:type filters: str
:type output_json: list or dict
:return: None
"""
matching_objects = {}
for filter in filters:
no_objects_found = True
wrong_field = True

filter_fields = filter.split("=")
if len(filter_fields) != 2:
display_message(
msg_type="error",
msg="Invalid filter format. Use --filter some_field_name=some_value",
)
sys.exit(1)
filterField_name, filterField_value = filter_fields
filterField_names = filterField_name.split(".")
for index, object in enumerate(output_json):
if object == "$schema":
handle_error_message(output_field)

Check warning on line 98 in cernopendata_client/metadater.py

View check run for this annotation

Codecov / codecov/patch

cernopendata_client/metadater.py#L98

Added line #L98 was not covered by tests
if filterField_name in object:
wrong_field = False
if filterField_value == object[filterField_names[-1]]:
matching_objects[f"{filterField_names[-1]}_{index}"] = object
no_objects_found = False
if no_objects_found and not wrong_field:
display_message(
msg_type="error",
msg="No objects found with {}={}".format(
filterField_names[-1], filterField_value
),
)
sys.exit(1)
elif wrong_field:
handle_error_message(filterField_names[-1])
if matching_objects:
filter_matching_output(matching_objects, output_field, output_json)
18 changes: 9 additions & 9 deletions docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -95,15 +95,15 @@ You can use the **--filter** command-line option to achieve this:

.. code-block:: console
$ cernopendata-client get-metadata --recid 329 --output-value authors.orcid
0000-0002-2634-4958
0000-0001-8363-9827
0000-0002-9764-9783
0000-0002-9266-1783
0000-0001-5610-1060
0000-0001-7613-8063
$ cernopendata-client get-metadata --recid 329 --output-value authors.orcid --filter name='Rousseau, David'
0000-0001-7613-8063
$ cernopendata-client get-metadata --recid 329 --output-value authors.name
Adam-Bourdarios, Claire
Cowan, Glen
Germain, Cecile
Guyon, Isabelle
Kégl, Balázs
Rousseau, David
$ cernopendata-client get-metadata --recid 329 --output-value authors.name --filter affiliation='Orsay, LAL; Paris, IN2P3; Orsay' --filter orcid='0000-0001-7613-8063'
Rousseau, David
Listing available data files
Expand Down
50 changes: 0 additions & 50 deletions tests/test_cli_get_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,24 +69,6 @@ def test_get_metadata_from_output_fields():
assert "FT_R_42_V10A::All" in test_result.output


def test_get_metadata_from_filter_fields():
"""Test `get-metadata --recid --output-value --filter` command."""
test_get_metadata = CliRunner()
test_result = test_get_metadata.invoke(
get_metadata,
[
"--recid",
1,
"--output-value",
"usage.links.description",
"--filter",
"url=/docs/cms-getting-started-2010",
],
)
assert test_result.exit_code == 0
assert "Getting started with CMS open data" in test_result.output


def test_get_metadata_from_output_fields_one():
"""Test `get-metadata --recid --output-value` command."""
test_get_metadata = CliRunner()
Expand Down Expand Up @@ -115,38 +97,6 @@ def test_get_metadata_from_output_fields_wrong():
assert "Field 'global_tag' is not present in metadata\n" in test_result.output


def test_get_metadata_from_filter_fields_wrong():
"""Test `get-metadata --recid --output-value --filter` command for wrong values."""
test_get_metadata = CliRunner()
test_result = test_get_metadata.invoke(
get_metadata,
[
"--recid",
1,
"--output-value",
"usage.links.description",
"--filter",
"link=/docs/cms-getting-started-2010",
],
)
assert test_result.exit_code == 1
assert "Field 'link' is not present in metadata\n" in test_result.output


def test_get_metadata_from_filter_fields_emptyg():
"""Test `get-metadata --recid --output-value --filter` command with empty values."""
test_get_metadata = CliRunner()
test_result = test_get_metadata.invoke(
get_metadata,
["--recid", 1, "--output-value", "usage.links.description", "--filter", "url"],
)
assert test_result.exit_code == 1
assert (
"Invalid filter format. Use --filter some_field_name=some_value"
in test_result.output
)


def test_get_metadata_empty_value():
"""Test get_metadata() command with empty value."""
test_get_metadata_empty_value = CliRunner()
Expand Down
Loading

0 comments on commit ea24900

Please sign in to comment.