Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Experiments on a multi-file reader #2697

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 57 additions & 5 deletions satpy/readers/yaml_reader.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python

Check notice on line 1 in satpy/readers/yaml_reader.py

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

ℹ Getting worse: Lines of Code in a Single File

The lines of code increases from 1070 to 1106, improve code health by reducing it to 600. The number of Lines of Code in a single file. More Lines of Code lowers the code health.

Check notice on line 1 in satpy/readers/yaml_reader.py

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

ℹ Getting worse: Number of Functions in a Single Module

The number of functions increases from 100 to 109, threshold = 75. This file contains too many functions. Beyond a certain threshold, more functions lower the code health.

Check notice on line 1 in satpy/readers/yaml_reader.py

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

✅ No longer an issue: Bumpy Road Ahead

AbstractYAMLReader.load is no longer above the threshold for logical blocks with deeply nested code. The Bumpy Road code smell is a function that contains multiple chunks of nested conditional logic. The deeper the nesting and the more bumps, the lower the code health.

Check notice on line 1 in satpy/readers/yaml_reader.py

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

✅ No longer an issue: Bumpy Road Ahead

FileYAMLReader.load is no longer above the threshold for logical blocks with deeply nested code. The Bumpy Road code smell is a function that contains multiple chunks of nested conditional logic. The deeper the nesting and the more bumps, the lower the code health.
# -*- coding: utf-8 -*-
# Copyright (c) 2016-2022 Satpy developers
#
Expand All @@ -23,7 +23,7 @@
import os
import warnings
from abc import ABCMeta, abstractmethod
from collections import OrderedDict, deque
from collections import deque
from contextlib import suppress
from fnmatch import fnmatch
from weakref import WeakValueDictionary
Expand Down Expand Up @@ -121,6 +121,11 @@
return config


def remove_duplicates(elements):
"""Remove duplicates from a list while retaining order."""
return list(dict.fromkeys(elements))


class AbstractYAMLReader(metaclass=ABCMeta):
"""Base class for all readers that use YAML configuration files.

Expand Down Expand Up @@ -278,7 +283,7 @@
See `satpy.readers.get_key` for more information about kwargs.

"""
return get_key(key, self.all_ids.keys(), **kwargs)
return get_key(key, self.all_dataset_ids, **kwargs)

def load_ds_ids_from_config(self):
"""Get the dataset ids from the config."""
Expand Down Expand Up @@ -359,7 +364,7 @@
filter_filenames=True,
**kwargs):
"""Set up initial internal storage for loading file data."""
super(FileYAMLReader, self).__init__(config_dict)
super().__init__(config_dict)

self.file_handlers = {}
self.available_ids = {}
Expand Down Expand Up @@ -605,7 +610,7 @@

def create_filehandlers(self, filenames, fh_kwargs=None):
"""Organize the filenames into file types and create file handlers."""
filenames = list(OrderedDict.fromkeys(filenames))
filenames = remove_duplicates(filenames)
logger.debug("Assigning to %s: %s", self.info["name"], filenames)

self.info.setdefault("filenames", []).extend(filenames)
Expand Down Expand Up @@ -1164,7 +1169,7 @@
produce the correct order.

"""
created_fhs = super(GEOSegmentYAMLReader, self).create_filehandlers(
created_fhs = super().create_filehandlers(
filenames, fh_kwargs=fh_kwargs)

# add "expected_segments" information
Expand Down Expand Up @@ -1542,3 +1547,50 @@
ar = np.repeat(mod, n)
ar[-remainder:] = mod + 1
return ar.astype("int")


class MultiFileYAMLReader(AbstractYAMLReader):
"""Class for handling multiple files at once (same reader, same overpass/repeat cycle)."""

def start_time(self, *args, **kwargs):
"""Get the start time of the data."""
pass

Check warning on line 1557 in satpy/readers/yaml_reader.py

View check run for this annotation

Codecov / codecov/patch

satpy/readers/yaml_reader.py#L1557

Added line #L1557 was not covered by tests

def end_time(self, *args, **kwargs):
"""Get the end time of the data."""
pass

Check warning on line 1561 in satpy/readers/yaml_reader.py

View check run for this annotation

Codecov / codecov/patch

satpy/readers/yaml_reader.py#L1561

Added line #L1561 was not covered by tests

def filter_selected_filenames(self, *args, **kwargs):
"""Filter filenames."""
pass

Check warning on line 1565 in satpy/readers/yaml_reader.py

View check run for this annotation

Codecov / codecov/patch

satpy/readers/yaml_reader.py#L1565

Added line #L1565 was not covered by tests

def create_filehandlers(self, files, **kwargs):
"""Create file handlers."""
self.assign_storage_items(files)

def assign_storage_items(self, files):
"""Assign storage items."""
self.json_file, self.data_file = files
if str(self.json_file).endswith("npz"):
self.json_file, self.data_file = self.data_file, self.json_file

@property
def available_dataset_ids(self):
"""Generate the available dataset ids."""
return [DataID(default_id_keys_config, name="chanel_5", resolution=400)]

@property
def all_dataset_ids(self):
"""Generate all the dataset ids."""
return [DataID(default_id_keys_config, name="chanel_5", resolution=400)]

def load(self, dataset_keys, **kwargs):
"""Load the data."""
import json
with open(self.json_file, "r") as fp:
metadata = json.load(fp)
data_arrays = DatasetDict()
for dsid in dataset_keys:
data = np.load(self.data_file)[dsid["name"]]
data_arrays[dsid] = xr.DataArray(data, attrs=metadata)
return data_arrays
56 changes: 56 additions & 0 deletions satpy/tests/test_multi_file_yaml_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
"""Tests for a multi file reader."""

import numpy as np
import pytest

from satpy.readers.yaml_reader import MultiFileYAMLReader

yaml_reader_config = {
"reader": {
"name": "json_npz",
"short_name": "Simple JSON and npz reader",
"sensors": ["vvhrr"],
"reader": MultiFileYAMLReader
},
"file_types": {"metadata": {"file_patterns": ["{start_time:%Y%m%dT%H%M%S}_vvhrr.json"]},
"data": {"file_patterns": ["{start_time:%Y%m%dT%H%M%S}_vvhrr.data.npz"]}}
}


@pytest.fixture()
def vvhrr_files(tmp_path):
"""Create a fake file."""
import json
metadata = dict(sensor="vvhrr")
data = np.zeros((10, 10))
json_file = tmp_path / "20231211T111111_vvhrr.json"
data_file = tmp_path / "20231211T111111_vvhrr.data.npz"
with open(json_file, "w") as fp:
json.dump(metadata, fp)
np.savez(data_file, chanel_5=data)
return json_file, data_file


def test_read_using_yaml_reader_interface(vvhrr_files):
"""Test loading from storage using the YAMLReader interface."""
reader = MultiFileYAMLReader(yaml_reader_config)
reader.assign_storage_items(vvhrr_files)
from satpy.dataset.dataid import DataID, default_id_keys_config
dataarray_key = DataID(default_id_keys_config, name="chanel_5", resolution=400)
res = reader.load([dataarray_key])[dataarray_key]
expected = np.zeros((10, 10))
np.testing.assert_allclose(res, expected)
assert res.attrs["sensor"] == "vvhrr"

def test_read_using_scene_interface(vvhrr_files, tmp_path):
"""Test the reader interface."""
import yaml

from satpy import Scene, config
config_dir = tmp_path / "readers"
config_dir.mkdir()
with open(config_dir / "json_npz.yaml", "w") as fd:
fd.write(yaml.dump(yaml_reader_config))
with config.set(config_path=[tmp_path]):
scn = Scene(vvhrr_files, reader="json_npz")
scn.load(["chanel_5"])
Loading