Skip to content

Commit

Permalink
Merge pull request #763 from sentinel-hub/develop
Browse files Browse the repository at this point in the history
Release 1.5.1
  • Loading branch information
zigaLuksic authored Oct 17, 2023
2 parents 596fdc1 + f311aa7 commit e93682c
Show file tree
Hide file tree
Showing 21 changed files with 262 additions and 99 deletions.
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v4.5.0
hooks:
- id: end-of-file-fixer
- id: requirements-txt-fixer
Expand All @@ -13,13 +13,13 @@ repos:
- id: debug-statements

- repo: https://github.com/psf/black
rev: 23.7.0
rev: 23.9.1
hooks:
- id: black
language_version: python3

- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: "v0.0.282"
rev: "v0.0.292"
hooks:
- id: ruff

Expand Down
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
## [Version 1.5.1] - 2023-10-17

- `MorphologicalFilterTask` adapted to work on boolean values.
- Added `temporal_subset` method to `EOPatch`, which can be used to extract a subset of an `EOPatch` by filtering out temporal slices. Also added a corresponding `TemporalSubsetTask`.
- `EOExecutor` now has an option to treat `TemporalDimensionWarning` as an exception.
- String representation of `EOPatch` objects was revisited to avoid edge cases where the output would print enormous objects.

## [Version 1.5.0] - 2023-09-06

The release focuses on making `eo-learn` much simpler to install, reducing the number of dependencies, and improving validation of soundness of `EOPatch` data.
Expand Down
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@
# When Sphinx documents class signature it prioritizes __new__ method over __init__ method. The following hack puts
# EOTask.__new__ method to the blacklist so that __init__ method signature will be taken instead. This seems the
# cleanest way even though a private object is accessed.
sphinx.ext.autodoc._CLASS_NEW_BLACKLIST.append("{0.__module__}.{0.__qualname__}".format(EOTask.__new__)) # noqa: SLF001
sphinx.ext.autodoc._CLASS_NEW_BLACKLIST.append(f"{EOTask.__module__}.{EOTask.__new__.__qualname__}") # noqa: SLF001


EXAMPLES_FOLDER = "./examples"
Expand Down
2 changes: 1 addition & 1 deletion eolearn/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Main module of the `eolearn` package."""
__version__ = "1.5.0"
__version__ = "1.5.1"

import importlib.util
import warnings
Expand Down
1 change: 1 addition & 0 deletions eolearn/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
RemoveFeatureTask,
RenameFeatureTask,
SaveTask,
TemporalSubsetTask,
ZipFeatureTask,
)
from .eodata import EOPatch
Expand Down
24 changes: 24 additions & 0 deletions eolearn/core/core_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,30 @@ def execute(self, src_eopatch: EOPatch, dst_eopatch: EOPatch) -> EOPatch:
return dst_eopatch


class TemporalSubsetTask(EOTask):
"""Extracts a temporal subset of the EOPatch."""

def __init__(
self, timestamps: None | list[dt.datetime] | list[int] | Callable[[list[dt.datetime]], Iterable[bool]] = None
):
"""
:param timestamps: Input for the `temporal_subset` method of EOPatch. Can also be provided in execution
arguments. Value in execution arguments takes precedence.
"""
self.timestamps = timestamps

def execute(
self,
eopatch: EOPatch,
*,
timestamps: None | list[dt.datetime] | list[int] | Callable[[list[dt.datetime]], Iterable[bool]] = None,
) -> EOPatch:
timestamps = timestamps if timestamps is not None else self.timestamps
if timestamps is None:
raise ValueError("Value for `timestamps` must be provided on initialization or as an execution argument.")
return eopatch.temporal_subset(timestamps)


class MapFeatureTask(EOTask):
"""Applies a function to each feature in input_features of a patch and stores the results in a set of
output_features.
Expand Down
70 changes: 55 additions & 15 deletions eolearn/core/eodata.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,36 +455,36 @@ def __repr__(self) -> str:

@staticmethod
def _repr_value(value: object) -> str:
"""Creates a representation string for different types of data.
:param value: data in any type
:return: representation string
"""
"""Creates a representation string for different types of data."""
if isinstance(value, np.ndarray):
return f"{EOPatch._repr_value_class(value)}(shape={value.shape}, dtype={value.dtype})"

if isinstance(value, gpd.GeoDataFrame):
crs = CRS(value.crs).ogc_string() if value.crs else value.crs
return f"{EOPatch._repr_value_class(value)}(columns={list(value)}, length={len(value)}, crs={crs})"

repr_str = str(value)
if len(repr_str) <= MAX_DATA_REPR_LEN:
return repr_str

if isinstance(value, (list, tuple, dict)) and value:
repr_str = str(value)
if len(repr_str) <= MAX_DATA_REPR_LEN:
return repr_str
lb, rb = ("[", "]") if isinstance(value, list) else ("(", ")") if isinstance(value, tuple) else ("{", "}")

l_bracket, r_bracket = ("[", "]") if isinstance(value, list) else ("(", ")")
if isinstance(value, (list, tuple)) and len(value) > 2:
repr_str = f"{l_bracket}{value[0]!r}, ..., {value[-1]!r}{r_bracket}"
if isinstance(value, dict): # generate representation of first element or (key, value) pair
some_key = next(iter(value))
repr_of_el = f"{EOPatch._repr_value(some_key)}: {EOPatch._repr_value(value[some_key])}"
else:
repr_of_el = EOPatch._repr_value(value[0])

if len(repr_str) > MAX_DATA_REPR_LEN and isinstance(value, (list, tuple)) and len(value) > 1:
repr_str = f"{l_bracket}{value[0]!r}, ...{r_bracket}"
many_elements_visual = ", ..." if len(value) > 1 else "" # add ellipsis if there are multiple elements
repr_str = f"{lb}{repr_of_el}{many_elements_visual}{rb}"

if len(repr_str) > MAX_DATA_REPR_LEN:
repr_str = str(type(value))

return f"{repr_str}, length={len(value)}"
return f"{repr_str}<length={len(value)}>"

return repr(value)
return str(type(value))

@staticmethod
def _repr_value_class(value: object) -> str:
Expand Down Expand Up @@ -726,6 +726,7 @@ def merge(
self, *eopatches, features=features, time_dependent_op=time_dependent_op, timeless_op=timeless_op
)

@deprecated_function(EODeprecationWarning, "Please use the method `temporal_subset` instead.")
def consolidate_timestamps(self, timestamps: list[dt.datetime]) -> set[dt.datetime]:
"""Removes all frames from the EOPatch with a date not found in the provided timestamps list.
Expand All @@ -750,6 +751,45 @@ def consolidate_timestamps(self, timestamps: list[dt.datetime]) -> set[dt.dateti

return remove_from_patch

def temporal_subset(
self, timestamps: Iterable[dt.datetime] | Iterable[int] | Callable[[list[dt.datetime]], Iterable[bool]]
) -> EOPatch:
"""Returns an EOPatch that only contains data for the temporal subset corresponding to `timestamps`.
For array-based data appropriate temporal slices are extracted. For vector data a filtration is performed.
:param timestamps: Parameter that defines the temporal subset. Can be a collection of timestamps, a
collection of timestamp indices. It is possible to also provide a callable that maps a list of timestamps
to a sequence of booleans, which determine if a given timestamp is included in the subset or not.
"""
timestamp_indices = self._parse_temporal_subset_input(timestamps)
new_timestamps = [ts for i, ts in enumerate(self.get_timestamps()) if i in timestamp_indices]
new_patch = EOPatch(bbox=self.bbox, timestamps=new_timestamps)

for ftype, fname in self.get_features():
if ftype.is_timeless() or ftype.is_meta():
new_patch[ftype, fname] = self[ftype, fname]
elif ftype.is_vector():
gdf: gpd.GeoDataFrame = self[ftype, fname]
new_patch[ftype, fname] = gdf[gdf[TIMESTAMP_COLUMN].isin(new_timestamps)]
else:
new_patch[ftype, fname] = self[ftype, fname][timestamp_indices]

return new_patch

def _parse_temporal_subset_input(
self, timestamps: Iterable[dt.datetime] | Iterable[int] | Callable[[list[dt.datetime]], Iterable[bool]]
) -> list[int]:
"""Parses input into a list of timestamp indices. Also adds implicit support for strings via `parse_time`."""
if callable(timestamps):
accepted_timestamps = timestamps(self.get_timestamps())
return [i for i, accepted in enumerate(accepted_timestamps) if accepted]
ts_or_idx = list(timestamps)
if all(isinstance(ts, int) for ts in ts_or_idx):
return ts_or_idx # type: ignore[return-value]
parsed_timestamps = {parse_time(ts, force_datetime=True) for ts in ts_or_idx} # type: ignore[call-overload]
return [i for i, ts in enumerate(self.get_timestamps()) if ts in parsed_timestamps]

def plot(
self,
feature: Feature,
Expand Down
15 changes: 11 additions & 4 deletions eolearn/core/eoexecution.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

from .eonode import EONode
from .eoworkflow import EOWorkflow, WorkflowResults
from .exceptions import EORuntimeWarning
from .exceptions import EORuntimeWarning, TemporalDimensionWarning
from .utils.fs import get_base_filesystem_and_path, get_full_path, pickle_fs, unpickle_fs
from .utils.logging import LogFileFilter
from .utils.parallelize import _decide_processing_type, _ProcessingType, parallelize
Expand All @@ -36,8 +36,7 @@
class _HandlerWithFsFactoryType(Protocol):
"""Type definition for a callable that accepts a path and a filesystem object"""

def __call__(self, path: str, filesystem: FS, **kwargs: Any) -> Handler:
...
def __call__(self, path: str, filesystem: FS, **kwargs: Any) -> Handler: ...


# pylint: disable=invalid-name
Expand All @@ -56,6 +55,7 @@ class _ProcessingData:
filter_logs_by_thread: bool
logs_filter: Filter | None
logs_handler_factory: _HandlerFactoryType
raise_on_temporal_mismatch: bool


@dataclass(frozen=True)
Expand Down Expand Up @@ -87,6 +87,7 @@ def __init__(
filesystem: FS | None = None,
logs_filter: Filter | None = None,
logs_handler_factory: _HandlerFactoryType = FileHandler,
raise_on_temporal_mismatch: bool = False,
):
"""
:param workflow: A prepared instance of EOWorkflow class
Expand All @@ -109,6 +110,7 @@ def __init__(
object.
The 2nd option is chosen only if `filesystem` parameter exists in the signature.
:param raise_on_temporal_mismatch: Whether to treat `TemporalDimensionWarning` as an exception.
"""
self.workflow = workflow
self.execution_kwargs = self._parse_and_validate_execution_kwargs(execution_kwargs)
Expand All @@ -117,6 +119,7 @@ def __init__(
self.filesystem, self.logs_folder = self._parse_logs_filesystem(filesystem, logs_folder)
self.logs_filter = logs_filter
self.logs_handler_factory = logs_handler_factory
self.raise_on_temporal_mismatch = raise_on_temporal_mismatch

self.start_time: dt.datetime | None = None
self.report_folder: str | None = None
Expand Down Expand Up @@ -194,6 +197,7 @@ def run(self, workers: int | None = 1, multiprocess: bool = True, **tqdm_kwargs:
filter_logs_by_thread=filter_logs_by_thread,
logs_filter=self.logs_filter,
logs_handler_factory=self.logs_handler_factory,
raise_on_temporal_mismatch=self.raise_on_temporal_mismatch,
)
for workflow_kwargs, log_path in zip(self.execution_kwargs, log_paths)
]
Expand Down Expand Up @@ -264,7 +268,10 @@ def _execute_workflow(cls, data: _ProcessingData) -> WorkflowResults:
data.logs_handler_factory,
)

results = data.workflow.execute(data.workflow_kwargs, raise_errors=False)
with warnings.catch_warnings():
if data.raise_on_temporal_mismatch:
warnings.simplefilter("error", TemporalDimensionWarning)
results = data.workflow.execute(data.workflow_kwargs, raise_errors=False)

cls._try_remove_logging(data.log_path, logger, handler)
return results
Expand Down
6 changes: 2 additions & 4 deletions eolearn/core/eoworkflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,12 +307,10 @@ def get_nodes(self) -> list[EONode]:
return self._nodes[:]

@overload
def get_node_with_uid(self, uid: str, fail_if_missing: Literal[True] = ...) -> EONode:
...
def get_node_with_uid(self, uid: str, fail_if_missing: Literal[True] = ...) -> EONode: ...

@overload
def get_node_with_uid(self, uid: str, fail_if_missing: Literal[False] = ...) -> EONode | None:
...
def get_node_with_uid(self, uid: str, fail_if_missing: Literal[False] = ...) -> EONode | None: ...

def get_node_with_uid(self, uid: str, fail_if_missing: bool = False) -> EONode | None:
"""Returns node with give uid, if it exists in the workflow."""
Expand Down
2 changes: 1 addition & 1 deletion eolearn/core/extra/ray.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def _get_processing_type(*_: Any, **__: Any) -> _ProcessingType:
def _ray_workflow_executor(workflow_args: _ProcessingData) -> WorkflowResults:
"""Called to execute a workflow on a ray worker"""
# pylint: disable=protected-access
return RayExecutor._execute_workflow(workflow_args)
return RayExecutor._execute_workflow(workflow_args) # noqa: SLF001


def parallelize_with_ray(
Expand Down
20 changes: 4 additions & 16 deletions eolearn/core/utils/parallelize.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,7 @@ def _decide_processing_type(workers: int | None, multiprocess: bool) -> _Process
"""
if workers == 1:
return _ProcessingType.SINGLE_PROCESS
if multiprocess:
return _ProcessingType.MULTIPROCESSING
return _ProcessingType.MULTITHREADING
return _ProcessingType.MULTIPROCESSING if multiprocess else _ProcessingType.MULTITHREADING


def parallelize(
Expand All @@ -74,10 +72,7 @@ def parallelize(
:return: A list of function results.
"""
if not params:
raise ValueError(
"At least 1 list of parameters should be given. Otherwise it is not clear how many times the"
"function has to be executed."
)
return []
processing_type = _decide_processing_type(workers=workers, multiprocess=multiprocess)

if processing_type is _ProcessingType.SINGLE_PROCESS:
Expand Down Expand Up @@ -105,7 +100,6 @@ def execute_with_mp_lock(function: Callable[..., OutputType], *args: Any, **kwar
:param function: A function
:param args: Function's positional arguments
:param kwargs: Function's keyword arguments
:return: Function's results
"""
if multiprocessing.current_process().name == "MainProcess" or MULTIPROCESSING_LOCK is None:
return function(*args, **kwargs)
Expand Down Expand Up @@ -165,10 +159,7 @@ def join_futures_iter(
"""

def _wait_function(remaining_futures: Collection[Future]) -> tuple[Collection[Future], Collection[Future]]:
done, not_done = concurrent.futures.wait(
remaining_futures, timeout=float(update_interval), return_when=FIRST_COMPLETED
)
return done, not_done
return concurrent.futures.wait(remaining_futures, timeout=float(update_interval), return_when=FIRST_COMPLETED)

def _get_result(future: Future) -> Any:
return future.result()
Expand All @@ -184,8 +175,6 @@ def _base_join_futures_iter(
) -> Generator[tuple[int, OutputType], None, None]:
"""A generalized utility function that resolves futures, monitors progress, and serves as an iterator over
results."""
if not isinstance(futures, list):
raise ValueError(f"Parameters 'futures' should be a list but {type(futures)} was given")
remaining_futures: Collection[FutureType] = _make_copy_and_empty_given(futures)

id_to_position_map = {id(future): index for index, future in enumerate(remaining_futures)}
Expand All @@ -195,9 +184,8 @@ def _base_join_futures_iter(
done, remaining_futures = wait_function(remaining_futures)
for future in done:
result = get_result_function(future)
result_position = id_to_position_map[id(future)]
pbar.update(1)
yield result_position, result
yield id_to_position_map[id(future)], result


def _make_copy_and_empty_given(items: list[T]) -> list[T]:
Expand Down
2 changes: 1 addition & 1 deletion eolearn/features/extra/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def execute(self, eopatch: EOPatch) -> EOPatch:

# All connections to masked pixels are removed
if self.mask_name is not None:
mask = eopatch.mask_timeless[self.mask_name].squeeze()
mask = eopatch.mask_timeless[self.mask_name].squeeze(axis=-1)
graph_args["mask"] = mask
data = data[np.ravel(mask) != 0]

Expand Down
8 changes: 6 additions & 2 deletions eolearn/geometry/morphology.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def __init__(
self.no_data_label = no_data_label

def execute(self, eopatch: EOPatch) -> EOPatch:
feature_array = eopatch[(self.mask_type, self.mask_name)].squeeze().copy()
feature_array = eopatch[(self.mask_type, self.mask_name)].squeeze(axis=-1).copy()

all_labels = np.unique(feature_array)
erode_labels = self.erode_labels if self.erode_labels else all_labels
Expand Down Expand Up @@ -148,6 +148,10 @@ def __init__(
def map_method(self, feature: np.ndarray) -> np.ndarray:
"""Applies the morphological operation to a raster feature."""
feature = feature.copy()
is_bool = feature.dtype == bool
if is_bool:
feature = feature.astype(np.uint8)

morph_func = partial(cv2.morphologyEx, kernel=self.struct_elem, op=self.morph_operation)
if feature.ndim == 3:
for channel in range(feature.shape[2]):
Expand All @@ -158,4 +162,4 @@ def map_method(self, feature: np.ndarray) -> np.ndarray:
else:
raise ValueError(f"Invalid number of dimensions: {feature.ndim}")

return feature
return feature.astype(bool) if is_bool else feature
Loading

0 comments on commit e93682c

Please sign in to comment.