From 65c80ff89306c6648659710df3601b8f7b0dc81e Mon Sep 17 00:00:00 2001 From: Gerald Walter Irsiegler Date: Thu, 12 Oct 2023 10:16:55 +0200 Subject: [PATCH 1/4] Upd: persist rechunked array to cluster first to avoid multiple file calls (#174) persist rechunked array to cluster first to avoid multiple file calls Co-authored-by: Gerald Walter Irsiegler --- .../process_implementations/ml/curve_fitting.py | 1 + pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/openeo_processes_dask/process_implementations/ml/curve_fitting.py b/openeo_processes_dask/process_implementations/ml/curve_fitting.py index 98ef6db4..38d9d64e 100644 --- a/openeo_processes_dask/process_implementations/ml/curve_fitting.py +++ b/openeo_processes_dask/process_implementations/ml/curve_fitting.py @@ -58,6 +58,7 @@ def fit_curve( # The dimension along which to fit the curves cannot be chunked! rechunked_data = data.chunk(chunking) + rechunked_data.persist() def wrapper(f): def _wrap(*args, **kwargs): diff --git a/pyproject.toml b/pyproject.toml index 426ffefa..f136374e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "openeo-processes-dask" -version = "2023.10.3" +version = "2023.10.4" description = "Python implementations of many OpenEO processes, dask-friendly by default." authors = ["Lukas Weidenholzer ", "Sean Hoyal ", "Valentina Hutter "] maintainers = ["EODC Staff "] From ee873177994fa5ba615c02885f14b30f2f3a1293 Mon Sep 17 00:00:00 2001 From: Gerald Walter Irsiegler Date: Thu, 12 Oct 2023 10:48:19 +0200 Subject: [PATCH 2/4] fix persist not using itself (#176) Co-authored-by: Gerald Walter Irsiegler --- .../process_implementations/ml/curve_fitting.py | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/openeo_processes_dask/process_implementations/ml/curve_fitting.py b/openeo_processes_dask/process_implementations/ml/curve_fitting.py index 38d9d64e..d31207e3 100644 --- a/openeo_processes_dask/process_implementations/ml/curve_fitting.py +++ b/openeo_processes_dask/process_implementations/ml/curve_fitting.py @@ -58,7 +58,7 @@ def fit_curve( # The dimension along which to fit the curves cannot be chunked! rechunked_data = data.chunk(chunking) - rechunked_data.persist() + rechunked_data = rechunked_data.persist() def wrapper(f): def _wrap(*args, **kwargs): diff --git a/pyproject.toml b/pyproject.toml index f136374e..a64890b4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "openeo-processes-dask" -version = "2023.10.4" +version = "2023.10.5" description = "Python implementations of many OpenEO processes, dask-friendly by default." authors = ["Lukas Weidenholzer ", "Sean Hoyal ", "Valentina Hutter "] maintainers = ["EODC Staff "] From a0edb504ad9fb6b503d2cc00e6a50a60e51aa526 Mon Sep 17 00:00:00 2001 From: Gerald Walter Irsiegler Date: Thu, 12 Oct 2023 13:47:41 +0200 Subject: [PATCH 3/4] experimental: allow chunking along time axis (#177) Co-authored-by: Gerald Walter Irsiegler --- .../process_implementations/ml/curve_fitting.py | 3 +-- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/openeo_processes_dask/process_implementations/ml/curve_fitting.py b/openeo_processes_dask/process_implementations/ml/curve_fitting.py index d31207e3..7b14b74d 100644 --- a/openeo_processes_dask/process_implementations/ml/curve_fitting.py +++ b/openeo_processes_dask/process_implementations/ml/curve_fitting.py @@ -53,8 +53,7 @@ def fit_curve( # so we do this to generate names locally parameters = {f"param_{i}": v for i, v in enumerate(parameters)} - chunking = {key: "auto" for key in data.dims if key != dimension} - chunking[dimension] = -1 + chunking = {key: "auto" for key in data.dims} # The dimension along which to fit the curves cannot be chunked! rechunked_data = data.chunk(chunking) diff --git a/pyproject.toml b/pyproject.toml index a64890b4..73659e25 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "openeo-processes-dask" -version = "2023.10.5" +version = "2023.10.6" description = "Python implementations of many OpenEO processes, dask-friendly by default." authors = ["Lukas Weidenholzer ", "Sean Hoyal ", "Valentina Hutter "] maintainers = ["EODC Staff "] From 597d1005a91bb36823d191cb14507f5a1ee716f1 Mon Sep 17 00:00:00 2001 From: Gerald Walter Irsiegler Date: Thu, 12 Oct 2023 14:20:21 +0200 Subject: [PATCH 4/4] reverse experimental chunking change (#178) Co-authored-by: Gerald Walter Irsiegler --- .../process_implementations/ml/curve_fitting.py | 3 ++- pyproject.toml | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/openeo_processes_dask/process_implementations/ml/curve_fitting.py b/openeo_processes_dask/process_implementations/ml/curve_fitting.py index 7b14b74d..d31207e3 100644 --- a/openeo_processes_dask/process_implementations/ml/curve_fitting.py +++ b/openeo_processes_dask/process_implementations/ml/curve_fitting.py @@ -53,7 +53,8 @@ def fit_curve( # so we do this to generate names locally parameters = {f"param_{i}": v for i, v in enumerate(parameters)} - chunking = {key: "auto" for key in data.dims} + chunking = {key: "auto" for key in data.dims if key != dimension} + chunking[dimension] = -1 # The dimension along which to fit the curves cannot be chunked! rechunked_data = data.chunk(chunking) diff --git a/pyproject.toml b/pyproject.toml index 73659e25..3e4f6f92 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "openeo-processes-dask" -version = "2023.10.6" +version = "2023.10.7" description = "Python implementations of many OpenEO processes, dask-friendly by default." authors = ["Lukas Weidenholzer ", "Sean Hoyal ", "Valentina Hutter "] maintainers = ["EODC Staff "]