Fix(requirements): bump dependencies, mainly to eliminate third-party…

… security issues (#1383) * Fix(requirements): bump dependencies, mainly to eliminate third-party security issues * Bump `TensorFlow 2.12.1->2.16.1` * Bump `catboost 1.2.2->1.2.5` * Bump `mlflow 2.9.2->2.12.2` * Bump `azureml-core 1.49.0->1.56.0` * Remove locked `gunicorn` and `packaging` * Fix(requirements): bump test dependencies, mainly to eliminate third-party security issues * Bump `docker 6.1.3->7.1.0` * Bump `responses 0.23->0.25.3` * Bump `black 22.12.0->24.4.2` * Bump `notebook 6.5.6->7.2.1` * Fix(requirements): use scikeras to bring back keras wrappers * Add `scikeras~=0.13.0` dependency * Use `scikeras.wrappers.KerasRegressor` instead of removed `tensorflow.keras.wrappers.scikit_learn import KerasRegressor` * Fix: apply formatting * Fix: add `super().__init__(**kwargs)` call to `KerasBaseEstimator` * Fix: rename `build_fn` to `model` * Fix: move `super().__init__(**kwargs)` to start of `KerasBaseEstimator.__init__` * Fix: i'm done with comments for now * Fix: remove redundant `BaseEstimator` inheritance in `KerasBaseEstimator` * Fix: remove redundant `BaseWrapper` alias for `KerasRegressor` * Fix: add super call to KerasBaseEstimator * Fix: no comments * Fix: refactor `__call__` method of models to `_prepare_model` * Fix: move assignment of `kind` above the `_prepare_model` call * Fix: prepare model right before calling fit in keras models * Fix: rename `self.history` to `self._history` in `KerasBaseEstimator` * Fix: rename `self.kind` and `self.kwargs` to `self._kind` and `self._kwargs` in `KerasBaseEstimator` * Fix: refactor `getattr(keras.optimizers, optimizer)` to `keras.optimizers.get(optimizer)` in autoencoders * Fix: refactor `getattr(keras.optimizers, optimizer)` to `keras.optimizers.get(optimizer)` in autoencoders * Fix: refactor `getattr(keras.optimizers, optimizer)` to `keras.optimizers.get(optimizer)` in autoencoders * Fix: remove `save_format` param from `save_model` in `KerasBaseEstimator.__getstate__` * Fix: remove `save_format` param from `save_model` in `KerasBaseEstimator.__getstate__` * Fix: formatting * Fix: change saving to tempfile * Fix: formatting * Fix: save model as .keras temp file * Fix: save model as .keras temp file * Fix: save model as .keras temp file * Fix: load model as .keras temp file * Fix: load model as .keras temp file * Fix: load model as .keras temp file * Fix: adjust test for argo versions * Fix: save bytes instead of bytesio to model state * Fix: skip loading unitialized model from state This is due to the fact that `TransformedTargetRegressor` receives a `regressor` as input, then clones it for every `fit` and sets it to `regressor_`, leaving the original `regressor` uninitialized forever. * Fix: adjust test for serializer * Fix: adjust test for serializer * Fix: rename `KerasLSTMBaseEstimator` attributes to underscored prefixed * Fix: rename `KerasLSTMBaseEstimator` attributes to underscored prefixed * Fix: do not propagate kwargs to `super().__init__` of KerasBaseEstimator * Fix: do not propagate kwargs to `super().__init__` of KerasBaseEstimator * Fix: do not propagate kwargs to `super().__init__` of KerasBaseEstimator * Fix: do not propagate kwargs to `super().__init__` of KerasBaseEstimator * Fix: propagate batch_size to `super().__init__` of KerasBaseEstimator * Fix: set `input_shape` to tensorflow layers definition in `KerasRawModelRegressor` docstring * Fix: store history for model in `_history` and use proper `regressor_` in `_extract_metadata_from_model` * Fix: formatting * Fix: adjust `model` and `history` attributes access in `KerasBaseEstimator` and children Also adjust tests * Fix: adjust `lstm` `optimizer_kwargs` and `input.shape` access * Fix: add input_shape to Dense layers in kerasraw test * Formatting * Formatting * Add gunicorn as base requirement
equinor · Jul 9, 2024 · 19fc575 · 19fc575
1 parent 2312c6a
commit 19fc575
Show file tree

Hide file tree

Showing 24 changed files with 220 additions and 202 deletions.
diff --git a/docs/_static/architecture_diagram.py b/docs/_static/architecture_diagram.py
@@ -9,9 +9,14 @@
 from diagrams.k8s.storage import PV
 from diagrams.custom import Custom
 
-directory=os.path.dirname(__file__)
+directory = os.path.dirname(__file__)
 
-with Diagram("Gordo flow", filename=os.path.join(directory, "architecture_diagram"), outformat="png", show=False) as diag:
+with Diagram(
+    "Gordo flow",
+    filename=os.path.join(directory, "architecture_diagram"),
+    outformat="png",
+    show=False,
+) as diag:
     with Cluster("K8s"):
         gordo = CRD("Gordo")
         api = API("")

diff --git a/docs/conf.py b/docs/conf.py
@@ -26,7 +26,11 @@
 author = "Equinor ASA"
 version = gordo.__version__
 _parsed_version = parse_version(version)
-commit = f"{version}" if type(_parsed_version) is GordoRelease and not _parsed_version.suffix else "HEAD"
+commit = (
+    f"{version}"
+    if type(_parsed_version) is GordoRelease and not _parsed_version.suffix
+    else "HEAD"
+)
 
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
@@ -44,7 +48,7 @@
     "IPython.sphinxext.ipython_console_highlighting",
     "sphinx_copybutton",
     "sphinx_click",
-    "nbsphinx"
+    "nbsphinx",
 ]
 
 root_doc = "index"
@@ -59,8 +63,11 @@
 _ignore_linkcode_infos = [
     # caused "OSError: could not find class definition"
     {"module": "gordo_core.utils", "fullname": "PredictionResult"},
-    {'module': 'gordo.workflow.config_elements.schemas', 'fullname': 'Model.Config.extra'},
-    {'module': 'gordo.reporters.postgres', 'fullname': 'Machine.DoesNotExist'}
+    {
+        "module": "gordo.workflow.config_elements.schemas",
+        "fullname": "Model.Config.extra",
+    },
+    {"module": "gordo.reporters.postgres", "fullname": "Machine.DoesNotExist"},
 ]
 
 

diff --git a/gordo/machine/model/anomaly/diff.py b/gordo/machine/model/anomaly/diff.py
@@ -95,13 +95,13 @@ def get_metadata(self):
         if hasattr(self, "aggregate_threshold_"):
             metadata["aggregate-threshold"] = self.aggregate_threshold_
         if hasattr(self, "feature_thresholds_per_fold_"):
-            metadata[
-                "feature-thresholds-per-fold"
-            ] = self.feature_thresholds_per_fold_.to_dict()
+            metadata["feature-thresholds-per-fold"] = (
+                self.feature_thresholds_per_fold_.to_dict()
+            )
         if hasattr(self, "aggregate_thresholds_per_fold_"):
-            metadata[
-                "aggregate-thresholds-per-fold"
-            ] = self.aggregate_thresholds_per_fold_
+            metadata["aggregate-thresholds-per-fold"] = (
+                self.aggregate_thresholds_per_fold_
+            )
         # Window threshold metadata
         if hasattr(self, "window"):
             metadata["window"] = self.window
@@ -111,23 +111,23 @@ def get_metadata(self):
             hasattr(self, "smooth_feature_thresholds_")
             and self.smooth_aggregate_threshold_ is not None
         ):
-            metadata[
-                "smooth-feature-thresholds"
-            ] = self.smooth_feature_thresholds_.tolist()
+            metadata["smooth-feature-thresholds"] = (
+                self.smooth_feature_thresholds_.tolist()
+            )
         if (
             hasattr(self, "smooth_aggregate_threshold_")
             and self.smooth_aggregate_threshold_ is not None
         ):
             metadata["smooth-aggregate-threshold"] = self.smooth_aggregate_threshold_
 
         if hasattr(self, "smooth_feature_thresholds_per_fold_"):
-            metadata[
-                "smooth-feature-thresholds-per-fold"
-            ] = self.smooth_feature_thresholds_per_fold_.to_dict()
+            metadata["smooth-feature-thresholds-per-fold"] = (
+                self.smooth_feature_thresholds_per_fold_.to_dict()
+            )
         if hasattr(self, "smooth_aggregate_thresholds_per_fold_"):
-            metadata[
-                "smooth-aggregate-thresholds-per-fold"
-            ] = self.smooth_aggregate_thresholds_per_fold_
+            metadata["smooth-aggregate-thresholds-per-fold"] = (
+                self.smooth_aggregate_thresholds_per_fold_
+            )
 
         if isinstance(self.base_estimator, GordoBase):
             metadata.update(self.base_estimator.get_metadata())
@@ -241,9 +241,9 @@ def cross_validate(
                 smooth_aggregate_threshold_fold = (
                     scaled_mse.rolling(self.window).min().max()
                 )
-                self.smooth_aggregate_thresholds_per_fold_[
-                    f"fold-{i}"
-                ] = smooth_aggregate_threshold_fold
+                self.smooth_aggregate_thresholds_per_fold_[f"fold-{i}"] = (
+                    smooth_aggregate_threshold_fold
+                )
 
                 smooth_tag_thresholds_fold = mae.rolling(self.window).min().max()
                 smooth_tag_thresholds_fold.name = f"fold-{i}"

diff --git a/gordo/machine/model/factories/lstm_autoencoder.py b/gordo/machine/model/factories/lstm_autoencoder.py
@@ -3,10 +3,10 @@
 from typing import Tuple, Union, Dict, Any
 
 import tensorflow
-from tensorflow import keras
 from tensorflow.keras.optimizers import Optimizer
 from tensorflow.keras.layers import Dense, LSTM
 from tensorflow.keras.models import Sequential as KerasSequential
+from tensorflow import keras
 
 from gordo.machine.model.register import register_model_builder
 from gordo.machine.model.factories.utils import hourglass_calc_dims, check_dim_func_len
@@ -189,7 +189,6 @@ def lstm_hourglass(
     compile_kwargs: Dict[str, Any] = dict(),
     **kwargs,
 ) -> tensorflow.keras.models.Sequential:
-
     """
 
     Builds an hourglass shaped neural network, with decreasing number of neurons

diff --git a/gordo/machine/model/register.py b/gordo/machine/model/register.py
@@ -48,22 +48,22 @@ def special_keras_model_builder(n_features, ...):
     def __init__(self, type: str):
         self.type = type
 
-    def __call__(self, build_fn: Callable[..., keras.models.Model]):
-        self._register(self.type, build_fn)
-        return build_fn
+    def __call__(self, model: Callable[..., keras.models.Model]):
+        self._register(self.type, model)
+        return model
 
     @classmethod
-    def _register(cls, type: str, build_fn: Callable[[int, Any], GordoBase]):
+    def _register(cls, type: str, model: Callable[[int, Any], GordoBase]):
         """
         Registers a given function as an available factory under
         this type.
         """
-        cls._validate_func(build_fn)
+        cls._validate_func(model)
 
         # Add function to available factories under this type
         if type not in cls.factories:
             cls.factories[type] = dict()
-        cls.factories[type][build_fn.__name__] = build_fn
+        cls.factories[type][model.__name__] = model
 
     @staticmethod
     def _validate_func(func):

diff --git a/gordo/machine/model/transformers/imputer.py b/gordo/machine/model/transformers/imputer.py
@@ -71,14 +71,18 @@ def fit(self, X: Union[pd.DataFrame, np.ndarray], y=None):
 
             # Calculate a 1d arrays of fill values for each feature
             self._posinf_fill_values = _posinf_fill_values.apply(
-                lambda val: val + self.delta
-                if max_allowable_value - self.delta > val
-                else max_allowable_value
+                lambda val: (
+                    val + self.delta
+                    if max_allowable_value - self.delta > val
+                    else max_allowable_value
+                )
             )
             self._neginf_fill_values = _neginf_fill_values.apply(
-                lambda val: val - self.delta
-                if min_allowable_value + self.delta < val
-                else min_allowable_value
+                lambda val: (
+                    val - self.delta
+                    if min_allowable_value + self.delta < val
+                    else min_allowable_value
+                )
             )
 
         return self

diff --git a/gordo/machine/model/utils.py b/gordo/machine/model/utils.py
@@ -111,9 +111,11 @@ def make_base_dataframe(
 
     # Calculate the end times if possible, or also all 'None's
     end_series = start_series.map(
-        lambda start: (start + frequency).isoformat()
-        if isinstance(start, datetime) and frequency is not None
-        else None
+        lambda start: (
+            (start + frequency).isoformat()
+            if isinstance(start, datetime) and frequency is not None
+            else None
+        )
     )
 
     # Convert to isoformatted string for JSON serialization.
@@ -134,7 +136,7 @@ def make_base_dataframe(
     # the multiindex column dataframe, and naming their second level labels as needed.
     name: str
     values: np.ndarray
-    for (name, values) in filter(lambda nv: nv[1] is not None, names_n_values):
+    for name, values in filter(lambda nv: nv[1] is not None, names_n_values):
 
         _tags = tags if name == "model-input" else target_tag_list
 

diff --git a/gordo/serializer/from_definition.py b/gordo/serializer/from_definition.py
@@ -176,9 +176,9 @@ def _build_step(
         import_str = list(step.keys())[0]
 
         try:
-            StepClass: Union[
-                None, FeatureUnion, Pipeline, BaseEstimator
-            ] = import_location(import_str)
+            StepClass: Union[None, FeatureUnion, Pipeline, BaseEstimator] = (
+                import_location(import_str)
+            )
         except (ImportError, ValueError):
             StepClass = None
 

diff --git a/gordo/serializer/into_definition.py b/gordo/serializer/into_definition.py
@@ -172,9 +172,11 @@ def load_definition_from_params(params: dict, tuples_to_list: bool = True) -> di
             # TODO: Make this more robust, probably via another function to parse the iterable recursively
             # TODO: b/c it _could_, in theory, be a dict of {str: BaseEstimator} or similar.
             definition[param] = [
-                _decompose_node(leaf[1], tuples_to_list=tuples_to_list)
-                if isinstance(leaf, tuple)
-                else leaf
+                (
+                    _decompose_node(leaf[1], tuples_to_list=tuples_to_list)
+                    if isinstance(leaf, tuple)
+                    else leaf
+                )
                 for leaf in param_val
             ]
 

diff --git a/gordo/server/utils.py b/gordo/server/utils.py
@@ -131,9 +131,11 @@ def dataframe_to_dict(df: pd.DataFrame) -> dict:
         data.index = data.index.astype(str)
     if isinstance(df.columns, pd.MultiIndex):
         return {
-            col: data[col].to_dict()
-            if isinstance(data[col], pd.DataFrame)
-            else pd.DataFrame(data[col]).to_dict()
+            col: (
+                data[col].to_dict()
+                if isinstance(data[col], pd.DataFrame)
+                else pd.DataFrame(data[col]).to_dict()
+            )
             for col in data.columns.get_level_values(0)
         }
     else:

diff --git a/gordo/util/version.py b/gordo/util/version.py
@@ -8,8 +8,7 @@
 
 class Version(metaclass=ABCMeta):
     @abstractmethod
-    def get_version(self):
-        ...
+    def get_version(self): ...
 
 
 class Special(Enum):

diff --git a/gordo/workflow/config_elements/normalized_config.py b/gordo/workflow/config_elements/normalized_config.py
@@ -119,11 +119,11 @@ def __init__(
         if gordo_version is None:
             gordo_version = __version__
         default_globals = self.get_default_globals(gordo_version)
-        default_globals["runtime"]["influx"][  # type: ignore
+        default_globals["runtime"]["influx"][
             "resources"
         ] = _calculate_influx_resources(  # type: ignore
             len(config["machines"])
-        )
+        )  # type: ignore
 
         passed_globals = load_globals_config(
             config.get("globals", dict()), join_json_paths("globals", json_path)

diff --git a/pytest.ini b/pytest.ini
@@ -11,8 +11,8 @@ addopts =
     --doctest-glob='*.md'
     --doctest-glob='*.rst'
     --junitxml=junit/junit.xml
-    --cov-report=xml
-    --cov=gordo
+;    --cov-report=xml
+;    --cov=gordo
 flakes-ignore =
     __init__.py UnusedImport
     test_*.py UnusedImport

diff --git a/requirements/full_requirements.txt b/requirements/full_requirements.txt
@@ -145,8 +145,6 @@ graphql-core==3.2.3
     #   graphql-relay
 graphql-relay==3.2.0
     # via graphene
-greenlet==3.0.3
-    # via sqlalchemy
 grpcio==1.64.1
     # via
     #   tensorboard
@@ -289,7 +287,7 @@ opt-einsum==3.3.0
     # via tensorflow
 optree==0.11.0
     # via keras
-packaging==21.3
+packaging==24.1
     # via
     #   -r requirements.in
     #   azureml-core
@@ -352,9 +350,7 @@ pyopenssl==24.1.0
     #   azureml-core
     #   ndg-httpsclient
 pyparsing==3.1.2
-    # via
-    #   matplotlib
-    #   packaging
+    # via matplotlib
 pysocks==1.7.1
     # via requests
 python-dateutil==2.9.0.post0

diff --git a/requirements/mlflow_requirements.in b/requirements/mlflow_requirements.in
@@ -1,2 +1,2 @@
 mlflow~=2.14
-azureml-core~=1.49
+azureml-core~=1.56.0
diff --git a/requirements/requirements.in b/requirements/requirements.in
@@ -1,15 +1,14 @@
 dictdiffer~=0.8
 dataclasses-json~=0.3
-gunicorn~=22.0
 jinja2~=3.1
 python-dateutil~=2.8
 tensorflow~=2.16.0
 scikeras~=0.13.0
+gunicorn~=22.0
 # There's a bug in keras 3.4.0 with loading models (https://github.com/keras-team/keras/issues/19921)
 keras<3.4.0
 Flask>=2.2.5,<3.0.0
 simplejson~=3.17
 prometheus_client~=0.7
-# Due to azureml-core 1.49.0 depends on packaging<22.0 
-packaging>=21.0,<22.0
+packaging>=24.0
 gordo-client~=6.2
diff --git a/requirements/test_requirements.in b/requirements/test_requirements.in
@@ -1,6 +1,6 @@
 -c full_requirements.txt
-docker>=4.0,<7.0
-pytest~=7.2
+docker~=7.1.0
+pytest~=8.2
 pytest-xdist~=3.2
 pytest-mock~=3.6
 pytest-mypy~=0.10
@@ -9,10 +9,9 @@ pytest-cov~=4.0
 pytest-benchmark~=4.0
 pytest-flakes~=4.0
 mock~=5.0
-responses~=0.23
-# Due to packaging>22.0 in black 23.0, azureml-core~=1.49 requires packaging<22.0
-black>=22.0,<23.0
-notebook~=6.4
+responses~=0.25.3
+black~=24.4.2
+notebook~=7.2.1
 nbconvert~=7.4
 types-simplejson
 types-python-dateutil