From 925c960dd9bad8d5bf368d388b98bab2209b9c3e Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Mon, 9 Oct 2023 19:58:00 -0500 Subject: [PATCH 01/39] Gets SciKeras script working WIP still need to fix one test --- skops/io/_general.py | 5 ++++- skops/io/_utils.py | 15 +++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/skops/io/_general.py b/skops/io/_general.py index 6d70b7ac..43d89501 100644 --- a/skops/io/_general.py +++ b/skops/io/_general.py @@ -4,6 +4,7 @@ import json import operator import uuid +import weakref from functools import partial from reprlib import Repr from types import FunctionType, MethodType @@ -46,10 +47,12 @@ def dict_get_state(obj: Any, save_context: SaveContext) -> dict[str, Any]: for key, value in obj.items(): if isinstance(value, property): continue + if isinstance(key, weakref.ref): + key = getattr(key(), "_name") if np.isscalar(key) and hasattr(key, "item"): # convert numpy value to python object key = key.item() # type: ignore - content[key] = get_state(value, save_context) + content[str(key)] = get_state(value, save_context) res["content"] = content res["key_types"] = key_types return res diff --git a/skops/io/_utils.py b/skops/io/_utils.py index d929380a..157f1dc9 100644 --- a/skops/io/_utils.py +++ b/skops/io/_utils.py @@ -157,14 +157,13 @@ def get_state(value, save_context: SaveContext) -> dict[str, Any]: # fails with `get_state`, we try with json.dumps, if that fails, we raise # the original error alongside the json error. - # TODO: This should help with fixing recursive references. - # if id(value) in save_context.memo: - # return { - # "__module__": None, - # "__class__": None, - # "__id__": id(value), - # "__loader__": "CachedNode", - # } + if id(value) in save_context.memo: + return { + "__module__": None, + "__class__": None, + "__id__": id(value), + "__loader__": "CachedNode", + } __id__ = save_context.memoize(obj=value) From 35005927a1e53df1af23a1b8159c4a8b8d99acf4 Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Tue, 10 Oct 2023 18:59:48 -0500 Subject: [PATCH 02/39] Fixes test_metainfo --- skops/io/tests/test_persist.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/skops/io/tests/test_persist.py b/skops/io/tests/test_persist.py index 25ebf37d..a11d2204 100644 --- a/skops/io/tests/test_persist.py +++ b/skops/io/tests/test_persist.py @@ -605,6 +605,11 @@ def fit(self, X, y=None, **fit_params): for key, val_expected in expected.items(): for state in states: val_state = state[key] + + # skipping all the state values that are a cached node + if val_state["__loader__"] == "CachedNode": + continue + # check presence of "content"/"file" but not exact values assert ("content" in val_state) or ("file" in val_state) assert val_state["__class__"] == val_expected["__class__"] From 492a1caa9450e9859bf71157af68dd31849cbc6d Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Tue, 10 Oct 2023 19:07:15 -0500 Subject: [PATCH 03/39] Updates changes.rst --- docs/changes.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/changes.rst b/docs/changes.rst index 3be2a186..a8867d94 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -16,6 +16,8 @@ v0.9 estimators. :pr:`384` by :user:`Reid Johnson `. - Fix an issue with visualizing Skops files for `scikit-learn` tree estimators. :pr:`386` by :user:`Reid Johnson `. +- Fix dumping Scikeras model failing because of maximum recursion depth. :pr:`388` + by :user:`Thomas Lazarus `. v0.8 ---- From f1b93fe6e04b997f35a0dae9ff5c067c9a6748b6 Mon Sep 17 00:00:00 2001 From: Thomas Lazarus <46943923+lazarust@users.noreply.github.com> Date: Sat, 28 Oct 2023 19:05:14 -0500 Subject: [PATCH 04/39] Update changes.rst --- docs/changes.rst | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/changes.rst b/docs/changes.rst index 6a9069cf..91ab326a 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -16,8 +16,6 @@ v0.9 estimators. :pr:`384` by :user:`Reid Johnson `. - Fix an issue with visualizing Skops files for `scikit-learn` tree estimators. :pr:`386` by :user:`Reid Johnson `. -- :func:`skops.hug_utils.get_model_output` is deprecated and will be removed in version - 0.10. :pr:`396` by `Adrin Jalali`_. - :func:`skops.hub_utils.get_model_output` and :func:`skops.hub_utils.push` are deprecated and will be removed in version 0.10. :pr:`396` by `Adrin Jalali`_. - Fix dumping Scikeras model failing because of maximum recursion depth. :pr:`388` From bbe6b347aba2c565a41ff9a48af5e4d79f5fda11 Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Mon, 13 Nov 2023 19:19:34 -0600 Subject: [PATCH 05/39] Adds test --- skops/_min_dependencies.py | 1 + skops/io/tests/test_external.py | 40 ++++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/skops/_min_dependencies.py b/skops/_min_dependencies.py index 0a8290a8..8809a491 100644 --- a/skops/_min_dependencies.py +++ b/skops/_min_dependencies.py @@ -36,6 +36,7 @@ "catboost": ("1.0", "tests", None), "fairlearn": ("0.7.0", "docs, tests", None), "rich": ("12", "tests, rich", None), + "scikeras": ("0.4.0", "tests", None), } diff --git a/skops/io/tests/test_external.py b/skops/io/tests/test_external.py index aeb7ce72..8479d21e 100644 --- a/skops/io/tests/test_external.py +++ b/skops/io/tests/test_external.py @@ -16,8 +16,9 @@ import pytest from sklearn.datasets import make_classification, make_regression +from sklearn.pipeline import Pipeline -from skops.io import dumps, loads, visualize +from skops.io import dump, dumps, loads, visualize from skops.io.tests._utils import assert_method_outputs_equal, assert_params_equal # Default settings for generated data @@ -427,3 +428,40 @@ def test_quantile_forest(self, quantile_forest, regr_data, trusted, tree_method) assert_method_outputs_equal(estimator, loaded, X) visualize(dumped, trusted=trusted) + + +class TestSciKeras: + """Tests for SciKerasRegressor and SciKerasClassifier""" + + @pytest.fixture(autouse=True) + def capture_stdout(self): + # Mock print and rich.print so that running these tests with pytest -s + # does not spam stdout. Other, more common methods of suppressing + # printing to stdout don't seem to work, perhaps because of pytest. + with patch("builtins.print", Mock()), patch("rich.print", Mock()): + yield + + @pytest.fixture(autouse=True) + def keras(self): + scikeras = pytest.importorskip("scikeras") + return scikeras + + @pytest.fixture + def test_dumping_model(self, keras): + from scikeras.wrappers import KerasClassifier + + # This simplifies the basic usage tutorial from https://adriangb.com/scikeras/stable/notebooks/Basic_Usage.html + + def get_clf(meta): + n_features_in_ = meta["n_features_in_"] + model = keras.models.Sequential() + model.add(keras.layers.Input(shape=(n_features_in_,))) + model.add(keras.layers.Dense(1, activation="sigmoid")) + return model + + clf = KerasClassifier(model=get_clf, loss="binary_crossentropy") + + pipeline = Pipeline([("classifier", clf)]) + + dump(clf, "keras-test.skops") + dump(pipeline, "keras-test.skops") From 3b274b472c3719a0969d11bcf68eb3837a91eff7 Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Fri, 17 Nov 2023 19:28:19 -0600 Subject: [PATCH 06/39] Loads dumped model in test and checks output --- skops/io/tests/test_external.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/skops/io/tests/test_external.py b/skops/io/tests/test_external.py index 8479d21e..ed08e458 100644 --- a/skops/io/tests/test_external.py +++ b/skops/io/tests/test_external.py @@ -447,7 +447,16 @@ def keras(self): return scikeras @pytest.fixture - def test_dumping_model(self, keras): + def trusted(self): + return [ + "scikeras.wrappers.KerasClassifier", + "keras.models.Sequential", + "keras.layers.core.Dense", + "keras.layers.core.Input", + ] + + @pytest.fixture + def test_dumping_model(self, keras, trusted): from scikeras.wrappers import KerasClassifier # This simplifies the basic usage tutorial from https://adriangb.com/scikeras/stable/notebooks/Basic_Usage.html @@ -465,3 +474,10 @@ def get_clf(meta): dump(clf, "keras-test.skops") dump(pipeline, "keras-test.skops") + + X, y = make_classification(1000, 20, n_informative=10, random_state=0) + clf.fit(X, y) + dumped = dump(clf, "keras-test.skops") + + loaded = loads(dumped, trusted=trusted) + assert_method_outputs_equal(clf, loaded, X) From e7ab34ec92b2ce756813796ce9e7913529309ffa Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Tue, 21 Nov 2023 17:33:08 -0600 Subject: [PATCH 07/39] Refactor test_external.py to use dumps instead of dump --- skops/io/tests/test_external.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/skops/io/tests/test_external.py b/skops/io/tests/test_external.py index ed08e458..713bf071 100644 --- a/skops/io/tests/test_external.py +++ b/skops/io/tests/test_external.py @@ -18,7 +18,7 @@ from sklearn.datasets import make_classification, make_regression from sklearn.pipeline import Pipeline -from skops.io import dump, dumps, loads, visualize +from skops.io import dumps, loads, visualize from skops.io.tests._utils import assert_method_outputs_equal, assert_params_equal # Default settings for generated data @@ -472,12 +472,12 @@ def get_clf(meta): pipeline = Pipeline([("classifier", clf)]) - dump(clf, "keras-test.skops") - dump(pipeline, "keras-test.skops") + dumps(clf, "keras-test.skops") + dumps(pipeline, "keras-test.skops") X, y = make_classification(1000, 20, n_informative=10, random_state=0) clf.fit(X, y) - dumped = dump(clf, "keras-test.skops") + dumped = dumps(clf, "keras-test.skops") loaded = loads(dumped, trusted=trusted) assert_method_outputs_equal(clf, loaded, X) From a1a92cc060f64db6eb2d1c8bc35dd31dbff8bc03 Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Wed, 22 Nov 2023 20:36:18 -0600 Subject: [PATCH 08/39] Add TensorFlow as a dependent package --- skops/_min_dependencies.py | 1 + skops/io/tests/test_external.py | 101 +++++++++++++++++++++++++++----- 2 files changed, 87 insertions(+), 15 deletions(-) diff --git a/skops/_min_dependencies.py b/skops/_min_dependencies.py index 8809a491..074c40c3 100644 --- a/skops/_min_dependencies.py +++ b/skops/_min_dependencies.py @@ -37,6 +37,7 @@ "fairlearn": ("0.7.0", "docs, tests", None), "rich": ("12", "tests, rich", None), "scikeras": ("0.4.0", "tests", None), + "tensorflow": ("2.4.0", "tests", None), } diff --git a/skops/io/tests/test_external.py b/skops/io/tests/test_external.py index 713bf071..42133ffc 100644 --- a/skops/io/tests/test_external.py +++ b/skops/io/tests/test_external.py @@ -442,42 +442,113 @@ def capture_stdout(self): yield @pytest.fixture(autouse=True) - def keras(self): + def tensorflow(self): + tensorflow = pytest.importorskip("tensorflow") + return tensorflow + + @pytest.fixture(autouse=True) + def scikeras(self): scikeras = pytest.importorskip("scikeras") return scikeras @pytest.fixture def trusted(self): return [ + "_thread.RLock", + "builtins.weakref", + "collections.Counter", + "collections.OrderedDict", + "collections.defaultdict", + "inspect.FullArgSpec", + "inspect.Signature", + "keras.src.activations.sigmoid", + "keras.src.backend.RandomGenerator", + "keras.src.callbacks.History", + "keras.src.engine.compile_utils.LossesContainer", + "keras.src.engine.compile_utils.MetricsContainer", + "keras.src.engine.input_layer.InputLayer", + "keras.src.engine.input_spec.InputSpec", + "keras.src.engine.keras_tensor.KerasTensor", + "keras.src.engine.node.KerasHistory", + "keras.src.engine.node.Node", + "keras.src.engine.sequential.Sequential", + "keras.src.engine.training.train_function", + "keras.src.initializers.initializers.GlorotUniform", + "keras.src.initializers.initializers.Zeros", + "keras.src.layers.core.dense.Dense", + "keras.src.losses.LossFunctionWrapper", + "keras.src.losses.binary_crossentropy", + "keras.src.metrics.base_metric.Mean", + "keras.src.metrics.base_metric.method", + "keras.src.mixed_precision.policy.Policy", + "keras.src.optimizers.legacy.rmsprop.RMSprop", + "keras.src.optimizers.utils.", + "keras.src.optimizers.utils.all_reduce_sum_gradients", + "keras.src.saving.serialization_lib.Config", + "keras.src.utils.layer_utils.CallFunctionSpec", + "keras.src.utils.metrics_utils.Reduction", + "keras.src.utils.object_identity.ObjectIdentityDictionary", + "numpy.dtype", + "scikeras.utils.transformers.ClassifierLabelEncoder", + "scikeras.utils.transformers.TargetReshaper", "scikeras.wrappers.KerasClassifier", - "keras.models.Sequential", - "keras.layers.core.Dense", - "keras.layers.core.Input", + "tensorflow.core.function.capture.capture_container.FunctionCaptures", + "tensorflow.core.function.capture.capture_container.MutationAwareDict", + "tensorflow.core.function.polymorphism.function_cache.FunctionCache", + "tensorflow.core.function.polymorphism.function_type.FunctionType", + "tensorflow.python.checkpoint.checkpoint.Checkpoint", + "tensorflow.python.checkpoint.checkpoint.TrackableSaver", + "tensorflow.python.checkpoint.graph_view.ObjectGraphView", + "tensorflow.python.data.ops.iterator_ops.IteratorSpec", + "tensorflow.python.eager.polymorphic_function.atomic_function.AtomicFunction", + "tensorflow.python.eager.polymorphic_function.function_spec.FunctionSpec", + "tensorflow.python.eager.polymorphic_function.monomorphic_function.ConcreteFunction", + "tensorflow.python.eager.polymorphic_function.monomorphic_function.ConcreteFunctionGarbageCollector", + "tensorflow.python.eager.polymorphic_function.monomorphic_function._DelayedRewriteGradientFunctions", + "tensorflow.python.eager.polymorphic_function.polymorphic_function.Function", + "tensorflow.python.eager.polymorphic_function.polymorphic_function.UnliftedInitializerVariable", + "tensorflow.python.eager.polymorphic_function.tracing_compiler.TracingCompiler", + "tensorflow.python.framework.dtypes.DType", + "tensorflow.python.framework.func_graph.FuncGraph", + "tensorflow.python.framework.ops.EagerTensor", + "tensorflow.python.framework.tensor.TensorSpec", + "tensorflow.python.framework.tensor_shape.TensorShape", + "tensorflow.python.ops.resource_variable_ops.ResourceVariable", + "tensorflow.python.ops.variables.VariableAggregation", + "tensorflow.python.ops.variables.VariableAggregationV2", + "tensorflow.python.ops.variables.VariableSynchronization", + "tensorflow.python.trackable.base.TrackableReference", + "tensorflow.python.trackable.base.WeakTrackableReference", + "tensorflow.python.trackable.data_structures.dict", + "tensorflow.python.util.object_identity.ObjectIdentitySet", + "tensorflow.python.util.tf_decorator.TFDecorator", + "test_external.get_clf", + "weakref.WeakKeyDictionary", + "weakref.remove", ] - @pytest.fixture - def test_dumping_model(self, keras, trusted): - from scikeras.wrappers import KerasClassifier - + def test_dumping_model(self, scikeras, tensorflow, trusted): # This simplifies the basic usage tutorial from https://adriangb.com/scikeras/stable/notebooks/Basic_Usage.html def get_clf(meta): n_features_in_ = meta["n_features_in_"] - model = keras.models.Sequential() - model.add(keras.layers.Input(shape=(n_features_in_,))) - model.add(keras.layers.Dense(1, activation="sigmoid")) + model = tensorflow.keras.models.Sequential() + model.add(tensorflow.keras.layers.Input(shape=(n_features_in_,))) + model.add(tensorflow.keras.layers.Dense(1, activation="sigmoid")) return model - clf = KerasClassifier(model=get_clf, loss="binary_crossentropy") + clf = scikeras.wrappers.KerasClassifier( + model=get_clf, loss="binary_crossentropy" + ) pipeline = Pipeline([("classifier", clf)]) - dumps(clf, "keras-test.skops") - dumps(pipeline, "keras-test.skops") + dumps(clf) + dumps(pipeline) X, y = make_classification(1000, 20, n_informative=10, random_state=0) clf.fit(X, y) - dumped = dumps(clf, "keras-test.skops") + dumped = dumps(clf) loaded = loads(dumped, trusted=trusted) assert_method_outputs_equal(clf, loaded, X) From 7b0f21e28a385b105b2feabf3a7a3e63f962d602 Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Fri, 24 Nov 2023 12:57:37 -0600 Subject: [PATCH 09/39] WIP Still running into a recursion error --- skops/io/tests/test_external.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/skops/io/tests/test_external.py b/skops/io/tests/test_external.py index 42133ffc..e76404d3 100644 --- a/skops/io/tests/test_external.py +++ b/skops/io/tests/test_external.py @@ -522,7 +522,6 @@ def trusted(self): "tensorflow.python.trackable.data_structures.dict", "tensorflow.python.util.object_identity.ObjectIdentitySet", "tensorflow.python.util.tf_decorator.TFDecorator", - "test_external.get_clf", "weakref.WeakKeyDictionary", "weakref.remove", ] @@ -530,16 +529,12 @@ def trusted(self): def test_dumping_model(self, scikeras, tensorflow, trusted): # This simplifies the basic usage tutorial from https://adriangb.com/scikeras/stable/notebooks/Basic_Usage.html - def get_clf(meta): - n_features_in_ = meta["n_features_in_"] - model = tensorflow.keras.models.Sequential() - model.add(tensorflow.keras.layers.Input(shape=(n_features_in_,))) - model.add(tensorflow.keras.layers.Dense(1, activation="sigmoid")) - return model + n_features_in_ = 20 + model = tensorflow.keras.models.Sequential() + model.add(tensorflow.keras.layers.Input(shape=(n_features_in_,))) + model.add(tensorflow.keras.layers.Dense(1, activation="sigmoid")) - clf = scikeras.wrappers.KerasClassifier( - model=get_clf, loss="binary_crossentropy" - ) + clf = scikeras.wrappers.KerasClassifier(model=model, loss="binary_crossentropy") pipeline = Pipeline([("classifier", clf)]) From 3397b17d3dbc2456459ae845becab6d7dcbdc1ea Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Fri, 24 Nov 2023 13:00:32 -0600 Subject: [PATCH 10/39] Refactor imports and update test method --- skops/io/tests/test_external.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/skops/io/tests/test_external.py b/skops/io/tests/test_external.py index e76404d3..48f7603b 100644 --- a/skops/io/tests/test_external.py +++ b/skops/io/tests/test_external.py @@ -446,11 +446,6 @@ def tensorflow(self): tensorflow = pytest.importorskip("tensorflow") return tensorflow - @pytest.fixture(autouse=True) - def scikeras(self): - scikeras = pytest.importorskip("scikeras") - return scikeras - @pytest.fixture def trusted(self): return [ @@ -526,7 +521,7 @@ def trusted(self): "weakref.remove", ] - def test_dumping_model(self, scikeras, tensorflow, trusted): + def test_dumping_model(self, tensorflow, trusted): # This simplifies the basic usage tutorial from https://adriangb.com/scikeras/stable/notebooks/Basic_Usage.html n_features_in_ = 20 @@ -534,7 +529,9 @@ def test_dumping_model(self, scikeras, tensorflow, trusted): model.add(tensorflow.keras.layers.Input(shape=(n_features_in_,))) model.add(tensorflow.keras.layers.Dense(1, activation="sigmoid")) - clf = scikeras.wrappers.KerasClassifier(model=model, loss="binary_crossentropy") + from scikeras.wrappers import KerasClassifier + + clf = KerasClassifier(model=model, loss="binary_crossentropy") pipeline = Pipeline([("classifier", clf)]) From 49a16f0305a9e3c1acc5b1ef12fd1151fec7d546 Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Tue, 28 Nov 2023 19:37:16 -0600 Subject: [PATCH 11/39] WIP Fix get_state function to include module and class information for CachedNodes I'm wodering after looking at the types of a lot of the CachedNodes if there's something weird happening with `None` --- skops/io/_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skops/io/_utils.py b/skops/io/_utils.py index 157f1dc9..d8168bf2 100644 --- a/skops/io/_utils.py +++ b/skops/io/_utils.py @@ -159,8 +159,8 @@ def get_state(value, save_context: SaveContext) -> dict[str, Any]: if id(value) in save_context.memo: return { - "__module__": None, - "__class__": None, + "__module__": get_module(type(value)), + "__class__": value.__class__.__name__, "__id__": id(value), "__loader__": "CachedNode", } From b51ca0e1e6c89966af879bb442e79f0441628ead Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Wed, 3 Jan 2024 21:41:24 -0600 Subject: [PATCH 12/39] Reverts changes from previous implementation This commit can be reverted if we decide to pivot back to this implementation, but I just wanted to start from a clean slate on the new direction. --- skops/io/_general.py | 5 +- skops/io/_utils.py | 14 ++-- skops/io/tests/test_external.py | 117 -------------------------------- skops/io/tests/test_persist.py | 4 -- 4 files changed, 8 insertions(+), 132 deletions(-) diff --git a/skops/io/_general.py b/skops/io/_general.py index 30cefb3f..662252bc 100644 --- a/skops/io/_general.py +++ b/skops/io/_general.py @@ -4,7 +4,6 @@ import json import operator import uuid -import weakref from functools import partial from reprlib import Repr from types import FunctionType, MethodType @@ -47,12 +46,10 @@ def dict_get_state(obj: Any, save_context: SaveContext) -> dict[str, Any]: for key, value in obj.items(): if isinstance(value, property): continue - if isinstance(key, weakref.ref): - key = getattr(key(), "_name") if np.isscalar(key) and hasattr(key, "item"): # convert numpy value to python object key = key.item() # type: ignore - content[str(key)] = get_state(value, save_context) + content[key] = get_state(value, save_context) res["content"] = content res["key_types"] = key_types return res diff --git a/skops/io/_utils.py b/skops/io/_utils.py index d8168bf2..37148802 100644 --- a/skops/io/_utils.py +++ b/skops/io/_utils.py @@ -157,13 +157,13 @@ def get_state(value, save_context: SaveContext) -> dict[str, Any]: # fails with `get_state`, we try with json.dumps, if that fails, we raise # the original error alongside the json error. - if id(value) in save_context.memo: - return { - "__module__": get_module(type(value)), - "__class__": value.__class__.__name__, - "__id__": id(value), - "__loader__": "CachedNode", - } + # if id(value) in save_context.memo: + # return { + # "__module__": None, + # "__class__": None, + # "__id__": id(value), + # "__loader__": "CachedNode", + # } __id__ = save_context.memoize(obj=value) diff --git a/skops/io/tests/test_external.py b/skops/io/tests/test_external.py index 48f7603b..aeb7ce72 100644 --- a/skops/io/tests/test_external.py +++ b/skops/io/tests/test_external.py @@ -16,7 +16,6 @@ import pytest from sklearn.datasets import make_classification, make_regression -from sklearn.pipeline import Pipeline from skops.io import dumps, loads, visualize from skops.io.tests._utils import assert_method_outputs_equal, assert_params_equal @@ -428,119 +427,3 @@ def test_quantile_forest(self, quantile_forest, regr_data, trusted, tree_method) assert_method_outputs_equal(estimator, loaded, X) visualize(dumped, trusted=trusted) - - -class TestSciKeras: - """Tests for SciKerasRegressor and SciKerasClassifier""" - - @pytest.fixture(autouse=True) - def capture_stdout(self): - # Mock print and rich.print so that running these tests with pytest -s - # does not spam stdout. Other, more common methods of suppressing - # printing to stdout don't seem to work, perhaps because of pytest. - with patch("builtins.print", Mock()), patch("rich.print", Mock()): - yield - - @pytest.fixture(autouse=True) - def tensorflow(self): - tensorflow = pytest.importorskip("tensorflow") - return tensorflow - - @pytest.fixture - def trusted(self): - return [ - "_thread.RLock", - "builtins.weakref", - "collections.Counter", - "collections.OrderedDict", - "collections.defaultdict", - "inspect.FullArgSpec", - "inspect.Signature", - "keras.src.activations.sigmoid", - "keras.src.backend.RandomGenerator", - "keras.src.callbacks.History", - "keras.src.engine.compile_utils.LossesContainer", - "keras.src.engine.compile_utils.MetricsContainer", - "keras.src.engine.input_layer.InputLayer", - "keras.src.engine.input_spec.InputSpec", - "keras.src.engine.keras_tensor.KerasTensor", - "keras.src.engine.node.KerasHistory", - "keras.src.engine.node.Node", - "keras.src.engine.sequential.Sequential", - "keras.src.engine.training.train_function", - "keras.src.initializers.initializers.GlorotUniform", - "keras.src.initializers.initializers.Zeros", - "keras.src.layers.core.dense.Dense", - "keras.src.losses.LossFunctionWrapper", - "keras.src.losses.binary_crossentropy", - "keras.src.metrics.base_metric.Mean", - "keras.src.metrics.base_metric.method", - "keras.src.mixed_precision.policy.Policy", - "keras.src.optimizers.legacy.rmsprop.RMSprop", - "keras.src.optimizers.utils.", - "keras.src.optimizers.utils.all_reduce_sum_gradients", - "keras.src.saving.serialization_lib.Config", - "keras.src.utils.layer_utils.CallFunctionSpec", - "keras.src.utils.metrics_utils.Reduction", - "keras.src.utils.object_identity.ObjectIdentityDictionary", - "numpy.dtype", - "scikeras.utils.transformers.ClassifierLabelEncoder", - "scikeras.utils.transformers.TargetReshaper", - "scikeras.wrappers.KerasClassifier", - "tensorflow.core.function.capture.capture_container.FunctionCaptures", - "tensorflow.core.function.capture.capture_container.MutationAwareDict", - "tensorflow.core.function.polymorphism.function_cache.FunctionCache", - "tensorflow.core.function.polymorphism.function_type.FunctionType", - "tensorflow.python.checkpoint.checkpoint.Checkpoint", - "tensorflow.python.checkpoint.checkpoint.TrackableSaver", - "tensorflow.python.checkpoint.graph_view.ObjectGraphView", - "tensorflow.python.data.ops.iterator_ops.IteratorSpec", - "tensorflow.python.eager.polymorphic_function.atomic_function.AtomicFunction", - "tensorflow.python.eager.polymorphic_function.function_spec.FunctionSpec", - "tensorflow.python.eager.polymorphic_function.monomorphic_function.ConcreteFunction", - "tensorflow.python.eager.polymorphic_function.monomorphic_function.ConcreteFunctionGarbageCollector", - "tensorflow.python.eager.polymorphic_function.monomorphic_function._DelayedRewriteGradientFunctions", - "tensorflow.python.eager.polymorphic_function.polymorphic_function.Function", - "tensorflow.python.eager.polymorphic_function.polymorphic_function.UnliftedInitializerVariable", - "tensorflow.python.eager.polymorphic_function.tracing_compiler.TracingCompiler", - "tensorflow.python.framework.dtypes.DType", - "tensorflow.python.framework.func_graph.FuncGraph", - "tensorflow.python.framework.ops.EagerTensor", - "tensorflow.python.framework.tensor.TensorSpec", - "tensorflow.python.framework.tensor_shape.TensorShape", - "tensorflow.python.ops.resource_variable_ops.ResourceVariable", - "tensorflow.python.ops.variables.VariableAggregation", - "tensorflow.python.ops.variables.VariableAggregationV2", - "tensorflow.python.ops.variables.VariableSynchronization", - "tensorflow.python.trackable.base.TrackableReference", - "tensorflow.python.trackable.base.WeakTrackableReference", - "tensorflow.python.trackable.data_structures.dict", - "tensorflow.python.util.object_identity.ObjectIdentitySet", - "tensorflow.python.util.tf_decorator.TFDecorator", - "weakref.WeakKeyDictionary", - "weakref.remove", - ] - - def test_dumping_model(self, tensorflow, trusted): - # This simplifies the basic usage tutorial from https://adriangb.com/scikeras/stable/notebooks/Basic_Usage.html - - n_features_in_ = 20 - model = tensorflow.keras.models.Sequential() - model.add(tensorflow.keras.layers.Input(shape=(n_features_in_,))) - model.add(tensorflow.keras.layers.Dense(1, activation="sigmoid")) - - from scikeras.wrappers import KerasClassifier - - clf = KerasClassifier(model=model, loss="binary_crossentropy") - - pipeline = Pipeline([("classifier", clf)]) - - dumps(clf) - dumps(pipeline) - - X, y = make_classification(1000, 20, n_informative=10, random_state=0) - clf.fit(X, y) - dumped = dumps(clf) - - loaded = loads(dumped, trusted=trusted) - assert_method_outputs_equal(clf, loaded, X) diff --git a/skops/io/tests/test_persist.py b/skops/io/tests/test_persist.py index a11d2204..ffe2b6bd 100644 --- a/skops/io/tests/test_persist.py +++ b/skops/io/tests/test_persist.py @@ -606,10 +606,6 @@ def fit(self, X, y=None, **fit_params): for state in states: val_state = state[key] - # skipping all the state values that are a cached node - if val_state["__loader__"] == "CachedNode": - continue - # check presence of "content"/"file" but not exact values assert ("content" in val_state) or ("file" in val_state) assert val_state["__class__"] == val_expected["__class__"] From f68eea6be66d4e68a2f31d99a46c79bac3e29174 Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Thu, 11 Jan 2024 20:45:44 -0600 Subject: [PATCH 13/39] WIP Fix saving of scikeras models in zip file Still need to fix the model being saved outside of the zip file. --- skops/io/_persist.py | 18 ++++++++----- skops/io/tests/test_external.py | 45 +++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 6 deletions(-) diff --git a/skops/io/_persist.py b/skops/io/_persist.py index d69b6123..22e24f89 100644 --- a/skops/io/_persist.py +++ b/skops/io/_persist.py @@ -33,12 +33,18 @@ def _save(obj: Any, compression: int, compresslevel: int | None) -> io.BytesIO: buffer, "w", compression=compression, compresslevel=compresslevel ) as zip_file: save_context = SaveContext(zip_file=zip_file) - state = get_state(obj, save_context) - save_context.clear_memo() - - state["protocol"] = save_context.protocol - state["_skops_version"] = skops.__version__ - zip_file.writestr("schema.json", json.dumps(state, indent=2)) + # If obj is scikeras model save the scikeras model via scikeras + if "scikeras" in obj.__class__.__module__: + # TODO: This needs fixed to not save the model outside of the zip file. + obj.model.save("model", save_format="tf") + zip_file.write("model") + else: + state = get_state(obj, save_context) + save_context.clear_memo() + + state["protocol"] = save_context.protocol + state["_skops_version"] = skops.__version__ + zip_file.writestr("schema.json", json.dumps(state, indent=2)) return buffer diff --git a/skops/io/tests/test_external.py b/skops/io/tests/test_external.py index aeb7ce72..ad901c2e 100644 --- a/skops/io/tests/test_external.py +++ b/skops/io/tests/test_external.py @@ -12,7 +12,9 @@ """ +import io from unittest.mock import Mock, patch +from zipfile import ZipFile import pytest from sklearn.datasets import make_classification, make_regression @@ -427,3 +429,46 @@ def test_quantile_forest(self, quantile_forest, regr_data, trusted, tree_method) assert_method_outputs_equal(estimator, loaded, X) visualize(dumped, trusted=trusted) + + +class TestSciKeras: + """Tests for SciKerasRegressor and SciKerasClassifier""" + + @pytest.fixture(autouse=True) + def capture_stdout(self): + # Mock print and rich.print so that running these tests with pytest -s + # does not spam stdout. Other, more common methods of suppressing + # printing to stdout don't seem to work, perhaps because of pytest. + with patch("builtins.print", Mock()), patch("rich.print", Mock()): + yield + + @pytest.fixture(autouse=True) + def tensorflow(self): + tensorflow = pytest.importorskip("tensorflow") + return tensorflow + + def test_dumping_model(self, tensorflow): + # This simplifies the basic usage tutorial from https://adriangb.com/scikeras/stable/notebooks/Basic_Usage.html + + n_features_in_ = 20 + model = tensorflow.keras.models.Sequential() + model.add(tensorflow.keras.layers.Input(shape=(n_features_in_,))) + model.add(tensorflow.keras.layers.Dense(1, activation="sigmoid")) + + from scikeras.wrappers import KerasClassifier + + clf = KerasClassifier(model=model, loss="binary_crossentropy") + + X, y = make_classification(1000, 20, n_informative=10, random_state=0) + clf.fit(X, y) + + predictions = clf.predict(X) + + dumped = dumps(clf) + ZipFile(io.BytesIO(dumped)).read("model/") + + new_clf_model = tensorflow.keras.models.load_model("model/") + clf_new = KerasClassifier(new_clf_model) + clf_new.initialize(X, y) + new_preidctions = clf_new.predict(X) + assert all(new_preidctions == predictions) From f304dc7cf6114e684b84f4d1856e4d71f031762c Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Thu, 11 Jan 2024 20:49:12 -0600 Subject: [PATCH 14/39] Fixes lines I missed when reverting previous commits --- skops/io/_utils.py | 1 + skops/io/tests/test_persist.py | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/skops/io/_utils.py b/skops/io/_utils.py index 37148802..d929380a 100644 --- a/skops/io/_utils.py +++ b/skops/io/_utils.py @@ -157,6 +157,7 @@ def get_state(value, save_context: SaveContext) -> dict[str, Any]: # fails with `get_state`, we try with json.dumps, if that fails, we raise # the original error alongside the json error. + # TODO: This should help with fixing recursive references. # if id(value) in save_context.memo: # return { # "__module__": None, diff --git a/skops/io/tests/test_persist.py b/skops/io/tests/test_persist.py index ffe2b6bd..25ebf37d 100644 --- a/skops/io/tests/test_persist.py +++ b/skops/io/tests/test_persist.py @@ -605,7 +605,6 @@ def fit(self, X, y=None, **fit_params): for key, val_expected in expected.items(): for state in states: val_state = state[key] - # check presence of "content"/"file" but not exact values assert ("content" in val_state) or ("file" in val_state) assert val_state["__class__"] == val_expected["__class__"] From c131ebddad569d3ce7b7c1f991e3d696cbb3baab Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Tue, 16 Jan 2024 20:11:07 -0600 Subject: [PATCH 15/39] Switch to saving as a `.keras` file --- skops/io/_persist.py | 4 ++-- skops/io/tests/test_external.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/skops/io/_persist.py b/skops/io/_persist.py index 22e24f89..673dd471 100644 --- a/skops/io/_persist.py +++ b/skops/io/_persist.py @@ -36,8 +36,8 @@ def _save(obj: Any, compression: int, compresslevel: int | None) -> io.BytesIO: # If obj is scikeras model save the scikeras model via scikeras if "scikeras" in obj.__class__.__module__: # TODO: This needs fixed to not save the model outside of the zip file. - obj.model.save("model", save_format="tf") - zip_file.write("model") + obj.model.save("model.keras") + zip_file.write("model.keras", "model.keras") else: state = get_state(obj, save_context) save_context.clear_memo() diff --git a/skops/io/tests/test_external.py b/skops/io/tests/test_external.py index ad901c2e..df673d6a 100644 --- a/skops/io/tests/test_external.py +++ b/skops/io/tests/test_external.py @@ -465,9 +465,9 @@ def test_dumping_model(self, tensorflow): predictions = clf.predict(X) dumped = dumps(clf) - ZipFile(io.BytesIO(dumped)).read("model/") + ZipFile(io.BytesIO(dumped)).read("model.keras") - new_clf_model = tensorflow.keras.models.load_model("model/") + new_clf_model = tensorflow.keras.models.load_model("model.keras") clf_new = KerasClassifier(new_clf_model) clf_new.initialize(X, y) new_preidctions = clf_new.predict(X) From 846e72ec2105aa14a29285eb2da46000f311bf2e Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Tue, 23 Jan 2024 18:47:52 -0600 Subject: [PATCH 16/39] Updates to use TempFile to save the model --- skops/io/_persist.py | 8 +++++--- skops/io/tests/test_external.py | 12 ++++++++++-- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/skops/io/_persist.py b/skops/io/_persist.py index 673dd471..080251ea 100644 --- a/skops/io/_persist.py +++ b/skops/io/_persist.py @@ -3,6 +3,7 @@ import importlib import io import json +import tempfile from pathlib import Path from typing import Any, BinaryIO, Sequence from zipfile import ZIP_STORED, ZipFile @@ -35,9 +36,10 @@ def _save(obj: Any, compression: int, compresslevel: int | None) -> io.BytesIO: save_context = SaveContext(zip_file=zip_file) # If obj is scikeras model save the scikeras model via scikeras if "scikeras" in obj.__class__.__module__: - # TODO: This needs fixed to not save the model outside of the zip file. - obj.model.save("model.keras") - zip_file.write("model.keras", "model.keras") + with tempfile.NamedTemporaryFile(mode="w+", newline="") as temp_file: + file_name = temp_file.name + ".keras" + obj.model.save(file_name) + zip_file.write(file_name, "model.keras") else: state = get_state(obj, save_context) save_context.clear_memo() diff --git a/skops/io/tests/test_external.py b/skops/io/tests/test_external.py index df673d6a..a1f16cc1 100644 --- a/skops/io/tests/test_external.py +++ b/skops/io/tests/test_external.py @@ -13,6 +13,7 @@ """ import io +import tempfile from unittest.mock import Mock, patch from zipfile import ZipFile @@ -465,9 +466,16 @@ def test_dumping_model(self, tensorflow): predictions = clf.predict(X) dumped = dumps(clf) - ZipFile(io.BytesIO(dumped)).read("model.keras") + # Using a tempfile here to avoid creating a file in the current directory. + # This is reuiqred because `.load_model()` expects a file path. + with tempfile.NamedTemporaryFile(mode="w+", newline="") as temp_file: + file_name = temp_file.name + ".keras" + ZipFile(io.BytesIO(dumped)).extract("model.keras", file_name) + + new_clf_model = tensorflow.keras.models.load_model( + file_name + "/model.keras", compile=False + ) - new_clf_model = tensorflow.keras.models.load_model("model.keras") clf_new = KerasClassifier(new_clf_model) clf_new.initialize(X, y) new_preidctions = clf_new.predict(X) From 7f8593ac17c38f59eea7238064b23be4266b3b5c Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Tue, 23 Jan 2024 18:50:10 -0600 Subject: [PATCH 17/39] Fixes typo in comment --- skops/io/tests/test_external.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skops/io/tests/test_external.py b/skops/io/tests/test_external.py index a1f16cc1..b6170064 100644 --- a/skops/io/tests/test_external.py +++ b/skops/io/tests/test_external.py @@ -467,7 +467,7 @@ def test_dumping_model(self, tensorflow): dumped = dumps(clf) # Using a tempfile here to avoid creating a file in the current directory. - # This is reuiqred because `.load_model()` expects a file path. + # This is required because `.load_model()` expects a file path. with tempfile.NamedTemporaryFile(mode="w+", newline="") as temp_file: file_name = temp_file.name + ".keras" ZipFile(io.BytesIO(dumped)).extract("model.keras", file_name) From ac11b46c734a42628bafaa4c2f0a5ef008e3fed6 Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Mon, 1 Apr 2024 09:38:38 -0500 Subject: [PATCH 18/39] Add support for saving and loading scikeras models by adding _scikeras.py --- skops/io/_persist.py | 28 +++++++++++++------------- skops/io/_scikeras.py | 35 +++++++++++++++++++++++++++++++++ skops/io/tests/test_external.py | 12 +++++------ 3 files changed, 54 insertions(+), 21 deletions(-) create mode 100644 skops/io/_scikeras.py diff --git a/skops/io/_persist.py b/skops/io/_persist.py index 080251ea..2de0fc44 100644 --- a/skops/io/_persist.py +++ b/skops/io/_persist.py @@ -3,7 +3,6 @@ import importlib import io import json -import tempfile from pathlib import Path from typing import Any, BinaryIO, Sequence from zipfile import ZIP_STORED, ZipFile @@ -16,7 +15,14 @@ # We load the dispatch functions from the corresponding modules and register # them. Old protocols are found in the 'old/' directory, with the protocol # version appended to the corresponding module name. -modules = ["._general", "._numpy", "._scipy", "._sklearn", "._quantile_forest"] +modules = [ + "._general", + "._numpy", + "._scikeras", + "._scipy", + "._sklearn", + "._quantile_forest", +] modules.extend([".old._general_v0", ".old._numpy_v0"]) for module_name in modules: # register exposed functions for get_state and get_tree @@ -35,18 +41,12 @@ def _save(obj: Any, compression: int, compresslevel: int | None) -> io.BytesIO: ) as zip_file: save_context = SaveContext(zip_file=zip_file) # If obj is scikeras model save the scikeras model via scikeras - if "scikeras" in obj.__class__.__module__: - with tempfile.NamedTemporaryFile(mode="w+", newline="") as temp_file: - file_name = temp_file.name + ".keras" - obj.model.save(file_name) - zip_file.write(file_name, "model.keras") - else: - state = get_state(obj, save_context) - save_context.clear_memo() - - state["protocol"] = save_context.protocol - state["_skops_version"] = skops.__version__ - zip_file.writestr("schema.json", json.dumps(state, indent=2)) + state = get_state(obj, save_context) + save_context.clear_memo() + + state["protocol"] = save_context.protocol + state["_skops_version"] = skops.__version__ + zip_file.writestr("schema.json", json.dumps(state, indent=2)) return buffer diff --git a/skops/io/_scikeras.py b/skops/io/_scikeras.py new file mode 100644 index 00000000..7062d2a5 --- /dev/null +++ b/skops/io/_scikeras.py @@ -0,0 +1,35 @@ +import os +import tempfile +from typing import Type + +from scikeras.wrappers import KerasClassifier, KerasRegressor +from torch import Node + +from ._utils import Any, SaveContext, get_module + + +def scikeras_get_state(obj: Any, save_context: SaveContext) -> dict[str, Any]: + res = { + "__class__": obj.__class__.__name__, + "__module__": get_module(type(obj)), + "__loader__": "ScikerasNode", + } + + obj_id = save_context.memoize(obj) + f_name = f"{obj_id}.keras" + + with tempfile.TemporaryDirectory() as temp_dir: + file_name = os.path.join(temp_dir, "model.keras") + obj.model.save(file_name) + save_context.zip_file.write(file_name, f_name) + + res.update(type="scikeras", file=f_name) + return res + + +GET_STATE_DISPATCH_FUNCTIONS = [ + (KerasClassifier, scikeras_get_state), + (KerasRegressor, scikeras_get_state), +] + +NODE_TYPE_MAPPING: dict[tuple[str, int], Type[Node]] = {} diff --git a/skops/io/tests/test_external.py b/skops/io/tests/test_external.py index b6170064..da2cb3a2 100644 --- a/skops/io/tests/test_external.py +++ b/skops/io/tests/test_external.py @@ -11,8 +11,8 @@ with a range of hyperparameters. """ - import io +import os import tempfile from unittest.mock import Mock, patch from zipfile import ZipFile @@ -468,13 +468,11 @@ def test_dumping_model(self, tensorflow): dumped = dumps(clf) # Using a tempfile here to avoid creating a file in the current directory. # This is required because `.load_model()` expects a file path. - with tempfile.NamedTemporaryFile(mode="w+", newline="") as temp_file: - file_name = temp_file.name + ".keras" - ZipFile(io.BytesIO(dumped)).extract("model.keras", file_name) + with tempfile.TemporaryDirectory() as temp_dir: + file_name = os.path.join(temp_dir, f"{id(clf)}.keras") + ZipFile(io.BytesIO(dumped)).extract(f"{id(clf)}.keras", path=temp_dir) - new_clf_model = tensorflow.keras.models.load_model( - file_name + "/model.keras", compile=False - ) + new_clf_model = tensorflow.keras.models.load_model(file_name, compile=False) clf_new = KerasClassifier(new_clf_model) clf_new.initialize(X, y) From a9b9dbfb6c5ecf07955faf25e1716cd2bc77f1d1 Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Mon, 1 Apr 2024 09:39:43 -0500 Subject: [PATCH 19/39] Removes comment that isn't necessary now --- skops/io/_persist.py | 1 - 1 file changed, 1 deletion(-) diff --git a/skops/io/_persist.py b/skops/io/_persist.py index 2de0fc44..1b4d0fef 100644 --- a/skops/io/_persist.py +++ b/skops/io/_persist.py @@ -40,7 +40,6 @@ def _save(obj: Any, compression: int, compresslevel: int | None) -> io.BytesIO: buffer, "w", compression=compression, compresslevel=compresslevel ) as zip_file: save_context = SaveContext(zip_file=zip_file) - # If obj is scikeras model save the scikeras model via scikeras state = get_state(obj, save_context) save_context.clear_memo() From e5eb579caeb16f40ebdb3e10758541afa8d58294 Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Mon, 1 Apr 2024 11:25:50 -0500 Subject: [PATCH 20/39] Adds SciKerasNode --- skops/io/_scikeras.py | 36 ++++++++++++++++++++++++++++----- skops/io/tests/test_external.py | 11 +--------- 2 files changed, 32 insertions(+), 15 deletions(-) diff --git a/skops/io/_scikeras.py b/skops/io/_scikeras.py index 7062d2a5..50c1a15a 100644 --- a/skops/io/_scikeras.py +++ b/skops/io/_scikeras.py @@ -1,18 +1,21 @@ +import io import os import tempfile -from typing import Type +from typing import Sequence, Type +import keras from scikeras.wrappers import KerasClassifier, KerasRegressor -from torch import Node -from ._utils import Any, SaveContext, get_module +from ._audit import Node +from ._protocol import PROTOCOL +from ._utils import Any, LoadContext, SaveContext, get_module def scikeras_get_state(obj: Any, save_context: SaveContext) -> dict[str, Any]: res = { "__class__": obj.__class__.__name__, "__module__": get_module(type(obj)), - "__loader__": "ScikerasNode", + "__loader__": "SciKerasNode", } obj_id = save_context.memoize(obj) @@ -27,9 +30,32 @@ def scikeras_get_state(obj: Any, save_context: SaveContext) -> dict[str, Any]: return res +class SciKerasNode(Node): + def __init__( + self, + state: dict[str, Any], + load_context: LoadContext, + trusted: bool | Sequence[str] = False, + ) -> None: + super().__init__(state, load_context, trusted) + self.trusted = self._get_trusted(trusted, [KerasClassifier, KerasRegressor]) + + self.children = {"content": io.BytesIO(load_context.src.read(state["file"]))} + + def _construct(self): + with tempfile.TemporaryDirectory() as temp_dir: + file_path = os.path.join(temp_dir, "model.keras") + with open(file_path, "wb") as f: + f.write(self.children["content"].getbuffer()) + model = keras.models.load_model(file_path) + return model + + GET_STATE_DISPATCH_FUNCTIONS = [ (KerasClassifier, scikeras_get_state), (KerasRegressor, scikeras_get_state), ] -NODE_TYPE_MAPPING: dict[tuple[str, int], Type[Node]] = {} +NODE_TYPE_MAPPING: dict[tuple[str, int], Type[Node]] = { + ("SciKerasNode", PROTOCOL): SciKerasNode +} diff --git a/skops/io/tests/test_external.py b/skops/io/tests/test_external.py index da2cb3a2..ea325610 100644 --- a/skops/io/tests/test_external.py +++ b/skops/io/tests/test_external.py @@ -11,11 +11,7 @@ with a range of hyperparameters. """ -import io -import os -import tempfile from unittest.mock import Mock, patch -from zipfile import ZipFile import pytest from sklearn.datasets import make_classification, make_regression @@ -466,13 +462,8 @@ def test_dumping_model(self, tensorflow): predictions = clf.predict(X) dumped = dumps(clf) - # Using a tempfile here to avoid creating a file in the current directory. - # This is required because `.load_model()` expects a file path. - with tempfile.TemporaryDirectory() as temp_dir: - file_name = os.path.join(temp_dir, f"{id(clf)}.keras") - ZipFile(io.BytesIO(dumped)).extract(f"{id(clf)}.keras", path=temp_dir) - new_clf_model = tensorflow.keras.models.load_model(file_name, compile=False) + new_clf_model = loads(dumped) clf_new = KerasClassifier(new_clf_model) clf_new.initialize(X, y) From 0eca68a5835c9ebc16a7f664b30c561ddc239726 Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Mon, 1 Apr 2024 11:55:02 -0500 Subject: [PATCH 21/39] Update Keras import to TensorFlow and fix model loading --- skops/io/_scikeras.py | 4 ++-- skops/io/tests/test_external.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/skops/io/_scikeras.py b/skops/io/_scikeras.py index 50c1a15a..2b0bceff 100644 --- a/skops/io/_scikeras.py +++ b/skops/io/_scikeras.py @@ -3,7 +3,7 @@ import tempfile from typing import Sequence, Type -import keras +import tensorflow as tf from scikeras.wrappers import KerasClassifier, KerasRegressor from ._audit import Node @@ -47,7 +47,7 @@ def _construct(self): file_path = os.path.join(temp_dir, "model.keras") with open(file_path, "wb") as f: f.write(self.children["content"].getbuffer()) - model = keras.models.load_model(file_path) + model = tf.keras.models.load_model(file_path, compile=False) return model diff --git a/skops/io/tests/test_external.py b/skops/io/tests/test_external.py index ea325610..699533b9 100644 --- a/skops/io/tests/test_external.py +++ b/skops/io/tests/test_external.py @@ -463,6 +463,7 @@ def test_dumping_model(self, tensorflow): dumped = dumps(clf) + # Loads returns the Keras model so we need to initialize it as a SciKeras model new_clf_model = loads(dumped) clf_new = KerasClassifier(new_clf_model) From 30f8993ba50dce5179efbd8385a3de1d4aa9e835 Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Mon, 1 Apr 2024 15:50:40 -0500 Subject: [PATCH 22/39] Update dependencies for TensorFlow to be included in docs --- skops/_min_dependencies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skops/_min_dependencies.py b/skops/_min_dependencies.py index ef8166d0..1b89a893 100644 --- a/skops/_min_dependencies.py +++ b/skops/_min_dependencies.py @@ -38,7 +38,7 @@ "fairlearn": ("0.7.0", "docs, tests", None), "rich": ("12", "tests, rich", None), "scikeras": ("0.4.0", "tests", None), - "tensorflow": ("2.4.0", "tests", None), + "tensorflow": ("2.4.0", "docs, tests", None), } From 1b1cdffeb16aad2bf6a7a16e2ac6b6e251496e4b Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Mon, 1 Apr 2024 15:56:11 -0500 Subject: [PATCH 23/39] Adds scikeras to docs dependencies --- skops/_min_dependencies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skops/_min_dependencies.py b/skops/_min_dependencies.py index 1b89a893..f524dc8b 100644 --- a/skops/_min_dependencies.py +++ b/skops/_min_dependencies.py @@ -37,7 +37,7 @@ "catboost": ("1.0", "tests", None), "fairlearn": ("0.7.0", "docs, tests", None), "rich": ("12", "tests, rich", None), - "scikeras": ("0.4.0", "tests", None), + "scikeras": ("0.4.0", "docs, tests", None), "tensorflow": ("2.4.0", "docs, tests", None), } From 503311236518ffdd7497534d57104a258ad8168c Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Tue, 16 Apr 2024 20:55:36 -0500 Subject: [PATCH 24/39] Updates scikeras to version 0.13 --- skops/_min_dependencies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skops/_min_dependencies.py b/skops/_min_dependencies.py index f524dc8b..f201373d 100644 --- a/skops/_min_dependencies.py +++ b/skops/_min_dependencies.py @@ -37,7 +37,7 @@ "catboost": ("1.0", "tests", None), "fairlearn": ("0.7.0", "docs, tests", None), "rich": ("12", "tests, rich", None), - "scikeras": ("0.4.0", "docs, tests", None), + "scikeras": ("0.13.0", "docs, tests", None), "tensorflow": ("2.4.0", "docs, tests", None), } From 6a8e8210ea80310d01a1888052914897f2868b7d Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Tue, 16 Apr 2024 21:03:08 -0500 Subject: [PATCH 25/39] Removes default trusted types --- skops/io/_scikeras.py | 2 +- skops/io/tests/test_external.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/skops/io/_scikeras.py b/skops/io/_scikeras.py index 2b0bceff..e8a6dfd7 100644 --- a/skops/io/_scikeras.py +++ b/skops/io/_scikeras.py @@ -38,7 +38,7 @@ def __init__( trusted: bool | Sequence[str] = False, ) -> None: super().__init__(state, load_context, trusted) - self.trusted = self._get_trusted(trusted, [KerasClassifier, KerasRegressor]) + self.trusted = self._get_trusted(trusted, default=[]) self.children = {"content": io.BytesIO(load_context.src.read(state["file"]))} diff --git a/skops/io/tests/test_external.py b/skops/io/tests/test_external.py index 699533b9..eeadd4cc 100644 --- a/skops/io/tests/test_external.py +++ b/skops/io/tests/test_external.py @@ -431,6 +431,10 @@ def test_quantile_forest(self, quantile_forest, regr_data, trusted, tree_method) class TestSciKeras: """Tests for SciKerasRegressor and SciKerasClassifier""" + @pytest.fixture + def trusted(self): + return ["scikeras.wrappers.KerasClassifier"] + @pytest.fixture(autouse=True) def capture_stdout(self): # Mock print and rich.print so that running these tests with pytest -s @@ -444,7 +448,7 @@ def tensorflow(self): tensorflow = pytest.importorskip("tensorflow") return tensorflow - def test_dumping_model(self, tensorflow): + def test_dumping_model(self, tensorflow, trusted): # This simplifies the basic usage tutorial from https://adriangb.com/scikeras/stable/notebooks/Basic_Usage.html n_features_in_ = 20 @@ -464,7 +468,7 @@ def test_dumping_model(self, tensorflow): dumped = dumps(clf) # Loads returns the Keras model so we need to initialize it as a SciKeras model - new_clf_model = loads(dumped) + new_clf_model = loads(dumped, trusted=trusted) clf_new = KerasClassifier(new_clf_model) clf_new.initialize(X, y) From ab530f30c64fcebd8318c3f6bfa49b61bf727da4 Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Tue, 23 Apr 2024 20:08:43 -0500 Subject: [PATCH 26/39] Add importing __future__ annotations in _scikeras.py --- skops/io/_scikeras.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/skops/io/_scikeras.py b/skops/io/_scikeras.py index e8a6dfd7..ceed971f 100644 --- a/skops/io/_scikeras.py +++ b/skops/io/_scikeras.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import io import os import tempfile From 0d8efca33dfcd621b4c2bb536e22d069eae5236a Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Tue, 23 Apr 2024 20:30:29 -0500 Subject: [PATCH 27/39] Update TensorFlow version to 2.16.0 in _min_dependencies.py --- skops/_min_dependencies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skops/_min_dependencies.py b/skops/_min_dependencies.py index f201373d..b3e3ed29 100644 --- a/skops/_min_dependencies.py +++ b/skops/_min_dependencies.py @@ -38,7 +38,7 @@ "fairlearn": ("0.7.0", "docs, tests", None), "rich": ("12", "tests, rich", None), "scikeras": ("0.13.0", "docs, tests", None), - "tensorflow": ("2.4.0", "docs, tests", None), + "tensorflow": ("2.16.0", "docs, tests", None), } From 07e0d5ae4aa90f62efd41e1028ee5ef2ce21dabe Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Tue, 23 Apr 2024 20:32:10 -0500 Subject: [PATCH 28/39] Update scikeras version to 0.12.0 in _min_dependencies.py --- skops/_min_dependencies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skops/_min_dependencies.py b/skops/_min_dependencies.py index b3e3ed29..4e17427d 100644 --- a/skops/_min_dependencies.py +++ b/skops/_min_dependencies.py @@ -37,7 +37,7 @@ "catboost": ("1.0", "tests", None), "fairlearn": ("0.7.0", "docs, tests", None), "rich": ("12", "tests, rich", None), - "scikeras": ("0.13.0", "docs, tests", None), + "scikeras": ("0.12.0", "docs, tests", None), "tensorflow": ("2.16.0", "docs, tests", None), } From cc085307c382a97b029f60b7e319ac0a7d37fddf Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Tue, 23 Apr 2024 20:35:17 -0500 Subject: [PATCH 29/39] Update TensorFlow version to 2.13.0 in _min_dependencies.py --- skops/_min_dependencies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skops/_min_dependencies.py b/skops/_min_dependencies.py index 4e17427d..48257e74 100644 --- a/skops/_min_dependencies.py +++ b/skops/_min_dependencies.py @@ -38,7 +38,7 @@ "fairlearn": ("0.7.0", "docs, tests", None), "rich": ("12", "tests, rich", None), "scikeras": ("0.12.0", "docs, tests", None), - "tensorflow": ("2.16.0", "docs, tests", None), + "tensorflow": ("2.13.0", "docs, tests", None), } From cc2aead69c2a82a9d96a8f1757f7c5dc34f45adb Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Tue, 23 Apr 2024 20:39:46 -0500 Subject: [PATCH 30/39] Update TensorFlow version to 2.12.0 in _min_dependencies.py --- skops/_min_dependencies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skops/_min_dependencies.py b/skops/_min_dependencies.py index 48257e74..1e6603ac 100644 --- a/skops/_min_dependencies.py +++ b/skops/_min_dependencies.py @@ -38,7 +38,7 @@ "fairlearn": ("0.7.0", "docs, tests", None), "rich": ("12", "tests, rich", None), "scikeras": ("0.12.0", "docs, tests", None), - "tensorflow": ("2.13.0", "docs, tests", None), + "tensorflow": ("2.12.0", "docs, tests", None), } From 91983e88d7f3518d96e42b2c78ab4e9919f7d7b0 Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Sat, 4 May 2024 20:48:30 -0500 Subject: [PATCH 31/39] Moves changes to the correct version --- docs/changes.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/changes.rst b/docs/changes.rst index fc1f0d39..49f40cda 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -11,7 +11,8 @@ skops Changelog v0.10 ---- - +- Fix dumping Scikeras model failing because of maximum recursion depth. :pr:`388` + by :user:`Thomas Lazarus `. v0.9 ---- @@ -21,8 +22,6 @@ v0.9 :pr:`386` by :user:`Reid Johnson `. - :func:`skops.hub_utils.get_model_output` and :func:`skops.hub_utils.push` are deprecated and will be removed in version 0.10. :pr:`396` by `Adrin Jalali`_. -- Fix dumping Scikeras model failing because of maximum recursion depth. :pr:`388` - by :user:`Thomas Lazarus `. v0.8 ---- From 3739f1fda11b49843fd8ee41958f07436c4d8a9c Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Sat, 25 May 2024 15:10:38 -0500 Subject: [PATCH 32/39] Ignores deprecation warning from protobuf --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index f697b327..ce9dde54 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,9 @@ filterwarnings = [ "ignore:DataFrameGroupBy.apply operated on the grouping columns.:DeprecationWarning", # Ignore Pandas 2.2 warning on PyArrow. It might be reverted in a later release. "ignore:\\s*Pyarrow will become a required dependency of pandas.*:DeprecationWarning", + # Ignore Google protobuf deprecation warnings. This should be reverted once tensorflow supports protobuf 5.0+ + "ignore:Type google._upb._message.MessageMapContainer uses PyType_Spec with a metaclass that has custom tp_new. This is deprecated and will no longer be allowed in Python 3.14.:DeprecationWarning:importlib.*:", + "ignore:Type google._upb._message.ScalarMapContainer uses PyType_Spec with a metaclass that has custom tp_new. This is deprecated and will no longer be allowed in Python 3.14.:DeprecationWarning:importlib.*:", ] markers = [ "network: marks tests as requiring internet (deselect with '-m \"not network\"')", From ce11bf01e10d6ed4130e30b13d67319908304c79 Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Sat, 25 May 2024 15:24:45 -0500 Subject: [PATCH 33/39] Fixes deprecation warning from matplotlib --- skops/card/_model_card.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skops/card/_model_card.py b/skops/card/_model_card.py index 02cd8000..81324b1f 100644 --- a/skops/card/_model_card.py +++ b/skops/card/_model_card.py @@ -1227,7 +1227,7 @@ def add_permutation_importances( _, ax = plt.subplots() ax.boxplot( x=permutation_importances.importances[sorted_importances_idx].T, - labels=columns[sorted_importances_idx], + tick_labels=columns[sorted_importances_idx], vert=False, ) ax.set_title(plot_name) From d677476e0fb4cc4bc3fec828c7be6676ed7213ec Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Mon, 1 Jul 2024 20:09:13 -0500 Subject: [PATCH 34/39] Fixes making scikears a hard dependency --- skops/io/_scikeras.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/skops/io/_scikeras.py b/skops/io/_scikeras.py index ceed971f..70a5a9fe 100644 --- a/skops/io/_scikeras.py +++ b/skops/io/_scikeras.py @@ -6,12 +6,16 @@ from typing import Sequence, Type import tensorflow as tf -from scikeras.wrappers import KerasClassifier, KerasRegressor from ._audit import Node from ._protocol import PROTOCOL from ._utils import Any, LoadContext, SaveContext, get_module +try: + from scikeras.wrappers import KerasClassifier, KerasRegressor +except ImportError: + SciKeras = None + def scikeras_get_state(obj: Any, save_context: SaveContext) -> dict[str, Any]: res = { @@ -39,6 +43,11 @@ def __init__( load_context: LoadContext, trusted: bool | Sequence[str] = False, ) -> None: + if SciKeras is None: + raise ImportError( + "`scikeras` is missing and needs to be installed in order to load this" + " model" + ) super().__init__(state, load_context, trusted) self.trusted = self._get_trusted(trusted, default=[]) From 6d10f7128d29a38dabba3c8f5856e05ddd51606c Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Mon, 1 Jul 2024 20:25:41 -0500 Subject: [PATCH 35/39] Adds test for error on untrusted types --- skops/io/_scikeras.py | 18 ++++++++++-------- skops/io/tests/test_external.py | 24 +++++++++++++++++++++++- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/skops/io/_scikeras.py b/skops/io/_scikeras.py index 70a5a9fe..e135d489 100644 --- a/skops/io/_scikeras.py +++ b/skops/io/_scikeras.py @@ -3,7 +3,7 @@ import io import os import tempfile -from typing import Sequence, Type +from typing import Optional, Sequence, Type import tensorflow as tf @@ -14,7 +14,8 @@ try: from scikeras.wrappers import KerasClassifier, KerasRegressor except ImportError: - SciKeras = None + KerasClassifier = None + KerasRegressor = None def scikeras_get_state(obj: Any, save_context: SaveContext) -> dict[str, Any]: @@ -41,9 +42,9 @@ def __init__( self, state: dict[str, Any], load_context: LoadContext, - trusted: bool | Sequence[str] = False, + trusted: Optional[Sequence[str]] = None, ) -> None: - if SciKeras is None: + if KerasClassifier is None and KerasRegressor is None: raise ImportError( "`scikeras` is missing and needs to be installed in order to load this" " model" @@ -62,10 +63,11 @@ def _construct(self): return model -GET_STATE_DISPATCH_FUNCTIONS = [ - (KerasClassifier, scikeras_get_state), - (KerasRegressor, scikeras_get_state), -] +if KerasClassifier is not None and KerasRegressor is not None: + GET_STATE_DISPATCH_FUNCTIONS = [ + (KerasClassifier, scikeras_get_state), + (KerasRegressor, scikeras_get_state), + ] NODE_TYPE_MAPPING: dict[tuple[str, int], Type[Node]] = { ("SciKerasNode", PROTOCOL): SciKerasNode diff --git a/skops/io/tests/test_external.py b/skops/io/tests/test_external.py index eeadd4cc..e1de8ac5 100644 --- a/skops/io/tests/test_external.py +++ b/skops/io/tests/test_external.py @@ -17,6 +17,7 @@ from sklearn.datasets import make_classification, make_regression from skops.io import dumps, loads, visualize +from skops.io.exceptions import UntrustedTypesFoundException from skops.io.tests._utils import assert_method_outputs_equal, assert_params_equal # Default settings for generated data @@ -431,7 +432,7 @@ def test_quantile_forest(self, quantile_forest, regr_data, trusted, tree_method) class TestSciKeras: """Tests for SciKerasRegressor and SciKerasClassifier""" - @pytest.fixture + @pytest.fixture() def trusted(self): return ["scikeras.wrappers.KerasClassifier"] @@ -474,3 +475,24 @@ def test_dumping_model(self, tensorflow, trusted): clf_new.initialize(X, y) new_preidctions = clf_new.predict(X) assert all(new_preidctions == predictions) + + def test_dumping_untrusted_model(self, tensorflow): + # This simplifies the basic usage tutorial from https://adriangb.com/scikeras/stable/notebooks/Basic_Usage.html + + n_features_in_ = 20 + model = tensorflow.keras.models.Sequential() + model.add(tensorflow.keras.layers.Input(shape=(n_features_in_,))) + model.add(tensorflow.keras.layers.Dense(1, activation="sigmoid")) + + from scikeras.wrappers import KerasClassifier + + clf = KerasClassifier(model=model, loss="binary_crossentropy") + + X, y = make_classification(1000, 20, n_informative=10, random_state=0) + clf.fit(X, y) + + dumped = dumps(clf) + + # Tries to load the dumped model but returns an untrusted exception + with pytest.raises(UntrustedTypesFoundException): + loads(dumped) From e30756023b8e4d03c3f4e4d8478c60c1d8351fee Mon Sep 17 00:00:00 2001 From: Thomas Lazarus Date: Mon, 1 Jul 2024 20:27:53 -0500 Subject: [PATCH 36/39] Cleans up unneeded () --- skops/io/tests/test_external.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skops/io/tests/test_external.py b/skops/io/tests/test_external.py index e1de8ac5..43fa753e 100644 --- a/skops/io/tests/test_external.py +++ b/skops/io/tests/test_external.py @@ -432,7 +432,7 @@ def test_quantile_forest(self, quantile_forest, regr_data, trusted, tree_method) class TestSciKeras: """Tests for SciKerasRegressor and SciKerasClassifier""" - @pytest.fixture() + @pytest.fixture def trusted(self): return ["scikeras.wrappers.KerasClassifier"] From fa6b2085754d491784626ae5c8e31d8dabbec02f Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Wed, 28 Aug 2024 11:20:32 +0200 Subject: [PATCH 37/39] use TF directly --- skops/io/_persist.py | 2 +- skops/io/_scikeras.py | 74 --------------------------------- skops/io/tests/test_external.py | 15 ++++--- 3 files changed, 11 insertions(+), 80 deletions(-) delete mode 100644 skops/io/_scikeras.py diff --git a/skops/io/_persist.py b/skops/io/_persist.py index 367380bc..8e620f0e 100644 --- a/skops/io/_persist.py +++ b/skops/io/_persist.py @@ -18,7 +18,7 @@ modules = [ "._general", "._numpy", - "._scikeras", + "._keras", "._scipy", "._sklearn", "._quantile_forest", diff --git a/skops/io/_scikeras.py b/skops/io/_scikeras.py deleted file mode 100644 index e135d489..00000000 --- a/skops/io/_scikeras.py +++ /dev/null @@ -1,74 +0,0 @@ -from __future__ import annotations - -import io -import os -import tempfile -from typing import Optional, Sequence, Type - -import tensorflow as tf - -from ._audit import Node -from ._protocol import PROTOCOL -from ._utils import Any, LoadContext, SaveContext, get_module - -try: - from scikeras.wrappers import KerasClassifier, KerasRegressor -except ImportError: - KerasClassifier = None - KerasRegressor = None - - -def scikeras_get_state(obj: Any, save_context: SaveContext) -> dict[str, Any]: - res = { - "__class__": obj.__class__.__name__, - "__module__": get_module(type(obj)), - "__loader__": "SciKerasNode", - } - - obj_id = save_context.memoize(obj) - f_name = f"{obj_id}.keras" - - with tempfile.TemporaryDirectory() as temp_dir: - file_name = os.path.join(temp_dir, "model.keras") - obj.model.save(file_name) - save_context.zip_file.write(file_name, f_name) - - res.update(type="scikeras", file=f_name) - return res - - -class SciKerasNode(Node): - def __init__( - self, - state: dict[str, Any], - load_context: LoadContext, - trusted: Optional[Sequence[str]] = None, - ) -> None: - if KerasClassifier is None and KerasRegressor is None: - raise ImportError( - "`scikeras` is missing and needs to be installed in order to load this" - " model" - ) - super().__init__(state, load_context, trusted) - self.trusted = self._get_trusted(trusted, default=[]) - - self.children = {"content": io.BytesIO(load_context.src.read(state["file"]))} - - def _construct(self): - with tempfile.TemporaryDirectory() as temp_dir: - file_path = os.path.join(temp_dir, "model.keras") - with open(file_path, "wb") as f: - f.write(self.children["content"].getbuffer()) - model = tf.keras.models.load_model(file_path, compile=False) - return model - - -if KerasClassifier is not None and KerasRegressor is not None: - GET_STATE_DISPATCH_FUNCTIONS = [ - (KerasClassifier, scikeras_get_state), - (KerasRegressor, scikeras_get_state), - ] - -NODE_TYPE_MAPPING: dict[tuple[str, int], Type[Node]] = { - ("SciKerasNode", PROTOCOL): SciKerasNode -} diff --git a/skops/io/tests/test_external.py b/skops/io/tests/test_external.py index 2b34dc3c..fe859cae 100644 --- a/skops/io/tests/test_external.py +++ b/skops/io/tests/test_external.py @@ -11,6 +11,7 @@ with a range of hyperparameters. """ + from unittest.mock import Mock, patch import pytest @@ -438,7 +439,14 @@ class TestSciKeras: @pytest.fixture def trusted(self): - return ["scikeras.wrappers.KerasClassifier"] + return [ + "collections.defaultdict", + "keras.src.models.sequential.Sequential", + "numpy.dtype", + "scikeras.utils.transformers.ClassifierLabelEncoder", + "scikeras.utils.transformers.TargetReshaper", + "scikeras.wrappers.KerasClassifier", + ] @pytest.fixture(autouse=True) def capture_stdout(self): @@ -473,10 +481,7 @@ def test_dumping_model(self, tensorflow, trusted): dumped = dumps(clf) # Loads returns the Keras model so we need to initialize it as a SciKeras model - new_clf_model = loads(dumped, trusted=trusted) - - clf_new = KerasClassifier(new_clf_model) - clf_new.initialize(X, y) + clf_new = loads(dumped, trusted=trusted) new_preidctions = clf_new.predict(X) assert all(new_preidctions == predictions) From e9b2dd0571170a3addd62293de061c650ed2b041 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Wed, 28 Aug 2024 11:21:05 +0200 Subject: [PATCH 38/39] move changelog --- docs/changes.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/changes.rst b/docs/changes.rst index 35d2015b..6bf5219e 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -13,6 +13,8 @@ v0.11 ----- - Correctly restore ``default_factory`` when saving and loading a ``defaultdict``. :pr:`433` by `Adrin Jalali`_. +- Fix dumping Scikeras model failing because of maximum recursion depth. :pr:`388` + by :user:`Thomas Lazarus `. v0.10 ----- @@ -24,8 +26,6 @@ v0.10 it. :func:`skops.io.get_untrusted_types` can be used to get the untrusted types present in the input. :pr:`422` by `Adrin Jalali`_. -- Fix dumping Scikeras model failing because of maximum recursion depth. :pr:`388` - by :user:`Thomas Lazarus `. - ``skops.io`` now supports `numpy>=2`. :pr:`429` by `Adrin Jalali`_. v0.9 From 2d92168631dee916438efe7c8bc2165a2c1f2af1 Mon Sep 17 00:00:00 2001 From: adrinjalali Date: Wed, 28 Aug 2024 11:21:46 +0200 Subject: [PATCH 39/39] add missing file --- skops/io/_keras.py | 78 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 skops/io/_keras.py diff --git a/skops/io/_keras.py b/skops/io/_keras.py new file mode 100644 index 00000000..01e0dd12 --- /dev/null +++ b/skops/io/_keras.py @@ -0,0 +1,78 @@ +from __future__ import annotations + +import io +import os +import tempfile +from typing import Sequence, Type + +from ._audit import Node +from ._protocol import PROTOCOL +from ._utils import Any, LoadContext, SaveContext, get_module + +try: + from tensorflow.keras.models import Model, Sequential, load_model, save_model + + tf_present = True +except ImportError: + tf_present = False + + +def keras_get_state(obj: Any, save_context: SaveContext) -> dict[str, Any]: + res = { + "__class__": obj.__class__.__name__, + "__module__": get_module(type(obj)), + "__loader__": "KerasNode", + } + + # Memoize the object and then check if it's file name (containing + # the object id) already exists. If it does, there is no need to + # save the object again. Memoizitation is necessary since for + # ephemeral objects, the same id might otherwise be reused. + obj_id = save_context.memoize(obj) + f_name = f"{obj_id}.keras" + if f_name in save_context.zip_file.namelist(): + return res + + with tempfile.TemporaryDirectory() as temp_dir: + file_name = os.path.join(temp_dir, "model.keras") + save_model(obj, file_name) + save_context.zip_file.write(file_name, f_name) + res.update(file=f_name) + return res + + +class KerasNode(Node): + def __init__( + self, + state: dict[str, Any], + load_context: LoadContext, + trusted: Sequence[str] | None = None, + ) -> None: + if not tf_present: + raise ImportError( + "`tf.keras` is missing and needs to be installed in order to load this" + " object." + ) + super().__init__(state, load_context, trusted) + self.trusted = self._get_trusted(trusted, default=[]) + + self.children = {"content": io.BytesIO(load_context.src.read(state["file"]))} + + def _construct(self): + with tempfile.TemporaryDirectory() as temp_dir: + file_path = os.path.join(temp_dir, "model.keras") + with open(file_path, "wb") as f: + f.write(self.children["content"].getbuffer()) + model = load_model(file_path, compile=False, safe_mode=True) + return model + + +if tf_present: + GET_STATE_DISPATCH_FUNCTIONS = [ + (Model, keras_get_state), + (Sequential, keras_get_state), + ] + +NODE_TYPE_MAPPING: dict[tuple[str, int], Type[Node]] = { + ("KerasNode", PROTOCOL): KerasNode +}