From e59632709ffedb4164d0e3f2fc690af37ef31dda Mon Sep 17 00:00:00 2001 From: martin-sicho Date: Thu, 21 Mar 2024 17:20:01 +0100 Subject: [PATCH 01/17] use only PyPI dependencies --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 85789d94..fe529a30 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,10 +51,10 @@ dependencies = [ "xgboost", "boruta", "tqdm", - "ml2json @ git+https://github.com/HellevdM/ml2json.git", + "ml2json", "jsonpickle", - "papyrus_scripts @ git+https://github.com/OlivierBeq/Papyrus-scripts.git", - "gbmt-splits @ git+https://github.com/sohviluukkonen/gbmt-splits@0.0.4", + "papyrus_scripts", + "gbmtsplits", "mlchemad", "mols2grid" ] From 956fe4eee2db352cfe7bbee80f96c4c579f5900e Mon Sep 17 00:00:00 2001 From: martin-sicho Date: Fri, 22 Mar 2024 08:35:10 +0100 Subject: [PATCH 02/17] add test as extra index --- .gitlab-ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 902745e3..4b46b896 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -19,6 +19,8 @@ stages: - export MAFFT_BINARIES=$PWD/mafft-linux64/mafftdir/libexec/ - clustalo --version # For debugging clustalo version - mafft --version # For debugging mafft version + - pip config set global.index-url https://pypi.org/simple/ + - pip config set global.extra-index-url https://test.pypi.org/simple/ - pip install ".[full]" --no-cache-dir - python -c "import qsprpred; print(qsprpred.__version__)" # For debugging package version - pip install pytest From be561cd4e4c60e9db73a1cb1fffe1e6d90f71d36 Mon Sep 17 00:00:00 2001 From: martin-sicho Date: Fri, 22 Mar 2024 10:55:48 +0100 Subject: [PATCH 03/17] remove repo dependency --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index fe529a30..0679eb3d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,7 +62,7 @@ dependencies = [ [project.optional-dependencies] extra = [ - "mold2-pywrapper @ git+https://github.com/OlivierBeq/Mold2_pywrapper.git@master", + "mold2-pywrapper", "padel-pywrapper >= 1.0.2.post1", "Mordred", "biopython", "prodec", "Signature-pywrapper", ] pyboost = ["py-boost"] From 98c7305e78442ee20c88f9dec36b2c42069bb602 Mon Sep 17 00:00:00 2001 From: martin-sicho Date: Fri, 22 Mar 2024 12:56:58 +0100 Subject: [PATCH 04/17] add gitlab PyPI-test publishing workflow --- .github/workflows/pypi-test.yml | 53 +++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 .github/workflows/pypi-test.yml diff --git a/.github/workflows/pypi-test.yml b/.github/workflows/pypi-test.yml new file mode 100644 index 00000000..eb538b10 --- /dev/null +++ b/.github/workflows/pypi-test.yml @@ -0,0 +1,53 @@ +name: Publish Python 🐍 distribution 📦 to PyPI and TestPyPI + +on: + push: + tags: + - 'v[0-9]+.[0-9]+.[0-9]+.alpha[0-9]+' + - 'v[0-9]+.[0-9]+.[0-9]+.beta[0-9]+' + +jobs: + build: + name: Build distribution 📦 + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.x" + - name: Install pypa/build + run: >- + python3 -m + pip install + build + --user + - name: Build a binary wheel and a source tarball + run: python3 -m build + - name: Store the distribution packages + uses: actions/upload-artifact@v3 + with: + name: python-package-distributions + path: dist/ + + publish-to-pypi: + name: >- + Publish Python 🐍 distribution 📦 to PyPI + if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes + needs: + - build + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/qsprpred # Replace with your PyPI project name + permissions: + id-token: write # IMPORTANT: mandatory for trusted publishing + steps: + - name: Download all the dists + uses: actions/download-artifact@v3 + with: + name: python-package-distributions + path: dist/ + - name: Publish distribution 📦 to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 From 919d794149aeeb8cd7e04328d9124aca2eb41e54 Mon Sep 17 00:00:00 2001 From: martin-sicho Date: Fri, 22 Mar 2024 15:53:56 +0100 Subject: [PATCH 05/17] remove index settings --- .gitlab-ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4b46b896..93ca661a 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -19,8 +19,8 @@ stages: - export MAFFT_BINARIES=$PWD/mafft-linux64/mafftdir/libexec/ - clustalo --version # For debugging clustalo version - mafft --version # For debugging mafft version - - pip config set global.index-url https://pypi.org/simple/ - - pip config set global.extra-index-url https://test.pypi.org/simple/ + # - pip config set global.index-url https://pypi.org/simple/ + # - pip config set global.extra-index-url https://test.pypi.org/simple/ - pip install ".[full]" --no-cache-dir - python -c "import qsprpred; print(qsprpred.__version__)" # For debugging package version - pip install pytest From 4f3380ed671d626a36ca6d9505f2d394c143c729 Mon Sep 17 00:00:00 2001 From: martin-sicho Date: Fri, 22 Mar 2024 16:10:49 +0100 Subject: [PATCH 06/17] use proper urls and environment --- .github/workflows/pypi-test.yml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/pypi-test.yml b/.github/workflows/pypi-test.yml index eb538b10..ac365fee 100644 --- a/.github/workflows/pypi-test.yml +++ b/.github/workflows/pypi-test.yml @@ -1,4 +1,4 @@ -name: Publish Python 🐍 distribution 📦 to PyPI and TestPyPI +name: Publish Python 🐍 distribution 📦 to TestPyPI on: push: @@ -31,16 +31,16 @@ jobs: name: python-package-distributions path: dist/ - publish-to-pypi: + publish-to-pypi-test: name: >- - Publish Python 🐍 distribution 📦 to PyPI + Publish Python 🐍 distribution 📦 to TestPyPI if: startsWith(github.ref, 'refs/tags/') # only publish to PyPI on tag pushes needs: - build runs-on: ubuntu-latest environment: - name: pypi - url: https://pypi.org/p/qsprpred # Replace with your PyPI project name + name: pypi-test # IMPORTANT: mandatory for trusted publishing + url: https://test.pypi.org/p/qsprpred # Replace with your PyPI project name permissions: id-token: write # IMPORTANT: mandatory for trusted publishing steps: @@ -49,5 +49,7 @@ jobs: with: name: python-package-distributions path: dist/ - - name: Publish distribution 📦 to PyPI + - name: Publish distribution 📦 to TestPyPI uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository-url: https://test.pypi.org/legacy/ From 5fec71f1c13bcd0955bb10a2d4bc3d1d1118667c Mon Sep 17 00:00:00 2001 From: martin-sicho Date: Fri, 22 Mar 2024 16:19:32 +0100 Subject: [PATCH 07/17] remove numpy restrictions --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0679eb3d..d271bd2c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,7 @@ classifiers = [ dependencies = [ "parameterized", "pebble", - "numpy >= 1.19, <1.24.0", + "numpy", "scikit-learn >= 1.0.2", "pandas >= 1.2.2", "matplotlib >= 2.0", From d8c05925f6f490c4b2f0bc78cad12e14238c6801 Mon Sep 17 00:00:00 2001 From: martin-sicho Date: Fri, 22 Mar 2024 17:15:38 +0100 Subject: [PATCH 08/17] skip boruta if numpy not compatible --- qsprpred/data/processing/tests.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/qsprpred/data/processing/tests.py b/qsprpred/data/processing/tests.py index 28ed2196..7f225ded 100644 --- a/qsprpred/data/processing/tests.py +++ b/qsprpred/data/processing/tests.py @@ -1,5 +1,6 @@ import copy import itertools +from unittest import skipIf import numpy as np import pandas as pd @@ -224,6 +225,10 @@ def testHighCorrelationFilter(self, use_index_cols): (False,), ] ) + @skipIf( + int(np.__version__.split(".")[1]) >= 24, + "numpy 1.24.0 not compatible with boruta", + ) def testBorutaFilter(self, use_index_cols): """Test the Boruta filter, which removes the features which are statistically as relevant as random features.""" From dc2ab56712ea42bcda82ac6102c9be0c7700fa18 Mon Sep 17 00:00:00 2001 From: martin-sicho Date: Fri, 22 Mar 2024 17:54:32 +0100 Subject: [PATCH 09/17] simplify serialization --- qsprpred/models/early_stopping.py | 47 +++++++++---------------------- 1 file changed, 13 insertions(+), 34 deletions(-) diff --git a/qsprpred/models/early_stopping.py b/qsprpred/models/early_stopping.py index 8650d11d..9509d748 100644 --- a/qsprpred/models/early_stopping.py +++ b/qsprpred/models/early_stopping.py @@ -1,5 +1,4 @@ """Early stopping for training of models.""" -import json from enum import Enum from typing import Any, Callable @@ -8,6 +7,7 @@ from ..data.tables.qspr import QSPRDataset from ..logs import logger +from ..utils.serialization import JSONSerializable class EarlyStoppingMode(Enum): @@ -36,7 +36,7 @@ def __bool__(self) -> bool: return self in [EarlyStoppingMode.NOT_RECORDING, EarlyStoppingMode.RECORDING] -class EarlyStopping: +class EarlyStopping(JSONSerializable): """Early stopping tracker for training of QSPRpred models. An instance of this class is used to track the number of epochs trained in a model @@ -58,6 +58,7 @@ class EarlyStopping: trainedEpochs (list[int]): list of number of epochs trained in a model training with early stopping on RECORDING mode. """ + def __init__( self, mode: EarlyStoppingMode = EarlyStoppingMode.NOT_RECORDING, @@ -77,6 +78,15 @@ def __init__( self._trainedEpochs = [] self.aggregateFunc = aggregate_func + def __getstate__(self): + state = super().__getstate__() + state["aggregateFunc"] = self.aggregateFunc.__name__ + return state + + def __setstate__(self, state): + super().__setstate__(state) + self.aggregateFunc = getattr(np, self.aggregateFunc) + @property def optimalEpochs(self) -> int: """Return number of epochs to train in OPTIMAL mode.""" @@ -124,38 +134,6 @@ def __str__(self) -> str: """Return the name of the task.""" return self.mode.name - def toFile(self, path: str): - """Save early stopping object to file. - - Args: - path (str): path to file to save early stopping object to - """ - with open(path, "w") as f: - json.dump( - { - "mode": self.mode.name, - "num_epochs": self.numEpochs, - "trained_epochs": self.trainedEpochs, - "aggregate_func_name": self.aggregateFunc.__name__, - }, - f, - ) - - @classmethod - def fromFile(cls, path: str) -> "EarlyStopping": - """Load early stopping object from file. - - Args: - path (str): path to file containing early stopping object - """ - with open(path, "r") as f: - data = json.load(f) - mode = EarlyStoppingMode[data["mode"]] - aggregate_func = getattr(np, data["aggregate_func_name"]) - early_stopping = cls(mode, data["num_epochs"], aggregate_func) - early_stopping.trainedEpochs = data["trained_epochs"] - return early_stopping - def __bool__(self) -> bool: """Return whether early stopping is used.""" return self.mode.__bool__() @@ -171,6 +149,7 @@ def early_stopping(func: Callable) -> Callable: Returns: function: decorated fit method """ + def wrapper_fit( self, X: pd.DataFrame | np.ndarray | QSPRDataset, From 40e93a98786196e63d2692cab6850b5f3bf2d9be Mon Sep 17 00:00:00 2001 From: martin-sicho Date: Fri, 22 Mar 2024 17:56:55 +0100 Subject: [PATCH 10/17] update CHANGELOG.md and docs --- CHANGELOG.md | 8 +++++++- README.md | 4 ++-- qsprpred/data/processing/feature_filters.py | 4 +++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e1f5164d..b898e82d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,12 @@ From v3.0.1 to v3.0.2 ## Changes -None. +- Restrictions on `numpy` versions were removed to allow for more flexibility in + package installations. However, the `BorutaFilter` feature selection method does not + function with `numpy` versions 1.24.0 and above. Therefore, this functionality now + requires a downgrade to `numpy` version 1.23.0 or lower. This was reflected in the + documentation and `numpy` itself outputs a reasonable error message if the version is + incompatible. ## New Features @@ -19,6 +24,7 @@ None. custom descriptor sets. - Added the `prepMols` method to `DescriptorSet` to allow separated customization of molecule preparation before descriptor calculation. +- The package can now be installed from the PyPI repository 🐍📦. ## Removed Features diff --git a/README.md b/README.md index aa332dd9..fb1ed80d 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ Quick Start QSPRpred can be installed with pip like so (with python >= 3.10): ```bash -pip install git+https://github.com/CDDLeiden/QSPRpred.git@main +pip install qsprpred ``` Note that this will install the basic dependencies, but not the optional dependencies. @@ -50,7 +50,7 @@ If you want to use the optional dependencies, you can install the package with a option: ```bash -pip install git+https://github.com/CDDLeiden/QSPRpred.git@main#egg=qsprpred[