Skip to content

Commit

Permalink
✨ 🔀 Continuous perturbations
Browse files Browse the repository at this point in the history
Add continuous feature perturbations.

---------

Co-authored-by: Ricardo Hernández Medina <[email protected]>
Co-authored-by: Henry Webel <[email protected]>
Co-authored-by: Marc Pielies Avelli <[email protected]>
  • Loading branch information
4 people authored Aug 14, 2024
1 parent 92bced0 commit 49ee412
Show file tree
Hide file tree
Showing 48 changed files with 27,935 additions and 313 deletions.
107 changes: 101 additions & 6 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
@@ -1,16 +1,111 @@
name: release on pypi
name: CI
on:
push:
branches:
- main
pull_request:
# branches:
# - main

jobs:
format:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: psf/black@stable
lint:
name: Lint with flake8
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install flake8
run: pip install flake8 flake8-bugbear
- name: Lint with flake8
run: flake8 src
run-tutorial:
name: Run tutorial - random_small
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install dependencies
run: pip install .
- name: Prepare tutorial data
run: |
cd tutorial
move-dl data=random_small task=encode_data --cfg job
move-dl data=random_small task=encode_data
- name: Train model and analyze latent space
run: |
cd tutorial
move-dl data=random_small task=random_small__latent --cfg job
move-dl data=random_small task=random_small__latent
- name: Identify associations - t-test
run: |
cd tutorial
move-dl data=random_small task=random_small__id_assoc_ttest --cfg job
move-dl data=random_small task=random_small__id_assoc_ttest task.training_loop.num_epochs=30 task.num_refits=4
- name: Identify associations - bayes factors
run: |
cd tutorial
move-dl data=random_small task=random_small__id_assoc_bayes --cfg job
move-dl data=random_small task=random_small__id_assoc_bayes task.training_loop.num_epochs=30 task.num_refits=20
run-tutorial-cont:
name: Run tutorial - random_continuous
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install dependencies
run: pip install .
- name: Prepare tutorial data
run: |
cd tutorial
move-dl data=random_continuous task=encode_data
- name: Train model and analyze latent space
run: |
cd tutorial
move-dl data=random_continuous task=random_continuous__latent --cfg job
move-dl data=random_continuous task=random_continuous__latent
- name: Identify associations - t-test
run: |
cd tutorial
move-dl data=random_continuous task=random_continuous__id_assoc_ttest --cfg job
move-dl data=random_continuous task=random_continuous__id_assoc_ttest task.training_loop.num_epochs=30 task.num_refits=4
- name: Identify associations - bayes factors
run: |
cd tutorial
move-dl data=random_continuous task=random_continuous__id_assoc_bayes --cfg job
move-dl data=random_continuous task=random_continuous__id_assoc_bayes task.training_loop.num_epochs=30 task.num_refits=4
- name: Identify associations - KS
run: |
cd tutorial
move-dl data=random_continuous task=random_continuous__id_assoc_ks --cfg job
move-dl data=random_continuous task=random_continuous__id_assoc_ks task.training_loop.num_epochs=30 task.num_refits=4
publish:
name: Publish package
runs-on: ubuntu-latest
if: startsWith(github.ref, 'refs/tags')
needs:
- format
- lint
steps:
- uses: actions/checkout@v3
- name: Publish package
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install twine and build
run: python -m pip install --upgrade twine build
- name: Build
run: python -m build
- name: Publish package
uses: pypa/gh-action-pypi-publish@release/v1
with:
user: __token__
Expand Down
12 changes: 12 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ tutorial/*
!tutorial/notebooks/*.ipynb
!tutorial/README.md

# Supplementary files
supplementary_files/*.png
supplementary_files/*.tsv
supplementary_files/*.txt

# Virtual environment
venv/
virtualvenv/
Expand All @@ -48,6 +53,12 @@ virtualvenv/
docs/build/
docs/source/_templates/

# VS Code settings
.vscode

# macOS
.DS_Store

# Root folder
/*.*
!/.gitignore
Expand All @@ -58,3 +69,4 @@ docs/source/_templates/
!/pyproject.toml
!/requirements.txt
!/setup.cfg
!/.github
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ Medication data

## Installing MOVE package

MOVE is written in Python and can therefore be installed using `pip`:
MOVE is written in Python and can be installed using `pip`:

```bash
>>> pip install move-dl
Expand Down Expand Up @@ -78,11 +78,11 @@ MOVE has five-six steps:
## How to run MOVE

Please refer to our [**documentation**](https://move-dl.readthedocs.io/) for
examples and [tutorials](https://move-dl.readthedocs.io/tutorial/index.html)
examples and [tutorials](https://move-dl.readthedocs.io/tutorial/index.html)
on how to run MOVE.

Additionally, you can copy
[this notebook](https://colab.research.google.com/drive/1RFWNsuGymCmppPsElBvDuA9zRbGskKmi?usp=sharing)
Additionally, you can copy
[this notebook](https://colab.research.google.com/drive/1RFWNsuGymCmppPsElBvDuA9zRbGskKmi?usp=sharing)
and follow its instructions to get familiar with our pipeline.

# Data sets
Expand Down
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@

html_theme = "sphinx_rtd_theme"
html_theme_options = {
"collapse_navigation" : False,
"collapse_navigation": False,
}
html_static_path = []

Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
hydra-core>=1.2.0
numpy>=1.21.5
numpy>=1.21.5,<2
pandas>=1.4.2
torch>=1.11.0
matplotlib>=3.5.2
Expand Down
11 changes: 9 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
[metadata]
name = move-dl
description = Multi-omics variational autoencoder
long_description = file: README.md
long_description_content_type = text/markdown
url = https://github.com/RasmussenLab/MOVE
classifiers =
Intended Audience :: Healthcare Industry
Expand All @@ -15,13 +17,13 @@ version = attr: move.__version__
include_package_data = True
install_requires =
hydra-core
numpy
numpy<2
pandas
torch
matplotlib
seaborn
scikit-learn
scipy
scipy>=1.10.0

package_dir =
= src
Expand All @@ -34,3 +36,8 @@ where = src
[options.entry_points]
console_scripts =
move-dl=move.__main__:main

[flake8]
max-line-length = 88
aggressive = 2
extend-ignore = E203
14 changes: 7 additions & 7 deletions src/move/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from __future__ import annotations

__license__ = "MIT"
__version__ = (1, 4, 10)
__all__ = ["conf", "data", "models", "training_loop", "VAE"]

HYDRA_VERSION_BASE = "1.2"

from move import conf, data, models
from move.models.vae import VAE
from move.training.training_loop import training_loop
from move import conf, data, models # noqa:E402
from move.models.vae import VAE # noqa:E402
from move.training.training_loop import training_loop # noqa:E402

__license__ = "MIT"
__version__ = (1, 5, 0)
__all__ = ["conf", "data", "models", "training_loop", "VAE"]
18 changes: 18 additions & 0 deletions src/move/analysis/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,21 @@ def norm(x: np.ma.MaskedArray, axis: int = 1) -> np.ma.MaskedArray:
1D array with the specified axis removed.
"""
return np.sqrt(np.sum(x**2, axis=axis))


def get_2nd_order_polynomial(
x_array: FloatArray, y_array: FloatArray, n_points=100
) -> tuple[FloatArray, FloatArray, tuple[float, float, float]]:
"""
Given a set of x an y values, find the 2nd oder polynomial fitting best the data.
Returns:
x_pol: x coordinates for the polynomial function evaluation.
y_pol: y coordinates for the polynomial function evaluation.
"""
a2, a1, a = np.polyfit(x_array, y_array, deg=2)

x_pol = np.linspace(np.min(x_array), np.max(x_array), n_points)
y_pol = np.array([a2 * x * x + a1 * x + a for x in x_pol])

return x_pol, y_pol, (a2, a1, a)
1 change: 1 addition & 0 deletions src/move/conf/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ hydra:
job:
config:
override_dirname:
item_sep: ";"
exclude_keys:
- experiment

Expand Down
28 changes: 28 additions & 0 deletions src/move/conf/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,11 @@ class InputConfig:
name: str
weight: int = 1


@dataclass
class ContinuousInputConfig(InputConfig):
scale: bool = True
log2: bool = False


@dataclass
Expand Down Expand Up @@ -185,6 +187,27 @@ class IdentifyAssociationsTTestConfig(IdentifyAssociationsConfig):
num_latent: list[int] = MISSING


@dataclass
class IdentifyAssociationsKSConfig(IdentifyAssociationsConfig):
"""Configure the Kolmogorov-Smirnov approach to identify associations.
Args:
perturbed_feature_names: names of the perturbed features of interest.
target_feature_names: names of the target features of interest.
Description:
For each perturbed feature - target feature pair, we will plot:
- Input vs. reconstruction correlation plot: to assess reconstruction
quality of both target and perturbed features.
- Distribution of reconstruction values for the target feature before
and after the perturbation of the perturbed feature.
"""

perturbed_feature_names: list[str] = field(default_factory=list)
target_feature_names: list[str] = field(default_factory=list)


@dataclass
class MOVEConfig:
defaults: list[Any] = field(default_factory=lambda: [dict(data="base_data")])
Expand Down Expand Up @@ -237,6 +260,11 @@ def extract_names(configs: list[InputConfig]) -> list[str]:
name="identify_associations_ttest_schema",
node=IdentifyAssociationsTTestConfig,
)
cs.store(
group="task",
name="identify_associations_ks_schema",
node=IdentifyAssociationsKSConfig,
)

# Register custom resolvers
OmegaConf.register_new_resolver("weights", extract_weights)
Expand Down
2 changes: 2 additions & 0 deletions src/move/conf/task/identify_associations_bayes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,5 @@ training_loop:
- 25
early_stopping: false
patience: 0


26 changes: 26 additions & 0 deletions src/move/conf/task/identify_associations_ks.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
defaults:
- identify_associations_ks_schema

model:
categorical_weights: ${weights:${data.categorical_inputs}}
continuous_weights: ${weights:${data.continuous_inputs}}
num_hidden:
- 100
num_latent: 50
beta: 0.1
dropout: 0.1
cuda: false

training_loop:
lr: 1e-4
num_epochs: 200
batch_dilation_steps:
- 50
- 100
- 150
kld_warmup_steps:
- 15
- 20
- 25
early_stopping: false
patience: 0
2 changes: 2 additions & 0 deletions src/move/conf/task/identify_associations_ttest.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,5 @@ training_loop:
- 25
early_stopping: false
patience: 0


Loading

0 comments on commit 49ee412

Please sign in to comment.