From b0d9a3d8b7d14a1003f8c40cd720c725be3f941f Mon Sep 17 00:00:00 2001
From: chengmingbo <chengmingbo@gmail.com>
Date: Tue, 3 Oct 2023 11:21:53 +0200
Subject: [PATCH] a simple python version

---
 pymojitoo/.gitignore          |  13 ++++
 pymojitoo/mojitoo/__init__.py |   6 ++
 pymojitoo/mojitoo/core.py     | 117 ++++++++++++++++++++++++++++++++++
 pymojitoo/setup.py            |  34 ++++++++++
 4 files changed, 170 insertions(+)
 create mode 100644 pymojitoo/.gitignore
 create mode 100644 pymojitoo/mojitoo/__init__.py
 create mode 100644 pymojitoo/mojitoo/core.py
 create mode 100644 pymojitoo/setup.py

diff --git a/pymojitoo/.gitignore b/pymojitoo/.gitignore
new file mode 100644
index 0000000..65251cb
--- /dev/null
+++ b/pymojitoo/.gitignore
@@ -0,0 +1,13 @@
+# Compiled python modules.
+*.pyc
+*.dump
+
+# Setuptools distribution folder.
+/dist/
+/build/
+
+# Python egg metadata, regenerated from source files by setuptools.
+/*.egg-info
+
+.DS_Store
+
diff --git a/pymojitoo/mojitoo/__init__.py b/pymojitoo/mojitoo/__init__.py
new file mode 100644
index 0000000..1cfe60b
--- /dev/null
+++ b/pymojitoo/mojitoo/__init__.py
@@ -0,0 +1,6 @@
+__version__ = "0.1.0"
+__author__ = 'Mingbo Cheng'
+__credits__ = 'Institute for Computational Genomics'
+
+from .core import *
+
diff --git a/pymojitoo/mojitoo/core.py b/pymojitoo/mojitoo/core.py
new file mode 100644
index 0000000..59010cb
--- /dev/null
+++ b/pymojitoo/mojitoo/core.py
@@ -0,0 +1,117 @@
+from sklearn.cross_decomposition import CCA
+from sklearn.preprocessing import StandardScaler
+from scipy import stats
+from statsmodels.stats.multitest import multipletests
+from collections import Counter
+from anndata import AnnData
+
+def mojitoo(adata: AnnData,
+            reduction_list:list = [],
+            dims_list:list = [],
+            reduction_name:str = "X_mojitoo",
+            is_reduction_center:str = False,
+            is_reduction_scale:str = False,
+            fdr_method:str = "fdr_bh",
+            corr_pval:float = 0.05,
+            overwrite:bool = False,
+            iscopy:bool = False,
+            **kargs
+        ):
+    """
+    MOJITOO multimodal integration
+
+    Parameters
+    ----
+    adata: AnnData
+        anndata.Anndata
+    reduction_list: list
+        reductions in adata.obsm
+    dims_list : list
+        dims for each dimension reduction to use, e.g. range(1,30), empty list indicate all of the dimensions
+    reduction_name:str
+       save_name, default: X_mojitoo 
+    is_reduction_center: bool
+        if center before cca, default: False
+    is_reduction_scale: bool
+        if scale before cca, default: False
+    fdr_method: str
+        fdr methods to use, candidates: bonferroni, sidak, holm-sidak, holm, simes-hochberg, hommel, fdr_bh, fdr_by, fdr_tsbh, fdr_tsbky
+    corr_pval: float
+        corrected pval threshold, default: 0.05
+    overwrite: bool
+        if overwrite the cca name, defalt: False
+    iscopy: bool
+        if copy adata, default: False
+    """
+
+    if not overwrite and reduction_name in adata.obsm.keys():
+        raise ValueError("reduction name exists, please enable parameter overwrite and re-try")
+
+    if len(reduction_name) < 2:
+        raise ValueError("At least 2 dimension names in reduction_list")
+
+    if len(dims_list) !=0 and len(dims_list) !=len(reduction_name):
+        raise ValueError("dims_list should be consistent with reduction_list")
+
+    assert(fdr_method in ["bonferroni",
+                          "sidak",
+                          "holm-sidak",
+                          "holm",
+                          "simes-hochberg",
+                          "hommel",
+                          "fdr_bh",
+                          "fdr_by",
+                          "fdr_tsbh",
+                          "fdr_tsbky"])
+
+    assert(corr_pval >0 and corr_pval < 1)
+ 
+    for redu in reduction_list:
+        if redu not in adata.obsm.keys():
+            raise ValueError("%s not in adata.obsm" % redu)
+
+
+    adata = adata.copy() if iscopy else adata
+
+
+    if len(dims_list) == 0:
+        for name in reduction_list:
+            dims_list.append(range(adata.obsm[name].shape[1]))
+
+    a_redu = None
+    for i in range(len(reduction_list) -1):
+        if i == 0:
+            a_name = reduction_list[i]
+            a_redu = adata.obsm[a_name]
+            a_dims = list(dims_list[i])
+            a_redu = a_redu[:, a_dims]
+        b_name = reduction_list[i+1] 
+        b_redu = adata.obsm[b_name]
+        b_dims = list(dims_list[i+1])
+        b_redu = b_redu[:, b_dims]
+
+        ## center or scale the data
+        if is_reduction_center or is_reduction_scale:
+            scaler = StandardScaler(with_mean=is_reduction_center,
+                                    with_std=is_reduction_scale) 
+            a_redu = scaler.fit_transform(a_redu) #scale data
+            b_redu = scaler.fit_transform(b_redu)
+
+        ## CCA transformation
+        cca = CCA(n_components=min(len(a_dims), len(b_dims)), **kargs)
+        cca.fit(a_redu, b_redu)
+        a, b = cca.transform(a_redu, b_redu) 
+
+        ## correlation test
+        correlation_test = [stats.pearsonr(a[:, i], b[:, i])[1] for i in range(a.shape[1])]
+        fdr_bool = multipletests(correlation_test,
+                                alpha=corr_pval,
+                                method=fdr_method)[0]
+
+        print(f"{i+1} round cc", Counter(fdr_bool)[True])
+        cca_add = a[:, fdr_bool] + b[:, fdr_bool]
+
+        a_redu = cca_add
+    adata.obsm[reduction_name] = cca_add
+
+    return adata if iscopy else None
diff --git a/pymojitoo/setup.py b/pymojitoo/setup.py
new file mode 100644
index 0000000..e7f6e27
--- /dev/null
+++ b/pymojitoo/setup.py
@@ -0,0 +1,34 @@
+from setuptools import find_packages, setup
+
+setup(
+    name='mojitoo',
+    version='0.1.0',
+    description='single cell multimodal integration',
+    url='https://github.com/CostaLab/MOJITOO/pymojitoo',
+    author='Mingbo Cheng',
+    author_email='chengmingbo@gmail.com',
+    license='BSD 2-clause',
+    install_requires=['numpy',
+                      'sklearn',
+                      'scipy',
+                      'statsmodels',
+                      'scanpy',
+                      'anndata',
+                      ],
+    classifiers=[
+        'Development Status :: 1 - Planning',
+        'Intended Audience :: Science/Research',
+        'License :: OSI Approved :: BSD License',
+        'Operating System :: POSIX :: Linux',
+        'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3.4',
+        'Programming Language :: Python :: 3.5',
+        'Programming Language :: Python :: 3.6',
+        'Programming Language :: Python :: 3.7',
+        'Programming Language :: Python :: 3.8',
+        'Programming Language :: Python :: 3.9',
+        'Programming Language :: Python :: 3.10',
+    ],
+    packages=find_packages()
+)
+