Skip to content

Commit

Permalink
batch correction #133
Browse files Browse the repository at this point in the history
  • Loading branch information
elena-krismer committed Apr 7, 2023
1 parent 75675f6 commit 633ee54
Show file tree
Hide file tree
Showing 7 changed files with 47 additions and 8 deletions.
1 change: 1 addition & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
* ADD plot Sample Distribution Histogram
* ADD paired-ttest option
* ENH add option to remove samples in GUI
* ADD pyComBat Batch correction Behdenna A, Haziza J, Azencot CA and Nordor A. (2020) pyComBat, a Python tool for batch effects correction in high-throughput molecular data using empirical Bayes methods. bioRxiv doi: 10.1101/2020.03.17.995431

# 0.4.5
* FIX loading of Data on Windows
Expand Down
23 changes: 17 additions & 6 deletions alphastats/DataSet_Preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from alphastats.utils import ignore_warning
from sklearn.experimental import enable_iterative_imputer
import itertools
from combat.pycombat import pycombat


class Preprocess:
Expand Down Expand Up @@ -197,17 +198,27 @@ def _log2_transform(self):
self.mat = np.log2(self.mat + 0.1)
self.preprocessing_info.update({"Log2-transformed": True})
print("Data has been log2-transformed.")

def batch_correction(self, batch:str):
"""Correct for technical bias/batch effects
Behdenna A, Haziza J, Azencot CA and Nordor A. (2020) pyComBat, a Python tool for batch effects correction in high-throughput molecular data using empirical Bayes methods. bioRxiv doi: 10.1101/2020.03.17.995431
Args:
batch (str): column name in the metadata describing the different batches
"""
data = self.mat.transpose()
series_of_batches = self.metadata.set_index(self.sample).reindex(data.columns.to_list())[batch]
self.mat = pycombat(data=data, batch=series_of_batches).transpose()

@ignore_warning(RuntimeWarning)
def preprocess(
self,
log2_transform=True,
remove_contaminations=False,
subset=False,
normalization=None,
imputation=None,
remove_samples=None,
log2_transform: bool=True,
remove_contaminations: bool=False,
subset: bool=False,
normalization: str=None,
imputation: str=None,
remove_samples: list=None,
):
"""Preprocess Protein data
Expand Down
21 changes: 20 additions & 1 deletion alphastats/gui/pages/03_Preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def preprocessing():

st.markdown(
"Before analyzing your data, consider normalizing and imputing your data as well as the removal of contaminants. "
+ "A more detailed description about the preprocessing methods can be found in the AlphaPeptStats"
+ "A more detailed description about the preprocessing methods can be found in the AlphaPeptStats "
+ "[documentation](https://alphapeptstats.readthedocs.io/en/main/data_preprocessing.html)."
)

Expand Down Expand Up @@ -70,6 +70,25 @@ def preprocessing():
pd.DataFrame.from_dict(preprocessing, orient="index").astype(str),
use_container_width=True,
)

st.markdown("#### Batch correction: correct for technical bias")

with st.form("Batch correction: correct for technical bias"):
batch = st.selectbox(
"Batch",
options= st.session_state.dataset.metadata.columns.to_list()
)
submit_batch_correction = st.form_submit_button("Submit")

if submit_batch_correction:
st.session_state.dataset.batch_correction(
batch=batch
)
st.info(
"Data has been processed. "
+ datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S")
)


with c2:

Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ numba==0.56.4
numba-stats==0.5.0
swifter==1.2.0
click==8.0.1
kaleido==0.2.1
kaleido==0.2.1
combat==0.3.3
Binary file modified testfiles/maxquant/metadata.xlsx
Binary file not shown.
Binary file added testfiles/maxquant/~$metadata.xlsx
Binary file not shown.
7 changes: 7 additions & 0 deletions tests/test_DataSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -678,6 +678,13 @@ def test_plot_intensity_sign_001(self):
def test_plot_samplehistograms(self):
fig = self.obj.plot_samplehistograms().to_plotly_json()
self.assertEqual(312, len(fig["data"]))

def test_batch_correction(self):
self.obj.preprocess(subset=True, imputation="knn", normalization="quantile")
self.obj.batch_correction(batch="batch_artifical_added")
first_value = self.obj.mat.values[0,0]
self.assertAlmostEqual(0.0111, first_value, places=2)




Expand Down

0 comments on commit 633ee54

Please sign in to comment.