diff --git a/CHANGELOG.md b/CHANGELOG.md index 6beffa6a..8330e6e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,6 @@ # calour changelog +## Version 2024.6.1 +* Add command_history file to Experiment read and save. When saving, a NAME_history.txt file is created with the command history used to create the Experiment. When loading, can supply a call_history="XXX" parameter to load the call history file into the loaded Experiment. ## Version 2024.5.30 add mRNAExperiment class for handling rna-seq data. interactive heatmap gene information is via the rna_calour module using Harmonizome server (https://maayanlab.cloud/Harmonizome) diff --git a/calour/__init__.py b/calour/__init__.py index 5379192a..845559cd 100644 --- a/calour/__init__.py +++ b/calour/__init__.py @@ -19,7 +19,7 @@ __credits__ = "https://github.com/biocore/calour/graphs/contributors" -__version__ = "2024.5.30" +__version__ = "2024.6.1" __all__ = ['read', 'read_amplicon', 'read_ms', 'read_qiime2', 'Experiment', 'AmpliconExperiment', 'MS1Experiment','mRNAExperiment', diff --git a/calour/io.py b/calour/io.py index 9604a6c5..d27237f5 100644 --- a/calour/io.py +++ b/calour/io.py @@ -345,6 +345,7 @@ def _read_metadata(ids, f, kwargs): @ds.get_sectionsf('io.read') def read(data_file, sample_metadata_file=None, feature_metadata_file=None, + call_history_file=None, description='', sparse=True, data_file_type='biom', data_file_sep=',', sample_in_row=False, sample_id_proc=None, feature_id_proc=None, @@ -366,6 +367,8 @@ def read(data_file, sample_metadata_file=None, feature_metadata_file=None, metadata). feature_metadata_file : str, default=None File path to feature metadata. + call_history_file : str, default=None + File path to the call history file (one line per command, generated by Experiment.save() ) description : str description of the experiment sparse : bool @@ -475,9 +478,15 @@ def read(data_file, sample_metadata_file=None, feature_metadata_file=None, exp.normalize(total=normalize, inplace=True) # initialize the call history - param = ['{0!s}={1!r}'.format(k, v) for k, v in fparams.items()] - exp._call_history = ['{0}({1})'.format('read_amplicon', ','.join(param))] + exp._call_history = [] + # if we have a command history file, load it + if call_history_file is not None: + old_call_history = read_call_history(call_history_file) + exp._call_history.extend(old_call_history) + param = ['{0!s}={1!r}'.format(k, v) for k, v in fparams.items()] + exp._call_history.append('{0}({1})'.format('read', ','.join(param))) + logger.info('Loaded %d samples, %d features' % (exp.shape[0], exp.shape[1])) return exp @@ -524,10 +533,6 @@ def read_amplicon(data_file, sample_metadata_file=None, if normalize is not None: exp.normalize(total=normalize, axis=0, inplace=True) - # initialize the call history - param = ['{0!s}={1!r}'.format(k, v) for k, v in fparams.items()] - exp._call_history = ['{0}({1})'.format('read_amplicon', ','.join(param))] - return exp @@ -732,7 +737,7 @@ def read_ms(data_file, sample_metadata_file=None, feature_metadata_file=None, gn # initialize the call history param = ['{0!s}={1!r}'.format(k, v) for k, v in fparams.items()] - exp._call_history = ['{0}({1})'.format('read_amplicon', ','.join(param))] + exp._call_history.append(['{0}({1})'.format('read_ms', ','.join(param))]) return exp @@ -776,6 +781,39 @@ def save(exp: Experiment, prefix, fmt='hdf5'): exp.save_biom('%s.biom' % prefix, fmt=fmt) exp.sample_metadata.to_csv('%s_sample.txt' % prefix, sep='\t') exp.feature_metadata.to_csv('%s_feature.txt' % prefix, sep='\t') + exp.save_call_history('%s_history.txt' % prefix) + + +def save_call_history(exp: Experiment, f): + '''Save experiment call history to file + Save the command history used to generate the experiment to a file. + History file is a text file with one line per call to a calour function. + + Parameters + ---------- + f : str + the file name to save to + ''' + with open(f, 'w') as f: + for cline in exp._call_history: + f.write('%s\n' % cline) + + +def read_call_history(f): + '''Load the call history from a file + + Parameters + ---------- + f : str + the file name to load from + + Returns + ------- + list of str + the call history + ''' + with open(f, 'r') as f: + return [x.strip() for x in f] def save_biom(exp: Experiment, f, fmt='hdf5', add_metadata='taxonomy'): diff --git a/calour/mrna_experiment.py b/calour/mrna_experiment.py deleted file mode 100644 index d5d66757..00000000 --- a/calour/mrna_experiment.py +++ /dev/null @@ -1,158 +0,0 @@ -''' -mRNA experiment (:mod:`calour.mrna_experiment`) -======================================================= - -.. currentmodule:: calour.mrna_experiment - -Classes -^^^^^^^ -.. autosummary:: - :toctree: generated - - MRNAExperiment -''' - -# ---------------------------------------------------------------------------- -# Copyright (c) 2016--, Calour development team. -# -# Distributed under the terms of the Modified BSD License. -# -# The full license is in the file COPYING.txt, distributed with this software. -# ---------------------------------------------------------------------------- - -from logging import getLogger -from copy import deepcopy - -import numpy as np -import matplotlib as mpl - -from .experiment import Experiment -from .io import read -from .util import _get_taxonomy_string, _to_list - - -logger = getLogger(__name__) - - -class mRNAExperiment(Experiment): - '''This class stores transcriptomics (mrna) experiment - Interactive heatmap gene information is obtained through the mrna_calour module - - This is a child class of :class:`.Experiment`. - - Parameters - ---------- - data : numpy.ndarray or scipy.sparse.csr_matrix - The expression table for genes. Samples - are in row and features in column - sample_metadata : pandas.DataFrame - The metadata on the samples - feature_metadata : pandas.DataFrame - The metadata on the features - description : str - name of experiment - sparse : bool - store the data array in :class:`scipy.sparse.csr_matrix` - or :class:`numpy.ndarray` - databases: iterable of str, optional - database interface names to show by default in heatmap() function - by default use 'dbbact' - - Attributes - ---------- - data : numpy.ndarray or scipy.sparse.csr_matrix - The expression table for genes. Samples - are in row and features in column - sample_metadata : pandas.DataFrame - The metadata on the samples - feature_metadata : pandas.DataFrame - The metadata on the features - shape : tuple of (int, int) - the dimension of data - sparse : bool - store the data as sparse matrix (scipy.sparse.csr_matrix) or dense numpy array. - info : dict - information about the experiment (data md5, filenames, etc.) - description : str - name of the experiment - databases : dict - keys are the database names (i.e. 'dbbact' / 'gnps') - values are the database specific data for the experiment (i.e. annotations for dbbact) - - See Also - -------- - Experiment - ''' - def __init__(self, *args, databases=(), **kwargs): - super().__init__(*args, databases=('mrna',), **kwargs) - - def heatmap(self, *args, **kwargs): - '''Plot a heatmap for the mrna experiment. - - This method accepts exactly the same parameters as input with - its parent class method and does exactly the sample plotting. - - The only difference is that by default, its color scale is **in - log** as its `norm` parameter is set to - `matplotlib.colors.LogNorm()`. It makes more sense to show the - gene expression abundances in color of log scale since they cover a wide range of magnitudes. - You can always set it to other scale as - explained in :meth:`.Experiment.heatmap`. - - Parameters - ---------- - - Keyword Arguments - ----------------- - %(experiment.heatmap.parameters)s - - See Also - -------- - Experiment.heatmap - ''' - # set this default value inside the function instead of on the - # function API (like the __init__) because we don't wanna to - # define mpl.colors.LogNorm() on the API; otherwise, vmin and - # vmax are set the same once for all mRNAExperiment - # objects (which we don't want) because python initializes - # the function arguments when it reads in its definition. - - # by default use the log normalization - if 'norm' not in kwargs: - kwargs['norm'] = mpl.colors.LogNorm() - super().heatmap(*args, **kwargs) - - @staticmethod - def read(**kwargs): - '''Load an mRNA transcriptomics experiment. calls calour.io.read() providing the correct class parameter (cls=mRNAExperiment). - by default, the mRNAExperiment table is expected to be tab separated (can modify by the setting data_file_sep parameter), - and samples are in columns (can modify by setting sample_in_row parameter). - By default, the data is not normalized. To normalize the per-sample reads to sum X, set normalize=X. - For more details, see - - Parameters - ---------- - - Keyword Arguments - ----------------- - %(io.read.parameters)s - - Returns - ------- - ca.MRNAExperiment - - See Also - -------- - calour.io.read - ''' - if 'data_file_sep' not in kwargs: - kwargs['data_file_sep'] = '\t' - if 'sparse' not in kwargs: - kwargs['sparse'] = False - if 'sample_in_row' not in kwargs: - kwargs['sample_in_row'] = False - if 'normalize' not in kwargs: - kwargs['normalize'] = None - - dat = read(**kwargs, cls=mRNAExperiment) - return dat