Skip to content

Commit

Permalink
Merge branch 'development' into zero-slice-bug
Browse files Browse the repository at this point in the history
  • Loading branch information
GeorgWa committed Nov 14, 2024
2 parents 59434aa + 27edc96 commit c5522f2
Show file tree
Hide file tree
Showing 25 changed files with 179 additions and 77 deletions.
18 changes: 14 additions & 4 deletions alphadia/calibration/property.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def __init__(
float(transform_deviation) if transform_deviation is not None else None
)
self.is_fitted = False
self.metrics = None

def __repr__(self) -> str:
return f"<Calibration {self.name}, is_fitted: {self.is_fitted}>"
Expand Down Expand Up @@ -172,10 +173,12 @@ def fit(self, dataframe: pd.DataFrame, plot: bool = False, **kwargs):
self.function.fit(input_values, target_value)
self.is_fitted = True
except Exception as e:
logging.error(f"Could not fit estimator {self.name}: {e}")
logging.exception(f"Could not fit estimator {self.name}: {e}")
return

if plot is True:
self._save_metrics(dataframe)

if plot:
self.plot(dataframe, **kwargs)

def predict(self, dataframe, inplace=True):
Expand All @@ -200,13 +203,13 @@ def predict(self, dataframe, inplace=True):
logging.warning(
f"{self.name} prediction was skipped as it has not been fitted yet"
)
return
return None

if not set(self.input_columns).issubset(dataframe.columns):
logging.warning(
f"{self.name} calibration was skipped as input column {self.input_columns} not found in dataframe"
)
return
return None

input_values = dataframe[self.input_columns].values

Expand Down Expand Up @@ -297,6 +300,13 @@ def deviation(self, dataframe: pd.DataFrame):
axis=1,
)

def _save_metrics(self, dataframe):
deviation = self.deviation(dataframe)
self.metrics = {
"median_accuracy": np.median(np.abs(deviation[:, 1])),
"median_precision": np.median(np.abs(deviation[:, 2])),
}

def ci(self, dataframe, ci: float = 0.95):
"""Calculate the residual deviation at the given confidence interval.
Expand Down
2 changes: 0 additions & 2 deletions alphadia/data/alpharaw.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,8 +317,6 @@ def filter_spectra(self, **kwargs):
This function is implemented in the sub-class.
"""

pass

def jitclass(self):
return AlphaRawJIT(
self.cycle,
Expand Down
9 changes: 1 addition & 8 deletions alphadia/fdrexperimental.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,8 @@
# third party imports
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn import model_selection
from torch import nn, optim
from torchmetrics.classification import BinaryAUROC
from tqdm import tqdm

Expand All @@ -30,7 +29,6 @@ class Classifier(ABC):
@abstractmethod
def fitted(self):
"""Return whether the classifier has been fitted."""
pass

@abstractmethod
def fit(self, x: np.array, y: np.array):
Expand All @@ -46,7 +44,6 @@ def fit(self, x: np.array, y: np.array):
Target values of shape (n_samples,) or (n_samples, n_classes).
"""
pass

@abstractmethod
def predict(self, x: np.array):
Expand All @@ -65,7 +62,6 @@ def predict(self, x: np.array):
Predicted class of shape (n_samples,).
"""
pass

@abstractmethod
def predict_proba(self, x: np.array):
Expand All @@ -84,7 +80,6 @@ def predict_proba(self, x: np.array):
Predicted class probabilities of shape (n_samples, n_classes).
"""
pass

@abstractmethod
def to_state_dict(self):
Expand All @@ -97,7 +92,6 @@ def to_state_dict(self):
state_dict : dict
State dict of the classifier.
"""
pass

@abstractmethod
def from_state_dict(self, state_dict: dict):
Expand All @@ -111,7 +105,6 @@ def from_state_dict(self, state_dict: dict):
State dict of the classifier.
"""
pass


class BinaryClassifier(Classifier):
Expand Down
1 change: 0 additions & 1 deletion alphadia/libtransform.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ class ProcessingStep:
def __init__(self) -> None:
"""Base class for processing steps. Each implementation must implement the `validate` and `forward` method.
Processing steps can be chained together in a ProcessingPipeline."""
pass

def __call__(self, *args: typing.Any) -> typing.Any:
"""Run the processing step on the input object."""
Expand Down
20 changes: 10 additions & 10 deletions alphadia/numba/fft.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,13 @@ def rfft2(x: np.array, s: None | tuple = None) -> np.array:
@overload(rfft2, fastmath=True)
def _(x, s=None):
if not isinstance(x, nb.types.Array):
return
return None

if x.ndim != 2:
return
return None

if x.dtype != nb.types.float32:
return
return None

def funcx_impl(x, s=None):
s, axes = ndshape_and_axes(x, s, (-2, -1))
Expand Down Expand Up @@ -98,13 +98,13 @@ def irfft2(x: np.array, s: None | tuple = None) -> np.array:
@overload(irfft2, fastmath=True)
def _(x, s=None):
if not isinstance(x, nb.types.Array):
return
return None

if x.ndim != 2:
return
return None

if x.dtype != nb.types.complex64:
return
return None

def funcx_impl(x, s=None):
s, axes = ndshape_and_axes(x, s, (-2, -1))
Expand Down Expand Up @@ -161,16 +161,16 @@ def convolve_fourier(dense, kernel):
@overload(convolve_fourier, fastmath=True)
def _(dense, kernel):
if not isinstance(dense, nb.types.Array):
return
return None

if not isinstance(kernel, nb.types.Array):
return
return None

if kernel.ndim != 2:
return
return None

if dense.ndim < 2:
return
return None

if dense.ndim == 2:

Expand Down
4 changes: 3 additions & 1 deletion alphadia/numba/fragments.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,9 @@ def get_ion_group_mapping(

score_group_intensity = np.zeros((len(ion_mz)), dtype=np.float32)

for precursor, mz, intensity in zip(ion_precursor, ion_mz, ion_intensity): # noqa: B905 ('strict' not supported by numba yet
for precursor, mz, intensity in zip(
ion_precursor, ion_mz, ion_intensity
): # ('strict' not supported by numba yet
# score_group_idx = precursor_group[precursor]

if len(grouped_mz) == 0 or np.abs(grouped_mz[-1] - mz) > EPSILON:
Expand Down
70 changes: 56 additions & 14 deletions alphadia/outputtransform.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def accumulate_frag_df(
raw_name, df = next(df_iterable, (None, None))
if df is None:
logger.warning(f"no frag file found for {raw_name}")
return
return None

df = prepare_df(df, self.psm_df, column=self.column)

Expand Down Expand Up @@ -886,7 +886,7 @@ def build_library(

if len(psm_df) == 0:
logger.warning("No precursors found, skipping library building")
return
return None

libbuilder = libtransform.MbrLibraryBuilder(
fdr=0.01,
Expand Down Expand Up @@ -942,6 +942,10 @@ def _build_run_stat_df(
folder, peptidecentric.PeptideCentricWorkflow.OPTIMIZATION_MANAGER_PATH
)

calibration_manager_path = os.path.join(
folder, peptidecentric.PeptideCentricWorkflow.CALIBRATION_MANAGER_PATH
)

if channels is None:
channels = [0]
out_df = []
Expand All @@ -956,31 +960,69 @@ def _build_run_stat_df(
"proteins": channel_df["pg"].nunique(),
}

if "weighted_mass_error" in channel_df.columns:
base_dict["ms1_accuracy"] = np.mean(channel_df["weighted_mass_error"])

if "cycle_fwhm" in channel_df.columns:
base_dict["fwhm_rt"] = np.mean(channel_df["cycle_fwhm"])

if "mobility_fwhm" in channel_df.columns:
base_dict["fwhm_mobility"] = np.mean(channel_df["mobility_fwhm"])

# collect optimization stats
base_dict["optimization.ms2_error"] = np.nan
base_dict["optimization.ms1_error"] = np.nan
base_dict["optimization.rt_error"] = np.nan
base_dict["optimization.mobility_error"] = np.nan

if os.path.exists(optimization_manager_path):
optimization_manager = manager.OptimizationManager(
path=optimization_manager_path
)

base_dict["ms2_error"] = optimization_manager.ms2_error
base_dict["ms1_error"] = optimization_manager.ms1_error
base_dict["rt_error"] = optimization_manager.rt_error
base_dict["mobility_error"] = optimization_manager.mobility_error
base_dict["optimization.ms2_error"] = optimization_manager.ms2_error
base_dict["optimization.ms1_error"] = optimization_manager.ms1_error
base_dict["optimization.rt_error"] = optimization_manager.rt_error
base_dict["optimization.mobility_error"] = (
optimization_manager.mobility_error
)

else:
logger.warning(f"Error reading optimization manager for {raw_name}")
base_dict["ms2_error"] = np.nan
base_dict["ms1_error"] = np.nan
base_dict["rt_error"] = np.nan
base_dict["mobility_error"] = np.nan

# collect calibration stats
base_dict["calibration.ms2_median_accuracy"] = np.nan
base_dict["calibration.ms2_median_precision"] = np.nan
base_dict["calibration.ms1_median_accuracy"] = np.nan
base_dict["calibration.ms1_median_precision"] = np.nan

if os.path.exists(calibration_manager_path):
calibration_manager = manager.CalibrationManager(
path=calibration_manager_path
)

if (
fragment_mz_estimator := calibration_manager.get_estimator(
"fragment", "mz"
)
) and (fragment_mz_metrics := fragment_mz_estimator.metrics):
base_dict["calibration.ms2_median_accuracy"] = fragment_mz_metrics[
"median_accuracy"
]
base_dict["calibration.ms2_median_precision"] = fragment_mz_metrics[
"median_precision"
]

if (
precursor_mz_estimator := calibration_manager.get_estimator(
"precursor", "mz"
)
) and (precursor_mz_metrics := precursor_mz_estimator.metrics):
base_dict["calibration.ms1_median_accuracy"] = precursor_mz_metrics[
"median_accuracy"
]
base_dict["calibration.ms1_median_precision"] = precursor_mz_metrics[
"median_precision"
]

else:
logger.warning(f"Error reading calibration manager for {raw_name}")

out_df.append(base_dict)

Expand Down
2 changes: 0 additions & 2 deletions alphadia/peakgroup/kernel.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,6 @@ def get_dense_matrix(self, verbose: bool = True):
mobility_resolution = np.mean(np.diff(self.dia_data.mobility_values[::-1]))

if verbose:
pass
logger.info(
f"Duty cycle consists of {rt_datapoints} frames, {rt_resolution:.2f} seconds cycle time"
)
Expand All @@ -189,7 +188,6 @@ def get_dense_matrix(self, verbose: bool = True):
mobility_sigma = self.determine_mobility_sigma(mobility_resolution)

if verbose:
pass
logger.info(
f"FWHM in RT is {self.fwhm_rt:.2f} seconds, sigma is {rt_sigma:.2f}"
)
Expand Down
4 changes: 2 additions & 2 deletions alphadia/peakgroup/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,7 +690,7 @@ def build_candidates(
cycle_limits_list = np.zeros((peak_cycle_list.shape[0], 2), dtype="int32")

for candidate_rank, (scan_relative, cycle_relative) in enumerate(
zip(peak_scan_list, peak_cycle_list) # noqa: B905 ('strict' not supported by numba yet)
zip(peak_scan_list, peak_cycle_list) # ('strict' not supported by numba yet)
):
scan_limits_relative, cycle_limits_relative = numeric.symetric_limits_2d(
score,
Expand Down Expand Up @@ -740,7 +740,7 @@ def build_candidates(
peak_score_list,
scan_limits_list,
cycle_limits_list,
): # noqa: B905 ('strict' not supported by numba yet)
): # ('strict' not supported by numba yet)
# does not work anymore

scan_limits_absolute = numeric.wrap1(
Expand Down
8 changes: 4 additions & 4 deletions alphadia/peakgroup/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,16 @@ def assemble_isotope_mz(mono_mz, charge, isotope_intensity):
@overload(assemble_isotope_mz)
def _(mono_mz, charge, isotope_intensity):
if not isinstance(mono_mz, nb.types.Float):
return
return None

if not isinstance(charge, nb.types.Integer):
return
return None

if not isinstance(isotope_intensity, nb.types.Array):
return
return None

if isotope_intensity.ndim != 1:
return
return None

def funcx_impl(mono_mz, charge, isotope_intensity):
offset = np.arange(len(isotope_intensity)) * 1.0033548350700006 / charge
Expand Down
6 changes: 3 additions & 3 deletions alphadia/planning.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ def __init__(
reporting.init_logging(self.output_folder)

logger.progress(" _ _ ___ ___ _ ")
logger.progress(" __ _| |_ __| |_ __ _| \_ _| /_\ ")
logger.progress(" / _` | | '_ \ ' \\/ _` | |) | | / _ \ ")
logger.progress(" \__,_|_| .__/_||_\__,_|___/___/_/ \_\\")
logger.progress(r" __ _| |_ __| |_ __ _| \_ _| /_\ ")
logger.progress(" / _` | | '_ \\ ' \\/ _` | |) | | / _ \\ ")
logger.progress(" \\__,_|_| .__/_||_\\__,_|___/___/_/ \\_\\")
logger.progress(" |_| ")
logger.progress("")

Expand Down
2 changes: 1 addition & 1 deletion alphadia/transferlearning/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -994,7 +994,7 @@ def finetune_ccs(self, psm_df: pd.DataFrame) -> pd.DataFrame:
logger.error(
"Failed to finetune CCS model. PSM dataframe does not contain mobility or ccs columns."
)
return
return None
if "ccs" not in psm_df.columns:
psm_df["ccs"] = mobility_to_ccs_for_df(psm_df, "mobility")
elif "mobility" not in psm_df.columns:
Expand Down
2 changes: 1 addition & 1 deletion alphadia/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@
# alpha family imports
import alphatims.bruker
import alphatims.utils
import matplotlib.patches as patches
import numba as nb
import numpy as np

# third party imports
import pandas as pd
import torch
from matplotlib import patches

logger = logging.getLogger()

Expand Down
Loading

0 comments on commit c5522f2

Please sign in to comment.