linting and tests

MannLabs · Nov 14, 2024 · 59434aa · 59434aa
1 parent 4f5afa3
commit 59434aa
Show file tree

Hide file tree

Showing 5 changed files with 266 additions and 80 deletions.
diff --git a/alphadia/data/alpharaw.py b/alphadia/data/alpharaw.py
@@ -1,6 +1,5 @@
 # native imports
 import logging
-import math
 import os
 
 import numba as nb
@@ -460,7 +459,9 @@ def __init__(
         self.scan_max_index = scan_max_index
         self.frame_max_index = frame_max_index
 
-    def get_frame_indices(self, rt_values: np.array, optimize_size: int = 16):
+    def get_frame_indices(
+        self, rt_values: np.array, optimize_size: int = 16, min_size: int = 32
+    ):
         """
 
         Convert an interval of two rt values to a frame index interval.
@@ -474,54 +475,28 @@ def get_frame_indices(self, rt_values: np.array, optimize_size: int = 16):
         optimize_size : int, default = 16
             To optimize for fft efficiency, we want to extend the precursor cycle to a multiple of 16
 
+        min_size : int, default = 32
+            The minimum number of dia cycles to include
+
         Returns
         -------
         np.ndarray, shape = (2,), dtype = int64
             array of frame indices
 
         """
 
-        if rt_values.shape != (2,):
-            raise ValueError("rt_values must be a numpy array of shape (2,)")
-
-        frame_index = np.zeros(len(rt_values), dtype=np.int64)
-        for i, rt in enumerate(rt_values):
-            frame_index[i] = search_sorted_left(self.rt_values, rt)
-
-        precursor_cycle_limits = (frame_index + self.zeroth_frame) // self.cycle.shape[
-            1
-        ]
-        precursor_cycle_len = precursor_cycle_limits[1] - precursor_cycle_limits[0]
-
-        # round up to the next multiple of 16
-        optimal_len = int(
-            optimize_size * math.ceil(precursor_cycle_len / optimize_size)
-        )
-
-        # by default, we extend the precursor cycle to the right
-        optimal_cycle_limits = np.array(
-            [precursor_cycle_limits[0], precursor_cycle_limits[0] + optimal_len],
-            dtype=np.int64,
+        return utils.get_frame_indices(
+            rt_values=rt_values,
+            rt_values_array=self.rt_values,
+            zeroth_frame=self.zeroth_frame,
+            cycle_len=self.cycle.shape[1],
+            precursor_cycle_max_index=self.precursor_cycle_max_index,
+            optimize_size=optimize_size,
+            min_size=min_size,
         )
 
-        # if the cycle is too long, we extend it to the left
-        if optimal_cycle_limits[1] > self.precursor_cycle_max_index:
-            optimal_cycle_limits[1] = self.precursor_cycle_max_index
-            optimal_cycle_limits[0] = self.precursor_cycle_max_index - optimal_len
-
-            if optimal_cycle_limits[0] < 0:
-                optimal_cycle_limits[0] = (
-                    0 if self.precursor_cycle_max_index % 2 == 0 else 1
-                )
-
-        # second element is the index of the first whole cycle which should not be used
-        # precursor_cycle_limits[1] += 1
-        # convert back to frame indices
-        frame_limits = optimal_cycle_limits * self.cycle.shape[1] + self.zeroth_frame
-        return utils.make_slice_1d(frame_limits)
-
     def get_frame_indices_tolerance(
-        self, rt: float, tolerance: float, optimize_size: int = 16
+        self, rt: float, tolerance: float, optimize_size: int = 16, min_size: int = 32
     ):
         """
         Determine the frame indices for a given retention time and tolerance.
@@ -538,6 +513,9 @@ def get_frame_indices_tolerance(
         optimize_size : int, default = 16
             To optimize for fft efficiency, we want to extend the precursor cycle to a multiple of 16
 
+        min_size : int, default = 32
+            The minimum number of dia cycles to include
+
         Returns
         -------
         np.ndarray, shape = (1, 3,), dtype = int64
@@ -547,7 +525,9 @@ def get_frame_indices_tolerance(
 
         rt_limits = np.array([rt - tolerance, rt + tolerance], dtype=np.float32)
 
-        return self.get_frame_indices(rt_limits, optimize_size=optimize_size)
+        return self.get_frame_indices(
+            rt_limits, optimize_size=optimize_size, min_size=min_size
+        )
 
     def get_scan_indices(self, mobility_values: np.array, optimize_size: int = 16):
         return np.array([[0, 2, 1]], dtype=np.int64)

diff --git a/alphadia/data/bruker.py b/alphadia/data/bruker.py
@@ -288,7 +288,9 @@ def __init__(
 
         self.has_mobility = True
 
-    def get_frame_indices(self, rt_values: np.array, optimize_size: int = 16):
+    def get_frame_indices(
+        self, rt_values: np.array, optimize_size: int = 16, min_size: int = 32
+    ):
         """
 
         Convert an interval of two rt values to a frame index interval.
@@ -302,52 +304,28 @@ def get_frame_indices(self, rt_values: np.array, optimize_size: int = 16):
         optimize_size : int, default = 16
             To optimize for fft efficiency, we want to extend the precursor cycle to a multiple of 16
 
+        min_size : int, default = 32
+            The minimum number of dia cycles to include
+
         Returns
         -------
         np.ndarray, shape = (2,), dtype = int64
             array of frame indices
 
         """
 
-        if rt_values.shape != (2,):
-            raise ValueError("rt_values must be a numpy array of shape (2,)")
-
-        frame_index = np.searchsorted(self.rt_values, rt_values, "left")
-
-        precursor_cycle_limits = (frame_index + self.zeroth_frame) // self.cycle.shape[
-            1
-        ]
-        precursor_cycle_len = precursor_cycle_limits[1] - precursor_cycle_limits[0]
-
-        # round up to the next multiple of 16
-        optimal_len = int(
-            optimize_size * math.ceil(precursor_cycle_len / optimize_size)
-        )
-
-        # by default, we extend the precursor cycle to the right
-        optimal_cycle_limits = np.array(
-            [precursor_cycle_limits[0], precursor_cycle_limits[0] + optimal_len],
-            dtype=np.int64,
+        return utils.get_frame_indices(
+            rt_values=rt_values,
+            rt_values_array=self.rt_values,
+            zeroth_frame=self.zeroth_frame,
+            cycle_len=self.cycle.shape[1],
+            precursor_cycle_max_index=self.precursor_cycle_max_index,
+            optimize_size=optimize_size,
+            min_size=min_size,
         )
 
-        # if the cycle is too long, we extend it to the left
-        if optimal_cycle_limits[1] > self.precursor_cycle_max_index:
-            optimal_cycle_limits[1] = self.precursor_cycle_max_index
-            optimal_cycle_limits[0] = self.precursor_cycle_max_index - optimal_len
-
-            if optimal_cycle_limits[0] < 0:
-                optimal_cycle_limits[0] = (
-                    0 if self.precursor_cycle_max_index % 2 == 0 else 1
-                )
-
-        # second element is the index of the first whole cycle which should not be used
-        # precursor_cycle_limits[1] += 1
-        # convert back to frame indices
-        frame_limits = optimal_cycle_limits * self.cycle.shape[1] + self.zeroth_frame
-        return utils.make_slice_1d(frame_limits)
-
     def get_frame_indices_tolerance(
-        self, rt: float, tolerance: float, optimize_size: int = 16
+        self, rt: float, tolerance: float, optimize_size: int = 16, min_size: int = 32
     ):
         """
         Determine the frame indices for a given retention time and tolerance.
@@ -364,6 +342,9 @@ def get_frame_indices_tolerance(
         optimize_size : int, default = 16
             To optimize for fft efficiency, we want to extend the precursor cycle to a multiple of 16
 
+        min_size : int, default = 32
+            The minimum number of dia cycles to include
+
         Returns
         -------
         np.ndarray, shape = (1, 3,), dtype = int64
@@ -373,7 +354,9 @@ def get_frame_indices_tolerance(
 
         rt_limits = np.array([rt - tolerance, rt + tolerance], dtype=np.float32)
 
-        return self.get_frame_indices(rt_limits, optimize_size=optimize_size)
+        return self.get_frame_indices(
+            rt_limits, optimize_size=optimize_size, min_size=min_size
+        )
 
     def get_scan_indices(self, mobility_values: np.array, optimize_size: int = 16):
         """convert array of mobility values into scan indices, njit compatible.

diff --git a/alphadia/peakgroup/search.py b/alphadia/peakgroup/search.py
@@ -351,7 +351,9 @@ def select_candidates(
     rt = precursor_container.rt[i]
     mobility = precursor_container.mobility[i]
 
-    frame_limits = jit_data.get_frame_indices_tolerance(rt, config.rt_tolerance)
+    frame_limits = jit_data.get_frame_indices_tolerance(
+        rt, config.rt_tolerance, min_size=config.kernel_size
+    )
     scan_limits = jit_data.get_scan_indices_tolerance(
         mobility, config.mobility_tolerance
     )

diff --git a/alphadia/utils.py b/alphadia/utils.py
@@ -1,5 +1,6 @@
 # native imports
 import logging
+import math
 import platform
 import re
 from ctypes import Structure, c_double
@@ -681,3 +682,71 @@ def merge_missing_columns(
 
     # merge
     return left_df.merge(right_df[on + missing_from_left], on=on, how=how)
+
+
+@nb.njit(inline="always")
+def get_frame_indices(
+    rt_values: np.ndarray,
+    rt_values_array: np.ndarray,
+    zeroth_frame: int,
+    cycle_len: int,
+    precursor_cycle_max_index: int,
+    optimize_size: int = 16,
+    min_size: int = 32,
+) -> np.ndarray:
+    """
+    Convert an interval of two rt values to a frame index interval.
+    The length of the interval is rounded up so that a multiple of `optimize_size` cycles are included.
+
+    Parameters
+    ----------
+    rt_values : np.ndarray, shape = (2,), dtype = float32
+        Array of rt values.
+    rt_values_array : np.ndarray
+        Array containing all rt values for searching.
+    zeroth_frame : int
+        Indicator if the first frame is zero.
+    cycle_len : int
+        The size of the cycle dimension.
+    precursor_cycle_max_index : int
+        Maximum index for precursor cycles.
+    optimize_size : int, default = 16
+        Optimize for FFT efficiency by using multiples of this size.
+    min_size : int, default = 32
+        Minimum number of DIA cycles to include.
+
+    Returns
+    -------
+    np.ndarray, shape = (2,), dtype = int64
+        Array of frame indices.
+    """
+    if rt_values.shape != (2,):
+        raise ValueError("rt_values must be a numpy array of shape (2,)")
+
+    frame_index = np.searchsorted(rt_values_array, rt_values, "left")
+
+    precursor_cycle_limits = (frame_index + zeroth_frame) // cycle_len
+    precursor_cycle_len = precursor_cycle_limits[1] - precursor_cycle_limits[0]
+
+    # Apply minimum size
+    optimal_len = max(precursor_cycle_len, min_size)
+    # Round up to the next multiple of `optimize_size`
+    optimal_len = int(optimize_size * math.ceil(optimal_len / optimize_size))
+
+    # By default, extend the precursor cycle to the right
+    optimal_cycle_limits = np.array(
+        [precursor_cycle_limits[0], precursor_cycle_limits[0] + optimal_len],
+        dtype=np.int64,
+    )
+
+    # If the cycle is too long, extend it to the left
+    if optimal_cycle_limits[1] > precursor_cycle_max_index:
+        optimal_cycle_limits[1] = precursor_cycle_max_index
+        optimal_cycle_limits[0] = precursor_cycle_max_index - optimal_len
+
+        if optimal_cycle_limits[0] < 0:
+            optimal_cycle_limits[0] = 0
+
+    # Convert back to frame indices
+    frame_limits = optimal_cycle_limits * cycle_len + zeroth_frame
+    return make_slice_1d(frame_limits)