Skip to content

Commit

Permalink
linting and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
GeorgWa committed Nov 14, 2024
1 parent 4f5afa3 commit 59434aa
Show file tree
Hide file tree
Showing 5 changed files with 266 additions and 80 deletions.
62 changes: 21 additions & 41 deletions alphadia/data/alpharaw.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# native imports
import logging
import math
import os

import numba as nb
Expand Down Expand Up @@ -460,7 +459,9 @@ def __init__(
self.scan_max_index = scan_max_index
self.frame_max_index = frame_max_index

def get_frame_indices(self, rt_values: np.array, optimize_size: int = 16):
def get_frame_indices(
self, rt_values: np.array, optimize_size: int = 16, min_size: int = 32
):
"""
Convert an interval of two rt values to a frame index interval.
Expand All @@ -474,54 +475,28 @@ def get_frame_indices(self, rt_values: np.array, optimize_size: int = 16):
optimize_size : int, default = 16
To optimize for fft efficiency, we want to extend the precursor cycle to a multiple of 16
min_size : int, default = 32
The minimum number of dia cycles to include
Returns
-------
np.ndarray, shape = (2,), dtype = int64
array of frame indices
"""

if rt_values.shape != (2,):
raise ValueError("rt_values must be a numpy array of shape (2,)")

frame_index = np.zeros(len(rt_values), dtype=np.int64)
for i, rt in enumerate(rt_values):
frame_index[i] = search_sorted_left(self.rt_values, rt)

precursor_cycle_limits = (frame_index + self.zeroth_frame) // self.cycle.shape[
1
]
precursor_cycle_len = precursor_cycle_limits[1] - precursor_cycle_limits[0]

# round up to the next multiple of 16
optimal_len = int(
optimize_size * math.ceil(precursor_cycle_len / optimize_size)
)

# by default, we extend the precursor cycle to the right
optimal_cycle_limits = np.array(
[precursor_cycle_limits[0], precursor_cycle_limits[0] + optimal_len],
dtype=np.int64,
return utils.get_frame_indices(
rt_values=rt_values,
rt_values_array=self.rt_values,
zeroth_frame=self.zeroth_frame,
cycle_len=self.cycle.shape[1],
precursor_cycle_max_index=self.precursor_cycle_max_index,
optimize_size=optimize_size,
min_size=min_size,
)

# if the cycle is too long, we extend it to the left
if optimal_cycle_limits[1] > self.precursor_cycle_max_index:
optimal_cycle_limits[1] = self.precursor_cycle_max_index
optimal_cycle_limits[0] = self.precursor_cycle_max_index - optimal_len

if optimal_cycle_limits[0] < 0:
optimal_cycle_limits[0] = (
0 if self.precursor_cycle_max_index % 2 == 0 else 1
)

# second element is the index of the first whole cycle which should not be used
# precursor_cycle_limits[1] += 1
# convert back to frame indices
frame_limits = optimal_cycle_limits * self.cycle.shape[1] + self.zeroth_frame
return utils.make_slice_1d(frame_limits)

def get_frame_indices_tolerance(
self, rt: float, tolerance: float, optimize_size: int = 16
self, rt: float, tolerance: float, optimize_size: int = 16, min_size: int = 32
):
"""
Determine the frame indices for a given retention time and tolerance.
Expand All @@ -538,6 +513,9 @@ def get_frame_indices_tolerance(
optimize_size : int, default = 16
To optimize for fft efficiency, we want to extend the precursor cycle to a multiple of 16
min_size : int, default = 32
The minimum number of dia cycles to include
Returns
-------
np.ndarray, shape = (1, 3,), dtype = int64
Expand All @@ -547,7 +525,9 @@ def get_frame_indices_tolerance(

rt_limits = np.array([rt - tolerance, rt + tolerance], dtype=np.float32)

return self.get_frame_indices(rt_limits, optimize_size=optimize_size)
return self.get_frame_indices(
rt_limits, optimize_size=optimize_size, min_size=min_size
)

def get_scan_indices(self, mobility_values: np.array, optimize_size: int = 16):
return np.array([[0, 2, 1]], dtype=np.int64)
Expand Down
59 changes: 21 additions & 38 deletions alphadia/data/bruker.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,9 @@ def __init__(

self.has_mobility = True

def get_frame_indices(self, rt_values: np.array, optimize_size: int = 16):
def get_frame_indices(
self, rt_values: np.array, optimize_size: int = 16, min_size: int = 32
):
"""
Convert an interval of two rt values to a frame index interval.
Expand All @@ -302,52 +304,28 @@ def get_frame_indices(self, rt_values: np.array, optimize_size: int = 16):
optimize_size : int, default = 16
To optimize for fft efficiency, we want to extend the precursor cycle to a multiple of 16
min_size : int, default = 32
The minimum number of dia cycles to include
Returns
-------
np.ndarray, shape = (2,), dtype = int64
array of frame indices
"""

if rt_values.shape != (2,):
raise ValueError("rt_values must be a numpy array of shape (2,)")

frame_index = np.searchsorted(self.rt_values, rt_values, "left")

precursor_cycle_limits = (frame_index + self.zeroth_frame) // self.cycle.shape[
1
]
precursor_cycle_len = precursor_cycle_limits[1] - precursor_cycle_limits[0]

# round up to the next multiple of 16
optimal_len = int(
optimize_size * math.ceil(precursor_cycle_len / optimize_size)
)

# by default, we extend the precursor cycle to the right
optimal_cycle_limits = np.array(
[precursor_cycle_limits[0], precursor_cycle_limits[0] + optimal_len],
dtype=np.int64,
return utils.get_frame_indices(
rt_values=rt_values,
rt_values_array=self.rt_values,
zeroth_frame=self.zeroth_frame,
cycle_len=self.cycle.shape[1],
precursor_cycle_max_index=self.precursor_cycle_max_index,
optimize_size=optimize_size,
min_size=min_size,
)

# if the cycle is too long, we extend it to the left
if optimal_cycle_limits[1] > self.precursor_cycle_max_index:
optimal_cycle_limits[1] = self.precursor_cycle_max_index
optimal_cycle_limits[0] = self.precursor_cycle_max_index - optimal_len

if optimal_cycle_limits[0] < 0:
optimal_cycle_limits[0] = (
0 if self.precursor_cycle_max_index % 2 == 0 else 1
)

# second element is the index of the first whole cycle which should not be used
# precursor_cycle_limits[1] += 1
# convert back to frame indices
frame_limits = optimal_cycle_limits * self.cycle.shape[1] + self.zeroth_frame
return utils.make_slice_1d(frame_limits)

def get_frame_indices_tolerance(
self, rt: float, tolerance: float, optimize_size: int = 16
self, rt: float, tolerance: float, optimize_size: int = 16, min_size: int = 32
):
"""
Determine the frame indices for a given retention time and tolerance.
Expand All @@ -364,6 +342,9 @@ def get_frame_indices_tolerance(
optimize_size : int, default = 16
To optimize for fft efficiency, we want to extend the precursor cycle to a multiple of 16
min_size : int, default = 32
The minimum number of dia cycles to include
Returns
-------
np.ndarray, shape = (1, 3,), dtype = int64
Expand All @@ -373,7 +354,9 @@ def get_frame_indices_tolerance(

rt_limits = np.array([rt - tolerance, rt + tolerance], dtype=np.float32)

return self.get_frame_indices(rt_limits, optimize_size=optimize_size)
return self.get_frame_indices(
rt_limits, optimize_size=optimize_size, min_size=min_size
)

def get_scan_indices(self, mobility_values: np.array, optimize_size: int = 16):
"""convert array of mobility values into scan indices, njit compatible.
Expand Down
4 changes: 3 additions & 1 deletion alphadia/peakgroup/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,9 @@ def select_candidates(
rt = precursor_container.rt[i]
mobility = precursor_container.mobility[i]

frame_limits = jit_data.get_frame_indices_tolerance(rt, config.rt_tolerance)
frame_limits = jit_data.get_frame_indices_tolerance(
rt, config.rt_tolerance, min_size=config.kernel_size
)
scan_limits = jit_data.get_scan_indices_tolerance(
mobility, config.mobility_tolerance
)
Expand Down
69 changes: 69 additions & 0 deletions alphadia/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# native imports
import logging
import math
import platform
import re
from ctypes import Structure, c_double
Expand Down Expand Up @@ -681,3 +682,71 @@ def merge_missing_columns(

# merge
return left_df.merge(right_df[on + missing_from_left], on=on, how=how)


@nb.njit(inline="always")
def get_frame_indices(
rt_values: np.ndarray,
rt_values_array: np.ndarray,
zeroth_frame: int,
cycle_len: int,
precursor_cycle_max_index: int,
optimize_size: int = 16,
min_size: int = 32,
) -> np.ndarray:
"""
Convert an interval of two rt values to a frame index interval.
The length of the interval is rounded up so that a multiple of `optimize_size` cycles are included.
Parameters
----------
rt_values : np.ndarray, shape = (2,), dtype = float32
Array of rt values.
rt_values_array : np.ndarray
Array containing all rt values for searching.
zeroth_frame : int
Indicator if the first frame is zero.
cycle_len : int
The size of the cycle dimension.
precursor_cycle_max_index : int
Maximum index for precursor cycles.
optimize_size : int, default = 16
Optimize for FFT efficiency by using multiples of this size.
min_size : int, default = 32
Minimum number of DIA cycles to include.
Returns
-------
np.ndarray, shape = (2,), dtype = int64
Array of frame indices.
"""
if rt_values.shape != (2,):
raise ValueError("rt_values must be a numpy array of shape (2,)")

frame_index = np.searchsorted(rt_values_array, rt_values, "left")

precursor_cycle_limits = (frame_index + zeroth_frame) // cycle_len
precursor_cycle_len = precursor_cycle_limits[1] - precursor_cycle_limits[0]

# Apply minimum size
optimal_len = max(precursor_cycle_len, min_size)
# Round up to the next multiple of `optimize_size`
optimal_len = int(optimize_size * math.ceil(optimal_len / optimize_size))

# By default, extend the precursor cycle to the right
optimal_cycle_limits = np.array(
[precursor_cycle_limits[0], precursor_cycle_limits[0] + optimal_len],
dtype=np.int64,
)

# If the cycle is too long, extend it to the left
if optimal_cycle_limits[1] > precursor_cycle_max_index:
optimal_cycle_limits[1] = precursor_cycle_max_index
optimal_cycle_limits[0] = precursor_cycle_max_index - optimal_len

if optimal_cycle_limits[0] < 0:
optimal_cycle_limits[0] = 0

# Convert back to frame indices
frame_limits = optimal_cycle_limits * cycle_len + zeroth_frame
return make_slice_1d(frame_limits)
Loading

0 comments on commit 59434aa

Please sign in to comment.