From 3c0fac4a5fe1a5621bfabe624e27f7c6bc3e2a96 Mon Sep 17 00:00:00 2001
From: CCranney <11773171+CCranney@users.noreply.github.com>
Date: Tue, 19 Mar 2024 11:49:17 -0700
Subject: [PATCH 1/2] fixing minor warnings that appeared while running pytest

---
 src/zodiaq/scoring/quantificationFunctions.py               | 3 +--
 .../scoring/testFileContentCreators/MaccScoresBreakdown.py  | 6 +++---
 .../ProteinCosineEvalScoresBreakdown.py                     | 4 ++--
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/zodiaq/scoring/quantificationFunctions.py b/src/zodiaq/scoring/quantificationFunctions.py
index b93215e..fe8ba85 100644
--- a/src/zodiaq/scoring/quantificationFunctions.py
+++ b/src/zodiaq/scoring/quantificationFunctions.py
@@ -2,14 +2,13 @@
 import numpy as np
 from zodiaq.utils import format_protein_string_to_list
 from .idpickerFunctions import initialize__format_peptide_protein_connections
-from scipy import mean
 from collections import defaultdict
 from itertools import chain
 import scipy.linalg as linalg
 
 
 def calculate_ion_count_from_peptides_of_protein(ionCountList):
-    return mean(ionCountList)
+    return np.mean(ionCountList)
 
 
 def calculate_ion_count_for_each_protein_in_protein_fdr_df(proteinDf):
diff --git a/tests/system/system_tests/scoring/testFileContentCreators/MaccScoresBreakdown.py b/tests/system/system_tests/scoring/testFileContentCreators/MaccScoresBreakdown.py
index 15ff944..4cb8f95 100644
--- a/tests/system/system_tests/scoring/testFileContentCreators/MaccScoresBreakdown.py
+++ b/tests/system/system_tests/scoring/testFileContentCreators/MaccScoresBreakdown.py
@@ -22,11 +22,11 @@ def _create_input_template_for_scoring_module(self):
         )
         duplicatePeptideInputDf["zLIB"] = [2] * len(duplicatePeptideInputDf.index)
         allDecoyDf = self._create_input_for_all_decoys()
-        firstDecoyLayerInputDf = allDecoyDf.iloc[:2]
+        firstDecoyLayerInputDf = allDecoyDf.iloc[:2].copy()
         firstDecoyLayerInputDf["rank"] = [3] * len(firstDecoyLayerInputDf.index)
-        secondDecoyLayerInputDf = allDecoyDf.iloc[2:4]
+        secondDecoyLayerInputDf = allDecoyDf.iloc[2:4].copy()
         secondDecoyLayerInputDf["rank"] = [4] * len(secondDecoyLayerInputDf.index)
-        thirdDecoyLayerInputDf = allDecoyDf.iloc[4:]
+        thirdDecoyLayerInputDf = allDecoyDf.iloc[4:].copy()
         thirdDecoyLayerInputDf["rank"] = [5] * len(thirdDecoyLayerInputDf.index)
         extraSpectralInputDf = self._create_input_for_generic_peptides(
             rank=6, startingRow=200
diff --git a/tests/system/system_tests/scoring/testFileContentCreators/ProteinCosineEvalScoresBreakdown.py b/tests/system/system_tests/scoring/testFileContentCreators/ProteinCosineEvalScoresBreakdown.py
index 811918b..9301366 100644
--- a/tests/system/system_tests/scoring/testFileContentCreators/ProteinCosineEvalScoresBreakdown.py
+++ b/tests/system/system_tests/scoring/testFileContentCreators/ProteinCosineEvalScoresBreakdown.py
@@ -21,9 +21,9 @@ def _create_input_template_for_scoring_module(self):
         )
         peptideInputDf = self._create_input_for_generic_peptides(rank=2, startingRow=2)
         allDecoyDf = self._create_input_for_all_decoys(numRows=3)
-        firstDecoyLayerInputDf = allDecoyDf.iloc[:2]
+        firstDecoyLayerInputDf = allDecoyDf.iloc[:2].copy()
         firstDecoyLayerInputDf["rank"] = [3] * len(firstDecoyLayerInputDf.index)
-        secondDecoyLayerInputDf = allDecoyDf.iloc[2:]
+        secondDecoyLayerInputDf = allDecoyDf.iloc[2:].copy()
         secondDecoyLayerInputDf["rank"] = [4] * len(secondDecoyLayerInputDf.index)
         self.inputDf = pd.concat(
             [

From 65e020880a1d247317966b5a836ca159d8d30e2c Mon Sep 17 00:00:00 2001
From: CCranney <11773171+CCranney@users.noreply.github.com>
Date: Tue, 19 Mar 2024 16:43:51 -0700
Subject: [PATCH 2/2] various minor bug fixes

---
 src/zodiaq/identification/identifier.py          | 16 ++++++----------
 src/zodiaq/identification/matchingFunctions.py   |  5 ++++-
 .../identification/outputFormattingFunctions.py  |  2 ++
 .../loaders/library/decoyGenerationFunctions.py  |  2 ++
 .../loaders/query/queryLoaderStrategyMzxml.py    |  1 +
 src/zodiaq/scoring/fdrCalculationFunctions.py    |  6 +++++-
 src/zodiaq/scoring/scoringFunctions.py           |  2 ++
 .../test_outputFormattingFunctions.py            |  3 +++
 8 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/src/zodiaq/identification/identifier.py b/src/zodiaq/identification/identifier.py
index 9386285..76e4617 100644
--- a/src/zodiaq/identification/identifier.py
+++ b/src/zodiaq/identification/identifier.py
@@ -9,10 +9,7 @@
     calculate_ppm_offset_tolerance,
     create_ppm_histogram,
 )
-from zodiaq.scoring import (
-    score_library_to_query_matches,
-    determine_index_of_fdr_cutoff,
-)
+from zodiaq.scoring import score_library_to_query_matches
 from zodiaq.identification.outputFormattingFunctions import (
     extract_metadata_from_match_and_score_dataframes,
     format_output_line,
@@ -168,6 +165,11 @@ def _apply_correction_to_match_dataframe(self, matchDf):
         offset, tolerance = calculate_ppm_offset_tolerance(
             matchDf["ppmDifference"], self._commandLineArgs["correctionDegree"]
         )
+        toleranceMinimumCutoff = 5
+        if not self._commandLineArgs["correctionDegree"] and tolerance < toleranceMinimumCutoff:
+            _, tolerance = calculate_ppm_offset_tolerance(
+                matchDf["ppmDifference"], 0.5
+            )
         queryFile = self._queryContext.filePath.split("/")[-1]
         outFile = os.path.splitext(queryFile)[0] + "_correctionHistogram.png"
         if self._commandLineArgs["histogram"]:
@@ -180,12 +182,6 @@ def _apply_correction_to_match_dataframe(self, matchDf):
         matchDf = filter_matches_by_ppm_offset_and_tolerance(matchDf, offset, tolerance)
         return eliminate_low_count_matches(matchDf)
 
-    def _apply_correction_to_score_dataframe(self, matchDf, scoreDf):
-        scoreDf = score_library_to_query_matches(matchDf)
-        isDecoyArray = identify_all_decoys(self._decoySet, scoreDf)
-        scoreDfCutoffIdx = determine_index_of_fdr_cutoff(isDecoyArray)
-        return scoreDf.iloc[:scoreDfCutoffIdx, :]
-
     def _format_identifications_as_dataframe(self, matchDf, scoreDf):
         """
         The final match/score identifications are consolidated into a dataframe.
diff --git a/src/zodiaq/identification/matchingFunctions.py b/src/zodiaq/identification/matchingFunctions.py
index b613e29..a0132b9 100644
--- a/src/zodiaq/identification/matchingFunctions.py
+++ b/src/zodiaq/identification/matchingFunctions.py
@@ -154,7 +154,10 @@ def match_library_to_query_pooled_spectra(libraryPeaks, queryPeaks, ppmTolerance
             libraryArray, queryArray, ppmTolerance, baselineLibraryIdx, baselineQueryIdx
         )
         dataArrays.append(dataArray)
-    data = np.concatenate(dataArrays, axis=0)
+    if len(dataArrays) == 0:
+        data = []
+    else:
+        data = np.concatenate(dataArrays, axis=0)
     matchDf = pd.DataFrame(
         data,
         columns=[
diff --git a/src/zodiaq/identification/outputFormattingFunctions.py b/src/zodiaq/identification/outputFormattingFunctions.py
index 1e10c81..9c81422 100644
--- a/src/zodiaq/identification/outputFormattingFunctions.py
+++ b/src/zodiaq/identification/outputFormattingFunctions.py
@@ -20,6 +20,7 @@ def format_output_line(libMetadata, queMetadata, matchMetadata):
         queMetadata["CV"],
         queMetadata["windowWidth"],
         matchMetadata["exclude_num"],
+        queMetadata["retentionTime"],
     ]
 
 
@@ -69,6 +70,7 @@ def format_output_as_pandas_dataframe(inputFileName, outputData):
         "CompensationVoltage",
         "totalWindowWidth",
         "exclude_num",
+        "retentionTime",
     ]
     outputDf = pd.DataFrame(outputData, columns=columns)
     outputDf.insert(0, "fileName", [inputFileName] * len(outputDf.index))
diff --git a/src/zodiaq/loaders/library/decoyGenerationFunctions.py b/src/zodiaq/loaders/library/decoyGenerationFunctions.py
index 7b0a865..1b62255 100644
--- a/src/zodiaq/loaders/library/decoyGenerationFunctions.py
+++ b/src/zodiaq/loaders/library/decoyGenerationFunctions.py
@@ -36,6 +36,8 @@ def shuffle_peptide_sequence_with_preserved_cleavage_points(peptide):
         if char in cleavageAminoAcids
     ]
     otherAAs = [char for char in originalPeptide if char not in cleavageAminoAcids]
+    if len(otherAAs) < 2:
+        return peptide
     for i in range(100):
         shuffledPeptide = shuffle_non_cleavage_amino_acids(otherAAs, i)
         shuffledPeptide = insert_cleavage_amino_acids_into_shuffled_peptide(
diff --git a/src/zodiaq/loaders/query/queryLoaderStrategyMzxml.py b/src/zodiaq/loaders/query/queryLoaderStrategyMzxml.py
index 63e0c21..6231071 100644
--- a/src/zodiaq/loaders/query/queryLoaderStrategyMzxml.py
+++ b/src/zodiaq/loaders/query/queryLoaderStrategyMzxml.py
@@ -42,6 +42,7 @@ def extract_metadata_from_query_scans(self) -> dict:
                 metadataDict["precursorMz"] = spec["precursorMz"][0]["precursorMz"]
                 metadataDict["windowWidth"] = spec["precursorMz"][0]["windowWideness"]
                 metadataDict["peaksCount"] = spec["peaksCount"]
+                metadataDict["retentionTime"] = spec["retentionTime"]
                 if "nameValue" in spec:
                     for key, value in spec["nameValue"].items():
                         spec[key] = value
diff --git a/src/zodiaq/scoring/fdrCalculationFunctions.py b/src/zodiaq/scoring/fdrCalculationFunctions.py
index a82c8ec..ab202d1 100644
--- a/src/zodiaq/scoring/fdrCalculationFunctions.py
+++ b/src/zodiaq/scoring/fdrCalculationFunctions.py
@@ -11,9 +11,11 @@ def create_spectral_fdr_output_from_full_output_sorted_by_desired_score(
     fullDf, fdrCutoff=0.01
 ):
     fdrs = calculate_fdr_rates_of_decoy_array(fullDf["isDecoy"])
-    scoreDfCutoffIdx = np.argmax(fdrs > fdrCutoff)
     spectralDf = fullDf.copy()
     spectralDf["spectralFDR"] = fdrs
+    if len(fdrs) == 0:
+        return spectralDf
+    scoreDfCutoffIdx = np.argmax(fdrs > fdrCutoff)
     return spectralDf.iloc[:scoreDfCutoffIdx, :]
 
 
@@ -23,6 +25,8 @@ def create_peptide_fdr_output_from_full_output_sorted_by_desired_score(
     peptideDf = drop_duplicate_values_from_df_in_given_column(fullDf, "peptide")
     fdrs = calculate_fdr_rates_of_decoy_array(peptideDf["isDecoy"])
     peptideDf["peptideFDR"] = fdrs
+    if len(fdrs) == 0:
+        return peptideDf
     scoreDfCutoffIdx = np.argmax(fdrs > fdrCutoff)
     if not scoreDfCutoffIdx:
         return peptideDf
diff --git a/src/zodiaq/scoring/scoringFunctions.py b/src/zodiaq/scoring/scoringFunctions.py
index 4790a8b..b7b4f63 100644
--- a/src/zodiaq/scoring/scoringFunctions.py
+++ b/src/zodiaq/scoring/scoringFunctions.py
@@ -32,6 +32,8 @@ def determine_index_of_fdr_cutoff(isDecoyArray, fdrCutoff=1e-2):
     if 1 not in isDecoyArray:
         return len(isDecoyArray)
     fdrs = calculate_fdr_rates_of_decoy_array(isDecoyArray)
+    if len(fdrs) == 0:
+        return 0
     lastDecoyIdx = np.argmax(fdrs > fdrCutoff)
     return lastDecoyIdx
 
diff --git a/tests/unit/identification/test_outputFormattingFunctions.py b/tests/unit/identification/test_outputFormattingFunctions.py
index 23096b2..ccd4253 100644
--- a/tests/unit/identification/test_outputFormattingFunctions.py
+++ b/tests/unit/identification/test_outputFormattingFunctions.py
@@ -28,6 +28,7 @@ def identifierOutputData():
         6,
         7,
         11,
+        12,
     ]
 
 
@@ -47,6 +48,7 @@ def test__output_formatting_functions__format_output_line(identifierOutputData):
         "peaksCount": 5,
         "CV": 6,
         "windowWidth": 7,
+        "retentionTime":12,
     }
     matchDict = {
         "cosineSimilarityScore": 8,
@@ -172,6 +174,7 @@ def test__output_formatting_functions__format_output_as_pandas_dataframe(
         "CompensationVoltage",
         "totalWindowWidth",
         "exclude_num",
+        "retentionTime",
     ]
     inputFileName = "dummyFile"
     expectedOutputDf = pd.DataFrame(