Merge pull request #87 from WangHong007/main

Enable Sage search engine scores
bigbio · Sep 30, 2023 · fda6118 · fda6118
2 parents 008355e + cde2dac
commit fda6118
Show file tree

Hide file tree

Showing 2 changed files with 53 additions and 9 deletions.
diff --git a/pmultiqc/modules/quantms/quantms.py b/pmultiqc/modules/quantms/quantms.py
@@ -110,6 +110,7 @@ def __init__(self):
         self.mzml_table = OrderedDict()
         self.search_engine = OrderedDict()
         self.XCORR_HIST_RANGE = {'start': 0, 'end': 5, 'step': 0.1}
+        self.HYPER_HIST_RANGE = {'start': 0, 'end': 5, 'step': 0.1}
         self.SPECEVALUE_HIST_RANGE = {'start': 0, 'end': 20, 'step': 0.4}
         self.PEP_HIST_RANGE = {'start': 0, 'end': 1, 'step': 0.02}
         self.total_ms2_spectra = 0
@@ -584,6 +585,11 @@ def draw_mzml_ms(self):
                 'description': 'Number of spectra identified by Comet search engine',
                 'color': "#ffffff"
             }
+        if any(['Sage' in v for k,v in self.mzml_table.items()]):
+            headers['Sage'] = {
+                'description': 'Number of spectra identified by Sage search engine',
+                'color': "#ffffff"
+            }
         headers['num_quant_psms'] = {
             'title': '#PSMs from quant. peptides',
             'description': 'Number of reliable PSMs from peptides IDs used in quantification',
@@ -610,6 +616,7 @@ def draw_mzml_ms(self):
                     * MS2_Num: The number of MS2 spectra extracted from mzMLs
                     * MSGF: The Number of spectra identified by MSGF search engine
                     * Comet: The Number of spectra identified by Comet search engine
+                    * Sage: The Number of spectra identified by Sage search engine
                     * PSMs from quant. peptides: extracted from PSM table in mzTab file
                     * Peptides quantified: extracted from PSM table in mzTab file
                     ''',
@@ -800,7 +807,7 @@ def draw_search_engine(self):
                     '''
         )
         # Create scores summary plot
-        [MSGF_labels, Comet_labels] = self.search_engine['data_label']['score_label']
+        [MSGF_labels, Comet_labels, Sage_labels] = self.search_engine['data_label']['score_label']
         SpecE_pconfig = {
             'id': 'search_scores_summary',  # ID used for the table
             'cpswitch': False,
@@ -825,24 +832,39 @@ def draw_search_engine(self):
             'data_labels': Comet_labels,
         }
 
+        hyper_pconfig = {
+            'id': 'search_scores_summary',  # ID used for the table
+            'cpswitch': False,
+            'title': 'Summary of Hyperscore',
+            'xlab': 'Sage hyperscore ranges',
+            'stacking': True,
+            'height': 550,
+            'tt_percentages': True, 
+            'tt_decimals': 0,
+            'data_labels': Sage_labels,
+        }        
+
         bar_cats = OrderedDict()
         bar_cats['target'] = {'name': 'target', 'color': '#2b908f'}
         bar_cats['decoy'] = {'name': 'decoy', 'color': '#90ed7d'}
         bar_cats['target+decoy'] = {'name': 'target+decoy', 'color': '#434348'}
 
         SpecE_cats = [bar_cats] * len(self.search_engine['SpecE'])
         xcorr_cats = [bar_cats] * len(self.search_engine['xcorr'])
+        hyper_cats = [bar_cats] * len(self.search_engine['hyper'])
         PEP_cats = [bar_cats] * len(self.search_engine['PEPs'])
 
         xcorr_bar_html = bargraph.plot(list(self.search_engine['xcorr'].values()), xcorr_cats, xcorr_pconfig) if self.Comet_label else ''
         SpecE_bar_html = bargraph.plot(list(self.search_engine['SpecE'].values()), SpecE_cats, SpecE_pconfig) if self.MSGF_label else ''
-
+        hyper_bar_html = bargraph.plot(list(self.search_engine['hyper'].values()), hyper_cats, hyper_pconfig) if self.Sage_label else ''
+
         self.add_section(
             description='''#### Summary of Search Scores
             * SpecEvalue : Spectral E-values, the search score of MSGF. The value used for plotting is -lg(SpecEvalue).
             * xcorr : cross-correlation scores, the search score of Comet. The value used for plotting is xcorr.
+            * hyperscore : Hyperscore, the search score of Sage. The value used for plotting is hyperscore.
             ''',
-            plot=xcorr_bar_html + SpecE_bar_html
+            plot=xcorr_bar_html + SpecE_bar_html + hyper_bar_html
         )
         # Create PEPs summary plot
         PEP_pconfig = {
@@ -895,7 +917,7 @@ def draw_search_engine(self):
         else:
             self.add_section(
                 description='''#### Summary of consensus PSMs
-                No Consensus PSMs data because of single search engine！
+                No Consensus PSMs data because of single search engine!
                 '''
             )
 
@@ -1181,10 +1203,10 @@ def parse_idxml(self, mzml_table):
                 consensus_paths.append(raw_id)
                 self.idx_paths.remove(raw_id)
 
-        self.MSGF_label, self.Comet_label = False, False
-        self.search_engine = {'SpecE': OrderedDict(), 'xcorr': OrderedDict(), 'PEPs': OrderedDict(),
+        self.MSGF_label, self.Comet_label, self.Sage_label = False, False, False
+        self.search_engine = {'SpecE': OrderedDict(), 'xcorr': OrderedDict(), 'hyper': OrderedDict(), 'PEPs': OrderedDict(),
                               'consensus_support': OrderedDict(), 'data_label': OrderedDict()}
-        SpecE_label, xcorr_label, PEPs_label, consensus_label = [], [], [], []
+        SpecE_label, xcorr_label, hyper_label, PEPs_label, consensus_label = [], [], [], [], []
 
         for raw_id in self.idx_paths:
             log.info("Parsing search result file {}...".format(raw_id))
@@ -1205,10 +1227,14 @@ def parse_idxml(self, mzml_table):
 
             self.search_engine['SpecE'][raw_id] = OrderedDict()
             self.search_engine['xcorr'][raw_id] = OrderedDict()
+            self.search_engine['hyper'][raw_id] = OrderedDict()
             self.search_engine['PEPs'][raw_id] = OrderedDict()
 
             xcorr_breaks = list(np.arange(
                 self.XCORR_HIST_RANGE['start'], self.XCORR_HIST_RANGE['end'] + self.XCORR_HIST_RANGE['step'], self.XCORR_HIST_RANGE['step']).round(2))
+
+            hyper_breaks = list(np.arange(
+                self.HYPER_HIST_RANGE['start'], self.HYPER_HIST_RANGE['end'] + self.HYPER_HIST_RANGE['step'], self.HYPER_HIST_RANGE['step']).round(2))
 
             SpecE_breaks = list(np.arange(
                 self.SPECEVALUE_HIST_RANGE['start'], self.SPECEVALUE_HIST_RANGE['end'] + self.SPECEVALUE_HIST_RANGE['step'], self.SPECEVALUE_HIST_RANGE['step']).round(2))
@@ -1220,6 +1246,7 @@ def parse_idxml(self, mzml_table):
 
             bar_stacks = ['target', 'decoy', 'target+decoy']
             Xcorr = Histogram('Comet cross-correlation score', plot_category = 'range', stacks = bar_stacks, breaks = xcorr_breaks)
+            Hyper = Histogram('Sage hyperscore', plot_category = 'range', stacks = bar_stacks, breaks = hyper_breaks)
             SpecE = Histogram('MSGF spectral E-value', plot_category = 'range', stacks = bar_stacks, breaks = SpecE_breaks)
             PEP = Histogram('Posterior error probability', plot_category = 'range', stacks = bar_stacks, breaks = PEP_breaks)
             Consensus_support = Histogram('Consensus PSM number', plot_category = 'frequency', stacks = bar_stacks)
@@ -1259,6 +1286,23 @@ def parse_idxml(self, mzml_table):
                 self.search_engine['xcorr'][raw_id] = Xcorr.dict['data']
                 self.search_engine['PEPs'][raw_id] = PEP.dict['data']
 
+            elif search_engine == "Sage" or "sage" in raw_id:
+                self.Sage_label = True
+                mzml_table[ms_name]['Sage'] = identified_num
+                hyper_label.append({'name': raw_id, 'ylab': 'Counts'})
+                PEPs_label.append({'name': raw_id, 'ylab': 'Counts'})
+                for peptide_id in peptide_ids:
+                    for hit in peptide_id.getHits():
+                        hyper = hit.getMetaValue("hyperscore")
+                        pep = hit.getMetaValue("MS:1001493") if hit.getMetaValue("MS:1001493") else hit.getScore()
+                        Hyper.addValue(hyper, stack = hit.getMetaValue("target_decoy"))
+                        PEP.addValue(pep, stack = hit.getMetaValue("target_decoy"))
+
+                Hyper.to_dict()
+                PEP.to_dict()
+                self.search_engine['hyper'][raw_id] = Hyper.dict['data']
+                self.search_engine['PEPs'][raw_id] = PEP.dict['data']
+
             else:
                 mzml_table[ms_name][search_engine] = identified_num
 
@@ -1283,7 +1327,7 @@ def parse_idxml(self, mzml_table):
             for i in Consensus_support.dict['data'].keys():
                 self.search_engine['consensus_support'][raw_id][i] = Consensus_support.dict['data'][i]
 
-        self.search_engine['data_label'] = {'score_label': [SpecE_label, xcorr_label], 'PEPs_label': PEPs_label, 'consensus_label': consensus_label}
+        self.search_engine['data_label'] = {'score_label': [SpecE_label, xcorr_label, hyper_label], 'PEPs_label': PEPs_label, 'consensus_label': consensus_label}
 
         # mass spectrum files sorted based on experimental file
         for spectrum_name in self.exp_design_table.keys():

diff --git a/setup.py b/setup.py
@@ -7,7 +7,7 @@
 
 from setuptools import setup, find_packages
 
-version = '0.0.21'
+version = '0.0.22'
 
 
 def readme():