Skip to content

Commit

Permalink
Only read mzbin files on first and last epochs when doing iterative l…
Browse files Browse the repository at this point in the history
…ocalization

This commit results in a 43% speedup to IterativeLocalizer.calculateLocalizationProbabilities
  • Loading branch information
danielgeiszler committed Apr 11, 2024
1 parent a910af3 commit 1018e72
Showing 1 changed file with 15 additions and 41 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,8 @@ private void calculateLocalizationProbabilities() throws Exception {
for (String cf : runToLine.keySet()) {
// Load current run
mr = new MXMLReader(mzMap.get(ds).get(cf), this.nThreads);
mr.readFully();
if (epoch == 1 || finalPass) //todo logic too complicated, need to create a state machine
mr.readFully();

// Calculate PSM-level localization probabilities
for (int j : runToLine.get(cf)) {
Expand Down Expand Up @@ -271,10 +272,14 @@ private void calculateLocalizationProbabilities() throws Exception {
if (!finalPass && this.priorProbs[cBin].getIsConverged()) // Safe because left is first
continue;

Spectrum spec = mr.getSpectrum(specName);
if (spec == null) {
linesWithoutSpectra.add(specName);
continue;
// todo this logic is getting way too complex, need to handle execution states in a static context
Spectrum spec = null;
if (epoch == 1 || finalPass) { // or out of memory dataset
spec = mr.getSpectrum(specName);
if (spec == null) {
linesWithoutSpectra.add(specName);
continue; // todo handle this error
}
}

//if (specName.equals("02330a_GC1_3990_03_PTM_TrainKit_Rmod_Dimethyl_asymm_200fmol_3xHCD_R1.15210.15210")) {
Expand Down Expand Up @@ -669,7 +674,8 @@ boolean isDecoyAA(char aa) {
* P(Spec_i|Pep_{ij}) -> Likelihood
* Sum_{k=0}^{{L_i}+1} P(Pep_{ik})*P(Spec_i|Pep_{ik}) -> Marginal probability
*
* @param spec Spectrum class opject containing pre-process mass spectrum
* @param psm PSMFile.PSM object containing PSM information //todo most of the other values don't need to be preparsed if this is passed
* @param spec Spectrum class object containing pre-processed mass spectrum
* @param pep pep sequence
* @param mods array containing masses to be added on to pep sequence at mods[i] position
* @param dMass delta mass of PSM
Expand All @@ -689,41 +695,6 @@ private double[] localizePsm (PSMFile.PSM psm, Spectrum spec, String pep, float[
else
sitePriorProbs = this.priorProbs[cBin].computePriorProbs(pep, allowedPoses);

// Iterate through sites to compute likelihood for each site P(Spec_i|Pep_{ij})
// There are no ions that can differentiate termini and terminal AAs, so the likelihood for each terminus
// is equal to the proximal AA

// First calculate the set of shifted and unshifted ions
ArrayList<Float> pepFrags = Peptide.calculatePeptideFragments(pep, mods, this.ionTypes, 1);
ArrayList<Float> shiftedPepFrags = new ArrayList<Float>(pepFrags.size());
for (Float frag : pepFrags)
shiftedPepFrags.add(frag + dMass);
pepFrags.addAll(shiftedPepFrags);

if (debugFlag)
System.out.println(pepFrags.stream().map(Object::toString)
.collect(Collectors.joining(", ")));

// Filter peakMzs and peakInts to only those that match at least one ion
float[] peakMzs = spec.getPeakMZ();
float[] peakInts = spec.getPeakInt();
float[] matchedIons = findMatchedIons(pepFrags, peakMzs, peakInts)[0]; // Returns -1 if unmatched, intensity otherwise // [0] is intensities, [1] is mass errors TODO rewrite
int matchedCount = 0;
for (int i = 0; i < matchedIons.length; i++) {
if (matchedIons[i] > 0.0)
matchedCount++;
}
float[] reducedMzs = new float[matchedCount];
float[] reducedInts = new float[matchedCount];
int j = 0;
for (int i = 0; i < matchedIons.length; i++) {
if (matchedIons[i] > 0.0) {
reducedMzs[j] = peakMzs[i];
reducedInts[j] = peakInts[i];
j++;
}
}

// Iterate through sites to compute likelihood for each site P(Spec_i|Pep_{ij})
// There are no ions that can differentiate termini and terminal AAs, so the likelihood for each terminus
// is equal to the proximal AA
Expand All @@ -745,6 +716,9 @@ private double[] localizePsm (PSMFile.PSM psm, Spectrum spec, String pep, float[
}
**/

// Iterate through sites to compute likelihood for each site P(Spec_i|Pep_{ij})
// There are no ions that can differentiate termini and terminal AAs, so the likelihood for each terminus
// is equal to the proximal AA
// Check to see if the likelihood has already been computed. If it has, grab it. If not, compute it.
if (this.localizationLikelihoodMap.containsKey(psm.getSpec())) {
siteLikelihoods = this.localizationLikelihoodMap.get(psm.getSpec()).getMod().getSiteLikelihoods();
Expand Down

0 comments on commit 1018e72

Please sign in to comment.