diff --git a/src/edu/umich/andykong/ptmshepherd/iterativelocalization/BinPriorProbabilities.java b/src/edu/umich/andykong/ptmshepherd/iterativelocalization/BinPriorProbabilities.java index 46975ec..8de2b99 100644 --- a/src/edu/umich/andykong/ptmshepherd/iterativelocalization/BinPriorProbabilities.java +++ b/src/edu/umich/andykong/ptmshepherd/iterativelocalization/BinPriorProbabilities.java @@ -3,7 +3,7 @@ import java.util.ArrayList; import java.util.Arrays; -class BinPriorProbabilities { +public class BinPriorProbabilities { //Holds the values to compute prior probability Pr(Pep_{ij}) //Note: the original manuscript has 104 values, we use 62 because we do not consider protein n and c termini //Note: the length of these is 26 to make character indexing possible, but only the 20 AAs will be valid @@ -32,7 +32,7 @@ class BinPriorProbabilities { final char[] AAs = {'A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y'}; - BinPriorProbabilities() { + public BinPriorProbabilities() { this.probs = new double[26]; this.nProbs = new double[26]; this.cProbs = new double[26]; @@ -61,39 +61,30 @@ class BinPriorProbabilities { } - double[] computePriorProbs(String pep, boolean[] allowedPoses) { - double[] priorProbs = new double[pep.length()+2]; + public double[] computePriorProbs(String pep, boolean[] allowedPoses) { + double[] priorProbs = new double[pep.length()]; double sumProbs = 0.0; // Build peptide prior probability distribution // The prior probability function includes two extra values for N- and C-term modifications for (int i = 0; i < pep.length(); i++) { // Iterate over middle n-2 indices char cAA = pep.charAt(i); - if (allowedPoses[i+1] == true) { + if (allowedPoses[i] == true) { if (i == 0) { // Pick max probability for N-terminal options - double curProb = Math.max(this.nProbs[cAA-'A'], this.probs[cAA-'A']); - priorProbs[i+1] = curProb; + double curProb = Math.max(this.n, Math.max(this.nProbs[cAA-'A'], this.probs[cAA-'A'])); + priorProbs[i] = curProb; sumProbs += curProb; } else if (i == pep.length()-1) { // Pick max probability for C-terminal options - double curProb = Math.max(this.cProbs[cAA-'A'], this.probs[cAA-'A']); - priorProbs[i+1] = curProb; + double curProb = Math.max(this.c, Math.max(this.cProbs[cAA-'A'], this.probs[cAA-'A'])); + priorProbs[i] = curProb; sumProbs += curProb; } else { double curProb = this.probs[cAA-'A']; - priorProbs[i+1] = curProb; + priorProbs[i] = curProb; sumProbs += curProb; } } } - // Add termini //TODO this will not handle the case where termini are allowed but n* is not - if (allowedPoses[0]) { - priorProbs[0] = this.n; - sumProbs += this.n; - } - if (allowedPoses[allowedPoses.length-1]) { - priorProbs[priorProbs.length - 1] = this.c; - sumProbs += this.c; - } // Normalize prior probability distribution for (int i = 0; i < priorProbs.length; i++) @@ -102,12 +93,12 @@ class BinPriorProbabilities { return priorProbs; } - static double[] computeUniformPriorProbs(String pep, boolean[] allowedPoses) { - double[] priorProbs = new double[pep.length()+2]; + public static double[] computeUniformPriorProbs(String pep, boolean[] allowedPoses) { + double[] priorProbs = new double[pep.length()]; double sumProbs = 0.0; // Build peptide prior probability distribution - for (int i = 0; i < pep.length()+2; i++) { + for (int i = 0; i < pep.length(); i++) { if (allowedPoses[i] == true) { priorProbs[i] = 1.0; sumProbs += 1.0; @@ -128,7 +119,7 @@ public void update(String pep, double[] siteProbs, boolean [] allowedPoses) { this.nT1Count++; } if (allowedPoses[allowedPoses.length-1]) { - this.cT1 += siteProbs[siteProbs.length - 1]; + this.cT1 += siteProbs[siteProbs.length-1]; this.cT1Count++; } @@ -136,19 +127,18 @@ public void update(String pep, double[] siteProbs, boolean [] allowedPoses) { for (int i = 0; i < pep.length(); i++) { char cAA = pep.charAt(i); - if (i == 0 && allowedPoses[1]) { // Update N-terminal AA probabilities - this.nProbsT1[cAA-'A'] += siteProbs[1]; + if (i == 0 && allowedPoses[0]) { // Update N-terminal AA probabilities + this.nProbsT1[cAA-'A'] += siteProbs[0]; this.nProbsT1Count[cAA-'A']++; - - } else if (i == pep.length()-1 && allowedPoses[pep.length()]) { // Update C-termini AA probabilities - this.cProbsT1[cAA-'A'] += siteProbs[pep.length()]; + } else if (i == pep.length()-1 && allowedPoses[pep.length()-1]) { // Update C-termini AA probabilities + this.cProbsT1[cAA-'A'] += siteProbs[pep.length()-1]; this.cProbsT1Count[cAA-'A']++; } // Update base AA probabilities - if (allowedPoses[i+1]) { - this.probsT1[cAA - 'A'] += siteProbs[i + 1]; - this.probsT1Count[cAA - 'A']++; + if (allowedPoses[i]) { + this.probsT1[cAA-'A'] += siteProbs[i]; + this.probsT1Count[cAA-'A']++; } } this.nPsms++; diff --git a/src/edu/umich/andykong/ptmshepherd/iterativelocalization/IterativeLocalizer.java b/src/edu/umich/andykong/ptmshepherd/iterativelocalization/IterativeLocalizer.java index 9ee33d3..138e06b 100644 --- a/src/edu/umich/andykong/ptmshepherd/iterativelocalization/IterativeLocalizer.java +++ b/src/edu/umich/andykong/ptmshepherd/iterativelocalization/IterativeLocalizer.java @@ -418,7 +418,7 @@ private void calculateLocalizationProbabilities() throws Exception { //todo mods should be parsed here if we don't want to localize on top of var mods public static boolean[] parseAllowedPositions(String seq, String allowedAAs, float[] mods) { - boolean[] allowedPoses = new boolean[seq.length()+2]; + boolean[] allowedPoses = new boolean[seq.length()]; if (allowedAAs.equals("all") || allowedAAs.equals("")) Arrays.fill(allowedPoses, true); else { @@ -428,24 +428,20 @@ public static boolean[] parseAllowedPositions(String seq, String allowedAAs, flo i++; if ((allowedAAs.charAt(i) == '*') || (allowedAAs.charAt(i) == '^')) { allowedPoses[0] = true; - allowedPoses[1] = true; } else if (allowedAAs.charAt(i) == seq.charAt(0)) { allowedPoses[0] = true; - allowedPoses[1] = true; } } else if (allowedAAs.charAt(i) == 'c') { i++; if ((allowedAAs.charAt(i) == '*') || (allowedAAs.charAt(i) == '^')) { - allowedPoses[seq.length()] = true; - allowedPoses[seq.length() + 1] = true; + allowedPoses[seq.length() - 1] = true; } else if (allowedAAs.charAt(i) == seq.charAt(seq.length()-1)) { - allowedPoses[seq.length()] = true; - allowedPoses[seq.length() + 1] = true; + allowedPoses[seq.length() - 1] = true; } } else { for (int j = 0; j < seq.length(); j++) { if (seq.charAt(j) == allowedAAs.charAt(i)) - allowedPoses[j+1] = true; + allowedPoses[j] = true; } } } @@ -791,9 +787,9 @@ boolean isDecoyAA(char aa) { */ private double[] localizePsm (PSMFile.PSM psm, Spectrum spec, String pep, float[] mods, float dMass, int cBin, boolean[] allowedPoses) { double[] sitePriorProbs; - double[] siteLikelihoods = new double[pep.length()+2]; + double[] siteLikelihoods = new double[pep.length()]; double marginalProb = 0.0; - double[] sitePosteriorProbs = new double[pep.length()+2]; + double[] sitePosteriorProbs = new double[pep.length()]; // Compute prior probability or use uniform prior if PSM does not belong to mass shift bin if (cBin == -1) @@ -833,13 +829,6 @@ private double[] localizePsm (PSMFile.PSM psm, Spectrum spec, String pep, float[ this.localizationLikelihoodMap.put(psm.getSpec(), new LocalizationLikelihood(dMass, siteLikelihoods)); } - - // Propagate terminal AA likelihoods to each terminus //TODO this isn't going to handle cases when termini are allowed but the first residue isnt - if (allowedPoses[0]) - siteLikelihoods[0] = siteLikelihoods[1]; // N-term - if (allowedPoses[allowedPoses.length-1]) - siteLikelihoods[siteLikelihoods.length-1] = siteLikelihoods[siteLikelihoods.length-2]; // C-term - // Compute marginal probability for peptide Sum_{k=0}^{{L_i}+1} P(Pep_{ik})*P(Spec_i|Pep_{ik}) // Sum (site prior * site likelihoods) for (int i = 0; i < siteLikelihoods.length; i++) @@ -914,7 +903,7 @@ private double[] computePoissonBinomialLikelihood(String pep, float[] mods, floa // Set up structures to hold site matched ion probabilities int nAllowedPoses = 0; - for (int i = 1; i < allowedPoses.length-1; i++) { // Ignore C- and N-term, will be propogated at the end + for (int i = 0; i < allowedPoses.length; i++) { // Ignore C- and N-term, will be propogated at the end if (allowedPoses[i]) nAllowedPoses++; } @@ -923,7 +912,7 @@ private double[] computePoissonBinomialLikelihood(String pep, float[] mods, floa // Loop through sites and calculate the matched ions int cAllowedPos = 0; for(int i = 0; i < pep.length(); i++) { - if (!allowedPoses[i + 1]) + if (!allowedPoses[i]) continue; mods[i] += dMass; ArrayList sitePepFrags = Peptide.calculatePeptideFragments(pep, mods, this.ionTypes, 1); @@ -947,12 +936,12 @@ private double[] computePoissonBinomialLikelihood(String pep, float[] mods, floa mods[i] -= dMass; } - // Compute site likelihoods, add N- and C-termini to likelihood output + // Compute site likelihoods double[] pepAALikelihoods = PoissonBinomialLikelihood.calculateProbXMax(ionProbs); - double[] siteLikelihoods = IntStream.rangeClosed(0, pepAALikelihoods.length + 1).mapToDouble( - i -> (i == 0 || i == pepAALikelihoods.length + 1) ? 0 : pepAALikelihoods[i - 1]).toArray(); + //double[] siteLikelihoods = IntStream.rangeClosed(0, pepAALikelihoods.length + 1).mapToDouble( + // i -> (i == 0 || i == pepAALikelihoods.length + 1) ? 0 : pepAALikelihoods[i - 1]).toArray(); - return siteLikelihoods; + return pepAALikelihoods; //TODO this won't be tolerant to site restriction } /** @@ -1168,13 +1157,7 @@ private String findMaxLocalizationProbabilitySite(double[] locProbs, String pep) } } - String site; - if (maxI == 0) - site = "N-term"; - else if (maxI == locProbs.length-1) - site = "C-term"; - else - site = pep.charAt(maxI-1) + Integer.toString(maxI); + String site = pep.charAt(maxI) + Integer.toString(maxI+1); return site; } @@ -1217,15 +1200,11 @@ private String probabilitiesToPepString(String pep, float dMass, double[] probs, sb.append(new DecimalFormat("0.0000").format(dMass)); sb.append("@"); - if (allowedPoses[0]) - sb.append("("+new DecimalFormat("0.0000").format(probs[0])+")"); for (int i = 0; i < pep.length(); i++) { sb.append(pep.charAt(i)); - if (allowedPoses[i + 1]) - sb.append("("+new DecimalFormat("0.0000").format(probs[i+1])+")"); + if (allowedPoses[i]) + sb.append("("+new DecimalFormat("0.0000").format(probs[i])+")"); } - if (allowedPoses[allowedPoses.length - 1]) - sb.append("("+new DecimalFormat("0.0000").format(probs[0])+")"); return sb.toString(); } diff --git a/test/iterativelocalization/BinPriorProbabilitiesTest.java b/test/iterativelocalization/BinPriorProbabilitiesTest.java new file mode 100644 index 0000000..fc70643 --- /dev/null +++ b/test/iterativelocalization/BinPriorProbabilitiesTest.java @@ -0,0 +1,34 @@ +package iterativelocalization; + +import edu.umich.andykong.ptmshepherd.iterativelocalization.BinPriorProbabilities; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.*; + +public class BinPriorProbabilitiesTest { + BinPriorProbabilities bpp; + + @BeforeEach + public void setUp() { + // Initialize test data + this.bpp = new BinPriorProbabilities(); + } + + @Test + public void computePriorProbs() { + String seq = "PEPTI"; + boolean[] allowedPoses = new boolean[]{true, true, true, true, true}; + + double[] expectedPriorProbs = new double[]{0.2, 0.2, 0.2, 0.2, 0.2}; + assertArrayEquals(bpp.computePriorProbs(seq, allowedPoses), expectedPriorProbs, 0.0001); + } + + @Test + public void computeUniformPriorProbs() { + String seq = "PEPTI"; + boolean[] allowedPoses = new boolean[]{true, true, true, true, true}; + + double[] expectedPriorProbs = new double[]{0.2, 0.2, 0.2, 0.2, 0.2}; + assertArrayEquals(bpp.computeUniformPriorProbs(seq, allowedPoses), expectedPriorProbs, 0.0001); + } +} diff --git a/test/iterativelocalization/IterativeLocalizerTest.java b/test/iterativelocalization/IterativeLocalizerTest.java index 889eb47..eb320c9 100644 --- a/test/iterativelocalization/IterativeLocalizerTest.java +++ b/test/iterativelocalization/IterativeLocalizerTest.java @@ -14,41 +14,41 @@ void parseAllowedPositions() { String seq = "PEPTIDE"; float[] mods = new float[]{0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}; String allowedAAs = ""; - boolean[] expectedPoses = new boolean[]{true, true, true, true, true, true, true, true, true}; + boolean[] expectedPoses = new boolean[]{true, true, true, true, true, true, true}; assertArrayEquals(expectedPoses, IterativeLocalizer.parseAllowedPositions(seq, allowedAAs, mods)); allowedAAs = "all"; - expectedPoses = new boolean[]{true, true, true, true, true, true, true, true, true}; + expectedPoses = new boolean[]{true, true, true, true, true, true, true}; IterativeLocalizer.parseAllowedPositions(seq, allowedAAs, mods); assertArrayEquals(expectedPoses, IterativeLocalizer.parseAllowedPositions(seq, allowedAAs, mods)); allowedAAs = "Q"; - expectedPoses = new boolean[]{true, true, true, true, true, true, true, true, true}; + expectedPoses = new boolean[]{true, true, true, true, true, true, true}; assertArrayEquals(expectedPoses, IterativeLocalizer.parseAllowedPositions(seq, allowedAAs, mods)); allowedAAs = "E"; - expectedPoses = new boolean[]{false, false, true, false, false, false, false, true, false}; + expectedPoses = new boolean[]{false, true, false, false, false, false, true}; System.out.println(Arrays.toString(IterativeLocalizer.parseAllowedPositions(seq, allowedAAs, mods))); assertArrayEquals(expectedPoses, IterativeLocalizer.parseAllowedPositions(seq, allowedAAs, mods)); allowedAAs = "P"; - expectedPoses = new boolean[]{false, true, false, true, false, false, false, false, false}; + expectedPoses = new boolean[]{true, false, true, false, false, false, false}; assertArrayEquals(expectedPoses, IterativeLocalizer.parseAllowedPositions(seq, allowedAAs, mods)); allowedAAs = "nP"; - expectedPoses = new boolean[]{true, true, false, false, false, false, false, false, false}; + expectedPoses = new boolean[]{true, false, false, false, false, false, false}; assertArrayEquals(expectedPoses, IterativeLocalizer.parseAllowedPositions(seq, allowedAAs, mods)); allowedAAs = "n*"; - expectedPoses = new boolean[]{true, true, false, false, false, false, false, false, false}; + expectedPoses = new boolean[]{true, false, false, false, false, false, false}; assertArrayEquals(expectedPoses, IterativeLocalizer.parseAllowedPositions(seq, allowedAAs, mods)); allowedAAs = "n^"; - expectedPoses = new boolean[]{true, true, false, false, false, false, false, false, false}; + expectedPoses = new boolean[]{true, false, false, false, false, false, false}; assertArrayEquals(expectedPoses, IterativeLocalizer.parseAllowedPositions(seq, allowedAAs, mods)); allowedAAs = "c^"; - expectedPoses = new boolean[]{false, false, false, false, false, false, false, true, true}; + expectedPoses = new boolean[]{false, false, false, false, false, false, true}; assertArrayEquals(expectedPoses, IterativeLocalizer.parseAllowedPositions(seq, allowedAAs, mods)); }