Skip to content

Commit

Permalink
Make raw file mapping in PSMFile more robust and easier to maintain
Browse files Browse the repository at this point in the history
  • Loading branch information
danielgeiszler committed May 10, 2024
1 parent 3b9d8a7 commit b83c101
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 37 deletions.
87 changes: 55 additions & 32 deletions src/edu/umich/andykong/ptmshepherd/PSMFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import java.util.concurrent.Callable;
import java.util.zip.CRC32;

import static edu.umich.andykong.ptmshepherd.PTMShepherd.concatIonTypes;
import static edu.umich.andykong.ptmshepherd.PTMShepherd.reNormName;

public class PSMFile {
Expand Down Expand Up @@ -251,6 +252,15 @@ public int getColumn(String head) {
}
return res;
}

private static String removeCalTag(String fileBaseName) {
if (fileBaseName.contains("_calibrated"))
return fileBaseName.replace("_calibrated", "");
else if (fileBaseName.contains("_uncalibrated"))
return fileBaseName.replace("_uncalibrated", "");
else
return fileBaseName;
}

public static String getCRC32(File f) throws Exception {
CRC32 crc = new CRC32();
Expand Down Expand Up @@ -385,47 +395,60 @@ public int getPrecursorCol() {
}

public static void getMappings(File path, HashMap<String,File> mappings, HashSet<String> runNames) {
HashMap<String, Integer> datTypes = new HashMap<>();
datTypes.put("mgf", 4);
datTypes.put("mzBIN_cache", 7);
datTypes.put("mzBIN", 3);
datTypes.put("mzML", 2);
datTypes.put("mzXML", 2);
datTypes.put("raw", 1);

int fileScore = 0;
if(path.isDirectory()) {
// File priority list, this will return the first one matched so insertion order must be consistent
LinkedHashMap<String, Integer> priorities = new LinkedHashMap<>();
priorities.put(".mzBIN_cache", 20);
priorities.put("_calibrated.mzML", 19);
priorities.put("_uncalibrated.mzML", 18);
priorities.put("_calibrated.mgf", 17);
priorities.put("_uncalibrated.mgf", 16);
priorities.put(".mzML", 15);
priorities.put(".mzXML", 14);
priorities.put(".mzBIN", 13);
priorities.put(".mgf", 4);
priorities.put(".raw", 1);
priorities.put("None", 0);

// Recursively search all directories
if(path.isDirectory()) {
File [] ls = path.listFiles();
//get mapping for each file
// get mapping for each file
for(int i = 0; i < ls.length; i++) {
getMappings(ls[i],mappings, runNames);
}
} else {
String [] ns = splitName(path.getName());
if (ns[0].contains("_calibrated")) {
fileScore += 6;
ns[0] = ns[0].substring(0, ns[0].indexOf("_calibrated"));
}
else if (ns[0].contains("_uncalibrated")) {
fileScore += 5;
ns[0] = ns[0].substring(0, ns[0].indexOf("_uncalibrated"));
}
if (!runNames.contains(ns[0]))
return;
if (mappings.containsKey(ns[0]) && (ns[1].equals("mzXML") || ns[1].equals("mzML") || ns[1].equals("raw") || ns[1].equals("mzBIN") || ns[1].equals("mgf")) || ns[1].equals("mzBIN_cache")) {
if (mappings.get(ns[0]) == null)
mappings.put(ns[0], path);
else {
File storedPath = mappings.get(ns[0]);
String[] storedNs = splitName(storedPath.getName());
fileScore += datTypes.get(ns[1]);
if (fileScore > datTypes.get(storedNs[1]))
mappings.put(ns[0], path);
} else { // see if valid file ext
String matchedKey = getMatchingExtension(path, priorities);
if (matchedKey != null) { // end of name exists in priorities map
String rawFileName = removeCalTag(splitName(path.getName())[0]);
// If raw file not part of this analysis, continue
if (!mappings.containsKey(rawFileName))
return;
// New raw file info
int newRawFilePriority = priorities.get(matchedKey);
// Existing raw file info
File previousRawFile = mappings.getOrDefault(rawFileName, null);
if (previousRawFile == null) // no file mapped yet
mappings.put(rawFileName, path);
else { // Compare and replace if appropriate
int previousRawFilePriority = priorities.get(getMatchingExtension(previousRawFile, priorities));
if (newRawFilePriority > previousRawFilePriority) {
mappings.put(rawFileName, path);
}
}
}
}
}

private static String getMatchingExtension(File file, HashMap<String, Integer> priority) {
for (String key : priority.keySet()) {
if (file.getName().endsWith(key)) {
return key; // Returns the extension key if the file name ends with it
}
}
return null; // No matching extension found
}


public TreeMap<String, Integer> getMS2Counts() {
TreeMap<String, Integer> cnts = new TreeMap<>();
int col = getColumn("Spectrum");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -360,9 +360,9 @@ private void calculateLocalizationProbabilities() throws Exception {
}
}

if (specName.equals("02330a_GH3_3991_13_PTM_TrainKit_Pmod_Hydroxyproline_200fmol_3xHCD_R1.07708.07708")) {
this.debugFlag = true;
}
//if (specName.equals("02330a_GH3_3991_13_PTM_TrainKit_Pmod_Hydroxyproline_200fmol_3xHCD_R1.7708.7708")) {
// this.debugFlag = true;
//}

// Calculate site-specific localization probabilities
float[] mods = psm.getModsAsArray();
Expand Down
4 changes: 2 additions & 2 deletions test/utils/PeptideTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ void calculatePeptideFragments() {
assertEquals(expectedFrags.get(i), sitePepFrags.get(i), 0.0001, "Mismatch at index " + i);
}

seq = "PASGAGAGAGAGKR";
mods = new float[]{114.0448f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
seq = "GDRGEIGPPGPR";
mods = new float[]{0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 16.0004f, 0.0f, 0.0f, 0.0f, 0.0f};
pep = new Peptide(seq, mods);

sitePepFrags = pep.calculatePeptideFragments("b", 1);
Expand Down

0 comments on commit b83c101

Please sign in to comment.