diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/alignment/samtools/SamtoolsFlagstats.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/alignment/samtools/SamtoolsFlagstats.java index 674f7f85d..fe4823e98 100644 --- a/biodata-formats/src/main/java/org/opencb/biodata/formats/alignment/samtools/SamtoolsFlagstats.java +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/alignment/samtools/SamtoolsFlagstats.java @@ -22,6 +22,10 @@ public class SamtoolsFlagstats { */ private int secondaryAlignments; + /** + * Total number of reads which are supplementary (0x800 bit set) + */ + private int supplementary; /** * Total number of reads which are duplicates (0x400 bit set) */ @@ -32,6 +36,17 @@ public class SamtoolsFlagstats { */ private int pairedInSequencing; + /** + * Total number of reads with both 0x1 and 0x40 bits set + */ + private int read1; + + + /** + * Total number reads with both 0x1 and 0x80 bits set + */ + private int read2; + /** * Total number of reads which are properly paired (both 0x1 and 0x2 bits set and 0x4 bit not set) */ @@ -70,8 +85,11 @@ public String toString() { sb.append(", totalQcPassed=").append(totalQcPassed); sb.append(", mapped=").append(mapped); sb.append(", secondaryAlignments=").append(secondaryAlignments); + sb.append(", supplementary=").append(supplementary); sb.append(", duplicates=").append(duplicates); sb.append(", pairedInSequencing=").append(pairedInSequencing); + sb.append(", read1=").append(read1); + sb.append(", read2=").append(read2); sb.append(", properlyPaired=").append(properlyPaired); sb.append(", selfAndMateMapped=").append(selfAndMateMapped); sb.append(", singletons=").append(singletons); @@ -117,6 +135,15 @@ public SamtoolsFlagstats setSecondaryAlignments(int secondaryAlignments) { return this; } + public int getSupplementary() { + return supplementary; + } + + public SamtoolsFlagstats setSupplementary(int supplementary) { + this.supplementary = supplementary; + return this; + } + public int getDuplicates() { return duplicates; } @@ -135,6 +162,24 @@ public SamtoolsFlagstats setPairedInSequencing(int pairedInSequencing) { return this; } + public int getRead1() { + return read1; + } + + public SamtoolsFlagstats setRead1(int read1) { + this.read1 = read1; + return this; + } + + public int getRead2() { + return read2; + } + + public SamtoolsFlagstats setRead2(int read2) { + this.read2 = read2; + return this; + } + public int getProperlyPaired() { return properlyPaired; } diff --git a/biodata-formats/src/main/java/org/opencb/biodata/formats/alignment/samtools/io/SamtoolsFlagstatsParser.java b/biodata-formats/src/main/java/org/opencb/biodata/formats/alignment/samtools/io/SamtoolsFlagstatsParser.java new file mode 100644 index 000000000..30eaf03c9 --- /dev/null +++ b/biodata-formats/src/main/java/org/opencb/biodata/formats/alignment/samtools/io/SamtoolsFlagstatsParser.java @@ -0,0 +1,57 @@ +package org.opencb.biodata.formats.alignment.samtools.io; + +import org.opencb.biodata.formats.alignment.samtools.SamtoolsFlagstats; + +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; +import java.nio.file.Path; + +public class SamtoolsFlagstatsParser { + + public static SamtoolsFlagstats parse(Path path) throws IOException { + + SamtoolsFlagstats flagstats = new SamtoolsFlagstats(); + + FileReader fr = new FileReader(path.toFile()); + BufferedReader br = new BufferedReader(fr); + + String line; + + while ((line = br.readLine()) != null) { + String[] splits = line.split(" "); + int passed = Integer.parseInt(splits[0]); + int failed = Integer.parseInt(splits[2]); + if (line.contains("QC-passed")) { + flagstats.setTotalQcPassed(passed); + flagstats.setTotalReads(passed + failed); + } else if (line.contains("secondary")) { + flagstats.setSecondaryAlignments(passed); + } else if (line.contains("supplementary")) { + flagstats.setSupplementary(passed); + } else if (line.contains("duplicates")) { + flagstats.setDuplicates(passed); + } else if (line.contains("mapped")) { + flagstats.setMapped(passed); + } else if (line.contains("paired in sequencing")) { + flagstats.setPairedInSequencing(passed); + } else if (line.contains("read1")) { + flagstats.setRead1(passed); + } else if (line.contains("read2")) { + flagstats.setRead2(passed); + } else if (line.contains("properly paired")) { + flagstats.setProperlyPaired(passed); + } else if (line.contains("with itself and mate mapped")) { + flagstats.setSelfAndMateMapped(passed); + } else if (line.contains("singletons")) { + flagstats.setSingletons(passed); + } else if (line.contains("mapQ>=5")) { + flagstats.setDiffChrMapQ5(passed); + } else if (line.contains("with mate mapped")) { + flagstats.setMateMappedToDiffChr(passed); + } + } + + return flagstats; + } +}