From 7b91eb67fd19017a255579f5cdfdbdc7f0c208c4 Mon Sep 17 00:00:00 2001 From: John McGuigan Date: Fri, 5 Jan 2024 13:58:47 -0500 Subject: [PATCH 1/3] Correct check for insertion to avoid counting soft-clipped bases --- .../com/fulcrumgenomics/bam/pileup/PileupBuilder.scala | 4 ++-- .../fulcrumgenomics/bam/pileup/PileupBuilderTest.scala | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/main/scala/com/fulcrumgenomics/bam/pileup/PileupBuilder.scala b/src/main/scala/com/fulcrumgenomics/bam/pileup/PileupBuilder.scala index 89963cf96..1f0bba4b8 100644 --- a/src/main/scala/com/fulcrumgenomics/bam/pileup/PileupBuilder.scala +++ b/src/main/scala/com/fulcrumgenomics/bam/pileup/PileupBuilder.scala @@ -215,8 +215,8 @@ trait PileupBuilder extends PileupParameters { testAndAdd(DeletionEntry(rec, deletionPosition - 1)) } else { // This site must be a matched site within the read. testAndAdd(BaseEntry(rec, offset - 1)) - // Also check to see if the subsequent base represents an insertion. - if (offset < rec.length - 1 && rec.refPosAtReadPos(offset + 1) == 0) testAndAdd(InsertionEntry(rec, offset)) + // Also check to see if any subsequent base represents an insertion. + if (rec.end > pos && rec.refPosAtReadPos(offset + 1) == 0) testAndAdd(InsertionEntry(rec, offset)) } } } diff --git a/src/test/scala/com/fulcrumgenomics/bam/pileup/PileupBuilderTest.scala b/src/test/scala/com/fulcrumgenomics/bam/pileup/PileupBuilderTest.scala index 967ad689b..1ef6aad63 100644 --- a/src/test/scala/com/fulcrumgenomics/bam/pileup/PileupBuilderTest.scala +++ b/src/test/scala/com/fulcrumgenomics/bam/pileup/PileupBuilderTest.scala @@ -127,6 +127,7 @@ class PileupBuilderTest extends UnitSpec { builder.addFrag(name = "q3", start = 101, cigar = "31M9I10M").foreach(_.bases = "G" * ReadLength) builder.addFrag(name = "q4", start = 101, cigar = "30M9D20M").foreach(_.bases = "T" * ReadLength) builder.addFrag(name = "q5", start = 141, cigar = "10I40M" ).foreach(_.bases = "N" * ReadLength) + builder.addFrag(name = "q6", start = 201, cigar = "47M3S" ).foreach(_.bases = "N" * ReadLength) val source = builder.toSource val piler = PileupBuilder(source, accessPattern = accessPattern, mappedPairsOnly = false) @@ -170,6 +171,13 @@ class PileupBuilderTest extends UnitSpec { p4.iterator.collect{ case x: InsertionEntry => x }.map(_.rec.name).next() shouldBe "q5" p4.baseIterator.toSeq should contain theSameElementsAs p4.withoutIndels.iterator.toSeq + // Locus with the end of a read that is soft-clipped + val p5 = piler.pileup(Chr1, 247) + p5.depth shouldBe 1 + p5.iterator.size shouldBe 1 // should not report an insertion due to the remaining bases + p5.baseIterator.size shouldBe 1 + p5.baseIterator.toSeq should contain theSameElementsAs p5.withoutIndels.iterator.toSeq + source.safelyClose() piler.safelyClose() } From a17217d5b61fd0f940ea61c2d8d973418a27e158 Mon Sep 17 00:00:00 2001 From: John McGuigan Date: Wed, 17 Jan 2024 15:55:17 -0500 Subject: [PATCH 2/3] add unit tests for soft-clipped positions Co-authored-by: Clint Valentine --- .../fulcrumgenomics/bam/pileup/PileupBuilderTest.scala | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/test/scala/com/fulcrumgenomics/bam/pileup/PileupBuilderTest.scala b/src/test/scala/com/fulcrumgenomics/bam/pileup/PileupBuilderTest.scala index 1ef6aad63..7d1da24ba 100644 --- a/src/test/scala/com/fulcrumgenomics/bam/pileup/PileupBuilderTest.scala +++ b/src/test/scala/com/fulcrumgenomics/bam/pileup/PileupBuilderTest.scala @@ -171,13 +171,19 @@ class PileupBuilderTest extends UnitSpec { p4.iterator.collect{ case x: InsertionEntry => x }.map(_.rec.name).next() shouldBe "q5" p4.baseIterator.toSeq should contain theSameElementsAs p4.withoutIndels.iterator.toSeq - // Locus with the end of a read that is soft-clipped + // Locus with the remainder of a read that is soft-clipped val p5 = piler.pileup(Chr1, 247) p5.depth shouldBe 1 - p5.iterator.size shouldBe 1 // should not report an insertion due to the remaining bases + p5.iterator.size shouldBe 1 // should not report an insertion due to the remaining soft-clipped bases p5.baseIterator.size shouldBe 1 p5.baseIterator.toSeq should contain theSameElementsAs p5.withoutIndels.iterator.toSeq + // Locus at the site of a single read that is soft-clipped + val p6 = piler.pileup(Chr1, 248) + p6.depth shouldBe 0 + p6.iterator.size shouldBe 0 + p6.baseIterator.size shouldBe 0 + p6.baseIterator.toSeq should contain theSameElementsAs p5.withoutIndels.iterator.toSeq source.safelyClose() piler.safelyClose() } From bc462fbad5a926c6293b54ea28a95c48cd8c8e96 Mon Sep 17 00:00:00 2001 From: Clint Valentine Date: Wed, 17 Jan 2024 13:09:38 -0800 Subject: [PATCH 3/3] Update src/test/scala/com/fulcrumgenomics/bam/pileup/PileupBuilderTest.scala --- .../com/fulcrumgenomics/bam/pileup/PileupBuilderTest.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/scala/com/fulcrumgenomics/bam/pileup/PileupBuilderTest.scala b/src/test/scala/com/fulcrumgenomics/bam/pileup/PileupBuilderTest.scala index 7d1da24ba..13ed46b2d 100644 --- a/src/test/scala/com/fulcrumgenomics/bam/pileup/PileupBuilderTest.scala +++ b/src/test/scala/com/fulcrumgenomics/bam/pileup/PileupBuilderTest.scala @@ -183,7 +183,7 @@ class PileupBuilderTest extends UnitSpec { p6.depth shouldBe 0 p6.iterator.size shouldBe 0 p6.baseIterator.size shouldBe 0 - p6.baseIterator.toSeq should contain theSameElementsAs p5.withoutIndels.iterator.toSeq + p6.baseIterator.toSeq should contain theSameElementsAs p6.withoutIndels.iterator.toSeq source.safelyClose() piler.safelyClose() }