Skip to content

Commit

Permalink
Add scala implementations for some SamRecord methods
Browse files Browse the repository at this point in the history
* refPosAtReadPos
* readPosAtRefPos
  • Loading branch information
nh13 committed Mar 26, 2022
1 parent bb36b51 commit 65d71ee
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 20 deletions.
17 changes: 11 additions & 6 deletions src/main/scala/com/fulcrumgenomics/bam/SamRecordClipper.scala
Original file line number Diff line number Diff line change
Expand Up @@ -309,10 +309,15 @@ class SamRecordClipper(val mode: ClippingMode, val autoClipAttributes: Boolean)
// mid point, or if the mid point is in a deletion, the base prior to the deletion. The mate ends at the mid
// point, or if the mid point is in a deletion, the base after the deletion.
val midPoint = (rec.start + mate.end) / 2
val readEnd = rec.readPosAtRefPos(pos=midPoint, returnLastBaseIfDeleted=true)
val mateStart = { // NB: need to be careful if the midpoint falls in a deletion
val retval = mate.readPosAtRefPos(pos=midPoint + 1, returnLastBaseIfDeleted=false)
if (retval != 0) retval else mate.readPosAtRefPos(pos=midPoint + 1, returnLastBaseIfDeleted=true) + 1
val readEnd = rec.readPosAtRefPos(pos=midPoint, returnLastBaseIfDeleted=true).getOrElse(
unreachable("Expected to find reference position in read")
)
val mateStart: Int = { // NB: need to be careful if the midpoint falls in a deletion
mate.readPosAtRefPos(pos=midPoint + 1, returnLastBaseIfDeleted=false).getOrElse(
1 + mate.readPosAtRefPos(pos=midPoint + 1, returnLastBaseIfDeleted=true).getOrElse(
unreachable("Expected to find reference position in a deletion in mate")
)
)
}
val numOverlappingBasesRead = this.clip3PrimeEndOfRead(rec, rec.cigar.trailingClippedBases + rec.length - readEnd)
val numOverlappingBasesMate = this.clip3PrimeEndOfRead(mate, mate.cigar.leadingClippedBases + mateStart - 1)
Expand Down Expand Up @@ -342,11 +347,11 @@ class SamRecordClipper(val mode: ClippingMode, val autoClipAttributes: Boolean)
else {
if (rec.positiveStrand && rec.end >= mateEnd) {
// clip from where last read base of where the mate ends
Math.max(0, rec.length - rec.readPosAtRefPos(pos=mateEnd, returnLastBaseIfDeleted=false))
Math.max(0, rec.length - rec.readPosAtRefPos(pos=mateEnd, returnLastBaseIfDeleted=false).getOrElse(0))
}
else if (rec.negativeStrand && rec.start <= rec.mateStart) {
// clip up to and including one base before where the mate starts
Math.max(0, rec.readPosAtRefPos(pos=rec.mateStart, returnLastBaseIfDeleted=false) - 1)
Math.max(0, rec.readPosAtRefPos(pos=rec.mateStart, returnLastBaseIfDeleted=false).getOrElse(0) - 1)
} else {
// no bases extend past
0
Expand Down
80 changes: 77 additions & 3 deletions src/main/scala/com/fulcrumgenomics/bam/api/SamRecord.scala
Original file line number Diff line number Diff line change
Expand Up @@ -236,9 +236,83 @@ trait SamRecord {
// transient attributes
@inline final val transientAttrs: TransientAttrs = new TransientAttrs(this)

// TODO long-term: replace these two methods with methods on [[Cigar]] to save creating alignment blocks in memory
@inline final def refPosAtReadPos(pos: Int) = getReferencePositionAtReadPosition(pos)
@inline final def readPosAtRefPos(pos: Int, returnLastBaseIfDeleted: Boolean) = getReadPositionAtReferencePosition(pos, returnLastBaseIfDeleted)
/** Returns the 1-based reference position for the 1-based position in the read, or [[None]] if the reference does
* not map at the read position (e.g. insertion).
*
* @param pos the 1-based read position to query
* @param returnLastBaseIfInserted if the reference is an insertion, true to returned the previous reference base,
* false to return None
* */
@inline final def refPosAtReadPos(pos: Int, returnLastBaseIfInserted: Boolean = false): Option[Int] = if (pos <= 0) None else {
var readPos = 1
var refPos = this.start
var elementIndex = 0
val elems = this.cigar.elems

def continue(): Boolean = if (elementIndex >= elems.length) false else {
val elem = elems(elementIndex)
if (pos > CoordMath.getEnd(readPos, elem.lengthOnQuery)) true // current cigar element is before the desired position
else {
if (pos < readPos) readPos = 0 // past the point in the read
else if (elem.operator == CigarOperator.INSERTION) {
if (!returnLastBaseIfInserted) refPos = 0
else refPos -= 1
} // in an insertion, no reference position
else refPos += pos - readPos // get the offset
false
}
}

while (continue()) {
val elem = elems(elementIndex)
refPos += elem.lengthOnTarget
readPos += elem.lengthOnQuery
elementIndex += 1
}

if (this.start <= refPos && readPos <= this.end) Some(refPos) else None
}


/** Returns the 1-based position into the read's bases where the position is mapped either as a match or mismatch,
* [[None]] if the read does not map the position.
*
* @param pos the 1-based reference position to query
* @param returnLastBaseIfDeleted if the reference is a deletion, true to returned the previous read base, false to
* return None
* */
@inline final def readPosAtRefPos(pos: Int, returnLastBaseIfDeleted: Boolean = false): Option[Int] = {
if (this.unmapped || pos < this.start || this.end < pos) None
else { // overlaps
var readPos = 0
var refPos = this.start
var elementIndex = 0
val elems = this.cigar.elems

def continue(): Boolean = if (elementIndex >= elems.length) false else {
val elem = elems(elementIndex)
if (pos > CoordMath.getEnd(refPos, elem.lengthOnTarget)) true // current cigar element is before the desired position
else { // overlaps!
if (elem.operator == CigarOperator.DELETION) { // deletion
if (!returnLastBaseIfDeleted) readPos = 0 // don't return a read position, so zero out the current read position
} else {
readPos += pos - refPos + 1 // get the offset
}
false
}
}

while (continue()) {
val elem = elems(elementIndex)
refPos += elem.lengthOnTarget
readPos += elem.lengthOnQuery
elementIndex += 1
}

if (0 < readPos && readPos <= this.length) Some(readPos) else None
}
}


////////////////////////////////////////////////////////////////////////////
// Non-wrapper methods.
Expand Down
18 changes: 10 additions & 8 deletions src/main/scala/com/fulcrumgenomics/bam/pileup/PileupBuilder.scala
Original file line number Diff line number Diff line change
Expand Up @@ -209,14 +209,16 @@ trait PileupBuilder extends PileupParameters {
if (rec.start == pos + 1) { // This site must be an insertion in the read that is at the start of the read.
testAndAdd(InsertionEntry(rec, 0))
} else {
val offset = rec.readPosAtRefPos(pos, returnLastBaseIfDeleted = false)
if (offset == 0) { // This site must be deleted within the read.
val deletionPosition = rec.readPosAtRefPos(pos, returnLastBaseIfDeleted = true)
testAndAdd(DeletionEntry(rec, deletionPosition - 1))
} else { // This site must be a matched site within the read.
testAndAdd(BaseEntry(rec, offset - 1))
// Also check to see if the subsequent base represents an insertion.
if (offset < rec.length - 1 && rec.refPosAtReadPos(offset + 1) == 0) testAndAdd(InsertionEntry(rec, offset))
rec.readPosAtRefPos(pos, returnLastBaseIfDeleted = false) match {
case None => // This site must be deleted within the read.
val deletionPosition = rec.readPosAtRefPos(pos, returnLastBaseIfDeleted=true).getOrElse(
unreachable("Bug: expected to get the read position within a deletion")
)
testAndAdd(DeletionEntry(rec, deletionPosition - 1))
case Some(offset) => // This site must be a matched site within the read.
testAndAdd(BaseEntry(rec, offset - 1))
// Also check to see if the subsequent base represents an insertion.
if (offset < rec.length - 1 && rec.refPosAtReadPos(offset + 1).isEmpty) testAndAdd(InsertionEntry(rec, offset))
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -338,9 +338,7 @@ class ReviewConsensusVariants
private def nonReferenceAtAnyVariant(rec: SamRecord, variantsByChromAndPos: Map[String, Seq[Variant]]): Boolean = {
rec.mapped && variantsByChromAndPos(rec.refName).exists { v =>
if (v.start >= rec.start && v.start <= rec.end) {
val readPos = rec.readPosAtRefPos(v.start, false)
if (readPos == 0) true
else {
rec.readPosAtRefPos(v.start, false).forall { readPos =>
val base = rec.bases(readPos - 1)
!SequenceUtil.basesEqual(base, v.refBase.toByte) && (!this.ignoreNsInConsensusReads || !SequenceUtil.isNoCall(base))
}
Expand Down
44 changes: 44 additions & 0 deletions src/test/scala/com/fulcrumgenomics/bam/api/SamRecordTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -289,4 +289,48 @@ class SamRecordTest extends UnitSpec with OptionValues {
rec1.matesOverlap shouldBe None // Mate's start is not enclosed by rec, and mate's end cannot be determined
rec2.matesOverlap.value shouldBe true // Mate's start is enclosed by rec, regardless of where mate end is
}

"SamRecord.readPosAtRefPos" should "return the correct value" in {
case class ReadPosAtRefPos(cigar: String, refPos: Int, readPos: Option[Int], returnLastBaseIfDeleted: Boolean)
val testCases = Seq(
ReadPosAtRefPos("3S9M", 7, Some(10), false),
ReadPosAtRefPos("3S9M", 0, None, false),
ReadPosAtRefPos("3S9M", -1, None, false),
ReadPosAtRefPos("3S9M", 13, None, false),
ReadPosAtRefPos("4M1D6M", 4, Some(4), false),
ReadPosAtRefPos("4M1D6M", 4, Some(4), true),
ReadPosAtRefPos("4M1D6M", 5, None, false),
ReadPosAtRefPos("4M1D6M", 5, Some(4), true),
ReadPosAtRefPos("4M1I6M", 5, Some(6), false),
ReadPosAtRefPos("4M1I6M", 11, None, false)
)
testCases.foreach { testCase =>
val frag = new SamBuilder(readLength=Cigar(testCase.cigar).lengthOnQuery).addFrag(start=1, cigar=testCase.cigar).value
val readPos = frag.readPosAtRefPos(pos=testCase.refPos, returnLastBaseIfDeleted=testCase.returnLastBaseIfDeleted)
readPos shouldBe testCase.readPos
readPos.getOrElse(0) shouldBe frag.asSam.getReadPositionAtReferencePosition(testCase.refPos, testCase.returnLastBaseIfDeleted)
}
}

"SamRecord.refPosAtReadPos" should "return the correct value" in {
case class RefPosAtReadPos(cigar: String, refPos: Option[Int], readPos: Int, returnLastBaseIfInserted: Boolean)
val testCases = Seq(
RefPosAtReadPos("3S9M", Some(7), 10, false),
RefPosAtReadPos("3S9M", None, 0, false),
RefPosAtReadPos("3S9M", None, 13, false),
RefPosAtReadPos("4M1D6M", Some(4), 4, false),
RefPosAtReadPos("4M1D6M", Some(6), 5, false),
RefPosAtReadPos("4M1I6M", None, 5, false),
RefPosAtReadPos("4M1I6M", Some(4), 5, true),
RefPosAtReadPos("4M1I6M", Some(5), 6, false),
)
testCases.foreach { testCase =>
val frag = new SamBuilder(readLength=Cigar(testCase.cigar).lengthOnQuery).addFrag(start=1, cigar=testCase.cigar).value
val readPos = frag.refPosAtReadPos(pos=testCase.readPos, returnLastBaseIfInserted=testCase.returnLastBaseIfInserted)
readPos shouldBe testCase.refPos
if (!testCase.returnLastBaseIfInserted) {
readPos.getOrElse(0) shouldBe frag.asSam.getReferencePositionAtReadPosition(testCase.readPos)
}
}
}
}

0 comments on commit 65d71ee

Please sign in to comment.