From 6d53b16bcbb567f1e42e9a985589b9883e85cc37 Mon Sep 17 00:00:00 2001 From: tfenne Date: Thu, 11 Nov 2021 09:15:44 -0700 Subject: [PATCH 1/3] Command line tool to produce and print pairwise alignments. --- .../fulcrumgenomics/alignment/Alignment.scala | 15 +++++ .../personal/tfenne/PairwiseAlign.scala | 57 +++++++++++++++++++ 2 files changed, 72 insertions(+) create mode 100644 src/main/scala/com/fulcrumgenomics/personal/tfenne/PairwiseAlign.scala diff --git a/src/main/scala/com/fulcrumgenomics/alignment/Alignment.scala b/src/main/scala/com/fulcrumgenomics/alignment/Alignment.scala index 840b73625..b09d63f32 100644 --- a/src/main/scala/com/fulcrumgenomics/alignment/Alignment.scala +++ b/src/main/scala/com/fulcrumgenomics/alignment/Alignment.scala @@ -25,6 +25,7 @@ package com.fulcrumgenomics.alignment import com.fulcrumgenomics.FgBioDef._ +import com.fulcrumgenomics.util.Sequences import htsjdk.samtools.{CigarElement, CigarOperator, SAMRecord, Cigar => HtsJdkCigar} import scala.collection.mutable.ArrayBuffer @@ -484,4 +485,18 @@ case class Alignment(query: Array[Byte], copy(queryStart=qStart, targetStart=tStart, cigar=Cigar(elems.result()), score=0) } + + /** Returns a version of the alignment where both sequences have been reverse complemented + * and the alignment has been reversed to match. + */ + def revcomped: Alignment = { + val query = this.query.clone() + val target = this.target.clone() + Sequences.revcomp(query) + Sequences.revcomp(target) + val cigar = this.cigar.reverse + val qs = query.length - (this.queryStart + this.cigar.lengthOnQuery - 1) + 1 + val ts = target.length - (this.targetStart + this.cigar.lengthOnTarget - 1) + 1 + Alignment(query, target, qs, ts, cigar, this.score) + } } diff --git a/src/main/scala/com/fulcrumgenomics/personal/tfenne/PairwiseAlign.scala b/src/main/scala/com/fulcrumgenomics/personal/tfenne/PairwiseAlign.scala new file mode 100644 index 000000000..5baeee5bc --- /dev/null +++ b/src/main/scala/com/fulcrumgenomics/personal/tfenne/PairwiseAlign.scala @@ -0,0 +1,57 @@ +/* + * The MIT License + * + * Copyright (c) 2018 Fulcrum Genomics LLC + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +package com.fulcrumgenomics.personal.tfenne + +import com.fulcrumgenomics.FgBioDef._ +import com.fulcrumgenomics.alignment.{Aligner, Mode} +import com.fulcrumgenomics.cmdline.{ClpGroups, FgBioTool} +import com.fulcrumgenomics.fastq.{FastqSource, FastqWriter} +import com.fulcrumgenomics.sopt.{arg, clp} +import com.fulcrumgenomics.util.{Io, Sequences} + +@clp(group=ClpGroups.Personal, description= + """ + |Align one query sequence to one target sequence. + """) +class PairwiseAlign +( @arg(flag='q', doc="Query sequence.") val query: String, + @arg(flag='t', doc="Target sequence.") val target: String, + @arg(flag='m', doc="Alignment mode.") val mode: Mode = Mode.Global, +) extends FgBioTool { + + override def execute(): Unit = { + val aligner = Aligner(1, -4, -6, -1, mode) + + val left = aligner.align(query, target) + val right = aligner.align(Sequences.revcomp(query), Sequences.revcomp(target)).revcomped + + println("Left Aligned:") + left.paddedString().foreach(println) + + println() + println("Right Aligned:") + right.paddedString().foreach(println) + } +} From f01e71e37f22935c303f7bfe8e4d7ce052178707 Mon Sep 17 00:00:00 2001 From: tfenne Date: Thu, 11 Nov 2021 14:28:39 -0700 Subject: [PATCH 2/3] Add test for Alignment.revcomped --- .../alignment/AlignmentTest.scala | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/src/test/scala/com/fulcrumgenomics/alignment/AlignmentTest.scala b/src/test/scala/com/fulcrumgenomics/alignment/AlignmentTest.scala index 66b287ec7..9c888f824 100644 --- a/src/test/scala/com/fulcrumgenomics/alignment/AlignmentTest.scala +++ b/src/test/scala/com/fulcrumgenomics/alignment/AlignmentTest.scala @@ -280,6 +280,28 @@ class AlignmentTest extends UnitSpec { alignment.subByTarget(6, 10).cigar.toString() shouldBe "5D" alignment.subByTarget(6, 11).cigar.toString() shouldBe "5D1=" } + + "Alignment.revcomped" should "reverse complement a simple alignment" in { + val in = Alignment("TTTTT", "TTTTT", 1, 1, Cigar("5M"), 5) + val rc = in.revcomped + rc.query.map(_.toChar).mkString shouldBe "AAAAA" + rc.target.map(_.toChar).mkString shouldBe "AAAAA" + rc.queryStart shouldBe 1 + rc.targetStart shouldBe 1 + rc.cigar.toString shouldBe "5M" + rc.score shouldBe 5 + } + + it should "correctly reverse a non-global alignment" in { + val in = Alignment("..GTAACCGGTT".filter(_ != '.'), "ACGTAA.CGGTTCCCCC".filter(_ != '.'), 1, 3, Cigar("5M1I4M"), 10) + val rc = in.revcomped + rc.query.map(_.toChar).mkString shouldBe "AACCGGTTAC" + rc.target.map(_.toChar).mkString shouldBe "GGGGGAACCGTTACGT" + rc.queryStart shouldBe 1 + rc.targetStart shouldBe 6 + rc.cigar.toString shouldBe "4M1I5M" + rc.score shouldBe 10 + } } class CigarStatsTest extends UnitSpec { From 2be41716a46e14cd8f16be8d89dfd7c7ce22cf93 Mon Sep 17 00:00:00 2001 From: Tim Fennell Date: Tue, 21 Dec 2021 06:05:26 -0700 Subject: [PATCH 3/3] Update src/main/scala/com/fulcrumgenomics/personal/tfenne/PairwiseAlign.scala Co-authored-by: Nils Homer --- .../com/fulcrumgenomics/personal/tfenne/PairwiseAlign.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/scala/com/fulcrumgenomics/personal/tfenne/PairwiseAlign.scala b/src/main/scala/com/fulcrumgenomics/personal/tfenne/PairwiseAlign.scala index 5baeee5bc..853211bad 100644 --- a/src/main/scala/com/fulcrumgenomics/personal/tfenne/PairwiseAlign.scala +++ b/src/main/scala/com/fulcrumgenomics/personal/tfenne/PairwiseAlign.scala @@ -1,7 +1,7 @@ /* * The MIT License * - * Copyright (c) 2018 Fulcrum Genomics LLC + * Copyright (c) 2021 Fulcrum Genomics LLC * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal