diff --git a/src/main/scala/com/fulcrumgenomics/bam/api/SamSource.scala b/src/main/scala/com/fulcrumgenomics/bam/api/SamSource.scala index c9ea7703e..30c19b10e 100644 --- a/src/main/scala/com/fulcrumgenomics/bam/api/SamSource.scala +++ b/src/main/scala/com/fulcrumgenomics/bam/api/SamSource.scala @@ -24,13 +24,12 @@ package com.fulcrumgenomics.bam.api -import java.io.Closeable - import com.fulcrumgenomics.FgBioDef._ import com.fulcrumgenomics.bam.api.QueryType.QueryType import htsjdk.samtools._ import htsjdk.samtools.util.{Interval, Locatable} +import java.io.{Closeable, InputStream} import scala.collection.compat._ /** Companion to the [[SamSource]] class that provides factory methods for sources. */ @@ -38,15 +37,29 @@ object SamSource { var DefaultUseAsyncIo: Boolean = false var DefaultValidationStringency: ValidationStringency = ValidationStringency.STRICT - /** - * Constructs a [[SamSource]] to read from the provided path. + /** Configure a [[SAMRecordFactory]] with a variety of parameters. */ + private def buildSamRecordFactory( + factory: SAMRecordFactory, + ref: Option[PathToFasta], + async: Boolean, + stringency: ValidationStringency, + ): SamReaderFactory = { + val fac = SamReaderFactory.make() + fac.samRecordFactory(factory) + fac.setUseAsyncIo(async) + fac.validationStringency(stringency) + ref.foreach(fac.referenceSequence) + fac + } + + /** Constructs a [[SamSource]] to read from the provided path. * * @param path the path to read the SAM/BAM/CRAM from * @param index an optional path to read the index from - * @param ref an optional reference sequencing for decoding CRAM files + * @param ref an optional reference sequence for decoding CRAM files * @param async if true use extra thread(s) to speed up reading * @param stringency the validation stringency to apply when reading the data - * @param factory a SAMRecordFactory; MUST return classes that mix in [[SamRecord]] + * @param factory a [[SAMRecordFactory]]; MUST return classes that mix in [[SamRecord]] */ def apply(path: PathToBam, index: Option[FilePath] = None, @@ -54,18 +67,30 @@ object SamSource { async: Boolean = DefaultUseAsyncIo, stringency: ValidationStringency = DefaultValidationStringency, factory: SAMRecordFactory = SamRecord.Factory): SamSource = { - // Configure the factory - val fac = SamReaderFactory.make() - fac.samRecordFactory(factory) - fac.setUseAsyncIo(async) - fac.validationStringency(stringency) - ref.foreach(r => fac.referenceSequence(r.toFile)) - - // Open the input(s) + val fac = buildSamRecordFactory(factory = factory, ref = ref, async = async, stringency = stringency) val input = SamInputResource.of(path) - index.foreach(i => input.index(i)) + index.foreach(input.index) new SamSource(fac.open(input)) } + + /** Constructs a [[SamSource]] to read from the provided input stream. + * + * @param stream the input stream of SAM/BAM/CRAM bytes + * @param ref an optional reference sequence for decoding CRAM files + * @param async if true use extra thread(s) to speed up reading + * @param stringency the validation stringency to apply when reading the data + * @param factory a [[SAMRecordFactory]]; MUST return classes that mix in [[SamRecord]] + */ + def apply( + stream: InputStream, + ref: Option[PathToFasta], + async: Boolean, + stringency: ValidationStringency, + factory: SAMRecordFactory, + ): SamSource = { + val fac = buildSamRecordFactory(factory = factory, ref = ref, async = async, stringency = stringency) + new SamSource(fac.open(SamInputResource.of(stream)), closer = Some(() => stream.close())) + } } /** Describes the two types of queries that can be performed. */ @@ -78,7 +103,9 @@ object QueryType extends Enumeration { * A source class for reading SAM/BAM/CRAM files and for querying them. * @param reader the underlying [[SamReader]] */ -class SamSource private(private val reader: SamReader) extends View[SamRecord] with HeaderHelper with Closeable { +class SamSource private(private val reader: SamReader, private val closer: Option[Closeable] = None) + extends View[SamRecord] with HeaderHelper with Closeable { + /** The [[htsjdk.samtools.SAMFileHeader]] associated with the source. */ override val header: SAMFileHeader = reader.getFileHeader @@ -109,7 +136,11 @@ class SamSource private(private val reader: SamReader) extends View[SamRecord] w /** Provides a string that shows where the source is reading from. */ override def toString: String = s"SamReader(${reader.getResourceDescription})" - override def close(): Unit = this.reader.close() + /** Close an optional wrapped closeable and release the SAM reader. */ + override def close(): Unit = { + this.closer.foreach(_.close()) + this.reader.close() + } /** * Returns the underlying SamReader. This should be avoided as much as possible, and the diff --git a/src/test/scala/com/fulcrumgenomics/bam/api/SamIoTest.scala b/src/test/scala/com/fulcrumgenomics/bam/api/SamIoTest.scala index c6c0fb9b1..ab4b676d8 100644 --- a/src/test/scala/com/fulcrumgenomics/bam/api/SamIoTest.scala +++ b/src/test/scala/com/fulcrumgenomics/bam/api/SamIoTest.scala @@ -24,15 +24,15 @@ package com.fulcrumgenomics.bam.api -import java.nio.file.Files -import java.util.concurrent.{Callable, Executors, TimeUnit} - import com.fulcrumgenomics.FgBioDef._ +import com.fulcrumgenomics.bam.api.SamSource.{DefaultUseAsyncIo, DefaultValidationStringency} import com.fulcrumgenomics.fasta.{SequenceDictionary, SequenceMetadata} import com.fulcrumgenomics.testing.{SamBuilder, UnitSpec} import com.fulcrumgenomics.util.Io import htsjdk.samtools.GenomicIndexUtil +import java.nio.file.Files +import java.util.concurrent.{Callable, Executors, TimeUnit} import scala.util.Random class SamIoTest extends UnitSpec { @@ -164,4 +164,29 @@ class SamIoTest extends UnitSpec { filterCount shouldBe 10 mapCount shouldBe 10 } + + it should "allow reading from a stream of SAM bytes" in { + val builder = new SamBuilder() + builder.addPair(name = "q1", start1 = 100, start2 = 300) + builder.addPair(name = "q4", start1 = 200, start2 = 400) + builder.addPair(name = "q3", start1 = 300, start2 = 500) + builder.addPair(name = "q2", start1 = 400, start2 = 600) + + val sam = makeTempFile(getClass.getSimpleName, ".sam") + val out = SamWriter(sam, builder.header, sort = Some(SamOrder.Coordinate)) + builder.foreach(out.write) + out.close() + + val source = SamSource( + stream = Io.toInputStream(sam), + ref = None, + async = DefaultUseAsyncIo, + stringency = DefaultValidationStringency, + factory = SamRecord.Factory + ) + + source.indexed shouldBe false + source.toSeq.map(_.start) should contain theSameElementsInOrderAs Seq(100, 200, 300, 300, 400, 400, 500, 600) + source.close() + } }