Skip to content

Commit

Permalink
Fixes after upgrade
Browse files Browse the repository at this point in the history
  • Loading branch information
mwiewior committed Jul 20, 2024
1 parent 5cce516 commit 1f2d580
Show file tree
Hide file tree
Showing 27 changed files with 104 additions and 108 deletions.
7 changes: 4 additions & 3 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,9 @@ dependencyOverrides += "io.netty" % "netty-transport" % nettyVersion
dependencyOverrides += "io.netty" % "netty-transport-native-epoll" % nettyVersion
dependencyOverrides += "io.netty" % "netty-transport-native-unix-common" % nettyVersion
dependencyOverrides += "com.google.guava" % "guava" % "15.0"
dependencyOverrides += "org.apache.orc" % "orc-core" % "1.8.7"
//dependencyOverrides += "org.apache.orc" % "orc-core" % "1.7.5"
//dependencyOverrides += "org.apache.logging.log4j" % "log4j-core" % "2.20.0"
//dependencyOverrides += "org.scalatest" %% "scalatest" % "3.0.3" % "test"


//removing hadoop-bam to used a patched one with support for htsjdk 2.22
Expand All @@ -40,7 +41,7 @@ libraryDependencies += "org.apache.spark" %% "spark-core" % sparkVersion
libraryDependencies += "org.apache.spark" %% "spark-sql" % sparkVersion
libraryDependencies += "com.github.mrpowers" %% "spark-fast-tests" % "0.21.3"
libraryDependencies += "com.github.mrpowers" %% "spark-daria" % "0.38.2"
libraryDependencies += "com.holdenkarau" %% "spark-testing-base" % "3.4.2_1.5.3" % "test" excludeAll ExclusionRule(organization = "javax.servlet") excludeAll (ExclusionRule("org.apache.hadoop"))
libraryDependencies += "com.holdenkarau" %% "spark-testing-base" % "3.4.1_1.4.4" % "test" excludeAll ExclusionRule(organization = "javax.servlet") excludeAll (ExclusionRule("org.apache.hadoop"))
libraryDependencies += "org.bdgenomics.adam" %% "adam-core-spark3" % "1.0.1" excludeAll (ExclusionRule("org.seqdoop"))
libraryDependencies += "org.bdgenomics.adam" %% "adam-apis-spark3" % "1.0.1" excludeAll (ExclusionRule("org.seqdoop"))
libraryDependencies += "org.bdgenomics.adam" %% "adam-cli-spark3" % "1.0.1" excludeAll (ExclusionRule("org.seqdoop"))
Expand All @@ -54,7 +55,7 @@ libraryDependencies += "org.apache.commons" % "commons-lang3" % "3.7"
libraryDependencies += "org.eclipse.jetty" % "jetty-servlet" % "9.3.24.v20180605"
libraryDependencies += "org.apache.derby" % "derbyclient" % "10.14.2.0"
//libraryDependencies += "org.disq-bio" % "disq" % "0.3.8" <-disabled since we use patched version of HtsjdkReadsTraversalParameters
libraryDependencies += "io.projectglow" %% "glow-spark3" % "1.2.1" excludeAll (ExclusionRule("com.github.samtools")) excludeAll (ExclusionRule("org.seqdoop")) //FIXME:: remove togehter with disq
libraryDependencies += "io.projectglow" %% "glow-spark3" % "2.0.0" excludeAll (ExclusionRule("com.github.samtools")) excludeAll (ExclusionRule("org.seqdoop")) //FIXME:: remove togehter with disq
libraryDependencies += "com.intel.gkl" % "gkl" % "0.8.8"
libraryDependencies += "org.openjdk.jol" % "jol-core" % "0.17" % "provided"
libraryDependencies += "com.github.jsr203hadoop" % "jsr203hadoop" % "1.0.3"
Expand Down
Empty file added sbt-cache/ivy/.sbt.ivy.lock
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@ import org.apache.spark.sql.execution.analysis.DetectAmbiguousSelfJoin
import org.apache.spark.sql.execution.command.CommandCheck
import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, TableCapabilityCheck}
import org.apache.spark.sql.execution.datasources.{DataSourceAnalysis, FallBackFileSourceV2, FindDataSourceTable, HiveOnlyCheck, PreReadCheck, PreWriteCheck, PreprocessTableCreation, PreprocessTableInsertion, ResolveSQLOnFile}
import org.apache.spark.sql.util.SchemaUtils
import org.apache.spark.util.collection.{Utils => CUtils}



class SeQuiLaAnalyzer(session: SparkSession) extends
Expand All @@ -29,8 +26,9 @@ class SeQuiLaAnalyzer(session: SparkSession) extends
new FindDataSourceTable(session) +:
new ResolveSQLOnFile(session) +:
new FallBackFileSourceV2(session) +:
new ResolveSessionCatalog(
catalogManager) +:
//FIXME: After upgrade to Spark - 3.4.0, this line is commented out
// new ResolveSessionCatalog(
// catalogManager) +:
ResolveEncodersInScalaAgg +: session.extensions.buildResolutionRules(session)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@ package org.biodatageeks.sequila.tests.base

import com.holdenkarau.spark.testing.{DataFrameSuiteBase, SharedSparkContext}
import org.apache.spark.sql.SequilaSession
import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, FunSuite}
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll}

class BAMBaseTestSuite
extends FunSuite
extends AnyFunSuite
with DataFrameSuiteBase
with SharedSparkContext with BeforeAndAfter{

Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
package org.biodatageeks.sequila.tests.base

import com.holdenkarau.spark.testing.{DataFrameSuiteBase, SharedSparkContext}
import org.scalatest.{BeforeAndAfter, FunSuite}
import org.scalatest.{BeforeAndAfter}
import org.scalatest.funsuite.AnyFunSuite

class BEDBaseTestSuite
extends
FunSuite
AnyFunSuite
with DataFrameSuiteBase
with SharedSparkContext with BeforeAndAfter{

Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
package org.biodatageeks.sequila.tests.base

import com.holdenkarau.spark.testing.{DataFrameSuiteBase, SharedSparkContext}
import org.scalatest.{BeforeAndAfter, FunSuite}
import org.scalatest.{BeforeAndAfter}
import org.scalatest.funsuite.AnyFunSuite

class FASTQBaseTestSuite
extends
FunSuite
AnyFunSuite
with DataFrameSuiteBase
with SharedSparkContext with BeforeAndAfter{

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ import com.holdenkarau.spark.testing.{DataFrameSuiteBase, SharedSparkContext}
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
import org.biodatageeks.sequila.rangejoins.IntervalTree.IntervalTreeJoinStrategyOptim
import org.scalatest.{BeforeAndAfter, FunSuite}
import org.scalatest.{BeforeAndAfter}
import org.scalatest.funsuite.AnyFunSuite

class IntervalJoinBaseTestSuite extends FunSuite
class IntervalJoinBaseTestSuite extends AnyFunSuite
with DataFrameSuiteBase
with SharedSparkContext
with BeforeAndAfter {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package org.biodatageeks.sequila.tests.dataquality

import org.biodatageeks.sequila.utils.DataQualityFuncs
import org.scalatest.FunSuite
import org.scalatest.funsuite.AnyFunSuite

class ContigNormalizationTest extends FunSuite{
class ContigNormalizationTest extends AnyFunSuite{

test("Test contig") {
val chrInTest1 = "chr1"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
package org.biodatageeks.sequila.tests.datasources

import java.io.{OutputStreamWriter, PrintWriter}

import com.holdenkarau.spark.testing.{DataFrameSuiteBase, SharedSparkContext}
import org.biodatageeks.sequila.rangejoins.IntervalTree.IntervalTreeJoinStrategyOptim
import org.biodatageeks.sequila.rangejoins.genApp.IntervalTreeJoinStrategy
import org.biodatageeks.sequila.utils.{Columns, InternalParams}
import org.scalatest.{BeforeAndAfter, FunSuite}
import org.scalatest.BeforeAndAfter
import org.scalatest.funsuite.AnyFunSuite

class ADAMBenchmarkTestSuite
extends FunSuite
extends AnyFunSuite
with DataFrameSuiteBase
with BeforeAndAfter
with SharedSparkContext {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
package org.biodatageeks.sequila.tests.datasources

import java.io.{OutputStreamWriter, PrintWriter}

import com.holdenkarau.spark.testing.{DataFrameSuiteBase, SharedSparkContext}
import org.apache.log4j.Logger
import org.biodatageeks.sequila.apps.FeatureCounts.Region
import org.biodatageeks.sequila.utils.Columns
import org.scalatest.{BeforeAndAfter, FunSuite}
import org.scalatest.BeforeAndAfter
import org.scalatest.funsuite.AnyFunSuite


class BAMADAMDataSourceTestSuite
extends FunSuite
extends AnyFunSuite
with DataFrameSuiteBase
with BeforeAndAfter
with SharedSparkContext {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@ package org.biodatageeks.sequila.tests.datasources
import com.holdenkarau.spark.testing.{DataFrameSuiteBase, SharedSparkContext}
import org.apache.spark.sql.SequilaSession
import org.biodatageeks.sequila.utils.{Columns, InternalParams}
import org.scalatest.{BeforeAndAfter, FunSuite}
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.BeforeAndAfter

class BAMReaderTestSuite
extends FunSuite
extends AnyFunSuite
with DataFrameSuiteBase
with BeforeAndAfter
with SharedSparkContext {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
package org.biodatageeks.sequila.tests.datasources

import com.holdenkarau.spark.testing.{DataFrameSuiteBase, SharedSparkContext}
import org.apache.spark.SparkConf
import org.apache.spark.sql.SequilaSession
import org.scalatest.FunSuite
import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
import org.scalatest.funsuite.AnyFunSuite

class DataSourceDDLTestSuite
extends FunSuite
extends AnyFunSuite
with DataFrameSuiteBase
with SharedSparkContext {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@ package org.biodatageeks.sequila.tests.datasources

import com.holdenkarau.spark.testing.{DataFrameSuiteBase, SharedSparkContext}
import org.biodatageeks.sequila.utils.Columns
import org.scalatest.{BeforeAndAfter, FunSuite}
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.BeforeAndAfter

class VCFDataSourceTestSuite
extends FunSuite
extends AnyFunSuite
with DataFrameSuiteBase
with BeforeAndAfter
with SharedSparkContext {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@ package org.biodatageeks.sequila.tests.pileup
import com.holdenkarau.spark.testing.{DataFrameSuiteBase, SharedSparkContext}
import org.apache.spark.storage.StorageLevel
import org.biodatageeks.sequila.utils.InternalParams
import org.scalatest.{BeforeAndAfter, FunSuite}
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.{BeforeAndAfter}

import java.io.File
import scala.reflect.io.Directory


class PileupTestBase extends FunSuite
class PileupTestBase extends AnyFunSuite
with DataFrameSuiteBase
with BeforeAndAfter
with SharedSparkContext{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,31 +90,33 @@ class PileupSaveOutputTestSuite
assert(covRefDF.count === covTestDF.count())
assertRDDEquals(covRefDF.rdd, covTestDF.rdd)
}

test("ORC - coverage - SQL CTaS - vectorized"){
val ss = SequilaSession(spark)
ss
.sqlContext
.setConf(InternalParams.useVectorizedOrcWriter, "true")
val orcCoveragePath = s"$coveragePath/orc/"
cleanup(orcCoveragePath)
val tableLocation = s"${orcCoveragePath}/x"
val ctasQuery =
s"""
|CREATE TABLE X USING ORC LOCATION '${tableLocation}' AS SELECT *
|FROM pileup('$tableName', '${sampleId}', '${referencePath}', false, false)
""".stripMargin
ss.sql(ctasQuery)
ss
.sqlContext
.setConf(InternalParams.useVectorizedOrcWriter, "false")
val covRefDF = ss.sql(queryCoverage)
val covTestDF = ss
.read
.orc(tableLocation)
assert(covRefDF.count === covTestDF.count())
assertRDDEquals(covRefDF.rdd, covTestDF.rdd)
}
//FIXME: CTAS not working with Spark 3.4

// test("ORC - coverage - SQL CTaS - vectorized"){
// val ss = SequilaSession(spark)
// ss
// .sqlContext
// .setConf(InternalParams.useVectorizedOrcWriter, "true")
// val orcCoveragePath = s"$coveragePath/orc/"
// cleanup(orcCoveragePath)
// val tableLocation = s"${orcCoveragePath}/x"
//
// val ctasQuery =
// s"""
// |CREATE TABLE X USING ORC LOCATION '${tableLocation}' AS SELECT *
// |FROM pileup('$tableName', '${sampleId}', '${referencePath}', false, false)
// """.stripMargin
// ss.sql(ctasQuery)
// ss
// .sqlContext
// .setConf(InternalParams.useVectorizedOrcWriter, "false")
// val covRefDF = ss.sql(queryCoverage)
// val covTestDF = ss
// .read
// .orc(tableLocation)
// assert(covRefDF.count === covTestDF.count())
// assertRDDEquals(covRefDF.rdd, covTestDF.rdd)
// }

test("ORC - pileup - DataFrame save - vectorized"){
val ss = SequilaSession(spark)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
package org.biodatageeks.sequila.tests.pileup.processing

import org.scalatest.FunSuite
import org.scalatest.funsuite.AnyFunSuite

import scala.collection.mutable

class IntMapTestSuite extends FunSuite{
class IntMapTestSuite extends AnyFunSuite{

test ("iIntMap") {
val map = new mutable.IntMap[String]()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
package org.biodatageeks.sequila.tests.pileup.processing

import org.biodatageeks.sequila.pileup.converters.samtools.PileupStringUtils
import org.scalatest.FunSuite

class PileupUtilsTest extends FunSuite{
import org.scalatest.funsuite.AnyFunSuite

class PileupUtilsTest extends AnyFunSuite{

test("clean Pileup") {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,13 @@ package org.biodatageeks.sequila.tests.pileup.processing

import htsjdk.samtools.{Cigar, CigarElement, CigarOperator}
import org.biodatageeks.sequila.pileup.model.{CigarDerivedConf, ReadSummary}
import org.scalatest.FunSuite

import org.scalatest.funsuite.AnyFunSuite

import scala.collection.JavaConversions.seqAsJavaList


class RelativePositionTestSuite extends FunSuite{
class RelativePositionTestSuite extends AnyFunSuite{

test("relative test #1") {
val cElement1 = new CigarElement(78, CigarOperator.M)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@ import org.apache.hadoop.io.LongWritable
import org.biodatageeks.sequila.apps.FeatureCounts.Region
import org.biodatageeks.sequila.rangejoins.IntervalTree.IntervalTreeJoinStrategyOptim
import org.biodatageeks.sequila.utils.{Columns, DataQualityFuncs}
import org.scalatest.{BeforeAndAfter, FunSuite}
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.BeforeAndAfter
import org.seqdoop.hadoop_bam.util.SAMHeaderReader
import org.seqdoop.hadoop_bam.{BAMInputFormat, SAMRecordWritable}



class FeatureCountsTestSuite
extends FunSuite
extends AnyFunSuite
with DataFrameSuiteBase
with BeforeAndAfter
with SharedSparkContext {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
package org.biodatageeks.sequila.tests.rangejoins

import java.io.{OutputStreamWriter, PrintWriter}

import com.holdenkarau.spark.testing.{DataFrameSuiteBase, SharedSparkContext}
import org.apache.spark.sql.{Row, SequilaSession}
import org.biodatageeks.sequila.rangejoins.IntervalTree.IntervalTreeJoinStrategyOptim
import org.biodatageeks.sequila.utils.{Columns, InternalParams, Interval, UDFRegister}
import org.scalatest.{BeforeAndAfter, FunSuite}
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.{BeforeAndAfter}

class GRangesTestSuite
extends FunSuite
extends AnyFunSuite
with DataFrameSuiteBase
with BeforeAndAfter
with SharedSparkContext
Expand Down
Original file line number Diff line number Diff line change
@@ -1,21 +1,15 @@
package org.biodatageeks.sequila.tests.rangejoins

import java.io.{OutputStreamWriter, PrintWriter}

import com.holdenkarau.spark.testing.DataFrameSuiteBase
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.{
IntegerType,
StringType,
StructField,
StructType
}

import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
import org.biodatageeks.sequila.rangejoins.genApp.IntervalTreeJoinStrategy
import org.scalatest.{BeforeAndAfter, FunSuite}
import org.scalatest.funsuite.AnyFunSuite
import org.scalatest.BeforeAndAfter

class IntervalTreeRedBlackGenAppTestSuite
extends FunSuite
extends AnyFunSuite
with DataFrameSuiteBase
with BeforeAndAfter {
val schema1 = StructType(
Expand Down
Loading

0 comments on commit 1f2d580

Please sign in to comment.