This repository has been archived by the owner on Nov 28, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 12
Coverage comparisons
Marek Wiewiórka edited this page Mar 31, 2019
·
62 revisions
Big cluster
WES - local
----------|1 . | . 2| . 3| . 5| . 10|
WGS with GKL
cores time(count)[s] times(save)[s]
-------------------------------------------
1 5583.9 6283.1
5 1592.7 1853.1
10 634.6 1050.4
25 245.5 339.2
50 129.1 206.8
100 78.6 130.5
200 56.5 86.5
300 47.8 78.4
400 47.7 70.7
500 43.3 65.7
WES
cores . times(save)[s]
------------------------
1 410.9
5 105.8
10 . 63.4
25 27.5
50 19.2
100 16.1
### exome
{ time docker run -it -v /data/samples/NA12878/WES:/data/ -v /data/samples/hg_builds/:/ref/ broadinstitute/gatk3:3.8-1 \
java -jar GenomeAnalysisTK.jar \
-T DepthOfCoverage \
-R /ref/Homo_sapiens_assembly18.fasta \
-o /data/gatk_doc_test.txt \
-I /data/NA12878.proper.wes.bam \
-omitIntervals \
-nt 1} 2>> gatk_wes_time_1.txt
Environment: cdh00. WGS file: NA12878.proper.wgs.bam - 272G, WES file NA12878.proper.wes.bam - 17G
data | threads | time [s] | time [m] |
---|---|---|---|
WES(0.2.3) | 1 | 387 | 6m27 |
WES(0.2.4) | 1 | 411 | 6m50 |
WES(0.2.4 -x) | 1 | 372 | 6m12 |
WES(0.2.3) | 5 | 136 | 2m16 |
WES(0.2.4) | 5 | 147 | 2m27 |
WES(0.2.4 -x) | 5 | 141 | 2m21 |
WES | 10 | 135 | 2m15 |
WGS | 1 | 6495 | 108m15 |
WGS | 5 | 1959 | 32m39 |
WGS | 10 | 1810 | 30m10 |
export LD_LIBRARY_PATH=/usr/local/lib
### exome
{ time mos/mosdepth prefix NA12878.proper.wes.bam ; } 2>> mos_wes_time_1.txt
{ time mos/mosdepth -t 4 prefix NA12878.proper.wes.bam ; } 2>> mos_wes_time_5.txt
{ time mos/mosdepth -t 9 prefix NA12878.proper.wes.bam ; } 2>> mos_wes_time_10.txt
### exome 0.2.4
{ time mos/mosdepth_0.2.4 prefix NA12878.proper.wes.bam ; } 2>> mos_wes_time_1_0.2.4.txt
{ time mos/mosdepth_0.2.4 -x prefix NA12878.proper.wes.bam ; } 2>> mos_wes_time_1_0.2.4.txt
### genome
{ time mos/mosdepthh prefix NA12878.proper.wgs.bam ; } 2>> wgs_time_1.txt
{ time mos/mosdepth -t 4 prefix NA12878.proper.wgs.bam ; } 2>> wgs_time_5.txt
{ time mos/mosdepth -t 9 prefix NA12878.proper.wgs.bam ; } 2>> wgs_time_9.txt
### fixed-length window for exome
{ time mos/mosdepth -n --by 500 win NA12878.proper.wes.bam ; } 2>> mos_wes_win_time_1.txt
{ time mos/mosdepth -n -t 4 --by 500 win NA12878.proper.wes.bam ; } 2>> mos_wes_win_time_5.txt
{ time mos/mosdepth -n -t 9 --by 500 win NA12878.proper.wes.bam ; } 2>> mos_wes_win_time_10.txt
### fixed-length window for genome
{ time mos/mosdepth -n --by 500 win NA12878.proper.wgs.bam ; } 2>> mos_wgs_win_time_1.txt
{ time mos/mosdepth -n -t 4 --by 500 win NA12878.proper.wgs.bam ; } 2>> mos_wgs_win_time_5.txt
{ time mos/mosdepth -n -t 9 --by 500 win NA12878.proper.wgs.bam ; } 2>> mos_wgs_win_time_10.txt
data | threads | time [s] | time [m] |
---|---|---|---|
WES | 1 | 720 | 12.5 |
WGS | 1 | 8370 | 139m30 |
# exome
{ time samtools depth NA12878.ga2.exome.maq.recal.bam > samtools/NA12878.ga2.exome.maq.recal.depth ; } 2>> samtools/wes_time.txt
# genome
{ time samtools depth NA12878.hiseq.wgs.bwa.recal.bam > samtools/NA12878.hiseq.wgs.bwa.recal.depth ; } 2>> samtools/wgs_time.txt
docker run -it --rm -u root -v /data/samples/NA12878:/data biocontainers/bedtools bash
root@131d1e99ad03:/data# time bedtools genomecov -ibam /data/NA12878.ga2.exome.maq.recal.bam -bga -strand + >> /data/bedcov.bed
real 16m55.177s
user 15m23.940s
sys 1m29.920s
rm -rf ~/.ivy2/cache/org.biodatageeks/bdg-sequila_2.11/*
rm -rf ~/.ivy2/jars/org.biodatageeks_bdg-sequila_2.11*
cd /data/local/opt/spark-2.3.2-bin-hadoop2.7/bin/
#unset in local mode export HADOOP_CONF_DIR=/etc/hadoop/conf
export cores=1
./spark-shell --conf "spark.driver.extraJavaOptions=-Dmapred.max.split.size=134217728" --conf "spark.sql.catalogImplementation=in-memory" --master=local[$cores] --driver-memory=12g --packages org.biodatageeks:bdg-sequila_2.11:0.4.1-SNAPSHOT --repositories https://zsibio.ii.pw.edu.pl/nexus/repository/maven-snapshots/ -v
import org.apache.spark.sql.SequilaSession
import org.biodatageeks.utils.{SequilaRegister, UDFRegister,BDGInternalParams}
spark.sqlContext.setConf(BDGInternalParams.InputSplitSize, "134217728")
val ss = SequilaSession(spark)
SequilaRegister.register(ss)
ss.sqlContext.setConf("spark.biodatageeks.bam.useGKLInflate","true")
ss.sqlContext.setConf("spark.biodatageeks.bam.useSparkBAM","false")
/* WES -bases-blocks*/
ss.sql("""
CREATE TABLE IF NOT EXISTS reads_exome USING org.biodatageeks.datasources.BAM.BAMDataSource OPTIONS(path '/data/samples/NA12878/WES/NA12878*.bam')""")
spark.time{
ss.sql(s"SELECT * FROM bdg_coverage('reads_exome','NA12878', 'blocks')").write.format("parquet").save("/data/samples/NA12878/output_tmp/wes_1_9.parquet")}
/* WGS -bases-blocks*/
import org.apache.spark.sql.SequilaSession
import org.biodatageeks.utils.{SequilaRegister, UDFRegister}
val ss = SequilaSession(spark)
SequilaRegister.register(ss)
ss.sqlContext.setConf("spark.biodatageeks.bam.useGKLInflate","true")
ss.sqlContext.setConf("spark.biodatageeks.bam.useSparkBAM","false")
/*bases-blocks*/
ss.sql("""
CREATE TABLE IF NOT EXISTS reads_genome USING org.biodatageeks.datasources.BAM.BAMDataSource OPTIONS(path '/data/samples/NA12878/NA12878*.bam')""")
spark.time{
ss.sql(s"SELECT * FROM bdg_coverage('reads_genome','NA12878', 'blocks')").write.format("parquet").save("/data/samples/NA12878/output_tmp/wgs_1_1.parquet")}
/*windows - 500*/
import org.apache.spark.sql.SequilaSession
import org.biodatageeks.utils.{SequilaRegister, UDFRegister}
val ss = SequilaSession(spark)
SequilaRegister.register(ss)
ss.sqlContext.setConf("spark.biodatageeks.bam.useGKLInflate","true")
ss.sqlContext.setConf("spark.biodatageeks.bam.useSparkBAM","false")
/*bases-blocks*/
ss.sql("""
CREATE TABLE IF NOT EXISTS reads_exome USING org.biodatageeks.datasources.BAM.BAMDataSource OPTIONS(path '/tmp/fp16yq/data/exome/32MB/*.bam')""")
spark.time{
ss.sql(s"SELECT * FROM bdg_coverage('reads_exome','NA12878', 'blocks', '500')").write.format("parquet").save("/tmp/fp16yq/data/32MB_w500_3.parquet") }
/*CRAM*/
import org.apache.spark.sql.SequilaSession
import org.biodatageeks.utils.{SequilaRegister, UDFRegister,BDGInternalParams}
//spark.sqlContext.setConf(BDGInternalParams.InputSplitSize, "134217728")
val ss = SequilaSession(spark)
SequilaRegister.register(ss)
ss.sqlContext.setConf("spark.biodatageeks.bam.useGKLInflate","true")
ss.sqlContext.setConf("spark.biodatageeks.bam.useSparkBAM","false")
/* WES -bases-blocks*/
//32MB
ss.sql("""
CREATE TABLE IF NOT EXISTS reads_exome_cram USING org.biodatageeks.datasources.BAM.CRAMDataSource OPTIONS(path '/tmp/fp16yq/data/exome/32MB/NA12878.*.cram', refPath 'hdfs://datalab/tmp/fp16yq/data/ref/Homo_sapiens_assembly18.fasta')""")
spark.time{
ss.sql(s"SELECT * FROM bdg_coverage('reads_exome_cram','NA12878', 'blocks')").write.format("parquet").save("/tmp/fp16yq/data/wes_5_3.parquet")}
ss.sql("""
CREATE TABLE IF NOT EXISTS reads_exome_cram USING org.biodatageeks.datasources.BAM.CRAMDataSource OPTIONS(path '/tmp/fp16yq/data/exome/NA12878.*.cram', refPath 'hdfs://datalab/tmp/fp16yq/data/ref/Homo_sapiens_assembly18.fasta')""")
ss.sql("""
CREATE TABLE IF NOT EXISTS reads_exome_bam USING org.biodatageeks.datasources.BAM.BAMDataSource OPTIONS(path '/tmp/fp16yq/data/exome/NA12878*.bam')""")
spark.time{
ss.sql(s"SELECT * FROM bdg_coverage('reads_exome_bam','NA12878', 'blocks')").write.format("parquet").save("/tmp/fp16yq/data/wes_bam_10_2.parquet")}
/*WGS*/
ss.sql("""
CREATE TABLE IF NOT EXISTS reads_genome_cram USING org.biodatageeks.datasources.BAM.CRAMDataSource OPTIONS(path '/tmp/fp16yq/data/genome/NA12878.*.cram', refPath 'hdfs://datalab/tmp/fp16yq/data/ref/Homo_sapiens_assembly18.fasta')""")
spark.time{
ss.sql(s"SELECT * FROM bdg_coverage('reads_exome_cram','NA12878', 'blocks')").write.format("parquet").save("/tmp/fp16yq/data/wes_5_3.parquet")}
spark.time{
ss.sql(s"SELECT count(*) FROM bdg_coverage('reads_genome_cram','NA12878', 'blocks')").show}
time docker run --rm -v /data/samples/NA12878:/data/samples/NA12878 -w /data/samples/NA12878 wkusmirek/sambamba /opt/sambamba-0.6.8-linux-static depth base --output-file=sambamba_base_coverage.txt --nthreads=1 /data/samples/NA12878/WES/NA12878.proper.wes.bam
time docker run --rm -v /data/samples/NA12878:/data/samples/NA12878 -w /data/samples/NA12878 wkusmirek/sambamba /opt/sambamba-0.6.8-linux-static depth window --output-file=sambamba_window_coverage.txt --nthreads=1 --window-size=500 /data/samples/NA12878/WES/NA12878.proper.wes.bam
time docker run --rm -v /data/samples/NA12878:/data/samples/NA12878 -w /data/samples/NA12878 wkusmirek/sambamba /opt/sambamba-0.6.8-linux-static depth base --output-file=sambamba_base_coverage.txt --nthreads=1 /data/samples/NA12878/WGS/NA12878.proper.wgs.bam
time docker run --rm -v /data/samples/NA12878:/data/samples/NA12878 -w /data/samples/NA12878 wkusmirek/sambamba /opt/sambamba-0.6.8-linux-static depth window --output-file=sambamba_window_coverage.txt --nthreads=1 --window-size=500 /data/samples/NA12878/WGS/NA12878.proper.wgs.bam