diff --git a/setup.py b/setup.py index 591f954..39fab91 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ setup( name = 'tiddit', - version = '3.9.1', + version = '3.9.2', url = "https://github.com/SciLifeLab/TIDDIT", diff --git a/tiddit/__main__.py b/tiddit/__main__.py index f079dde..5916f66 100644 --- a/tiddit/__main__.py +++ b/tiddit/__main__.py @@ -18,7 +18,7 @@ import tiddit.tiddit_gc as tiddit_gc def main(): - version="3.9.1" + version="3.9.2" parser = argparse.ArgumentParser("""tiddit-{}""".format(version),add_help=False) parser.add_argument("--sv" , help="call structural variation", required=False, action="store_true") parser.add_argument("--cov" , help="generate a coverage bed file", required=False, action="store_true") @@ -60,9 +60,9 @@ def main(): parser.add_argument('--skip_assembly', action="store_true", help="Skip running local assembly, tiddit will perform worse, but wont require bwa and bwa indexed ref, and will complete quicker") parser.add_argument('--bwa', type=str,default="bwa", help="path to bwa executable file(default=bwa)") parser.add_argument('--min_clip', type=int,default=4, help="Minimum clip reads to initiate local assembly of a region(default=4)") - parser.add_argument('--padding', type=int,default=4, help="Extend the local assembly by this number of bases (default=200bp)") + parser.add_argument('--padding', type=int,default=100, help="Extend the local assembly by this number of bases (default=100bp)") parser.add_argument('--min_pts_clips', type=int,default=3, help="min-pts parameter for the clustering of candidates for local assembly (default=3)") - parser.add_argument('--max_assembly_reads', type=int,default=100000, help="Skip assembly of regions containing too many reads (default=10000 reads)") + parser.add_argument('--max_assembly_reads', type=int,default=100000, help="Skip assembly of regions containing too many reads (default=100000 reads)") parser.add_argument('--max_local_assembly_region', type=int,default=2000, help="maximum size of the clip read cluster for being considered a local assembly candidate (default=2000 bp)") parser.add_argument('--min_anchor_len', type=int,default=60, help="minimum mapped bases to be considered a clip read (default=60 bp)") parser.add_argument('--min_clip_len', type=int,default=25, help="minimum clipped bases to be considered a clip read (default=25 bp)") diff --git a/tiddit/tiddit_variant.pyx b/tiddit/tiddit_variant.pyx index 1f00bfc..92e91a7 100644 --- a/tiddit/tiddit_variant.pyx +++ b/tiddit/tiddit_variant.pyx @@ -156,7 +156,11 @@ def find_sv_type(chrA,chrB,inverted,non_inverted,args,sample_data,samples,librar p=library["contig_ploidy_{}".format(chrA)] for sample in samples: - cn=int(round(sample_data[sample]["covM"]*p/library[ "avg_coverage_{}".format(chrA) ])) + if library[ "avg_coverage_{}".format(chrA) ] != 0: + cn=int(round(sample_data[sample]["covM"]*p/library[ "avg_coverage_{}".format(chrA) ])) + else: + cn=int(round(sample_data[sample]["covM"]*args.n/library[ "avg_coverage" ])) + #mitochondria or similar if p > args.n*10: @@ -261,7 +265,6 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar s=int(math.floor(sv_clusters[chrA][chrB][cluster]["startA"]/50.0)) e=int(math.floor(sv_clusters[chrA][chrB][cluster]["endA"]/50.0))+1 avg_a=numpy.average(coverage_data[chrA][s:e]) - #print(f"{chrA}-{posA}-{chrB}") if avg_a > args.max_coverage*library[ "avg_coverage_{}".format(chrA) ]: continue @@ -305,8 +308,12 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar e=int(math.floor(posB/50.0))+1 coverage_between=coverage_data[chrA][s:e] gc_between=gc[chrA][s:e] - - sample_data[sample]["covM"]=numpy.average(coverage_between[ gc_between > -1 ] ) + coverage_between=coverage_between[ gc_between > -1 ] + if len(coverage_between) > 4: + sample_data[sample]["covM"]=numpy.average(coverage_between) + else: + sample_data[sample]["covM"]=library[ "avg_coverage_{}".format(chrA) ] + inverted=0 non_inverted=0 @@ -559,7 +566,7 @@ def main(str bam_file_name,dict sv_clusters,args,dict library,int min_mapq,sampl for chrB in sv_clusters[chrA]: variants[chrB]=[] - variants_list=Parallel(n_jobs=args.threads,prefer="threads",timeout=99999)( delayed(define_variant)(chrA,bam_file_name,sv_clusters,args,library,min_mapq,samples,coverage_data,contig_number,max_ins_len,contig_seqs,gc) for chrA in sv_clusters) + variants_list=Parallel(n_jobs=args.threads,prefer="threads")( delayed(define_variant)(chrA,bam_file_name,sv_clusters,args,library,min_mapq,samples,coverage_data,contig_number,max_ins_len,contig_seqs,gc) for chrA in sv_clusters) ratios={"fragments_A":[],"fragments_B":[],"reads_A":[],"reads_B":[]} for v in variants_list: