From ef26e5c49758ee6c468bbfd82363f3647e453c85 Mon Sep 17 00:00:00 2001 From: jemten Date: Mon, 29 Aug 2022 12:58:19 +0200 Subject: [PATCH 1/3] changing RD to DR in format column --- tiddit/tiddit_variant.pyx | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tiddit/tiddit_variant.pyx b/tiddit/tiddit_variant.pyx index ddce978..bafd2ce 100644 --- a/tiddit/tiddit_variant.pyx +++ b/tiddit/tiddit_variant.pyx @@ -23,7 +23,7 @@ def get_region(samfile,str chr,int start,int end,int bp,int min_q,int max_ins, c q_start=start q_end=end+max_ins - + if q_end > contig_length: q_end=contig_length @@ -44,14 +44,14 @@ def get_region(samfile,str chr,int start,int end,int bp,int min_q,int max_ins, c if not read.mate_is_unmapped: if read.next_reference_start > end and read_reference_start > end: - continue + continue else: if read_reference_start > end: continue if read.is_duplicate: continue - + if not (read_reference_start > end): n_reads+=1 if read.mapq < min_q: @@ -83,7 +83,7 @@ def get_region(samfile,str chr,int start,int end,int bp,int min_q,int max_ins, c if read_reference_start < start: r_start=start - + if read_reference_end > end: r_end=end @@ -133,7 +133,7 @@ def find_sv_type(chrA,chrB,inverted,non_inverted,args,sample_data,samples,librar return("DUP:TANDEM",cn) elif cn < p: return("DEL",cn) - + elif inverted > non_inverted: return("INV",cn) else: @@ -253,7 +253,7 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar s=int(math.floor(posA/50.0)) e=int(math.floor(posB/50.0))+1 sample_data[sample]["covM"]=numpy.average(coverage_data[chrA][s:e] ) - + inverted=0 non_inverted=0 for i in range(0,len(sv_clusters[chrA][chrB][cluster]["positions_A"]["orientation_discordants"]) ): @@ -277,12 +277,12 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar svtype,cn=find_sv_type(chrA,chrB,inverted,non_inverted,args,sample_data,samples,library) filt=sv_filter(sample_data,args,chrA,chrB,posA,posB,max_ins_len,n_discordants,n_splits,library,sample_data[sample]["discA"],sample_data[sample]["discB"],sample_data[sample]["splitA"],sample_data[sample]["splitB"],n_contigs) - format_col="GT:CN:COV:DV:RV:LQ:RR:RD" + format_col="GT:CN:COV:DV:RV:LQ:RR:DR" #configure filters for CNV based on Read depth for sample in samples: if "DEL" in svtype: - #homozygout del based on coverage + #homozygout del based on coverage if cn == 0: filt="PASS" @@ -294,7 +294,7 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar if covA > covM*(cn+0.9) and covB > covM*(cn+0.9): filt="PASS" - #too few reads, but clear RD signal + #too few reads, but clear DR signal elif "DUP" in svtype and filt == "BelowExpectedLinks": filt="PASS" @@ -361,7 +361,7 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar GT= "1/1" else: GT="0/1" - + variant.append( "{}:{}:{},{},{}:{}:{}:{},{}:{},{}:{},{}".format(GT,cn,sample_data[sample]["covA"],sample_data[sample]["covM"],sample_data[sample]["covB"],n_discordants,n_splits,sample_data[sample]["QA"],sample_data[sample]["QB"],sample_data[sample]["refRA"],sample_data[sample]["refRB"],sample_data[sample]["refFA"],sample_data[sample]["refFB"]) ) variants.append([chrA,posA,variant]) else: @@ -472,7 +472,7 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar variant.append( "{}:{}:{},{},{}:{}:{}:{},{}:{},{}:{},{}".format(GT,cn,sample_data[sample]["covA"],sample_data[sample]["covM"],sample_data[sample]["covB"],n_discordants,n_splits,sample_data[sample]["QA"],sample_data[sample]["QB"],sample_data[sample]["refRA"],sample_data[sample]["refRB"],sample_data[sample]["refFA"],sample_data[sample]["refFB"]) ) - variants.append([chrB,posB,variant]) + variants.append([chrB,posB,variant]) samfile.close() return(variants) @@ -481,7 +481,7 @@ def main(str bam_file_name,dict sv_clusters,args,dict library,int min_mapq,sampl contig_seqs={} new_seq=False if not args.skip_assembly: - for line in open("{}_tiddit/clips.fa.assembly.clean.mag".format(args.o)): + for line in open("{}_tiddit/clips.fa.assembly.clean.mag".format(args.o)): if not new_seq and line[0] == "@" and "\t" in line: name=line.split("\t")[0][1:] @@ -501,5 +501,5 @@ def main(str bam_file_name,dict sv_clusters,args,dict library,int min_mapq,sampl for v in variants_list: for variant in v: variants[ variant[0] ].append( [ variant[1],variant[2] ] ) - + return(variants) From d55186d88357d499385e5293b37a4c3d779875e9 Mon Sep 17 00:00:00 2001 From: jemten Date: Mon, 29 Aug 2022 13:18:56 +0200 Subject: [PATCH 2/3] updates version --- setup.py | 2 +- tiddit/__main__.py | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/setup.py b/setup.py index d523864..35775b2 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ setup( name = 'tiddit', - version = '3.3.0', + version = '3.3.1', url = "https://github.com/SciLifeLab/TIDDIT", author = "Jesper Eisfeldt", diff --git a/tiddit/__main__.py b/tiddit/__main__.py index 45dcc6e..caf9191 100644 --- a/tiddit/__main__.py +++ b/tiddit/__main__.py @@ -17,7 +17,7 @@ import tiddit.tiddit_contig_analysis as tiddit_contig_analysis def main(): - version="3.3.0" + version="3.3.1" parser = argparse.ArgumentParser("""tiddit-{}""".format(version),add_help=False) parser.add_argument("--sv" , help="call structural variation", required=False, action="store_true") parser.add_argument("--cov" , help="generate a coverage bed file", required=False, action="store_true") @@ -88,7 +88,7 @@ def main(): if not os.path.isfile(args.bam): print ("error, could not find the bam file") quit() - + bam_file_name=args.bam samfile = pysam.AlignmentFile(bam_file_name, "r",reference_filename=args.ref) @@ -122,10 +122,10 @@ def main(): print("Folder already exists") pysam.index("-c","-m","6","-@",str(args.threads),bam_file_name,"{}_tiddit/{}.csi".format(args.o,sample_id)) - + min_mapq=args.q max_ins_len=100000 - n_reads=args.s + n_reads=args.s library=tiddit_stats.statistics(bam_file_name,args.ref,min_mapq,max_ins_len,n_reads) if args.i: @@ -153,29 +153,29 @@ def main(): print(time.time()-t) vcf_header=tiddit_vcf_header.main( bam_header,library,sample_id,version ) - + if not args.e: args.e=int(library["avg_insert_size"]/2.0) - + t=time.time() sv_clusters=tiddit_cluster.main(prefix,contigs,contig_length,samples,library["mp"],args.e,args.l,max_ins_len,args.min_contig,args.skip_assembly) - + print("generated clusters in") print(time.time()-t) - + f=open(prefix+".vcf","w") f.write(vcf_header+"\n") - + t=time.time() variants=tiddit_variant.main(bam_file_name,sv_clusters,args,library,min_mapq,samples,coverage_data,contig_number,max_ins_len) print("analyzed clusters in") print(time.time()-t) - + for chr in contigs: if not chr in variants: continue for variant in sorted(variants[chr], key=lambda x: x[0]): - f.write( "\t".join(variant[1])+"\n" ) + f.write( "\t".join(variant[1])+"\n" ) f.close() quit() From caaef0563ad71249ebaf5f2771349794a531bd81 Mon Sep 17 00:00:00 2001 From: jesper eisfeldt Date: Wed, 7 Sep 2022 09:13:25 +0200 Subject: [PATCH 3/3] modified: setup.py modified: tiddit/__main__.py --- setup.py | 2 +- tiddit/__main__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 35775b2..e6c1219 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ setup( name = 'tiddit', - version = '3.3.1', + version = '3.3.2', url = "https://github.com/SciLifeLab/TIDDIT", author = "Jesper Eisfeldt", diff --git a/tiddit/__main__.py b/tiddit/__main__.py index caf9191..5bfdf82 100644 --- a/tiddit/__main__.py +++ b/tiddit/__main__.py @@ -17,7 +17,7 @@ import tiddit.tiddit_contig_analysis as tiddit_contig_analysis def main(): - version="3.3.1" + version="3.3.2" parser = argparse.ArgumentParser("""tiddit-{}""".format(version),add_help=False) parser.add_argument("--sv" , help="call structural variation", required=False, action="store_true") parser.add_argument("--cov" , help="generate a coverage bed file", required=False, action="store_true")