Skip to content

Commit

Permalink
Merge pull request #21 from SciLifeLab/master
Browse files Browse the repository at this point in the history
edits from anders
  • Loading branch information
J35P312 authored Sep 7, 2022
2 parents 3dada4d + caaef05 commit dc37e29
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 25 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

setup(
name = 'tiddit',
version = '3.3.1',
version = '3.3.2',

url = "https://github.com/SciLifeLab/TIDDIT",
author = "Jesper Eisfeldt",
Expand Down
22 changes: 11 additions & 11 deletions tiddit/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import tiddit.tiddit_contig_analysis as tiddit_contig_analysis

def main():
version="3.3.1"
version="3.3.2"
parser = argparse.ArgumentParser("""tiddit-{}""".format(version),add_help=False)
parser.add_argument("--sv" , help="call structural variation", required=False, action="store_true")
parser.add_argument("--cov" , help="generate a coverage bed file", required=False, action="store_true")
Expand Down Expand Up @@ -88,7 +88,7 @@ def main():
if not os.path.isfile(args.bam):
print ("error, could not find the bam file")
quit()

bam_file_name=args.bam
samfile = pysam.AlignmentFile(bam_file_name, "r",reference_filename=args.ref)

Expand Down Expand Up @@ -122,10 +122,10 @@ def main():
print("Folder already exists")

pysam.index("-c","-m","6","-@",str(args.threads),bam_file_name,"{}_tiddit/{}.csi".format(args.o,sample_id))

min_mapq=args.q
max_ins_len=100000
n_reads=args.s
n_reads=args.s

library=tiddit_stats.statistics(bam_file_name,args.ref,min_mapq,max_ins_len,n_reads)
if args.i:
Expand Down Expand Up @@ -153,29 +153,29 @@ def main():
print(time.time()-t)

vcf_header=tiddit_vcf_header.main( bam_header,library,sample_id,version )

if not args.e:
args.e=int(library["avg_insert_size"]/2.0)

t=time.time()
sv_clusters=tiddit_cluster.main(prefix,contigs,contig_length,samples,library["mp"],args.e,args.l,max_ins_len,args.min_contig,args.skip_assembly)

print("generated clusters in")
print(time.time()-t)

f=open(prefix+".vcf","w")
f.write(vcf_header+"\n")

t=time.time()
variants=tiddit_variant.main(bam_file_name,sv_clusters,args,library,min_mapq,samples,coverage_data,contig_number,max_ins_len)
print("analyzed clusters in")
print(time.time()-t)

for chr in contigs:
if not chr in variants:
continue
for variant in sorted(variants[chr], key=lambda x: x[0]):
f.write( "\t".join(variant[1])+"\n" )
f.write( "\t".join(variant[1])+"\n" )
f.close()
quit()

Expand Down
26 changes: 13 additions & 13 deletions tiddit/tiddit_variant.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def get_region(samfile,str chr,int start,int end,int bp,int min_q,int max_ins, c

q_start=start
q_end=end+max_ins

if q_end > contig_length:
q_end=contig_length

Expand All @@ -44,14 +44,14 @@ def get_region(samfile,str chr,int start,int end,int bp,int min_q,int max_ins, c

if not read.mate_is_unmapped:
if read.next_reference_start > end and read_reference_start > end:
continue
continue
else:
if read_reference_start > end:
continue

if read.is_duplicate:
continue

if not (read_reference_start > end):
n_reads+=1
if read.mapq < min_q:
Expand Down Expand Up @@ -83,7 +83,7 @@ def get_region(samfile,str chr,int start,int end,int bp,int min_q,int max_ins, c

if read_reference_start < start:
r_start=start

if read_reference_end > end:
r_end=end

Expand Down Expand Up @@ -133,7 +133,7 @@ def find_sv_type(chrA,chrB,inverted,non_inverted,args,sample_data,samples,librar
return("DUP:TANDEM",cn)
elif cn < p:
return("DEL",cn)

elif inverted > non_inverted:
return("INV",cn)
else:
Expand Down Expand Up @@ -253,7 +253,7 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar
s=int(math.floor(posA/50.0))
e=int(math.floor(posB/50.0))+1
sample_data[sample]["covM"]=numpy.average(coverage_data[chrA][s:e] )

inverted=0
non_inverted=0
for i in range(0,len(sv_clusters[chrA][chrB][cluster]["positions_A"]["orientation_discordants"]) ):
Expand All @@ -277,12 +277,12 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar
svtype,cn=find_sv_type(chrA,chrB,inverted,non_inverted,args,sample_data,samples,library)

filt=sv_filter(sample_data,args,chrA,chrB,posA,posB,max_ins_len,n_discordants,n_splits,library,sample_data[sample]["discA"],sample_data[sample]["discB"],sample_data[sample]["splitA"],sample_data[sample]["splitB"],n_contigs)
format_col="GT:CN:COV:DV:RV:LQ:RR:RD"
format_col="GT:CN:COV:DV:RV:LQ:RR:DR"

#configure filters for CNV based on Read depth
for sample in samples:
if "DEL" in svtype:
#homozygout del based on coverage
#homozygout del based on coverage
if cn == 0:
filt="PASS"

Expand All @@ -294,7 +294,7 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar
if covA > covM*(cn+0.9) and covB > covM*(cn+0.9):
filt="PASS"

#too few reads, but clear RD signal
#too few reads, but clear DR signal
elif "DUP" in svtype and filt == "BelowExpectedLinks":
filt="PASS"

Expand Down Expand Up @@ -361,7 +361,7 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar
GT= "1/1"
else:
GT="0/1"

variant.append( "{}:{}:{},{},{}:{}:{}:{},{}:{},{}:{},{}".format(GT,cn,sample_data[sample]["covA"],sample_data[sample]["covM"],sample_data[sample]["covB"],n_discordants,n_splits,sample_data[sample]["QA"],sample_data[sample]["QB"],sample_data[sample]["refRA"],sample_data[sample]["refRB"],sample_data[sample]["refFA"],sample_data[sample]["refFB"]) )
variants.append([chrA,posA,variant])
else:
Expand Down Expand Up @@ -472,7 +472,7 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar


variant.append( "{}:{}:{},{},{}:{}:{}:{},{}:{},{}:{},{}".format(GT,cn,sample_data[sample]["covA"],sample_data[sample]["covM"],sample_data[sample]["covB"],n_discordants,n_splits,sample_data[sample]["QA"],sample_data[sample]["QB"],sample_data[sample]["refRA"],sample_data[sample]["refRB"],sample_data[sample]["refFA"],sample_data[sample]["refFB"]) )
variants.append([chrB,posB,variant])
variants.append([chrB,posB,variant])

samfile.close()
return(variants)
Expand All @@ -481,7 +481,7 @@ def main(str bam_file_name,dict sv_clusters,args,dict library,int min_mapq,sampl
contig_seqs={}
new_seq=False
if not args.skip_assembly:
for line in open("{}_tiddit/clips.fa.assembly.clean.mag".format(args.o)):
for line in open("{}_tiddit/clips.fa.assembly.clean.mag".format(args.o)):

if not new_seq and line[0] == "@" and "\t" in line:
name=line.split("\t")[0][1:]
Expand All @@ -501,5 +501,5 @@ def main(str bam_file_name,dict sv_clusters,args,dict library,int min_mapq,sampl
for v in variants_list:
for variant in v:
variants[ variant[0] ].append( [ variant[1],variant[2] ] )

return(variants)

0 comments on commit dc37e29

Please sign in to comment.