Skip to content

Commit

Permalink
modified: setup.py
Browse files Browse the repository at this point in the history
	modified:   tiddit/__main__.py
	modified:   tiddit/tiddit_cluster.pyx
	modified:   tiddit/tiddit_contig_analysis.pyx
	modified:   tiddit/tiddit_signal.pyx
	modified:   tiddit/tiddit_variant.pyx
  • Loading branch information
J35P312 committed Dec 19, 2022
1 parent dc37e29 commit 936c3cd
Show file tree
Hide file tree
Showing 6 changed files with 121 additions and 27 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

setup(
name = 'tiddit',
version = '3.3.2',
version = '3.4.0',

url = "https://github.com/SciLifeLab/TIDDIT",
author = "Jesper Eisfeldt",
Expand Down
2 changes: 1 addition & 1 deletion tiddit/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import tiddit.tiddit_contig_analysis as tiddit_contig_analysis

def main():
version="3.3.2"
version="3.4.0"
parser = argparse.ArgumentParser("""tiddit-{}""".format(version),add_help=False)
parser.add_argument("--sv" , help="call structural variation", required=False, action="store_true")
parser.add_argument("--cov" , help="generate a coverage bed file", required=False, action="store_true")
Expand Down
27 changes: 19 additions & 8 deletions tiddit/tiddit_cluster.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def main(prefix,chromosomes,contig_length,samples,is_mp,epsilon,m,max_ins_len,mi
if int(posB) > contig_length[chrB]:
posA=contig_length[chrB]

discordants[chrA][chrB].append([content[0],sample,"D",posA,content[5],posB,content[8],i])
discordants[chrA][chrB].append([content[0],sample,"D",posA,content[5],posB,content[8],i,int(content[3]),int(content[4]),int(content[6]),int(content[7])])
positions[chrA][chrB].append([int(posA),int(posB),i])
i+=1

Expand Down Expand Up @@ -99,7 +99,7 @@ def main(prefix,chromosomes,contig_length,samples,is_mp,epsilon,m,max_ins_len,mi
if int(posB) > contig_length[chrB]:
posB=contig_length[chrB]

discordants[chrA][chrB].append([content[0],sample,"S",posA,content[4],posB,content[6],i])
discordants[chrA][chrB].append([content[0],sample,"S",posA,content[4],posB,content[6],i,int(content[7]),int(content[8]),int(content[9]),int(content[10])])
positions[chrA][chrB].append([int(posA),int(posB),i])
i+=1

Expand Down Expand Up @@ -132,7 +132,7 @@ def main(prefix,chromosomes,contig_length,samples,is_mp,epsilon,m,max_ins_len,mi
if int(posB) > contig_length[chrB]:
posB=contig_length[chrB]

discordants[chrA][chrB].append([content[0],sample,"A",posA,content[4],posB,content[6],i])
discordants[chrA][chrB].append([content[0],sample,"A",posA,content[4],posB,content[6],i,int(content[7]),int(content[8]),int(content[9]),int(content[10])])
positions[chrA][chrB].append([int(posA),int(posB),i])
contigs.add(i)
i+=1
Expand Down Expand Up @@ -197,6 +197,9 @@ def main(prefix,chromosomes,contig_length,samples,is_mp,epsilon,m,max_ins_len,mi
candidates[chrA][chrB][candidate]["positions_A"]["orientation_contigs"]=[]
candidates[chrA][chrB][candidate]["positions_A"]["orientation_splits"]=[]
candidates[chrA][chrB][candidate]["positions_A"]["orientation_discordants"]=[]
candidates[chrA][chrB][candidate]["positions_A"]["start"]=[]
candidates[chrA][chrB][candidate]["positions_A"]["end"]=[]

candidates[chrA][chrB][candidate]["start_A"]=0
candidates[chrA][chrB][candidate]["end_A"]=0

Expand All @@ -208,6 +211,8 @@ def main(prefix,chromosomes,contig_length,samples,is_mp,epsilon,m,max_ins_len,mi
candidates[chrA][chrB][candidate]["positions_B"]["orientation_contigs"]=[]
candidates[chrA][chrB][candidate]["positions_B"]["orientation_splits"]=[]
candidates[chrA][chrB][candidate]["positions_B"]["orientation_discordants"]=[]
candidates[chrA][chrB][candidate]["positions_B"]["start"]=[]
candidates[chrA][chrB][candidate]["positions_B"]["end"]=[]

candidates[chrA][chrB][candidate]["start_B"]=0
candidates[chrA][chrB][candidate]["end_B"]=0
Expand All @@ -218,7 +223,13 @@ def main(prefix,chromosomes,contig_length,samples,is_mp,epsilon,m,max_ins_len,mi
candidates[chrA][chrB][candidate]["sample_contigs"][discordants[chrA][chrB][i][1]]=set([])

candidates[chrA][chrB][candidate]["samples"].add(discordants[chrA][chrB][i][1])


candidates[chrA][chrB][candidate]["positions_A"]["start"].append(discordants[chrA][chrB][i][8])
candidates[chrA][chrB][candidate]["positions_A"]["end"].append(discordants[chrA][chrB][i][9])

candidates[chrA][chrB][candidate]["positions_B"]["start"].append(discordants[chrA][chrB][i][10])
candidates[chrA][chrB][candidate]["positions_B"]["end"].append(discordants[chrA][chrB][i][11])

if discordants[chrA][chrB][i][2] == "D":
candidates[chrA][chrB][candidate]["discordants"].add(discordants[chrA][chrB][i][0])
candidates[chrA][chrB][candidate]["positions_A"]["discordants"].append(int(discordants[chrA][chrB][i][3]))
Expand Down Expand Up @@ -321,11 +332,11 @@ def main(prefix,chromosomes,contig_length,samples,is_mp,epsilon,m,max_ins_len,mi
candidates[chrA][chrB][candidate]["posA"]
candidates[chrA][chrB][candidate]["posB"]

candidates[chrA][chrB][candidate]["startB"]=min(candidates[chrA][chrB][candidate]["positions_B"]["contigs"]+candidates[chrA][chrB][candidate]["positions_B"]["splits"]+candidates[chrA][chrB][candidate]["positions_B"]["discordants"])
candidates[chrA][chrB][candidate]["endB"]=max(candidates[chrA][chrB][candidate]["positions_B"]["contigs"]+candidates[chrA][chrB][candidate]["positions_B"]["splits"]+candidates[chrA][chrB][candidate]["positions_B"]["discordants"])
candidates[chrA][chrB][candidate]["startB"]=min(candidates[chrA][chrB][candidate]["positions_B"]["start"])
candidates[chrA][chrB][candidate]["endB"]=max(candidates[chrA][chrB][candidate]["positions_B"]["end"])

candidates[chrA][chrB][candidate]["startA"]=min(candidates[chrA][chrB][candidate]["positions_A"]["contigs"]+candidates[chrA][chrB][candidate]["positions_A"]["splits"]+candidates[chrA][chrB][candidate]["positions_A"]["discordants"])
candidates[chrA][chrB][candidate]["endA"]=max(candidates[chrA][chrB][candidate]["positions_A"]["contigs"]+candidates[chrA][chrB][candidate]["positions_A"]["splits"]+candidates[chrA][chrB][candidate]["positions_A"]["discordants"])
candidates[chrA][chrB][candidate]["startA"]=min(candidates[chrA][chrB][candidate]["positions_A"]["start"])
candidates[chrA][chrB][candidate]["endA"]=max(candidates[chrA][chrB][candidate]["positions_A"]["end"])

return(candidates)

Expand Down
3 changes: 2 additions & 1 deletion tiddit/tiddit_contig_analysis.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ def read_contigs(aligned_contigs,prefix,sample_id,min_size):
current_bp=read.reference_start
for i in range(0,len(read.cigartuples)-1):
if read.cigartuples[i][0] == 2 and read.cigartuples[i][1] > min_size:
split_contigs[read.reference_name][read.reference_name]["{}_d_{}".format(read.query_name,i)]=[current_bp,read.is_reverse,current_bp+read.cigartuples[i][1],read.is_reverse]

split_contigs[read.reference_name][read.reference_name]["{}_d_{}".format(read.query_name,i)]=[current_bp,read.is_reverse,current_bp+read.cigartuples[i][1],read.is_reverse,read.reference_start,current_bp,current_bp+read.cigartuples[i][1],read.reference_end]
current_bp+=read.cigartuples[i][1]

f=open("{}_tiddit/contigs_{}.tab".format(prefix,sample_id),"w")
Expand Down
45 changes: 39 additions & 6 deletions tiddit/tiddit_signal.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -66,18 +66,40 @@ def SA_analysis(read,min_q,tag,reference_name):
cdef long read_query_alignment_end=read.query_alignment_end

if (read.query_alignment_start ) < (read.query_length - read_query_alignment_end):
split_pos=read.reference_end+1
if read.is_reverse:


split_pos=read.reference_start+1
else:
split_pos=read.reference_end+1
else:
split_pos=read.reference_start+1
if read.is_reverse:
split_pos=read.reference_end+1
else:
split_pos=read.reference_start+1

supplementry_alignment=find_SA_query_range(SA_data)
SA_chr=SA_data[0]

startA=read.reference_start+1
endA=read.reference_end+1

startB=supplementry_alignment.reference_start
endB=supplementry_alignment.reference_end


if (supplementry_alignment.query_alignment_start ) < (supplementry_alignment.query_length - read_query_alignment_end):
SA_split_pos=supplementry_alignment.reference_end
if SA_data[2] == "-":

SA_split_pos=supplementry_alignment.reference_start
else:
SA_split_pos=supplementry_alignment.reference_end
else:
SA_split_pos=supplementry_alignment.reference_start
if SA_data[2] == "-":
SA_split_pos=supplementry_alignment.reference_end

else:
SA_split_pos=supplementry_alignment.reference_start


if SA_chr < reference_name:
Expand All @@ -87,6 +109,12 @@ def SA_analysis(read,min_q,tag,reference_name):
split_pos=SA_split_pos
SA_split_pos=tmp

startB=read.reference_start+1
endB=read.reference_end+1
startA=supplementry_alignment.reference_start
endA=supplementry_alignment.reference_end


else:
chrA=reference_name
chrB=SA_chr
Expand All @@ -97,11 +125,16 @@ def SA_analysis(read,min_q,tag,reference_name):
split_pos=SA_split_pos
SA_split_pos=tmp

startB=read.reference_start+1
endB=read.reference_end+1
startA=supplementry_alignment.reference_start
endA=supplementry_alignment.reference_end

split=[]
if "-" == SA_data[2]:
split=[chrA,chrB,read.query_name,split_pos,read.is_reverse,SA_split_pos, True]
split=[chrA,chrB,read.query_name,split_pos,read.is_reverse,SA_split_pos, True,startA,endA,startB,endB]
else:
split=[chrA,chrB,read.query_name,split_pos,read.is_reverse,SA_split_pos,False]
split=[chrA,chrB,read.query_name,split_pos,read.is_reverse,SA_split_pos,False,startA,endA,startB,endB]
#splits[chrA][chrB][read.query_name]+=[split_pos,read.is_reverse,SA_split_pos,False]

return(split)
Expand Down
69 changes: 59 additions & 10 deletions tiddit/tiddit_variant.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,37 @@ from joblib import Parallel, delayed
#from pysam.libcalignmentfile cimport AlignmentFile, AlignedSegment
from pysam import AlignmentFile, AlignedSegment


def scoring(scoring_dict,percentiles):
score=[0]
if scoring_dict["n_contigs"]:
score.append(50)

if scoring_dict["n_discordants"]:
score.append(0)
for p in percentiles["FA"]:
if scoring_dict["n_discordants"]/(scoring_dict["refFA"]+scoring_dict["n_discordants"]) >= p:
score[-1]+=5

score.append(0)
for p in percentiles["FB"]:
if scoring_dict["n_discordants"]/(scoring_dict["refFB"]+scoring_dict["n_discordants"]) >= p:
score[-1]+=5


if scoring_dict["n_splits"]:
score.append(0)
for p in percentiles["RA"]:
if scoring_dict["n_splits"]/(scoring_dict["refRA"]+scoring_dict["n_splits"]) >= p:
score[-1]+=5

score.append(0)
for p in percentiles["RB"]:
if scoring_dict["n_splits"]/(scoring_dict["refRB"]+scoring_dict["n_splits"]) >= p:
score[-1]+=5

return(max(score))

def get_region(samfile,str chr,int start,int end,int bp,int min_q,int max_ins, contig_number):

cdef int low_q=0
Expand Down Expand Up @@ -67,10 +98,10 @@ def get_region(samfile,str chr,int start,int end,int bp,int min_q,int max_ins, c
r_start=read_reference_start
r_end=read_reference_end

if read_reference_start < bp-10 and r_end > bp:
if read_reference_start < bp-20 and r_end > bp+20:
crossing_r+=1

mate_bp_read= (read.next_reference_start < bp and r_end > bp)
mate_bp_read= (read.next_reference_start < bp-50 and r_end > bp+50)
discordant= ( abs(read.isize) > max_ins or read_next_reference_name != read_reference_name )

if mate_bp_read and not discordant:
Expand Down Expand Up @@ -281,14 +312,16 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar

#configure filters for CNV based on Read depth
for sample in samples:

covA=sample_data[sample]["covA"]
covM=sample_data[sample]["covM"]
covB=sample_data[sample]["covB"]

if "DEL" in svtype:
#homozygout del based on coverage
if cn == 0:
filt="PASS"

covA=sample_data[sample]["covA"]
covM=sample_data[sample]["covM"]
covB=sample_data[sample]["covB"]

#normal coverage on the flanking regions, abnormal inbetween
if covA > covM*(cn+0.9) and covB > covM*(cn+0.9):
Expand All @@ -297,8 +330,7 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar
#too few reads, but clear DR signal
elif "DUP" in svtype and filt == "BelowExpectedLinks":
filt="PASS"


scoring_dict={"n_contigs":n_contigs, "n_discordants":n_discordants,"n_splits":n_splits,"covA":covA,"covM":covM,"covB":covB,"refRA":sample_data[sample]["refRA"],"refRB":sample_data[sample]["refRB"],"refFA":sample_data[sample]["refFA"],"refFB":sample_data[sample]["refFB"]}

if svtype != "BND":
info=["SVTYPE={}".format(svtype),"SVLEN={}".format(posB-posA),"END={}".format(posB)]
Expand Down Expand Up @@ -363,7 +395,7 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar
GT="0/1"

variant.append( "{}:{}:{},{},{}:{}:{}:{},{}:{},{}:{},{}".format(GT,cn,sample_data[sample]["covA"],sample_data[sample]["covM"],sample_data[sample]["covB"],n_discordants,n_splits,sample_data[sample]["QA"],sample_data[sample]["QB"],sample_data[sample]["refRA"],sample_data[sample]["refRB"],sample_data[sample]["refFA"],sample_data[sample]["refFB"]) )
variants.append([chrA,posA,variant])
variants.append([chrA,posA,variant,scoring_dict])
else:
info=["SVTYPE=BND".format(svtype)]
inverted=False
Expand Down Expand Up @@ -439,7 +471,7 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar


variant.append( "{}:{}:{},{},{}:{}:{}:{},{}:{},{}:{},{}".format(GT,cn,sample_data[sample]["covA"],sample_data[sample]["covM"],sample_data[sample]["covB"],n_discordants,n_splits,sample_data[sample]["QA"],sample_data[sample]["QB"],sample_data[sample]["refRA"],sample_data[sample]["refRB"],sample_data[sample]["refFA"],sample_data[sample]["refFB"]) )
variants.append([chrA,posA,variant])
variants.append([chrA,posA,variant,scoring_dict])


variant=[chrB,str(posB),"SV_{}_2".format(var_n),"N",alt_str_b,".",filt,info,format_col]
Expand Down Expand Up @@ -472,7 +504,7 @@ def define_variant(str chrA, str bam_file_name,dict sv_clusters,args,dict librar


variant.append( "{}:{}:{},{},{}:{}:{}:{},{}:{},{}:{},{}".format(GT,cn,sample_data[sample]["covA"],sample_data[sample]["covM"],sample_data[sample]["covB"],n_discordants,n_splits,sample_data[sample]["QA"],sample_data[sample]["QB"],sample_data[sample]["refRA"],sample_data[sample]["refRB"],sample_data[sample]["refFA"],sample_data[sample]["refFB"]) )
variants.append([chrB,posB,variant])
variants.append([chrB,posB,variant, scoring_dict ])

samfile.close()
return(variants)
Expand All @@ -498,8 +530,25 @@ def main(str bam_file_name,dict sv_clusters,args,dict library,int min_mapq,sampl

variants_list=Parallel(n_jobs=args.threads)( delayed(define_variant)(chrA,bam_file_name,sv_clusters,args,library,min_mapq,samples,coverage_data,contig_number,max_ins_len,contig_seqs) for chrA in sv_clusters)

ratios={"fragments_A":[],"fragments_B":[],"reads_A":[],"reads_B":[]}
for v in variants_list:
for variant in v:
if variant[3]["n_discordants"]:
ratios["fragments_A"].append(variant[3]["n_discordants"]/(variant[3]["refFA"]+variant[3]["n_discordants"]) )
ratios["fragments_B"].append(variant[3]["n_discordants"]/(variant[3]["refFB"]+variant[3]["n_discordants"]) )

if variant[3]["n_splits"]:
ratios["reads_A"].append(variant[3]["n_splits"]/(variant[3]["refRA"]+variant[3]["n_splits"]) )
ratios["reads_B"].append(variant[3]["n_splits"]/(variant[3]["refRB"]+variant[3]["n_splits"]) )


p=[1,5,10,20,30,40,50,60,70,75,80,85,90,95,97.5,99]
percentiles={"FA":numpy.percentile(ratios["fragments_A"],p),"FB":numpy.percentile(ratios["fragments_B"],p),"RA":numpy.percentile(ratios["reads_A"],p),"RB":numpy.percentile(ratios["reads_B"],p)}

for v in variants_list:
for variant in v:
score=scoring(variant[3],percentiles)
variant[2][5]=str(score)
variants[ variant[0] ].append( [ variant[1],variant[2] ] )

return(variants)

0 comments on commit 936c3cd

Please sign in to comment.