Skip to content

Commit

Permalink
Merge pull request #50 from bedapub/report
Browse files Browse the repository at this point in the history
Report
  • Loading branch information
grexor authored Feb 6, 2024
2 parents 32c8aa5 + c9365bf commit 45b85a7
Show file tree
Hide file tree
Showing 9 changed files with 457 additions and 34 deletions.
5 changes: 3 additions & 2 deletions splicekit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
import splicekit.core.delta_dar
print("splicekit | loading splicekit.clusterlogfc")
import splicekit.clusterlogfc
print("splicekit | loading splicekit.report")
import splicekit.report

# initialization (triggered once on "import splicekit")
splicekit.core.annotation.compounds = {}
Expand Down Expand Up @@ -137,7 +139,6 @@ def genes():
splicekit.core.features.load_genes()
splicekit.core.features.read_genes()
os.system("rm -f data/comparison_genes_data/*.tab.gz > /dev/null 2>&1")
#splicekit.core.features.save_comps_feature_data("genes")
splicekit.core.features.make_counts_table("genes")

def features():
Expand Down Expand Up @@ -187,7 +188,7 @@ def edgeR(run=None):
if splicekit.config.platform=="SLURM":
os.system('jobs=( $(ls jobs/edgeR/junctions/*.job) ); g=10; for((i=0; i < ${#jobs[@]}; i+=g)); do part=( "${jobs[@]:i:g}" ); job_ids=(); for job_fname in "${part[@]}"; do echo "[edgeR.junctions] submitted $job_fname"; job_id=$(sbatch --mem=' + config.edgeR_memory + ' --parsable ${job_fname}); job_ids+=($job_id); done; for job_id in "${job_ids[@]}"; do scontrol show job $job_id | grep -q "JobState=COMPLETED" || scontrol wait job $job_id; done; echo "[edgeR.junctions] processing next 10"; done; echo "[edgeR.junctions] processing complete"')
splicekit.core.report.edgeR_feature('junctions')
splicekit.core.patterns.process() # adds donor patterns
splicekit.core.patterns.process() # adds patterns

if run=="exons" or run==None:
os.system(f"rm -f results/edgeR/exons/*.tab.gz > /dev/null 2>&1")
Expand Down
5 changes: 3 additions & 2 deletions splicekit/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import splicekit.core as core
import socket

module_desc = "splicekit | config |"

if os.path.exists("splicekit.config"):
config_lines = open("splicekit.config").readlines()
for cline in config_lines:
Expand All @@ -26,8 +28,7 @@ def jbrowse2_config(hostname):
hostname=socket.gethostname()
ip_addr=socket.gethostbyname(hostname)
jbrowse2_url = f"http://{ip_addr}:{port}/jbrowse2/?config=splicekit_data/config.json"

jbrowse2_config(None)
print(f"{module_desc} setting JBrowse2 URL to {jbrowse2_url}")

# read in location of gtf and fasta files
if genome_version!=None:
Expand Down
90 changes: 84 additions & 6 deletions splicekit/core/annotation.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,10 @@ def make_comparisons():
f.close()
# sort by sample ID
for treatment, data in annotation.treatments.items():
annotation.treatments[treatment] = sort_readout_id(data)
try:
annotation.treatments[treatment] = sort_readout_id(data)
except:
pass
dmso_hash = {}
dmso_letter = "A"
# sometimes the separates set was reshufled
Expand Down Expand Up @@ -132,7 +135,10 @@ def make_comparisons():
annotation.comparisons.append((short_names(comparison_name), temp_control, temp_test, short_names(control_group_id), short_names(test_group_id)))
# sort and cast sample_ids to string
annotation.samples = list(samples)
annotation.samples = sort_readout_id(annotation.samples)
try:
annotation.samples = sort_readout_id(annotation.samples)
except:
pass
annotation.samples = [str(el) for el in annotation.samples]

def write_comparisons():
Expand Down Expand Up @@ -178,8 +184,14 @@ def write_comparisons():
control_ids.append(sample_id)
for (sample_id, compound, rep, _) in test_set:
test_ids.append(sample_id)
control_ids = sort_readout_id(control_ids)
test_ids = sort_readout_id(test_ids)
try:
control_ids = sort_readout_id(control_ids)
except:
pass
try:
test_ids = sort_readout_id(test_ids)
except:
pass

# write rMATS {comparison_name}_control.tab, {comparison_name}_test.tab and {comparison_name}_run.sh
for rtype, rfile in [("control", control_ids), ("test", test_ids)]:
Expand All @@ -199,6 +211,7 @@ def write_comparisons():
comps_table.write("\t".join(row) + "\n")
comps_table.close()
write_edgeR_jobs()
write_mds_job()

def write_edgeR_jobs():
if config.platform == 'SLURM':
Expand Down Expand Up @@ -264,8 +277,14 @@ def write_edgeR_jobs():
control_ids.append(sample_id)
for (sample_id, compound, rep, _) in test_set:
test_ids.append(sample_id)
control_ids = sort_readout_id(control_ids)
test_ids = sort_readout_id(test_ids)
try:
control_ids = sort_readout_id(control_ids)
except:
pass
try:
test_ids = sort_readout_id(test_ids)
except:
pass

try:
filter_low = splicekit.config.filter_low
Expand Down Expand Up @@ -312,6 +331,65 @@ def write_edgeR_jobs():
fsh_donor_anchors.close()
fsh_acceptor_anchors.close()

def write_mds_job():
if config.platform == 'SLURM':
job_mds="""
#!/bin/bash
#SBATCH --job-name=edgeR_mds # Job name
#SBATCH --ntasks=1 # Number of tasks
#SBATCH --nodes=1 # All tasks on one node
#SBATCH --mem=8G # Allocate memory
#SBATCH --partition=short # Select queue
#SBATCH --output=logs/edgeR/mds/mds.out # Output file
#SBATCH --error=logs/edgeR/mds/mds.err # Error file
module load R
{container} R --no-save --args {input_folder} {sample_membership} {filter_low} < {core_path}/comps_edgeR_mds.R
"""

else:

job_mds="""
#!/bin/bash
#BSUB -J mds # job name
#BSUB -n 1 # number of tasks
#BSUB -R "span[hosts=1]" # allocate hosts
#BSUB -M {memory} # allocate memory
#BSUB -q {queue} # select queue
#BSUB -o logs/edgeR/mds/mds.out # output file
#BSUB -e logs/edgeR/mds/mds.err # error file
ml R
{container} R --no-save --args {input_folder} {sample_membership} {filter_low} < {core_path}/comps_edgeR_mds.R
"""

job_sh_mds="""{container} R --no-save --args {input_folder} {sample_membership} {filter_low} < {core_path}/comps_edgeR_mds.R"""
fsh_mds = open(f"jobs/edgeR/mds/process.sh", "wt")
fout_mds = open(f"jobs/edgeR/mds/mds.job", "wt")
sample_membership = {}
for (comparison_name, control_set, test_set, control_group_id, test_group_id) in annotation.comparisons:
for (sample_id, _, _, _) in control_set:
# in some rare cases, the same sample can be part of diverse control groups
#assert(sample_membership.get(sample_id, None) in [None, control_group_id])
sample_membership[sample_id] = control_group_id
for (sample_id, _, _, _) in test_set:
# in some rare cases, the same sample can be part of diverse test groups
#assert(sample_membership.get(sample_id, None) in [None, test_group_id])
sample_membership[sample_id] = test_group_id
sample_membership = [sample_membership[sample_id] for sample_id in annotation.samples]
try:
filter_low = splicekit.config.filter_low
except:
filter_low = "filter_low"

# mds
job = job_mds.format(queue=config.cluster_queue, memory=config.edgeR_memory, filter_low=filter_low, container=splicekit.config.container, core_path=os.path.dirname(core.__file__), input_folder=os.getcwd(), sample_membership=",".join(str(el) for el in sample_membership))
fout_mds.write(job)
fout_mds.close()
job = job_sh_mds.format(filter_low=filter_low, container=splicekit.config.container, input_folder=os.getcwd(), sample_membership=",".join(str(el) for el in sample_membership))
fsh_mds.write(job+"\n")
fsh_mds.close()

def make_design_contrast():
# write design matrix
f = open("annotation/design.tab", "wt")
Expand Down
17 changes: 10 additions & 7 deletions splicekit/core/jbrowse2.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,10 @@ def setup():
# for every bam file create bigwig and cram file plus a bed file that shows the junctions
bam_dir = config.bam_path
container = config.container
bam_files = [fi for fi in os.listdir(bam_dir) if fi.endswith('.bam')]
try:
bam_files = [fi for fi in os.listdir(bam_dir) if fi.endswith('.bam')]
except:
print(f"{module_desc} no bam files found in provided bam folder")
genome_fa = config.fasta_path
gff_fname = config.gff3_path
dirs_to_check = ['logs/logs_jbrowse/', 'jobs/jobs_jbrowse/','results/results_jbrowse/']
Expand All @@ -53,7 +56,7 @@ def write_sample_jobs(force_samples):
os.system("rm -r jobs/jobs_jbrowse/* >/dev/null 2>&1") # clean up previous jobs

# create bigwig and then cram files
if config.platforn == 'SLURM':
if config.platform == 'SLURM':
job_bw="""
#!/bin/bash
#SBATCH --job-name={sample}_jbrowse # Job name
Expand Down Expand Up @@ -129,7 +132,7 @@ def create_jbrowse_config(force_annotation):
for sample in bam_files:
os.system(f"rm logs/logs_jbrowse/{sample}.out >/dev/null 2>&1")
os.system(f"rm logs/logs_jbrowse/{sample}.err >/dev/null 2>&1")
print('{module_desc} creating config.json in jbrowse2/splicekit_data')
print(f'{module_desc} creating config.json in jbrowse2/splicekit_data')
if os.path.isabs(genome_fa): # check if genome_fa path is absolute or relative
os.system(f"cd jbrowse2/splicekit_data; {container} jbrowse add-assembly {genome_fa} --name GenomeSequence --load symlink") # initialize config.json with genome sequence
else:
Expand All @@ -144,15 +147,15 @@ def create_jbrowse_config(force_annotation):
new_gff_fname = new_gff_fname
else:
new_gff_fname = "../../" + new_gff_fname
print("{module_desc} start annotation parsing")
print(f"{module_desc} start annotation parsing")
os.system(f'cd jbrowse2/splicekit_data; grep -v "^#" {new_gff_fname} | sed "s/gene/AAA/; s/mRNA/BBB/" | sort -k1,1 -k4,4n | sed "s/AAA/gene/; s/BBB/mRNA/" >| {new_gff_fname}.sorted.gff'); print('[jbrowse2] annotation sorted')
os.system(f'cd jbrowse2/splicekit_data; {container} bgzip {new_gff_fname}.sorted.gff -f' ); print('[jbrowse2] annotation compressed')
os.system(f'cd jbrowse2/splicekit_data; {container} tabix {new_gff_fname}.sorted.gff.gz'); print('[jbrowse2] annotation indexed')
os.system(f'cd jbrowse2/splicekit_data; {container} jbrowse add-track {new_gff_fname}.sorted.gff.gz --trackId annotation_track --name "Gene Annotations" --category "Annotations" --load symlink' )
# indexes all assemblies that it can find in the current directory's config.json
print("{module_desc} adding indices for text searching")
print(f"{module_desc} adding indices for text searching")
os.system(f"cd jbrowse2/splicekit_data; {container} jbrowse text-index --out .")
print("{module_desc} annotation fully parsed")
print(f"{module_desc} annotation fully parsed")

# now we add each sample's files in different track categories --------------------------------------------------------------------------------------------------------------------------
for sample in bam_files:
Expand All @@ -162,7 +165,7 @@ def create_jbrowse_config(force_annotation):
os.system(f'cd jbrowse2/splicekit_data; {container} jbrowse add-track ../../{bigwig_fname} --name {sample_id}_bw --trackId {sample_id}_bw --category "Coverage" --load symlink')
os.system(f'cd jbrowse2/splicekit_data; {container} jbrowse add-track ../../{cram_fname} --name {sample_id}_cram --trackId {sample_id}_cram --category "Reads" --load symlink')
else:
print("{module_desc} config.json already existing in jbrowse2/splicekit_data --> use 'splicekit jbrowse2 process -force' to overwrite")
print(f"{module_desc} config.json already existing in jbrowse2/splicekit_data --> use 'splicekit jbrowse2 process -force' to overwrite")

def process(force_samples=False, force_annotation=False):
check_genome()
Expand Down
2 changes: 1 addition & 1 deletion splicekit/core/motifs.py
Original file line number Diff line number Diff line change
Expand Up @@ -656,7 +656,7 @@ def cluster(cutoff=9):
r = f.readline()
f.close()

if len(cmd)==0:
if len(cdm)==0:
continue

Z = linkage(cdm, 'ward')
Expand Down
6 changes: 5 additions & 1 deletion splicekit/folders.setup
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@ logs/edgeR/exons
logs/edgeR/genes
logs/edgeR/donor_anchors
logs/edgeR/acceptor_anchors
logs/edgeR/mds
jobs/edgeR/junctions
jobs/edgeR/exons
jobs/edgeR/donor_anchors
jobs/edgeR/acceptor_anchors
jobs/edgeR/genes
jobs/edgeR/mds
jobs
jobs/count_donor_anchors
jobs/count_acceptor_anchors
Expand Down Expand Up @@ -58,4 +60,6 @@ results/edgeR/genes
results/edgeR/donor_anchors
results/edgeR/acceptor_anchors
results/edgeR/dispersion
results/clusterlogfc
results/edgeR/mds
results/clusterlogfc
report
2 changes: 1 addition & 1 deletion splicekit/judge/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def plot(comp_name):
fig.update_layout(title_font_size=11)
fig.update_layout(font={"size":11})
fig.update_traces(marker={"size":4})
fig.update_traces(customdata=[df["gene_name"].to_numpy()], hovertemplate="%{z} (gene)")
fig.update_traces(customdata=df["gene_name"].to_numpy(), hovertemplate="%{customdata} (gene)")
fig.write_html("results/judge/plots/{comp_name}.html".format(comp_name=comp_name), full_html=False, include_plotlyjs="cdn")
return True

Expand Down
Loading

0 comments on commit 45b85a7

Please sign in to comment.