diff --git a/splicekit/__init__.py b/splicekit/__init__.py index 7d33adf..293357f 100644 --- a/splicekit/__init__.py +++ b/splicekit/__init__.py @@ -188,7 +188,7 @@ def edgeR(run=None): if splicekit.config.platform=="SLURM": os.system('jobs=( $(ls jobs/edgeR/junctions/*.job) ); g=10; for((i=0; i < ${#jobs[@]}; i+=g)); do part=( "${jobs[@]:i:g}" ); job_ids=(); for job_fname in "${part[@]}"; do echo "[edgeR.junctions] submitted $job_fname"; job_id=$(sbatch --mem=' + config.edgeR_memory + ' --parsable ${job_fname}); job_ids+=($job_id); done; for job_id in "${job_ids[@]}"; do scontrol show job $job_id | grep -q "JobState=COMPLETED" || scontrol wait job $job_id; done; echo "[edgeR.junctions] processing next 10"; done; echo "[edgeR.junctions] processing complete"') splicekit.core.report.edgeR_feature('junctions') - splicekit.core.patterns.process() # adds donor patterns + splicekit.core.patterns.process() # adds patterns if run=="exons" or run==None: os.system(f"rm -f results/edgeR/exons/*.tab.gz > /dev/null 2>&1") diff --git a/splicekit/core/annotation.py b/splicekit/core/annotation.py index 7d4aea8..336ec96 100644 --- a/splicekit/core/annotation.py +++ b/splicekit/core/annotation.py @@ -211,6 +211,7 @@ def write_comparisons(): comps_table.write("\t".join(row) + "\n") comps_table.close() write_edgeR_jobs() + write_mds_job() def write_edgeR_jobs(): if config.platform == 'SLURM': @@ -330,6 +331,65 @@ def write_edgeR_jobs(): fsh_donor_anchors.close() fsh_acceptor_anchors.close() +def write_mds_job(): + if config.platform == 'SLURM': + job_mds=""" +#!/bin/bash +#SBATCH --job-name=edgeR_mds # Job name +#SBATCH --ntasks=1 # Number of tasks +#SBATCH --nodes=1 # All tasks on one node +#SBATCH --mem=8G # Allocate memory +#SBATCH --partition=short # Select queue +#SBATCH --output=logs/edgeR/mds/mds.out # Output file +#SBATCH --error=logs/edgeR/mds/mds.err # Error file + +module load R +{container} R --no-save --args {input_folder} {sample_membership} {filter_low} < {core_path}/comps_edgeR_mds.R + """ + + else: + + job_mds=""" +#!/bin/bash +#BSUB -J mds # job name +#BSUB -n 1 # number of tasks +#BSUB -R "span[hosts=1]" # allocate hosts +#BSUB -M {memory} # allocate memory +#BSUB -q {queue} # select queue +#BSUB -o logs/edgeR/mds/mds.out # output file +#BSUB -e logs/edgeR/mds/mds.err # error file + +ml R +{container} R --no-save --args {input_folder} {sample_membership} {filter_low} < {core_path}/comps_edgeR_mds.R + """ + + job_sh_mds="""{container} R --no-save --args {input_folder} {sample_membership} {filter_low} < {core_path}/comps_edgeR_mds.R""" + fsh_mds = open(f"jobs/edgeR/mds/process.sh", "wt") + fout_mds = open(f"jobs/edgeR/mds/mds.job", "wt") + sample_membership = {} + for (comparison_name, control_set, test_set, control_group_id, test_group_id) in annotation.comparisons: + for (sample_id, _, _, _) in control_set: + # in some rare cases, the same sample can be part of diverse control groups + #assert(sample_membership.get(sample_id, None) in [None, control_group_id]) + sample_membership[sample_id] = control_group_id + for (sample_id, _, _, _) in test_set: + # in some rare cases, the same sample can be part of diverse test groups + #assert(sample_membership.get(sample_id, None) in [None, test_group_id]) + sample_membership[sample_id] = test_group_id + sample_membership = [sample_membership[sample_id] for sample_id in annotation.samples] + try: + filter_low = splicekit.config.filter_low + except: + filter_low = "filter_low" + + # mds + job = job_mds.format(queue=config.cluster_queue, memory=config.edgeR_memory, filter_low=filter_low, container=splicekit.config.container, core_path=os.path.dirname(core.__file__), input_folder=os.getcwd(), sample_membership=",".join(str(el) for el in sample_membership)) + fout_mds.write(job) + fout_mds.close() + job = job_sh_mds.format(filter_low=filter_low, container=splicekit.config.container, input_folder=os.getcwd(), sample_membership=",".join(str(el) for el in sample_membership)) + fsh_mds.write(job+"\n") + fsh_mds.close() + def make_design_contrast(): # write design matrix f = open("annotation/design.tab", "wt") diff --git a/splicekit/folders.setup b/splicekit/folders.setup index 972b027..f6953eb 100644 --- a/splicekit/folders.setup +++ b/splicekit/folders.setup @@ -12,11 +12,13 @@ logs/edgeR/exons logs/edgeR/genes logs/edgeR/donor_anchors logs/edgeR/acceptor_anchors +logs/edgeR/mds jobs/edgeR/junctions jobs/edgeR/exons jobs/edgeR/donor_anchors jobs/edgeR/acceptor_anchors jobs/edgeR/genes +jobs/edgeR/mds jobs jobs/count_donor_anchors jobs/count_acceptor_anchors @@ -58,5 +60,6 @@ results/edgeR/genes results/edgeR/donor_anchors results/edgeR/acceptor_anchors results/edgeR/dispersion +results/edgeR/mds results/clusterlogfc report \ No newline at end of file diff --git a/splicekit/splicekit b/splicekit/splicekit index 60f4bab..3a8ba85 100755 --- a/splicekit/splicekit +++ b/splicekit/splicekit @@ -121,7 +121,7 @@ elif args.command[0]=="edgeR": sys.exit(0) if len(args.command)>1 and args.command[1] in ["junctions", "exons", "anchors", "genes"]: splicekit.core.features.load_genes() - splicekit.edgeR(run=args.command[1], args=args) + splicekit.edgeR(run=args.command[1]) elif len(args.command)==1: splicekit.edgeR() else: @@ -163,4 +163,4 @@ elif args.command[0] in ["jbrowse2", "jbrowse"]: elif args.command[0]=="report": splicekit.report.process() else: - print(help_0) + print(help_0) \ No newline at end of file