From 23c38cd2c6fd52602816ec63db04462f463080b2 Mon Sep 17 00:00:00 2001 From: jarema Date: Mon, 3 Sep 2018 23:19:15 +0200 Subject: [PATCH] generate canceling script --- mcpartools/generator.py | 8 +++++++ mcpartools/scheduler/base.py | 14 +++++++++++ mcpartools/scheduler/data/kill_slurm.sh | 30 ++++++++++++++++++++++++ mcpartools/scheduler/data/kill_torque.sh | 30 ++++++++++++++++++++++++ mcpartools/scheduler/slurm.py | 2 ++ mcpartools/scheduler/torque.py | 2 ++ 6 files changed, 86 insertions(+) create mode 100644 mcpartools/scheduler/data/kill_slurm.sh create mode 100644 mcpartools/scheduler/data/kill_torque.sh diff --git a/mcpartools/generator.py b/mcpartools/generator.py index 8fcfc19..be93313 100644 --- a/mcpartools/generator.py +++ b/mcpartools/generator.py @@ -140,6 +140,9 @@ def run(self): # generate submit script self.generate_submit_script() + # generate kill script + self.generate_kill_script() + # copy input files self.copy_input() @@ -201,6 +204,11 @@ def generate_submit_script(self): jobs_no=self.options.jobs_no, workspace_dir=self.workspace_dir) + def generate_kill_script(self): + script_path = os.path.join(self.main_dir, self.scheduler.kill_script) + logger.debug("Preparation to generate " + script_path) + self.scheduler.write_kill_script(script_path) + def copy_input(self): indir_name = 'input' indir_path = os.path.join(self.main_dir, indir_name) diff --git a/mcpartools/scheduler/base.py b/mcpartools/scheduler/base.py index 3f58efa..3a83649 100644 --- a/mcpartools/scheduler/base.py +++ b/mcpartools/scheduler/base.py @@ -25,6 +25,7 @@ def __init__(self, scheduler_options): submit_script = 'submit.sh' main_run_script = 'main_run.sh' + kill_script = 'cancel.sh' def submit_script_body(self, jobs_no, main_dir, workspace_dir): from pkg_resources import resource_string @@ -51,6 +52,12 @@ def main_run_script_body(self, jobs_no, workspace_dir): jobs_no=jobs_no) return self.main_run_script + def submit_kill_body(self, ): + from pkg_resources import resource_string + tpl = resource_string(__name__, self.kill_script_template) + self.kill_script = tpl.decode('ascii') + return self.kill_script + def write_submit_script(self, main_dir, script_basename, jobs_no, workspace_dir): script_path = os.path.join(main_dir, script_basename) fd = open(script_path, 'w') @@ -72,3 +79,10 @@ def write_main_run_script(self, jobs_no, output_dir): os.chmod(out_file_path, 0o750) logger.debug("Saved main run script: " + out_file_path) logger.debug("Output dir " + output_dir) + + def write_kill_script(self, script_path): + fd = open(script_path, 'w') + fd.write(self.submit_kill_body()) + fd.close() + os.chmod(script_path, 0o750) + logger.debug("Saved kill script: " + script_path) diff --git a/mcpartools/scheduler/data/kill_slurm.sh b/mcpartools/scheduler/data/kill_slurm.sh new file mode 100644 index 0000000..39e6991 --- /dev/null +++ b/mcpartools/scheduler/data/kill_slurm.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +# submit.log is for storing stdout and stderr of submit.sh +LOGFILE="$(cd $(dirname $0) && pwd)/submit.log" + +RE="Job ID: ([0-9]*)" + +# no log file. Probably submit.sh not run +if [ ! -f $LOGFILE ]; then + echo "File not found: $LOGFILE" + echo "Make sure you run submit script" + exit 1 +fi + +cat ${LOGFILE} | while read line +do + if [[ ${line} =~ $RE ]]; + then + JOB_ID=${BASH_REMATCH[1]}; + if [ -n "$JOB_ID" ] + then + scancel ${JOB_ID} + if [ $? -eq 0 ]; then + echo "Job with id: $JOB_ID canceled successfully" + else + echo "Unable to cancel job: $JOB_ID" + fi + fi + fi +done diff --git a/mcpartools/scheduler/data/kill_torque.sh b/mcpartools/scheduler/data/kill_torque.sh new file mode 100644 index 0000000..5242713 --- /dev/null +++ b/mcpartools/scheduler/data/kill_torque.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +# submit.log is for storing stdout and stderr of submit.sh +LOGFILE="$(cd $(dirname $0) && pwd)/submit.log" + +RE="Job ID: ([0-9]*\[\])" + +# no log file. Probably submit.sh not run +if [ ! -f $LOGFILE ]; then + echo "File not found: $LOGFILE" + echo "Make sure you run submit script" + exit 1 +fi + +cat ${LOGFILE} | while read line +do + if [[ ${line} =~ $RE ]]; + then + JOB_ID=${BASH_REMATCH[1]}; + if [ -n "$JOB_ID" ] + then + qdel ${JOB_ID} + if [ $? -eq 0 ]; then + echo "Job with id: $JOB_ID canceled successfully" + else + echo "Unable to cancel job: $JOB_ID" + fi + fi + fi +done diff --git a/mcpartools/scheduler/slurm.py b/mcpartools/scheduler/slurm.py index b809ac2..4640798 100644 --- a/mcpartools/scheduler/slurm.py +++ b/mcpartools/scheduler/slurm.py @@ -13,3 +13,5 @@ def __init__(self, options_content): submit_script_template = os.path.join('data', 'submit_slurm.sh') main_run_script_template = os.path.join('data', 'run_slurm.sh') + + kill_script_template = os.path.join('data', 'kill_slurm.sh') diff --git a/mcpartools/scheduler/torque.py b/mcpartools/scheduler/torque.py index 7fcf067..2c88716 100644 --- a/mcpartools/scheduler/torque.py +++ b/mcpartools/scheduler/torque.py @@ -13,3 +13,5 @@ def __init__(self, options_content): submit_script_template = os.path.join('data', 'submit_torque.sh') main_run_script_template = os.path.join('data', 'run_torque.sh') + + kill_script_template = os.path.join('data', 'kill_torque.sh')