Skip to content

Commit

Permalink
Fix status methods to use the job_id getter
Browse files Browse the repository at this point in the history
  • Loading branch information
BenGalewsky committed Aug 5, 2024
1 parent c090d56 commit 28d1478
Showing 1 changed file with 6 additions and 5 deletions.
11 changes: 6 additions & 5 deletions mlflow_slurm/slurm_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,10 @@ def wait(self):
while not self.is_terminated_or_gone():
time.sleep(self.POLL_STATUS_INTERVAL)

with open(f"slurm-{self.slurm_job_id}.out") as file:
with open(f"slurm-{self.job_id}.out") as file:
log_lines = file.read()
MlflowClient().log_text(self.run_id, log_lines,
f"slurm-{self.slurm_job_id}.txt")
f"slurm-{self.job_id}.txt")
return self._status == RunStatus.FINISHED

def cancel(self) -> None:
Expand All @@ -92,7 +92,7 @@ def get_status(self) -> RunStatus:
return self._status

def _update_status(self) -> RunStatus:
with subprocess.Popen(f"squeue --state=all --job={self.slurm_job_id} -o%A,%t",
with subprocess.Popen(f"squeue --state=all --job={self.job_id} -o%A,%t",
stdout=subprocess.PIPE,
stderr=subprocess.PIPE, shell=True,
universal_newlines=True) as p:
Expand All @@ -102,7 +102,7 @@ def _update_status(self) -> RunStatus:
with self._status_lock:
if not job:
_logger.warning(
f"Looking for status of job {self.slurm_job_id}, but it is gone")
f"Looking for status of job {self.job_id}, but it is gone")
self._status = RunStatus.FINISHED

job_status = output[1].split(",")[1]
Expand All @@ -120,7 +120,7 @@ def _update_status(self) -> RunStatus:
self._status = RunStatus.RUNNING
else:
_logger.warning(
f"Job ID {self.slurm_job_id} has an unmapped status of {job_status}")
f"Job ID {self.job_id} has an unmapped status of {job_status}")
self._status = None


Expand Down Expand Up @@ -220,6 +220,7 @@ def run(self, project_uri: str, entry_point: str, params: dict, version: str,

previous_job = ""
job_ids = []
job_id = "N/A"
for worker in range(int(params.get("sequential_workers", 1))):
job_id = SlurmProjectBackend.sbatch(sbatch_file, previous_job)
job_ids.append(job_id)
Expand Down

0 comments on commit 28d1478

Please sign in to comment.