Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
Signed-off-by: Hemil Desai <[email protected]>
  • Loading branch information
hemildesai committed Dec 17, 2024
1 parent db6b60e commit 68f8792
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 3 deletions.
6 changes: 5 additions & 1 deletion src/nemo_run/core/execution/slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,7 @@ def package_configs(self, *cfgs: tuple[str, str]) -> list[str]:
return filenames

def package(self, packager: Packager, job_name: str):
if job_name in self.tunnel.packaging_jobs:
if job_name in self.tunnel.packaging_jobs and not packager.symlink_from_remote_dir:
logger.info(
f"Packaging for job {job_name} in tunnel {self.tunnel} already done. Skipping subsequent packagings.\n"
"This may cause issues if you have multiple tasks with the same name but different packagers, as only the first packager will be used."
Expand All @@ -570,6 +570,10 @@ def package(self, packager: Packager, job_name: str):
if base_remote_mount not in self.container_mounts:
self.container_mounts.append(f"{base_remote_dir}:{base_remote_dir}")

for req in self.resource_group:
if base_remote_mount not in req.container_mounts:
req.container_mounts.append(base_remote_mount)

return

assert self.experiment_id, "Executor not assigned to an experiment."
Expand Down
4 changes: 2 additions & 2 deletions src/nemo_run/run/torchx_backend/schedulers/slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ def _submit_dryrun(self, app: AppDef, cfg: Executor) -> AppDryRunInfo[Any]: # t
partition = executor.partition
assert partition is None or isinstance(partition, str), "partition must be str"

executor.package(packager=executor.packager, job_name=Path(job_dir).name)

srun_cmds: list[list[str]] = []
jobs = []
envs = {}
Expand Down Expand Up @@ -137,8 +139,6 @@ def _submit_dryrun(self, app: AppDef, cfg: Executor) -> AppDryRunInfo[Any]: # t
with open(path, "w") as f:
f.write(script)

executor.package(packager=executor.packager, job_name=Path(job_dir).name)

return AppDryRunInfo(req, repr)

def schedule(self, dryrun_info: AppDryRunInfo[SlurmBatchRequest]) -> str: # type: ignore
Expand Down

0 comments on commit 68f8792

Please sign in to comment.