diff --git a/src/simulation/m_global_parameters.fpp b/src/simulation/m_global_parameters.fpp index 8f5cdff10..81e8603c4 100644 --- a/src/simulation/m_global_parameters.fpp +++ b/src/simulation/m_global_parameters.fpp @@ -718,6 +718,8 @@ contains if (fluid_pp(i)%Re(1) > 0) Re_size(1) = Re_size(1) + 1 if (fluid_pp(i)%Re(2) > 0) Re_size(2) = Re_size(2) + 1 end do + + !$acc update device(Re_size) ! Bookkeeping the indexes of any viscous fluids and any pairs of ! fluids whose interface will support effects of surface tension @@ -770,8 +772,8 @@ contains ! cell-boundary values or otherwise, the unaltered left and right, ! WENO-reconstructed, cell-boundary values wa_flg = 0d0; IF(weno_avg) wa_flg = 1d0 + !$acc update device(wa_flg) -!$acc update device(Re_size) ! Determining the number of cells that are needed in order to store ! sufficient boundary conditions data as to iterate the solution in ! the physical computational domain from one time-step iteration to diff --git a/toolchain/mfc/args.py b/toolchain/mfc/args.py index 85353412f..ef7bfa485 100644 --- a/toolchain/mfc/args.py +++ b/toolchain/mfc/args.py @@ -74,15 +74,16 @@ def add_common_arguments(p, mask = None): # === TEST === add_common_arguments(test, "t") - test.add_argument("-l", "--list", action="store_true", help="List all available tests.") - test.add_argument("-f", "--from", default=TEST_CASES[0].get_uuid(), type=str, help="First test UUID to run.") - test.add_argument("-t", "--to", default=TEST_CASES[-1].get_uuid(), type=str, help="Last test UUID to run.") - test.add_argument("-o", "--only", nargs="+", type=str, default=[], metavar="L", help="Only run tests with UUIDs or hashes L.") - test.add_argument("-b", "--binary", choices=binaries, type=str, default=None, help="(Serial) Override MPI execution binary") - test.add_argument("-r", "--relentless", action="store_true", default=False, help="Run all tests, even if multiple fail.") - test.add_argument("-a", "--test-all", action="store_true", default=False, help="Run the Post Process Tests too.") - test.add_argument("-g", "--gpus", type=str, default="0", help="(GPU) Comma separated list of GPU #s to use.") - test.add_argument("-%", "--percent", type=int, default=100, help="Percentage of tests to run.") + test.add_argument("-l", "--list", action="store_true", help="List all available tests.") + test.add_argument("-f", "--from", default=TEST_CASES[0].get_uuid(), type=str, help="First test UUID to run.") + test.add_argument("-t", "--to", default=TEST_CASES[-1].get_uuid(), type=str, help="Last test UUID to run.") + test.add_argument("-o", "--only", nargs="+", type=str, default=[], metavar="L", help="Only run tests with UUIDs or hashes L.") + test.add_argument("-b", "--binary", choices=binaries, type=str, default=None, help="(Serial) Override MPI execution binary") + test.add_argument("-r", "--relentless", action="store_true", default=False, help="Run all tests, even if multiple fail.") + test.add_argument("-a", "--test-all", action="store_true", default=False, help="Run the Post Process Tests too.") + test.add_argument("-g", "--gpus", type=str, default="0", help="(GPU) Comma separated list of GPU #s to use.") + test.add_argument("-%", "--percent", type=int, default=100, help="Percentage of tests to run.") + test.add_argument("-m", "--max-attempts", type=int, default=3, help="Maximum number of attempts to run a test.") test.add_argument("--case-optimization", action="store_true", default=False, help="(GPU Optimization) Compile MFC targets with some case parameters hard-coded.") diff --git a/toolchain/mfc/build.py b/toolchain/mfc/build.py index 60bb94b72..577cde1fa 100644 --- a/toolchain/mfc/build.py +++ b/toolchain/mfc/build.py @@ -1,6 +1,6 @@ import os, typing, dataclasses -from .state import ARG +from .state import ARG, CFG from .printer import cons from . import common from .run.input import MFCInputFile @@ -20,25 +20,24 @@ def compute(self) -> typing.List[str]: return r - name: str - flags: typing.List[str] - isDependency: bool - isDefault: bool - isRequired: bool - requires: Dependencies + name: str # Name of the target + flags: typing.List[str] # Extra flags to pass to CMake + isDependency: bool # Is it a dependency of an MFC target? + isDefault: bool # Should it be built by default? (unspecified -t | --targets) + isRequired: bool # Should it always be built? (no matter what -t | --targets is) + requires: Dependencies # Build dependencies of the target -TARGETS: typing.List[MFCTarget] = [ - MFCTarget('fftw', ['-DMFC_FFTW=ON'], True, False, False, MFCTarget.Dependencies([], [], [])), - MFCTarget('hdf5', ['-DMFC_HDF5=ON'], True, False, False, MFCTarget.Dependencies([], [], [])), - MFCTarget('silo', ['-DMFC_SILO=ON'], True, False, False, MFCTarget.Dependencies(["hdf5"], [], [])), - MFCTarget('pre_process', ['-DMFC_PRE_PROCESS=ON'], False, True, False, MFCTarget.Dependencies([], [], [])), - MFCTarget('simulation', ['-DMFC_SIMULATION=ON'], False, True, False, MFCTarget.Dependencies([], ["fftw"], [])), - MFCTarget('post_process', ['-DMFC_POST_PROCESS=ON'], False, True, False, MFCTarget.Dependencies(['fftw', 'silo'], [], [])), - MFCTarget('syscheck', ['-DMFC_SYSCHECK=ON'], False, False, True, MFCTarget.Dependencies([], [], [])), - MFCTarget('documentation', ['-DMFC_DOCUMENTATION=ON'], False, False, False, MFCTarget.Dependencies([], [], [])) -] +FFTW = MFCTarget('fftw', ['-DMFC_FFTW=ON'], True, False, False, MFCTarget.Dependencies([], [], [])) +HDF5 = MFCTarget('hdf5', ['-DMFC_HDF5=ON'], True, False, False, MFCTarget.Dependencies([], [], [])) +SILO = MFCTarget('silo', ['-DMFC_SILO=ON'], True, False, False, MFCTarget.Dependencies(["hdf5"], [], [])) +PRE_PROCESS = MFCTarget('pre_process', ['-DMFC_PRE_PROCESS=ON'], False, True, False, MFCTarget.Dependencies([], [], [])) +SIMULATION = MFCTarget('simulation', ['-DMFC_SIMULATION=ON'], False, True, False, MFCTarget.Dependencies([], ["fftw"], [])) +POST_PROCESS = MFCTarget('post_process', ['-DMFC_POST_PROCESS=ON'], False, True, False, MFCTarget.Dependencies(['fftw', 'silo'], [], [])) +SYSCHECK = MFCTarget('syscheck', ['-DMFC_SYSCHECK=ON'], False, False, True, MFCTarget.Dependencies([], [], [])) +DOCUMENTATION = MFCTarget('documentation', ['-DMFC_DOCUMENTATION=ON'], False, False, False, MFCTarget.Dependencies([], [], [])) +TARGETS: typing.List[MFCTarget] = [ FFTW, HDF5, SILO, PRE_PROCESS, SIMULATION, POST_PROCESS, SYSCHECK, DOCUMENTATION ] def get_mfc_target_names() -> typing.List[str]: return [ target.name for target in TARGETS if target.isDefault ] @@ -66,23 +65,53 @@ def get_target(name: str) -> MFCTarget: # Get path to directory that will store the build files def get_build_dirpath(target: MFCTarget) -> str: - return os.sep.join([os.getcwd(), "build", target.name]) + return os.sep.join([ + os.getcwd(), + "build", + [CFG().make_slug(), 'dependencies'][int(target.isDependency)], + target.name + ]) # Get the directory that contains the target's CMakeLists.txt def get_cmake_dirpath(target: MFCTarget) -> str: - subdir = ["", os.sep.join(["toolchain", "dependencies"])][int(target.isDependency)] - - return os.sep.join([os.getcwd(), subdir]) - + # The CMakeLists.txt file is located: + # * Regular: /CMakelists.txt + # * Dependency: /toolchain/dependencies/CMakelists.txt + return os.sep.join([ + os.getcwd(), + os.sep.join(["toolchain", "dependencies"]) if target.isDependency else "", + ]) + + +def get_install_dirpath(target: MFCTarget) -> str: + # The install directory is located: + # Regular: /build/install/ + # Dependency: /build/install/dependencies (shared) + return os.sep.join([ + os.getcwd(), + "build", + "install", + 'dependencies' if target.isDependency else CFG().make_slug() + ]) + + +def get_dependency_install_dirpath() -> str: + # Since dependencies share the same install directory, we can just return + # the install directory of the first dependency we find. + for target in TARGETS: + if target.isDependency: + return get_install_dirpath(target) -def get_install_dirpath() -> str: - return os.sep.join([os.getcwd(), "build", "install"]) + raise common.MFCException("No dependency target found.") def is_target_configured(target: MFCTarget) -> bool: - cmake_cachepath = os.sep.join([get_build_dirpath(target), "CMakeCache.txt"]) - return os.path.isfile(cmake_cachepath) + # We assume that if the CMakeCache.txt file exists, then the target is + # configured. (this isn't perfect, but it's good enough for now) + return os.path.isfile( + os.sep.join([get_build_dirpath(target), "CMakeCache.txt"]) + ) def clean_target(name: str): @@ -146,7 +175,9 @@ def build_target(name: str, history: typing.List[str] = None): build_dirpath = get_build_dirpath(target) cmake_dirpath = get_cmake_dirpath(target) - install_dirpath = get_install_dirpath() + install_dirpath = get_install_dirpath(target) + + install_prefixes = ';'.join([install_dirpath, get_dependency_install_dirpath()]) flags: list = target.flags.copy() + [ # Disable CMake warnings intended for developers (us). @@ -163,10 +194,10 @@ def build_target(name: str, history: typing.List[str] = None): # second heighest level of priority, still letting users manually # specify _ROOT, which has precedence over CMAKE_PREFIX_PATH. # See: https://cmake.org/cmake/help/latest/command/find_package.html. - f"-DCMAKE_PREFIX_PATH={install_dirpath}", + f"-DCMAKE_PREFIX_PATH={install_prefixes}", # First directory that FIND_LIBRARY searches. # See: https://cmake.org/cmake/help/latest/command/find_library.html. - f"-DCMAKE_FIND_ROOT_PATH={install_dirpath}", + f"-DCMAKE_FIND_ROOT_PATH={install_prefixes}", # Location prefix to install bin/, lib/, include/, etc. # See: https://cmake.org/cmake/help/latest/command/install.html. f"-DCMAKE_INSTALL_PREFIX={install_dirpath}", diff --git a/toolchain/mfc/lock.py b/toolchain/mfc/lock.py index 03d18e4c0..f60890250 100644 --- a/toolchain/mfc/lock.py +++ b/toolchain/mfc/lock.py @@ -1,6 +1,6 @@ import os, dataclasses -from . import build, state, common +from . import state, common from .state import MFCConfig from .printer import cons @@ -68,8 +68,3 @@ def switch(to: MFCConfig): data.config = to state.gCFG = to write() - - for target_name in build.get_mfc_target_names() + build.get_required_target_names(): - dirpath = build.get_build_dirpath(build.get_target(target_name)) - cons.print(f"[bold red]Removing {os.path.relpath(dirpath)}[/bold red]") - common.delete_directory(dirpath) diff --git a/toolchain/mfc/packer/tol.py b/toolchain/mfc/packer/tol.py index 68eb82aaa..20b6a7ec4 100644 --- a/toolchain/mfc/packer/tol.py +++ b/toolchain/mfc/packer/tol.py @@ -58,13 +58,13 @@ def raise_err(msg: str): """ if math.isnan(gVal): - raise_err("is NaN in the golden file") + return raise_err("is NaN in the golden file") if math.isnan(cVal): - raise_err("is NaN in the pack file") + return raise_err("is NaN in the pack file") if not is_close(error, tol): - raise_err("is not within tolerance") + return raise_err("is not within tolerance") # Return the average relative error return avg_err.get(), None diff --git a/toolchain/mfc/run/engines.py b/toolchain/mfc/run/engines.py index 1273052e2..2115c75bb 100644 --- a/toolchain/mfc/run/engines.py +++ b/toolchain/mfc/run/engines.py @@ -44,10 +44,13 @@ def run(self, names: typing.List[str]) -> None: raise common.MFCException(f"MFCEngine::run: not implemented for {self.name}.") def get_binpath(self, target: str) -> str: - return os.sep.join([build.get_install_dirpath(), "bin", target]) + # /install//bin/ + prefix = build.get_install_dirpath(build.get_target(target)) + return os.sep.join([prefix, "bin", target]) -def _interactive_working_worker(cmd, q): +def _interactive_working_worker(cmd: typing.List[str], q: multiprocessing.Queue): + """ Runs a command and puts the result in a queue. """ cmd = [ str(_) for _ in cmd ] cons.print(f"$ {' '.join(cmd)}") result = subprocess.run( @@ -99,39 +102,44 @@ def run(self, names: typing.List[str]) -> None: p = multiprocessing.Process( target=_interactive_working_worker, args=( - [self.mpibin.bin] + self.mpibin.gen_params() + [os.sep.join([build.get_install_dirpath(), "bin", "syscheck"])], + [self.mpibin.bin] + self.mpibin.gen_params() + [os.sep.join([build.get_install_dirpath(build.SYSCHECK), "bin", "syscheck"])], q, )) p.start() p.join(work_timeout) - try: - result = q.get(block=False) - self.bKnowWorks = result.returncode == 0 - except queue.Empty as e: - self.bKnowWorks = False - - if p.is_alive() or not self.bKnowWorks: - if p.is_alive(): - raise common.MFCException("""\ + if p.is_alive(): + raise common.MFCException("""\ The [bold magenta]Interactive Engine[/bold magenta] appears to hang. This may indicate that the wrong MPI binary is being used to launch parallel jobs. You can specify the correct one for your system using the <-b,--binary> option. For example: - * ./mfc.sh run -b mpirun - * ./mfc.sh run -b srun +* ./mfc.sh run -b mpirun +* ./mfc.sh run -b srun """) - else: - raise common.MFCException(f"""\ + + result = q.get(block=False) + self.bKnowWorks = result.returncode == 0 + + if not self.bKnowWorks: + error_txt = f"""\ MFC's [bold magenta]syscheck[/bold magenta] (system check) failed to run successfully. Please review the output bellow and ensure that your system is configured correctly: +""" + + if result is not None: + error_txt += f"""\ STDOUT: {result.stdout} STDERR: {result.stderr} -""") +""" + else: + error_txt += f"Evaluation timed out after {work_timeout}s." + + raise common.MFCException(error_txt) cons.print() cons.unindent() diff --git a/toolchain/mfc/state.py b/toolchain/mfc/state.py index 6c2a394cf..6768329a4 100644 --- a/toolchain/mfc/state.py +++ b/toolchain/mfc/state.py @@ -8,14 +8,30 @@ class MFCConfig: debug: bool = False def from_dict(d: dict): + """ Create a MFCConfig object from a dictionary with the same keys + as the fields of MFCConfig """ r = MFCConfig() - for key in d: - setattr(r, key, d[key]) + for field in dataclasses.fields(MFCConfig): + setattr(r, field.name, d[field.name]) return r + def items(self) -> typing.List[typing.Tuple[str, bool]]: + return dataclasses.asdict(self).items() + + def make_options(self) -> typing.List[str]: + """ Returns a list of options that could be passed to mfc.sh again. + Example: --no-debug --mpi --no-gpu """ + return [ f"--{'no-' if not v else ''}{k}" for k, v in self.items() ] + + def make_slug(self) -> str: + """ Sort the items by key, then join them with underscores. This uniquely + identifies the configuration. Example: no-debug_no-gpu_mpi """ + return '_'.join([ f"{'no-' if not v else ''}{k}" for k, v in sorted(self.items(), key=lambda x: x[0]) ]) + def __eq__(self, other) -> bool: + """ Check if two MFCConfig objects are equal, field by field. """ for field in dataclasses.fields(self): if getattr(self, field.name) != getattr(other, field.name): return False @@ -23,10 +39,9 @@ def __eq__(self, other) -> bool: return True def __str__(self) -> str: - m = { False: "No", True: "Yes" } - r = ' & '.join([ f"{field.name}={m[getattr(self, field.name)]}" for field in dataclasses.fields(self) ]) - - return r + """ Returns a string like "mpi=No & gpu=No & debug=No" """ + + return ' & '.join([ f"{k}={'Yes' if v else 'No'}" for k, v in self.items() ]) gCFG: MFCConfig = MFCConfig() diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py index 69e81f6ad..527f8f7c1 100644 --- a/toolchain/mfc/test/test.py +++ b/toolchain/mfc/test/test.py @@ -8,7 +8,7 @@ from .cases import generate_cases from .. import sched from ..common import MFCException, does_command_exist, format_list_to_string, get_program_output -from ..build import build_targets, get_install_dirpath +from ..build import build_targets, get_install_dirpath, HDF5 from ..packer import tol as packtol from ..packer import packer @@ -106,7 +106,7 @@ def test(): cons.print() # Initialize GPU_LOAD to 0 for each GPU - handle_case.GPU_LOAD = { id: 0 for id in ARG("gpus") } + _handle_case.GPU_LOAD = { id: 0 for id in ARG("gpus") } # Select the correct number of threads to use to launch test CASES # We can't use ARG("jobs") when the --case-optimization option is set @@ -131,106 +131,121 @@ def test(): cons.unindent() -def handle_case(test: TestCase): - global nFAIL - - try: - if test.params.get("qbmm", 'F') == 'T': - tol = 1e-10 - elif test.params.get("bubbles", 'F') == 'T': - tol = 1e-10 - elif test.params.get("hypoelasticity", 'F') == 'T': - tol = 1e-7 +def _handle_case(test: TestCase): + if test.params.get("qbmm", 'F') == 'T': + tol = 1e-10 + elif test.params.get("bubbles", 'F') == 'T': + tol = 1e-10 + elif test.params.get("hypoelasticity", 'F') == 'T': + tol = 1e-7 + else: + tol = 1e-12 + + test.delete_output() + test.create_directory() + + load = test.get_cell_count() + gpu_id = min(_handle_case.GPU_LOAD.items(), key=lambda x: x[1])[0] + _handle_case.GPU_LOAD[gpu_id] += load + + cmd = test.run(["pre_process", "simulation"], gpu=gpu_id) + + out_filepath = os.path.join(test.get_dirpath(), "out_pre_sim.txt") + + common.file_write(out_filepath, cmd.stdout) + + if cmd.returncode != 0: + cons.print(cmd.stdout) + raise MFCException(f"Test {test}: Failed to execute MFC.") + + pack, err = packer.pack(test.get_dirpath()) + if err is not None: + raise MFCException(f"Test {test}: {err}") + + if pack.hash_NaNs(): + raise MFCException(f"Test {test}: NaNs detected in the case.") + + golden_filepath = os.path.join(test.get_dirpath(), "golden.txt") + if ARG("generate"): + common.delete_file(golden_filepath) + pack.save(golden_filepath) + else: + if not os.path.isfile(golden_filepath): + raise MFCException(f"Test {test}: The golden file does not exist! To generate golden files, use the '--generate' flag.") + + golden = packer.load(golden_filepath) + + if ARG("add_new_variables"): + for pfilepath, pentry in pack.entries.items(): + if golden.find(pfilepath) is None: + golden.set(pentry) + + golden.save(golden_filepath) else: - tol = 1e-12 + err, msg = packtol.compare(pack, packer.load(golden_filepath), packtol.Tolerance(tol, tol)) + if msg is not None: + raise MFCException(f"Test {test}: {msg}") + if ARG("test_all"): test.delete_output() - test.create_directory() + cmd = test.run(["pre_process", "simulation", "post_process"], gpu=gpu_id) + out_filepath = os.path.join(test.get_dirpath(), "out_post.txt") + common.file_write(out_filepath, cmd.stdout) - load = test.get_cell_count() - gpu_id = min(handle_case.GPU_LOAD.items(), key=lambda x: x[1])[0] - handle_case.GPU_LOAD[gpu_id] += load - - cmd = test.run(["pre_process", "simulation"], gpu=gpu_id) + for t_step in [ i*test["t_step_save"] for i in range(0, math.floor(test["t_step_stop"] / test["t_step_save"]) + 1) ]: + silo_filepath = os.path.join(test.get_dirpath(), 'silo_hdf5', 'p0', f'{t_step}.silo') + if not os.path.exists(silo_filepath): + silo_filepath = os.path.join(test.get_dirpath(), 'silo_hdf5', 'p_all', 'p0', f'{t_step}.silo') + + h5dump = f"{get_install_dirpath(HDF5)}/bin/h5dump" - out_filepath = os.path.join(test.get_dirpath(), "out_pre_sim.txt") + if ARG("no_hdf5"): + if not does_command_exist("h5dump"): + raise MFCException("--no-hdf5 was specified and h5dump couldn't be found.") + + h5dump = shutil.which("h5dump") - common.file_write(out_filepath, cmd.stdout) + output, err = get_program_output([h5dump, silo_filepath]) - if cmd.returncode != 0: - cons.print(cmd.stdout) - raise MFCException(f"Test {test}: Failed to execute MFC.") + if err != 0: + raise MFCException(f"""Test {test}: Failed to run h5dump. You can find the run's output in {out_filepath}, and the case dictionary in {os.path.join(test.get_dirpath(), "case.py")}.""") - pack, err = packer.pack(test.get_dirpath()) - if err is not None: - raise MFCException(f"Test {test}: {err}") + if "nan," in output: + raise MFCException(f"""Test {test}: Post Process has detected a NaN. You can find the run's output in {out_filepath}, and the case dictionary in {os.path.join(test.get_dirpath(), "case.py")}.""") - if pack.hash_NaNs(): - raise MFCException(f"Test {test}: NaNs detected in the case.") + if "inf," in output: + raise MFCException(f"""Test {test}: Post Process has detected an Infinity. You can find the run's output in {out_filepath}, and the case dictionary in {os.path.join(test.get_dirpath(), "case.py")}.""") - golden_filepath = os.path.join(test.get_dirpath(), "golden.txt") - if ARG("generate"): - common.delete_file(golden_filepath) - pack.save(golden_filepath) - else: - if not os.path.isfile(golden_filepath): - raise MFCException(f"Test {test}: The golden file does not exist! To generate golden files, use the '--generate' flag.") + test.delete_output() - golden = packer.load(golden_filepath) + cons.print(f" [bold magenta]{test.get_uuid()}[/bold magenta] {test.trace}") - if ARG("add_new_variables"): - for pfilepath, pentry in pack.entries.items(): - if golden.find(pfilepath) is None: - golden.set(pentry) + _handle_case.GPU_LOAD[gpu_id] -= load - golden.save(golden_filepath) - else: - err, msg = packtol.compare(pack, packer.load(golden_filepath), packtol.Tolerance(tol, tol)) - if msg is not None: - raise MFCException(f"Test {test}: {msg}") - - if ARG("test_all"): - test.delete_output() - cmd = test.run(["pre_process", "simulation", "post_process"], gpu=gpu_id) - out_filepath = os.path.join(test.get_dirpath(), "out_post.txt") - common.file_write(out_filepath, cmd.stdout) - - for t_step in [ i*test["t_step_save"] for i in range(0, math.floor(test["t_step_stop"] / test["t_step_save"]) + 1) ]: - silo_filepath = os.path.join(test.get_dirpath(), 'silo_hdf5', 'p0', f'{t_step}.silo') - if not os.path.exists(silo_filepath): - silo_filepath = os.path.join(test.get_dirpath(), 'silo_hdf5', 'p_all', 'p0', f'{t_step}.silo') - - h5dump = f"{get_install_dirpath()}/bin/h5dump" - - if ARG("no_hdf5"): - if not does_command_exist("h5dump"): - raise MFCException("--no-hdf5 was specified and h5dump couldn't be found.") - - h5dump = shutil.which("h5dump") - - output, err = get_program_output([h5dump, silo_filepath]) - - if err != 0: - raise MFCException(f"""Test {test}: Failed to run h5dump. You can find the run's output in {out_filepath}, and the case dictionary in {os.path.join(test.get_dirpath(), "case.py")}.""") - - if "nan," in output: - raise MFCException(f"""Test {test}: Post Process has detected a NaN. You can find the run's output in {out_filepath}, and the case dictionary in {os.path.join(test.get_dirpath(), "case.py")}.""") - - if "inf," in output: - raise MFCException(f"""Test {test}: Post Process has detected an Infinity. You can find the run's output in {out_filepath}, and the case dictionary in {os.path.join(test.get_dirpath(), "case.py")}.""") +_handle_case.GPU_LOAD = {} - test.delete_output() +def handle_case(test: TestCase): + global nFAIL + + nAttempts = 0 - cons.print(f" [bold magenta]{test.get_uuid()}[/bold magenta] {test.trace}") + while True: + nAttempts += 1 - handle_case.GPU_LOAD[gpu_id] -= load - except Exception as exc: - nFAIL = nFAIL + 1 + try: + _handle_case(test) + except Exception as exc: + if nAttempts < ARG("max_attempts"): + cons.print(f"[bold yellow] Attempt {nAttempts}: Failed test {test.get_uuid()}. Retrying...[/bold yellow]") + continue - if not ARG("relentless"): - raise exc + nFAIL += 1 - cons.print(f"[bold red]Failed test {test}.[/bold red]") - cons.print(f"{exc}") + cons.print(f"[bold red]Failed test {test} after {nAttempts} attempt(s).[/bold red]") -handle_case.GPU_LOAD = {} + if ARG("relentless"): + cons.print(f"{exc}") + else: + raise exc + + return