diff --git a/.docker/aiida-core-base/Dockerfile b/.docker/aiida-core-base/Dockerfile index 87339724bc..40b18f9fa3 100644 --- a/.docker/aiida-core-base/Dockerfile +++ b/.docker/aiida-core-base/Dockerfile @@ -137,7 +137,7 @@ RUN set -x && \ mamba && \ rm micromamba && \ # Pin major.minor version of python - mamba list python | grep '^python ' | tr -s ' ' | cut -d ' ' -f 1,2 >> "${CONDA_DIR}/conda-meta/pinned" && \ + mamba list python | grep -oP 'python\s+\K[\d.]+' | tr -s ' ' | cut -d ' ' -f 1,2 >> "${CONDA_DIR}/conda-meta/pinned" && \ mamba clean --all -f -y && \ fix-permissions "${CONDA_DIR}" && \ fix-permissions "/home/${SYSTEM_USER}" diff --git a/.docker/aiida-core-base/s6-assets/init/aiida-prepare.sh b/.docker/aiida-core-base/s6-assets/init/aiida-prepare.sh index 30eefc3999..a9e54142fa 100755 --- a/.docker/aiida-core-base/s6-assets/init/aiida-prepare.sh +++ b/.docker/aiida-core-base/s6-assets/init/aiida-prepare.sh @@ -18,11 +18,6 @@ verdi config set warnings.development_version False # If the environment variable `SETUP_DEFAULT_AIIDA_PROFILE` is not set, set it to `true`. if [[ ${SETUP_DEFAULT_AIIDA_PROFILE:-true} == true ]] && ! verdi profile show ${AIIDA_PROFILE_NAME} &> /dev/null; then - # For the container that includes the services, this script is called as soon as the RabbitMQ startup script has - # been launched, but it can take a while for the service to come up. If ``verdi presto`` is called straight away - # it is possible it tries to connect to the service before that and it will configure the profile without a broker. - sleep 5 - # Create AiiDA profile. verdi presto \ --verbosity info \ diff --git a/.docker/aiida-core-with-services/s6-assets/s6-rc.d/rabbitmq/data/check b/.docker/aiida-core-with-services/s6-assets/s6-rc.d/rabbitmq/data/check new file mode 100755 index 0000000000..46eb70ea89 --- /dev/null +++ b/.docker/aiida-core-with-services/s6-assets/s6-rc.d/rabbitmq/data/check @@ -0,0 +1,15 @@ +#!/bin/bash + +rabbitmq-diagnostics ping + +if [ $? -ne 0 ]; then + exit 1 +fi + +rabbitmq-diagnostics check_running + +if [ $? -ne 0 ]; then + exit 1 +fi + +exit 0 diff --git a/.docker/aiida-core-with-services/s6-assets/s6-rc.d/rabbitmq/notification-fd b/.docker/aiida-core-with-services/s6-assets/s6-rc.d/rabbitmq/notification-fd new file mode 100644 index 0000000000..00750edc07 --- /dev/null +++ b/.docker/aiida-core-with-services/s6-assets/s6-rc.d/rabbitmq/notification-fd @@ -0,0 +1 @@ +3 diff --git a/.docker/aiida-core-with-services/s6-assets/s6-rc.d/rabbitmq/run b/.docker/aiida-core-with-services/s6-assets/s6-rc.d/rabbitmq/run index e5752294ff..8a35acd20f 100644 --- a/.docker/aiida-core-with-services/s6-assets/s6-rc.d/rabbitmq/run +++ b/.docker/aiida-core-with-services/s6-assets/s6-rc.d/rabbitmq/run @@ -2,5 +2,14 @@ with-contenv -foreground { s6-echo "Calling /etc/init/rabbitmq.sh" } -rabbitmq-server +foreground { s6-echo "Starting RMQ server and notifying back when the service is ready" } + + +# For the container that includes the services, aiida-prepare.sh script is called as soon as the RabbitMQ startup script has +# been launched, but it can take a while for the RMQ service to come up. If ``verdi presto`` is called straight away +# it is possible it tries to connect to the service before that and it will configure the profile without a broker. +# Here we use s6-notifyoncheck to do the polling healthy check of the readyness of RMQ service. +# +# -w 500: 500 ms between two invocations of ./data/check + +s6-notifyoncheck -w 500 rabbitmq-server diff --git a/.github/workflows/extract-docker-image-names.sh b/.github/workflows/extract-docker-image-names.sh index 8609f7c385..e395432ddb 100755 --- a/.github/workflows/extract-docker-image-names.sh +++ b/.github/workflows/extract-docker-image-names.sh @@ -9,38 +9,35 @@ set -euo pipefail # The input to this script is a JSON string passed via BAKE_METADATA env variable # Here's example input (trimmed to relevant bits): # BAKE_METADATA: { -# "base": { +# "aiida-core-base": { +# # ... # "containerimage.descriptor": { # "mediaType": "application/vnd.docker.distribution.manifest.v2+json", # "digest": "sha256:8e57a52b924b67567314b8ed3c968859cad99ea13521e60bbef40457e16f391d", # "size": 6170, # }, # "containerimage.digest": "sha256:8e57a52b924b67567314b8ed3c968859cad99ea13521e60bbef40457e16f391d", -# "image.name": "ghcr.io/aiidalab/base" -# }, -# "aiida-core-base": { # "image.name": "ghcr.io/aiidateam/aiida-core-base" -# "containerimage.digest": "sha256:6753a809b5b2675bf4c22408e07c1df155907a465b33c369ef93ebcb1c4fec26", -# "...": "" -# } -# "aiida-core-with-services": { -# "image.name": "ghcr.io/aiidateam/aiida-core-with-services" -# "containerimage.digest": "sha256:85ee91f61be1ea601591c785db038e5899d68d5fb89e07d66d9efbe8f352ee48", -# "...": "" -# } +# }, # "aiida-core-dev": { -# "image.name": "ghcr.io/aiidateam/aiida-core-with-services" # "containerimage.digest": "sha256:4d9be090da287fcdf2d4658bb82f78bad791ccd15dac9af594fb8306abe47e97", +# "...": ... +# "image.name": "ghcr.io/aiidateam/aiida-core-dev" +# }, +# "aiida-core-with-services": { # "...": "" -# } +# "containerimage.digest": "sha256:85ee91f61be1ea601591c785db038e5899d68d5fb89e07d66d9efbe8f352ee48", +# "image.name": "ghcr.io/aiidateam/aiida-core-with-services" +# }, +# "some-other-key": ... # } # # Example output (real output is on one line): # # images={ -# "AIIDA_CORE_BASE_IMAGE": "ghcr.io/aiidateam/aiida-core-base@sha256:8e57a52b924b67567314b8ed3c968859cad99ea13521e60bbef40457e16f391d", -# "AIIDA_CORE_WITH_SERVICES_IMAGE": "ghcr.io/aiidateam/aiida-core-with-services@sha256:6753a809b5b2675bf4c22408e07c1df155907a465b33c369ef93ebcb1c4fec26", -# "AIIDA_CORE_DEV_IMAGE": "ghcr.io/aiidateam/aiida-core-dev@sha256:85ee91f61be1ea601591c785db038e5899d68d5fb89e07d66d9efbe8f352ee48", +# "AIIDA_CORE_BASE_IMAGE": "ghcr.io/aiidateam/aiida-core-base@sha256:4c402a8bfd635650ad691674f8f29e7ddec5fa656fb425452067950415ee447f", +# "AIIDA_CORE_DEV_IMAGE": "ghcr.io/aiidateam/aiida-core-dev@sha256:f94c06e47f801e751f9829010b31532039b210aad2649d43205e16c08371b2ed", +# "AIIDA_CORE_WITH_SERVICES_IMAGE": "ghcr.io/aiidateam/aiida-core-with-services@sha256:bd8272f2a331af7eac3e83c44cc16d23b2e5f601a20ab4a865402659b758515e" # } # # This json output is later turned to environment variables using fromJson() GHA builtin @@ -52,5 +49,7 @@ if [[ -z ${BAKE_METADATA-} ]];then exit 1 fi -images=$(echo "${BAKE_METADATA}" | jq -c '. as $base |[to_entries[] |{"key": (.key|ascii_upcase|sub("-"; "_"; "g") + "_IMAGE"), "value": [(.value."image.name"|split(",")[0]),.value."containerimage.digest"]|join("@")}] |from_entries') +images=$(echo "${BAKE_METADATA}" | +jq -c 'to_entries | map(select(.key | startswith("aiida"))) | from_entries' | # filters out every key that does not start with aiida +jq -c '. as $base |[to_entries[] |{"key": (.key|ascii_upcase|sub("-"; "_"; "g") + "_IMAGE"), "value": [(.value."image.name"|split(",")[0]),.value."containerimage.digest"]|join("@")}] |from_entries') echo "images=$images" diff --git a/CHANGELOG.md b/CHANGELOG.md index f15247b53f..6f514a3427 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,18 @@ # Changelog +## v2.6.3 - 2024-11-TODO + +### Fixes +- CLI: Fix exception for `verdi plugin list` (#6560) [[c3b10b7]](https://github.com/aiidateam/aiida-core/commit/c3b10b759a9cd062800ef120591d5c7fd0ae4ee7) +- `DirectScheduler`: Ensure killing child processes (#6572) [[fddffca]](https://github.com/aiidateam/aiida-core/commit/fddffca67b4f7e3b76b19df7db8e1511c449d2d9) +- Engine: Fix state change broadcast before process node is updated (#6580) [[867353c]](https://github.com/aiidateam/aiida-core/commit/867353c415c61d94a2427d5225dd5224a1b95fb9) + +### Devops +- Docker: Replace sleep with `s6-notifyoncheck` (#6475) [[9579378b]](https://github.com/aiidateam/aiida-core/commit/9579378ba063237baa5b73380eb8e9f0a28529ee) +- Fix failed docker CI using more reasoning grep regex to parse python version (#6581) [[332a4a91]](https://github.com/aiidateam/aiida-core/commit/332a4a915771afedcb144463b012558e4669e529) +- DevOps: Fix json query in reading the docker names to filter out fields not starting with aiida (#6573) [[e1467edc]](https://github.com/aiidateam/aiida-core/commit/e1467edca902867e53605e0e60b67f8767bf8d3e) + + ## v2.6.2 - 2024-08-07 ### Fixes diff --git a/src/aiida/__init__.py b/src/aiida/__init__.py index 5e3f19672d..8f1333b2d3 100644 --- a/src/aiida/__init__.py +++ b/src/aiida/__init__.py @@ -27,7 +27,7 @@ 'For further information please visit http://www.aiida.net/. All rights reserved.' ) __license__ = 'MIT license, see LICENSE.txt file.' -__version__ = '2.6.2' +__version__ = '2.6.3' __authors__ = 'The AiiDA team.' __paper__ = ( 'S. P. Huber et al., "AiiDA 1.0, a scalable computational infrastructure for automated reproducible workflows and ' diff --git a/src/aiida/cmdline/commands/cmd_plugin.py b/src/aiida/cmdline/commands/cmd_plugin.py index 9f43dd8b56..6cbbd958ce 100644 --- a/src/aiida/cmdline/commands/cmd_plugin.py +++ b/src/aiida/cmdline/commands/cmd_plugin.py @@ -47,14 +47,13 @@ def plugin_list(entry_point_group, entry_point): except EntryPointError as exception: echo.echo_critical(str(exception)) else: - try: - if (inspect.isclass(plugin) and issubclass(plugin, Process)) or ( - hasattr(plugin, 'is_process_function') and plugin.is_process_function - ): - print_process_info(plugin) - else: - echo.echo(str(plugin.get_description())) - except AttributeError: + if (inspect.isclass(plugin) and issubclass(plugin, Process)) or ( + hasattr(plugin, 'is_process_function') and plugin.is_process_function + ): + print_process_info(plugin) + elif plugin.__doc__: + echo.echo(plugin.__doc__) + else: echo.echo_error(f'No description available for {entry_point}') else: entry_points = get_entry_point_names(entry_point_group) diff --git a/src/aiida/engine/processes/process.py b/src/aiida/engine/processes/process.py index b6d83c67f0..2c0ed6400f 100644 --- a/src/aiida/engine/processes/process.py +++ b/src/aiida/engine/processes/process.py @@ -418,8 +418,6 @@ def on_entered(self, from_state: Optional[plumpy.process_states.State]) -> None: from aiida.engine.utils import set_process_state_change_timestamp - super().on_entered(from_state) - if self._state.LABEL is ProcessState.EXCEPTED: # The process is already excepted so simply update the process state on the node and let the process # complete the state transition to the terminal state. If another exception is raised during this exception @@ -427,9 +425,7 @@ def on_entered(self, from_state: Optional[plumpy.process_states.State]) -> None: self.node.set_process_state(self._state.LABEL) return - # For reasons unknown, it is important to update the outputs first, before doing anything else, otherwise there - # is the risk that certain outputs do not get attached before the process reaches a terminal state. Nevertheless - # we need to guarantee that the process state gets updated even if the ``update_outputs`` call excepts, for + # We need to guarantee that the process state gets updated even if the ``update_outputs`` call excepts, for # example if the process implementation attaches an invalid output through ``Process.out``, and so we call the # ``ProcessNode.set_process_state`` in the finally-clause. This way the state gets properly set on the node even # if the process is transitioning to the terminal excepted state. @@ -443,6 +439,12 @@ def on_entered(self, from_state: Optional[plumpy.process_states.State]) -> None: self._save_checkpoint() set_process_state_change_timestamp(self.node) + # The updating of outputs and state has to be performed before the super is called because the super will + # broadcast state changes and parent processes may start running again before the state change is completed. It + # is possible that they will read the old process state and outputs that they check may not yet have been + # attached. + super().on_entered(from_state) + @override def on_terminated(self) -> None: """Called when a Process enters a terminal state.""" diff --git a/src/aiida/orm/utils/serialize.py b/src/aiida/orm/utils/serialize.py index 320fae2935..2d5923b1bd 100644 --- a/src/aiida/orm/utils/serialize.py +++ b/src/aiida/orm/utils/serialize.py @@ -48,7 +48,7 @@ def represent_enum(dumper: yaml.Dumper, enum: Enum) -> yaml.ScalarNode: def enum_constructor(loader: yaml.Loader, serialized: yaml.Node) -> Enum: """Construct an enum from the serialized representation.""" - deserialized: str = loader.construct_scalar(serialized) # type: ignore[arg-type,assignment] + deserialized: str = loader.construct_scalar(serialized) # type: ignore[arg-type] identifier, value = deserialized.split('|') cls = get_object_loader().load_object(identifier) enum = cls(value) diff --git a/src/aiida/schedulers/plugins/direct.py b/src/aiida/schedulers/plugins/direct.py index 21d368a15f..512eadef28 100644 --- a/src/aiida/schedulers/plugins/direct.py +++ b/src/aiida/schedulers/plugins/direct.py @@ -8,6 +8,8 @@ ########################################################################### """Plugin for direct execution.""" +from typing import Union + import aiida.schedulers from aiida.common.escaping import escape_for_bash from aiida.schedulers import SchedulerError @@ -352,13 +354,28 @@ def _parse_submit_output(self, retval, stdout, stderr): return stdout.strip() - def _get_kill_command(self, jobid): - """Return the command to kill the job with specified jobid.""" - submit_command = f'kill {jobid}' + def _get_kill_command(self, jobid: Union[int, str]) -> str: + """Return the command to kill the process with specified id and all its descendants. + + :param jobid: The job id is in the case of the + :py:class:`~aiida.schedulers.plugins.direct.DirectScheduler` the process id. + + :return: A string containing the kill command. + """ + from psutil import Process + + # get a list of the process id of all descendants + process = Process(int(jobid)) + children = process.children(recursive=True) + jobids = [str(jobid)] + jobids.extend([str(child.pid) for child in children]) + jobids_str = ' '.join(jobids) + + kill_command = f'kill {jobids_str}' self.logger.info(f'killing job {jobid}') - return submit_command + return kill_command def _parse_kill_output(self, retval, stdout, stderr): """Parse the output of the kill command. diff --git a/tests/cmdline/commands/test_plugin.py b/tests/cmdline/commands/test_plugin.py index ec545ddea0..9960db7acd 100644 --- a/tests/cmdline/commands/test_plugin.py +++ b/tests/cmdline/commands/test_plugin.py @@ -11,7 +11,7 @@ import pytest from aiida.cmdline.commands import cmd_plugin from aiida.parsers import Parser -from aiida.plugins import CalculationFactory, ParserFactory, WorkflowFactory +from aiida.plugins import BaseFactory from aiida.plugins.entry_point import ENTRY_POINT_GROUP_TO_MODULE_PATH_MAP @@ -43,6 +43,7 @@ def test_plugin_list_non_existing(run_cli_command): 'entry_point_string', ( 'aiida.calculations:core.arithmetic.add', + 'aiida.data:core.array', 'aiida.workflows:core.arithmetic.multiply_add', 'aiida.workflows:core.arithmetic.add_multiply', ), @@ -52,24 +53,20 @@ def test_plugin_list_detail(run_cli_command, entry_point_string): from aiida.plugins.entry_point import parse_entry_point_string entry_point_group, entry_point_name = parse_entry_point_string(entry_point_string) - factory = CalculationFactory if entry_point_group == 'aiida.calculations' else WorkflowFactory - entry_point = factory(entry_point_name) + entry_point = BaseFactory(entry_point_group, entry_point_name) result = run_cli_command(cmd_plugin.plugin_list, [entry_point_group, entry_point_name]) assert entry_point.__doc__ in result.output -class CustomParser(Parser): - @classmethod - def get_description(cls) -> str: - return 'str69' +class NoDocStringPluginParser(Parser): + pass -def test_plugin_description(run_cli_command, entry_points): - """Test that ``verdi plugin list`` uses ``get_description`` if defined.""" - - entry_points.add(CustomParser, 'aiida.parsers:custom.parser') - assert ParserFactory('custom.parser') is CustomParser +def test_plugin_list_no_docstring(run_cli_command, entry_points): + """Test ``verdi plugin list`` does not fail if the plugin does not define a docstring.""" + entry_points.add(NoDocStringPluginParser, 'aiida.parsers:custom.parser') + assert BaseFactory('aiida.parsers', 'custom.parser') is NoDocStringPluginParser result = run_cli_command(cmd_plugin.plugin_list, ['aiida.parsers', 'custom.parser']) - assert result.output.strip() == 'str69' + assert result.output.strip() == 'Error: No description available for custom.parser' diff --git a/tests/engine/test_process.py b/tests/engine/test_process.py index 3035b432b7..9b1f230041 100644 --- a/tests/engine/test_process.py +++ b/tests/engine/test_process.py @@ -175,6 +175,36 @@ def test_work_calc_finish(self): run(process) assert process.node.is_finished_ok + def test_on_finish_node_updated_before_broadcast(self, monkeypatch): + """Tests if the process state and output has been updated in the database before a broadcast is invoked. + + In plumpy.Process.on_entered the state update is broadcasted. When a process is finished this results in the + next process being run. If the next process will access the process that just finished, it can result in not + being able to retrieve the outputs or correct process state because this information has yet not been updated + them in the database. + """ + import copy + + # By monkeypatching the parent class we can check the state when the + # parents class method is invoked and check if the state has be + # correctly updated. + original_on_entered = copy.deepcopy(plumpy.Process.on_entered) + + def on_entered(self, from_state): + if self._state.LABEL.value == 'finished': + assert ( + self.node.is_finished_ok + ), 'Node state should have been updated before plumpy.Process.on_entered is invoked.' + assert ( + self.node.outputs.result.value == 2 + ), 'Outputs should have been attached before plumpy.Process.on_entered is invoked.' + original_on_entered(self, from_state) + + monkeypatch.setattr(plumpy.Process, 'on_entered', on_entered) + # Ensure that process has run correctly otherwise the asserts in the + # monkeypatched member function have been skipped + assert run_get_node(test_processes.AddProcess, a=1, b=1).node.is_finished_ok, 'Process should not fail.' + @staticmethod def test_save_instance_state(): """Test save instance's state.""" diff --git a/tests/schedulers/test_direct.py b/tests/schedulers/test_direct.py index 8879bdc88e..41d924e182 100644 --- a/tests/schedulers/test_direct.py +++ b/tests/schedulers/test_direct.py @@ -70,3 +70,41 @@ def test_submit_script_with_num_cores_per_mpiproc(scheduler, template): ) result = scheduler.get_submit_script(template) assert f'export OMP_NUM_THREADS={num_cores_per_mpiproc}' in result + + +@pytest.mark.timeout(timeout=10) +def test_kill_job(scheduler, tmpdir): + """Test if kill_job kill all descendant children from the process. + For that we spawn a new process that runs a sleep command, then we + kill it and check if the sleep process is still alive. + + current process forked process run script.sh + python─────────────python───────────────────bash──────sleep + we kill this process we check if still running + """ + import multiprocessing + import time + + from aiida.transports.plugins.local import LocalTransport + from psutil import Process + + def run_sleep_100(): + import subprocess + + script = tmpdir / 'sleep.sh' + script.write('sleep 100') + # this is blocking for the process entering + subprocess.run(['bash', script.strpath], check=False) + + forked_process = multiprocessing.Process(target=run_sleep_100) + forked_process.start() + while len(forked_process_children := Process(forked_process.pid).children(recursive=True)) != 2: + time.sleep(0.1) + bash_process = forked_process_children[0] + sleep_process = forked_process_children[1] + with LocalTransport() as transport: + scheduler.set_transport(transport) + scheduler.kill(forked_process.pid) + while bash_process.is_running() or sleep_process.is_running(): + time.sleep(0.1) + forked_process.join()