diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_eval_run.py b/src/promptflow-evals/promptflow/evals/evaluate/_eval_run.py index d8be8f22559..6142747def4 100644 --- a/src/promptflow-evals/promptflow/evals/evaluate/_eval_run.py +++ b/src/promptflow-evals/promptflow/evals/evaluate/_eval_run.py @@ -10,7 +10,7 @@ import requests import time import uuid -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, Set from urllib.parse import urlparse from requests.adapters import HTTPAdapter @@ -142,8 +142,9 @@ def _start_run(self) -> None: """ Start the run, or, if it is not applicable (for example, if tracking is not enabled), mark it as started. """ - if self._status != RunStatus.NOT_STARTED: - raise ValueError("The run has already started. Please end this run and to start another one.") + self._check_state_and_log('start run', + {v for v in RunStatus if v != RunStatus.NOT_STARTED}, + True) self._status = RunStatus.STARTED if self._tracking_uri is None: LOGGER.warning("tracking_uri was not provided, " @@ -196,7 +197,9 @@ def _end_run(self, reason: str) -> None: :type reason: str :raises: ValueError if the run is not in ("FINISHED", "FAILED", "KILLED") """ - if not self._check_state_and_log('stop run'): + if not self._check_state_and_log('stop run', + {RunStatus.BROKEN, RunStatus.NOT_STARTED, RunStatus.TERMINATED}, + False): return if self._is_promptflow_run: # This run is already finished, we just add artifacts/metrics to it. @@ -206,9 +209,6 @@ def _end_run(self, reason: str) -> None: raise ValueError( f"Incorrect terminal status {reason}. " 'Valid statuses are "FINISHED", "FAILED" and "KILLED".' ) - if self._status == RunStatus.TERMINATED: - LOGGER.warning("Unable to stop run because it was already terminated.") - return url = f"https://{self._url_base}/mlflow/v2.0" f"{self._get_scope()}/api/2.0/mlflow/runs/update" body = { "run_uuid": self.info.run_id, @@ -309,22 +309,30 @@ def _log_warning(self, failed_op: str, response: requests.Response) -> None: f"{response.text=}." ) - def _check_state_and_log(self, action : str) -> bool: + def _check_state_and_log( + self, + action: str, + bad_states: Set[RunStatus], + should_raise: bool) -> bool: """ Check that the run is in the correct state and log worning if it is not. :param action: Action, which caused this check. For example if it is "log artifact", the log message will start "Unable to log artifact." :type action: str + :param bad_states: The states, considered invalid for given action. + :type bad_states: set + :param should_raise: Should we raise an error if the bad state has been encountered? + :type should_raise: bool + :raises: RuntimeError if should_raise is True and invalid state was encountered. :return: boolean saying if run is in the correct state. """ - if self._status == RunStatus.NOT_STARTED: - LOGGER.warning( - f"Unable to {action}. The run did not started. " - "Please start the run by calling start_run method.") - return False - if self._status == RunStatus.BROKEN: - LOGGER.warning(f"Unable to {action} because the run failed to start.") + if self._status in bad_states: + msg = f"Unable to {action} due to Run status={self._status}." + if should_raise: + raise RuntimeError(msg) + else: + LOGGER.warning(msg) return False return True @@ -338,7 +346,7 @@ def log_artifact(self, artifact_folder: str, artifact_name: str = EVALUATION_ART :param artifact_folder: The folder with artifacts to be uploaded. :type artifact_folder: str """ - if not self._check_state_and_log('log artifact'): + if not self._check_state_and_log('log artifact', {RunStatus.BROKEN, RunStatus.NOT_STARTED}, False): return # Check if artifact dirrectory is empty or does not exist. if not os.path.isdir(artifact_folder): @@ -425,7 +433,7 @@ def log_metric(self, key: str, value: float) -> None: :param value: The valure to be logged. :type value: float """ - if not self._check_state_and_log('log metric'): + if not self._check_state_and_log('log metric', {RunStatus.BROKEN, RunStatus.NOT_STARTED}, False): return body = { "run_uuid": self.info.run_id, @@ -450,7 +458,7 @@ def write_properties_to_run_history(self, properties: Dict[str, Any]) -> None: :param properties: The properties to be written to run history. :type properties: dict """ - if not self._check_state_and_log('write properties'): + if not self._check_state_and_log('write properties', {RunStatus.BROKEN, RunStatus.NOT_STARTED}, False): return # update host to run history and request PATCH API response = self.request_with_retry( diff --git a/src/promptflow-evals/tests/evals/unittests/test_eval_run.py b/src/promptflow-evals/tests/evals/unittests/test_eval_run.py index 388a2bfdca2..22964bbf12e 100644 --- a/src/promptflow-evals/tests/evals/unittests/test_eval_run.py +++ b/src/promptflow-evals/tests/evals/unittests/test_eval_run.py @@ -103,7 +103,7 @@ def test_run_logs_if_terminated(self, token_mock, caplog): run._end_run("KILLED") run._end_run("KILLED") assert len(caplog.records) == 1 - assert "Unable to stop run because it was already terminated." in caplog.records[0].message + assert "Unable to stop run due to Run status=RunStatus.TERMINATED." in caplog.records[0].message def test_end_logs_if_fails(self, token_mock, caplog): """Test that if the terminal status setting was failed, it is logged.""" @@ -158,17 +158,17 @@ def test_start_run_fails(self, token_mock, caplog): # Log artifact run.log_artifact("test") assert len(caplog.records) == 1 - assert "Unable to log artifact because the run failed to start." in caplog.records[0].message + assert "Unable to log artifact due to Run status=RunStatus.BROKEN." in caplog.records[0].message caplog.clear() # Log metric run.log_metric("a", 42) assert len(caplog.records) == 1 - assert "Unable to log metric because the run failed to start." in caplog.records[0].message + assert "Unable to log metric due to Run status=RunStatus.BROKEN." in caplog.records[0].message caplog.clear() # End run run._end_run("FINISHED") assert len(caplog.records) == 1 - assert "Unable to stop run because the run failed to start." in caplog.records[0].message + assert "Unable to stop run due to Run status=RunStatus.BROKEN." in caplog.records[0].message caplog.clear() @patch("promptflow.evals.evaluate._eval_run.requests.Session") @@ -463,8 +463,8 @@ def test_write_properties_to_run_history_logs_error(self, token_mock, caplog): run.write_properties_to_run_history({'foo': 'bar'}) assert len(caplog.records) == 3 assert "tracking_uri was not provided," in caplog.records[0].message - assert "Unable to write properties because the run failed to start." in caplog.records[1].message - assert "Unable to stop run because the run failed to start." in caplog.records[2].message + assert "Unable to write properties due to Run status=RunStatus.BROKEN." in caplog.records[1].message + assert "Unable to stop run due to Run status=RunStatus.BROKEN." in caplog.records[2].message @pytest.mark.parametrize( 'function_literal,args,expected_action', @@ -488,4 +488,25 @@ def test_logs_if_not_started(self, token_mock, caplog, function_literal, args, e getattr(run, function_literal)(*args) assert len(caplog.records) == 1 assert expected_action in caplog.records[0].message, caplog.records[0].message - assert "The run did not started." in caplog.records[0].message, caplog.records[0].message + assert f"Unable to {expected_action} due to Run status=RunStatus.NOT_STARTED" in caplog.records[ + 0].message, caplog.records[0].message + + @pytest.mark.parametrize( + 'status', + [RunStatus.STARTED, RunStatus.BROKEN, RunStatus.TERMINATED] + ) + def test_starting_started_run(self, token_mock, status): + """Test exception if the run was already started""" + run = EvalRun( + run_name=None, + **TestEvalRun._MOCK_CREDS + ) + mock_session = MagicMock() + mock_session.request.return_value = self._get_mock_create_resonse(500 if status == RunStatus.BROKEN else 200) + with patch("promptflow.evals.evaluate._eval_run.requests.Session", return_value=mock_session): + run._start_run() + if status == RunStatus.TERMINATED: + run._end_run('FINISHED') + with pytest.raises(RuntimeError) as cm: + run._start_run() + assert f"Unable to start run due to Run status={status}" in cm.value.args[0], cm.value.args[0]