Better exception grouping in reports (#720)

* switch over to ExceptionInfo * make the test suitable to see if we made progress * summarize according to origin with example message * improve report style * switch to better data structure * add hyperlinks * change iterator to get rid of import * Update eolearn/visualization/eoexecutor.py Co-authored-by: Matic Lubej <[email protected]> --------- Co-authored-by: Matic Lubej <[email protected]>
sentinel-hub · Aug 25, 2023 · cf626fb · cf626fb
1 parent d296a4e
commit cf626fb
Show file tree

Hide file tree

Showing 6 changed files with 104 additions and 48 deletions.
diff --git a/eolearn/core/eonode.py b/eolearn/core/eonode.py
@@ -100,5 +100,13 @@ class NodeStats:
     node_name: str
     start_time: dt.datetime
     end_time: dt.datetime
-    exception: BaseException | None = None
-    exception_traceback: str | None = None
+    exception_info: ExceptionInfo | None = None
+
+
+@dataclass(frozen=True)
+class ExceptionInfo:
+    """Contains information on exceptions that occur when executing a node."""
+
+    exception: BaseException
+    traceback: str
+    origin: str
diff --git a/eolearn/core/eoworkflow.py b/eolearn/core/eoworkflow.py
@@ -23,10 +23,10 @@
 import logging
 import traceback
 from dataclasses import dataclass, field, fields
-from typing import Literal, Sequence, Tuple, cast, overload
+from typing import Literal, Sequence, overload
 
 from .eodata import EOPatch
-from .eonode import EONode, NodeStats
+from .eonode import EONode, ExceptionInfo, NodeStats
 from .eotask import EOTask
 from .eoworkflow_tasks import OutputTask
 from .graph import DirectedGraph
@@ -209,7 +209,7 @@ def _execute_nodes(
             )
 
             stats_dict[node.uid] = stats
-            if stats.exception is not None:
+            if stats.exception_info is not None:
                 break
 
             intermediate_results[node.uid] = result
@@ -235,43 +235,45 @@ def _execute_node(
 
         LOGGER.debug("Computing %s(*%s, **%s)", node.task.__class__.__name__, str(task_args), str(node_input_kwargs))
         start_time = dt.datetime.now()
-        result, is_success = self._execute_task(node.task, task_args, node_input_kwargs, raise_errors=raise_errors)
+        result = self._execute_task(node.task, task_args, node_input_kwargs, raise_errors=raise_errors)
         end_time = dt.datetime.now()
 
-        if is_success:
-            exception, exception_traceback = None, None
-        else:
-            exception, exception_traceback = cast(Tuple[BaseException, str], result)  # temporary fix until 3.8
-            result = None
+        if isinstance(result, ExceptionInfo):
+            exception_info, result = result, None
             LOGGER.error(
-                "Task '%s' with id %s failed with stack trace:\n%s", node.get_name(), node.uid, exception_traceback
+                "Task '%s' with id %s failed with stack trace:\n%s",
+                node.get_name(),
+                node.uid,
+                exception_info.traceback,
             )
+        else:
+            exception_info = None
 
         node_stats = NodeStats(
             node_uid=node.uid,
             node_name=node.get_name(),
             start_time=start_time,
             end_time=end_time,
-            exception=exception,
-            exception_traceback=exception_traceback,
+            exception_info=exception_info,
         )
         return result, node_stats
 
     @staticmethod
     def _execute_task(
         task: EOTask, task_args: list[object], task_kwargs: dict[str, object], raise_errors: bool
-    ) -> tuple[object, bool]:
+    ) -> object | ExceptionInfo:
         """Executes an EOTask and handles any potential exceptions."""
         if raise_errors:
-            return task.execute(*task_args, **task_kwargs), True
+            return task.execute(*task_args, **task_kwargs)
 
         try:
-            return task.execute(*task_args, **task_kwargs), True
+            return task.execute(*task_args, **task_kwargs)
         except KeyboardInterrupt as exception:
             raise KeyboardInterrupt from exception
         except BaseException as exception:
-            exception_traceback = traceback.format_exc()
-            return (exception, exception_traceback), False
+            trace = traceback.extract_tb(exception.__traceback__)
+            origin = f"line {trace[-1].lineno} in {trace[-1].filename}." if trace else "unknown origin."
+            return ExceptionInfo(exception, traceback=traceback.format_exc(), origin=origin)
 
     @staticmethod
     def _relax_dependencies(
@@ -363,7 +365,7 @@ class WorkflowResults:
     def __post_init__(self) -> None:
         """Checks if there is any node that failed during the workflow execution."""
         for node_uid, node_stats in self.stats.items():
-            if node_stats.exception is not None:
+            if node_stats.exception_info is not None:
                 super().__setattr__("error_node_uid", node_uid)
                 break
 

diff --git a/eolearn/visualization/eoexecutor.py b/eolearn/visualization/eoexecutor.py
@@ -16,6 +16,7 @@
 import warnings
 from collections import defaultdict
 from contextlib import nullcontext
+from dataclasses import dataclass
 from typing import Any, cast
 
 import fs
@@ -28,6 +29,7 @@
 from pygments.formatters.html import HtmlFormatter
 
 from eolearn.core import EOExecutor
+from eolearn.core.eonode import ExceptionInfo
 from eolearn.core.exceptions import EOUserWarning
 
 
@@ -97,40 +99,49 @@ def _create_dependency_graph(self) -> str:
         dot = self.eoexecutor.workflow.dependency_graph()
         return base64.b64encode(dot.pipe()).decode()
 
-    def _get_exception_stats(self) -> list[tuple[str, str, list[tuple[str, int]]]]:
-        """Creates aggregated stats about exceptions"""
-        formatter = HtmlFormatter()
-        lexer = pygments.lexers.get_lexer_by_name("python", stripall=True)
+    def _get_exception_stats(self) -> list[tuple[str, str, list[_ErrorSummary]]]:
+        """Creates aggregated stats about exceptions
+
+        Returns tuples of form (name, uid, [error_summary])
+        """
 
-        exception_stats: defaultdict[str, defaultdict[str, int]] = defaultdict(lambda: defaultdict(lambda: 0))
+        exception_stats: defaultdict[str, dict[str, _ErrorSummary]] = defaultdict(dict)
 
-        for workflow_results in self.eoexecutor.execution_results:
-            if not workflow_results.error_node_uid:
+        for execution_idx, (execution, results) in enumerate(
+            zip(self.eoexecutor.execution_names, self.eoexecutor.execution_results)
+        ):
+            if not results.error_node_uid:
                 continue
 
-            error_node = workflow_results.stats[workflow_results.error_node_uid]
-            exception_str = pygments.highlight(
-                f"{error_node.exception.__class__.__name__}: {error_node.exception}", lexer, formatter
-            )
-            exception_stats[error_node.node_uid][exception_str] += 1
+            error_node = results.stats[results.error_node_uid]
+            exception_info: ExceptionInfo = error_node.exception_info  # type: ignore[assignment]
+            origin_str = f"<b>{exception_info.exception.__class__.__name__}</b> raised from {exception_info.origin}"
+
+            if origin_str not in exception_stats[error_node.node_uid]:
+                exception_stats[error_node.node_uid][origin_str] = _ErrorSummary(
+                    origin_str, str(exception_info.exception), []
+                )
+
+            exception_stats[error_node.node_uid][origin_str].add_execution(execution_idx, execution)
 
         return self._to_ordered_stats(exception_stats)
 
     def _to_ordered_stats(
-        self, exception_stats: defaultdict[str, defaultdict[str, int]]
-    ) -> list[tuple[str, str, list[tuple[str, int]]]]:
+        self, exception_stats: defaultdict[str, dict[str, _ErrorSummary]]
+    ) -> list[tuple[str, str, list[_ErrorSummary]]]:
         """Exception stats get ordered by nodes in their execution order in workflows. Exception stats that happen
         for the same node get ordered by number of occurrences in a decreasing order.
+
+        Returns tuples of form (name, uid, [_error_summary])
         """
         ordered_exception_stats = []
         for node in self.eoexecutor.workflow.get_nodes():
             if node.uid not in exception_stats:
                 continue
 
             node_stats = exception_stats[node.uid]
-            ordered_exception_stats.append(
-                (node.get_name(), node.uid, sorted(node_stats.items(), key=lambda item: -item[1]))
-            )
+            error_summaries = sorted(node_stats.values(), key=lambda summary: -len(summary.failed_indexed_executions))
+            ordered_exception_stats.append((node.get_name(), node.uid, error_summaries))
 
         return ordered_exception_stats
 
@@ -197,7 +208,8 @@ def _render_execution_tracebacks(self, formatter: pygments.formatter.Formatter)
             if results.workflow_failed() and results.error_node_uid is not None:
                 # second part of above check needed only for typechecking purposes
                 failed_node_stats = results.stats[results.error_node_uid]
-                traceback = pygments.highlight(failed_node_stats.exception_traceback, tb_lexer, formatter)
+                traceback_str = failed_node_stats.exception_info.traceback  # type: ignore[union-attr]
+                traceback = pygments.highlight(traceback_str, tb_lexer, formatter)
             else:
                 traceback = None
 
@@ -223,3 +235,21 @@ def _format_datetime(value: dt.datetime) -> str:
     def _format_timedelta(value1: dt.datetime, value2: dt.datetime) -> str:
         """Method for formatting time delta into report"""
         return str(value2 - value1)
+
+
+@dataclass()
+class _ErrorSummary:
+    """Contains data for errors of a node."""
+
+    origin: str
+    example_message: str
+    failed_indexed_executions: list[tuple[int, str]]
+
+    def add_execution(self, index: int, name: str) -> None:
+        """Adds an execution to the summary."""
+        self.failed_indexed_executions.append((index, name))
+
+    @property
+    def num_failed(self) -> int:
+        """Helps with jinja"""
+        return len(self.failed_indexed_executions)
diff --git a/eolearn/visualization/report_templates/report.html b/eolearn/visualization/report_templates/report.html
@@ -112,13 +112,24 @@ <h3> Summary of exceptions </h3>
 
         <div class="indent">
             <ul>
-            {% for node_name, node_uid, exception_list in exception_stats %}
+            {% for node_name, node_uid, error_summary_list in exception_stats %}
                 <li>
                     <b>{{ node_name }} ({{ node_uid }}):</b>
                     <ul>
-                    {% for exception_string, count in exception_list %}
+                    {% for error_summary in error_summary_list %}
                         <li>
-                            {{ count }} times: {{ exception_string }}
+                            {{ error_summary.num_failed }} times: {{ error_summary.origin }}
+                            <br>
+                            Example message: <pre>{{ error_summary.example_message }}</pre>
+                            <br>
+                            <button class="collapsible">Failed executions</button>
+                            <div class="collapsible-content">
+                                <ul>
+                                    {% for idx, execution in error_summary.failed_indexed_executions %}
+                                    <li><a href="#execution{{ idx }}">{{ execution }}</a></li>
+                                    {% endfor %}
+                                </ul>
+                            </div>
                         </li>
                     {% endfor %}
                     </ul>

diff --git a/tests/core/test_eoworkflow.py b/tests/core/test_eoworkflow.py
@@ -291,8 +291,7 @@ def test_exception_handling():
         assert node_stats.node_name == node.name
 
         if node is exception_node:
-            assert isinstance(node_stats.exception, CustomExceptionError)
-            assert node_stats.exception_traceback.startswith("Traceback")
+            assert isinstance(node_stats.exception_info.exception, CustomExceptionError)
+            assert node_stats.exception_info.traceback.startswith("Traceback")
         else:
-            assert node_stats.exception is None
-            assert node_stats.exception_traceback is None
+            assert node_stats.exception_info is None
diff --git a/tests/visualization/test_eoexecutor.py b/tests/visualization/test_eoexecutor.py
@@ -25,12 +25,18 @@ def execute(self, *_, **kwargs):
         my_logger.debug("Debug statement of Example task with kwargs: %s", kwargs)
 
         if "arg1" in kwargs and kwargs["arg1"] is None:
-            raise Exception
+            raise RuntimeError(f"Oh no, i spilled my kwargs all over the floor! {kwargs}!")
 
 
 NODE = EONode(ExampleTask())
 WORKFLOW = EOWorkflow([NODE, EONode(task=ExampleTask(), inputs=[NODE, NODE])])
-EXECUTION_KWARGS = [{NODE: {"arg1": 1}}, {}, {NODE: {"arg1": 3, "arg3": 10}}, {NODE: {"arg1": None}}]
+EXECUTION_KWARGS = [
+    {NODE: {"arg1": 1}},
+    {},
+    {NODE: {"arg1": 3, "arg3": 10}},
+    {NODE: {"arg1": None}},
+    {NODE: {"arg1": None, "arg3": 10}},
+]
 
 
 @pytest.mark.parametrize("save_logs", [True, False])
@@ -42,7 +48,7 @@ def test_report_creation(save_logs, include_logs):
             EXECUTION_KWARGS,
             logs_folder=tmp_dir_name,
             save_logs=save_logs,
-            execution_names=["ex 1", 2, 0.4, None],
+            execution_names=["ex 1", 2, 0.4, None, "beep"],
         )
         executor.run(workers=10)
         executor.make_report(include_logs=include_logs)