Skip to content

Commit

Permalink
Better exception grouping in reports (#720)
Browse files Browse the repository at this point in the history
* switch over to ExceptionInfo

* make the test suitable to see if we made progress

* summarize according to origin with example message

* improve report style

* switch to better data structure

* add hyperlinks

* change iterator to get rid of import

* Update eolearn/visualization/eoexecutor.py

Co-authored-by: Matic Lubej <[email protected]>

---------

Co-authored-by: Matic Lubej <[email protected]>
  • Loading branch information
zigaLuksic and Matic Lubej authored Aug 25, 2023
1 parent d296a4e commit cf626fb
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 48 deletions.
12 changes: 10 additions & 2 deletions eolearn/core/eonode.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,5 +100,13 @@ class NodeStats:
node_name: str
start_time: dt.datetime
end_time: dt.datetime
exception: BaseException | None = None
exception_traceback: str | None = None
exception_info: ExceptionInfo | None = None


@dataclass(frozen=True)
class ExceptionInfo:
"""Contains information on exceptions that occur when executing a node."""

exception: BaseException
traceback: str
origin: str
38 changes: 20 additions & 18 deletions eolearn/core/eoworkflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@
import logging
import traceback
from dataclasses import dataclass, field, fields
from typing import Literal, Sequence, Tuple, cast, overload
from typing import Literal, Sequence, overload

from .eodata import EOPatch
from .eonode import EONode, NodeStats
from .eonode import EONode, ExceptionInfo, NodeStats
from .eotask import EOTask
from .eoworkflow_tasks import OutputTask
from .graph import DirectedGraph
Expand Down Expand Up @@ -209,7 +209,7 @@ def _execute_nodes(
)

stats_dict[node.uid] = stats
if stats.exception is not None:
if stats.exception_info is not None:
break

intermediate_results[node.uid] = result
Expand All @@ -235,43 +235,45 @@ def _execute_node(

LOGGER.debug("Computing %s(*%s, **%s)", node.task.__class__.__name__, str(task_args), str(node_input_kwargs))
start_time = dt.datetime.now()
result, is_success = self._execute_task(node.task, task_args, node_input_kwargs, raise_errors=raise_errors)
result = self._execute_task(node.task, task_args, node_input_kwargs, raise_errors=raise_errors)
end_time = dt.datetime.now()

if is_success:
exception, exception_traceback = None, None
else:
exception, exception_traceback = cast(Tuple[BaseException, str], result) # temporary fix until 3.8
result = None
if isinstance(result, ExceptionInfo):
exception_info, result = result, None
LOGGER.error(
"Task '%s' with id %s failed with stack trace:\n%s", node.get_name(), node.uid, exception_traceback
"Task '%s' with id %s failed with stack trace:\n%s",
node.get_name(),
node.uid,
exception_info.traceback,
)
else:
exception_info = None

node_stats = NodeStats(
node_uid=node.uid,
node_name=node.get_name(),
start_time=start_time,
end_time=end_time,
exception=exception,
exception_traceback=exception_traceback,
exception_info=exception_info,
)
return result, node_stats

@staticmethod
def _execute_task(
task: EOTask, task_args: list[object], task_kwargs: dict[str, object], raise_errors: bool
) -> tuple[object, bool]:
) -> object | ExceptionInfo:
"""Executes an EOTask and handles any potential exceptions."""
if raise_errors:
return task.execute(*task_args, **task_kwargs), True
return task.execute(*task_args, **task_kwargs)

try:
return task.execute(*task_args, **task_kwargs), True
return task.execute(*task_args, **task_kwargs)
except KeyboardInterrupt as exception:
raise KeyboardInterrupt from exception
except BaseException as exception:
exception_traceback = traceback.format_exc()
return (exception, exception_traceback), False
trace = traceback.extract_tb(exception.__traceback__)
origin = f"line {trace[-1].lineno} in {trace[-1].filename}." if trace else "unknown origin."
return ExceptionInfo(exception, traceback=traceback.format_exc(), origin=origin)

@staticmethod
def _relax_dependencies(
Expand Down Expand Up @@ -363,7 +365,7 @@ class WorkflowResults:
def __post_init__(self) -> None:
"""Checks if there is any node that failed during the workflow execution."""
for node_uid, node_stats in self.stats.items():
if node_stats.exception is not None:
if node_stats.exception_info is not None:
super().__setattr__("error_node_uid", node_uid)
break

Expand Down
66 changes: 48 additions & 18 deletions eolearn/visualization/eoexecutor.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import warnings
from collections import defaultdict
from contextlib import nullcontext
from dataclasses import dataclass
from typing import Any, cast

import fs
Expand All @@ -28,6 +29,7 @@
from pygments.formatters.html import HtmlFormatter

from eolearn.core import EOExecutor
from eolearn.core.eonode import ExceptionInfo
from eolearn.core.exceptions import EOUserWarning


Expand Down Expand Up @@ -97,40 +99,49 @@ def _create_dependency_graph(self) -> str:
dot = self.eoexecutor.workflow.dependency_graph()
return base64.b64encode(dot.pipe()).decode()

def _get_exception_stats(self) -> list[tuple[str, str, list[tuple[str, int]]]]:
"""Creates aggregated stats about exceptions"""
formatter = HtmlFormatter()
lexer = pygments.lexers.get_lexer_by_name("python", stripall=True)
def _get_exception_stats(self) -> list[tuple[str, str, list[_ErrorSummary]]]:
"""Creates aggregated stats about exceptions
Returns tuples of form (name, uid, [error_summary])
"""

exception_stats: defaultdict[str, defaultdict[str, int]] = defaultdict(lambda: defaultdict(lambda: 0))
exception_stats: defaultdict[str, dict[str, _ErrorSummary]] = defaultdict(dict)

for workflow_results in self.eoexecutor.execution_results:
if not workflow_results.error_node_uid:
for execution_idx, (execution, results) in enumerate(
zip(self.eoexecutor.execution_names, self.eoexecutor.execution_results)
):
if not results.error_node_uid:
continue

error_node = workflow_results.stats[workflow_results.error_node_uid]
exception_str = pygments.highlight(
f"{error_node.exception.__class__.__name__}: {error_node.exception}", lexer, formatter
)
exception_stats[error_node.node_uid][exception_str] += 1
error_node = results.stats[results.error_node_uid]
exception_info: ExceptionInfo = error_node.exception_info # type: ignore[assignment]
origin_str = f"<b>{exception_info.exception.__class__.__name__}</b> raised from {exception_info.origin}"

if origin_str not in exception_stats[error_node.node_uid]:
exception_stats[error_node.node_uid][origin_str] = _ErrorSummary(
origin_str, str(exception_info.exception), []
)

exception_stats[error_node.node_uid][origin_str].add_execution(execution_idx, execution)

return self._to_ordered_stats(exception_stats)

def _to_ordered_stats(
self, exception_stats: defaultdict[str, defaultdict[str, int]]
) -> list[tuple[str, str, list[tuple[str, int]]]]:
self, exception_stats: defaultdict[str, dict[str, _ErrorSummary]]
) -> list[tuple[str, str, list[_ErrorSummary]]]:
"""Exception stats get ordered by nodes in their execution order in workflows. Exception stats that happen
for the same node get ordered by number of occurrences in a decreasing order.
Returns tuples of form (name, uid, [_error_summary])
"""
ordered_exception_stats = []
for node in self.eoexecutor.workflow.get_nodes():
if node.uid not in exception_stats:
continue

node_stats = exception_stats[node.uid]
ordered_exception_stats.append(
(node.get_name(), node.uid, sorted(node_stats.items(), key=lambda item: -item[1]))
)
error_summaries = sorted(node_stats.values(), key=lambda summary: -len(summary.failed_indexed_executions))
ordered_exception_stats.append((node.get_name(), node.uid, error_summaries))

return ordered_exception_stats

Expand Down Expand Up @@ -197,7 +208,8 @@ def _render_execution_tracebacks(self, formatter: pygments.formatter.Formatter)
if results.workflow_failed() and results.error_node_uid is not None:
# second part of above check needed only for typechecking purposes
failed_node_stats = results.stats[results.error_node_uid]
traceback = pygments.highlight(failed_node_stats.exception_traceback, tb_lexer, formatter)
traceback_str = failed_node_stats.exception_info.traceback # type: ignore[union-attr]
traceback = pygments.highlight(traceback_str, tb_lexer, formatter)
else:
traceback = None

Expand All @@ -223,3 +235,21 @@ def _format_datetime(value: dt.datetime) -> str:
def _format_timedelta(value1: dt.datetime, value2: dt.datetime) -> str:
"""Method for formatting time delta into report"""
return str(value2 - value1)


@dataclass()
class _ErrorSummary:
"""Contains data for errors of a node."""

origin: str
example_message: str
failed_indexed_executions: list[tuple[int, str]]

def add_execution(self, index: int, name: str) -> None:
"""Adds an execution to the summary."""
self.failed_indexed_executions.append((index, name))

@property
def num_failed(self) -> int:
"""Helps with jinja"""
return len(self.failed_indexed_executions)
17 changes: 14 additions & 3 deletions eolearn/visualization/report_templates/report.html
Original file line number Diff line number Diff line change
Expand Up @@ -112,13 +112,24 @@ <h3> Summary of exceptions </h3>

<div class="indent">
<ul>
{% for node_name, node_uid, exception_list in exception_stats %}
{% for node_name, node_uid, error_summary_list in exception_stats %}
<li>
<b>{{ node_name }} ({{ node_uid }}):</b>
<ul>
{% for exception_string, count in exception_list %}
{% for error_summary in error_summary_list %}
<li>
{{ count }} times: {{ exception_string }}
{{ error_summary.num_failed }} times: {{ error_summary.origin }}
<br>
Example message: <pre>{{ error_summary.example_message }}</pre>
<br>
<button class="collapsible">Failed executions</button>
<div class="collapsible-content">
<ul>
{% for idx, execution in error_summary.failed_indexed_executions %}
<li><a href="#execution{{ idx }}">{{ execution }}</a></li>
{% endfor %}
</ul>
</div>
</li>
{% endfor %}
</ul>
Expand Down
7 changes: 3 additions & 4 deletions tests/core/test_eoworkflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,8 +291,7 @@ def test_exception_handling():
assert node_stats.node_name == node.name

if node is exception_node:
assert isinstance(node_stats.exception, CustomExceptionError)
assert node_stats.exception_traceback.startswith("Traceback")
assert isinstance(node_stats.exception_info.exception, CustomExceptionError)
assert node_stats.exception_info.traceback.startswith("Traceback")
else:
assert node_stats.exception is None
assert node_stats.exception_traceback is None
assert node_stats.exception_info is None
12 changes: 9 additions & 3 deletions tests/visualization/test_eoexecutor.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,18 @@ def execute(self, *_, **kwargs):
my_logger.debug("Debug statement of Example task with kwargs: %s", kwargs)

if "arg1" in kwargs and kwargs["arg1"] is None:
raise Exception
raise RuntimeError(f"Oh no, i spilled my kwargs all over the floor! {kwargs}!")


NODE = EONode(ExampleTask())
WORKFLOW = EOWorkflow([NODE, EONode(task=ExampleTask(), inputs=[NODE, NODE])])
EXECUTION_KWARGS = [{NODE: {"arg1": 1}}, {}, {NODE: {"arg1": 3, "arg3": 10}}, {NODE: {"arg1": None}}]
EXECUTION_KWARGS = [
{NODE: {"arg1": 1}},
{},
{NODE: {"arg1": 3, "arg3": 10}},
{NODE: {"arg1": None}},
{NODE: {"arg1": None, "arg3": 10}},
]


@pytest.mark.parametrize("save_logs", [True, False])
Expand All @@ -42,7 +48,7 @@ def test_report_creation(save_logs, include_logs):
EXECUTION_KWARGS,
logs_folder=tmp_dir_name,
save_logs=save_logs,
execution_names=["ex 1", 2, 0.4, None],
execution_names=["ex 1", 2, 0.4, None, "beep"],
)
executor.run(workers=10)
executor.make_report(include_logs=include_logs)
Expand Down

0 comments on commit cf626fb

Please sign in to comment.