Skip to content

Commit

Permalink
Report execution times per node (#721)
Browse files Browse the repository at this point in the history
* remove task sources

* simplify error summary defs

* improve how the information is displayed
  • Loading branch information
zigaLuksic authored Aug 25, 2023
1 parent cf626fb commit 8b13353
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 85 deletions.
58 changes: 12 additions & 46 deletions eolearn/visualization/eoexecutor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@

import base64
import datetime as dt
import importlib
import inspect
import os
import warnings
from collections import defaultdict
Expand All @@ -22,6 +20,7 @@
import fs
import graphviz
import matplotlib as mpl
import numpy as np
import pygments
import pygments.formatter
import pygments.lexers
Expand Down Expand Up @@ -81,7 +80,6 @@ def make_report(self, include_logs: bool = True) -> None:
general_stats=self.eoexecutor.general_stats,
exception_stats=self._get_exception_stats(),
task_descriptions=self._get_node_descriptions(),
task_sources=self._render_task_sources(formatter),
execution_results=self.eoexecutor.execution_results,
execution_tracebacks=self._render_execution_tracebacks(formatter),
execution_logs=execution_logs,
Expand Down Expand Up @@ -124,16 +122,6 @@ def _get_exception_stats(self) -> list[tuple[str, str, list[_ErrorSummary]]]:

exception_stats[error_node.node_uid][origin_str].add_execution(execution_idx, execution)

return self._to_ordered_stats(exception_stats)

def _to_ordered_stats(
self, exception_stats: defaultdict[str, dict[str, _ErrorSummary]]
) -> list[tuple[str, str, list[_ErrorSummary]]]:
"""Exception stats get ordered by nodes in their execution order in workflows. Exception stats that happen
for the same node get ordered by number of occurrences in a decreasing order.
Returns tuples of form (name, uid, [_error_summary])
"""
ordered_exception_stats = []
for node in self.eoexecutor.workflow.get_nodes():
if node.uid not in exception_stats:
Expand All @@ -154,6 +142,16 @@ def _get_node_descriptions(self) -> list[dict[str, Any]]:
node_name = node.get_name(name_counts[node.get_name()])
name_counts[node.get_name()] += 1

node_stats = filter(None, (results.stats.get(node.uid) for results in self.eoexecutor.execution_results))
durations = np.array([(stats.end_time - stats.start_time).total_seconds() for stats in node_stats])
if len(durations) == 0:
duration_report = "unknown"
else:
duration_report = (
f"Between {np.min(durations):.4g} and {np.max(durations):.4g} seconds,"
f" usually {np.mean(durations):.4g} ± {np.std(durations):.4g} seconds"
)

descriptions.append(
{
"name": f"{node_name} ({node.uid})",
Expand All @@ -162,43 +160,11 @@ def _get_node_descriptions(self) -> list[dict[str, Any]]:
key: value.replace("<", "&lt;").replace(">", "&gt;") # type: ignore[attr-defined]
for key, value in node.task.private_task_config.init_args.items()
},
"duration_report": duration_report,
}
)

return descriptions

def _render_task_sources(self, formatter: pygments.formatter.Formatter) -> dict[str, Any]:
"""Renders source code of EOTasks"""
lexer = pygments.lexers.get_lexer_by_name("python", stripall=True)
sources = {}

for node in self.eoexecutor.workflow.get_nodes():
task = node.task

key = f"{task.__class__.__name__} ({task.__module__})"
if key in sources:
continue

source: Any
if task.__module__.startswith("eolearn"):
subpackage_name = ".".join(task.__module__.split(".")[:2])
subpackage = importlib.import_module(subpackage_name)
subpackage_version = subpackage.__version__ if hasattr(subpackage, "__version__") else "unknown"
source = subpackage_name, subpackage_version
else:
try:
source = inspect.getsource(task.__class__)
source = pygments.highlight(source, lexer, formatter)
except (TypeError, OSError):
# Jupyter notebook does not have __file__ method to collect source code
# StackOverflow provides no solutions
# Could be investigated further by looking into Jupyter Notebook source code
source = None

sources[key] = source

return sources

def _render_execution_tracebacks(self, formatter: pygments.formatter.Formatter) -> list:
"""Renders stack traces of those executions which failed"""
tb_lexer = pygments.lexers.get_lexer_by_name("py3tb", stripall=True)
Expand Down
58 changes: 19 additions & 39 deletions eolearn/visualization/report_templates/report.html
Original file line number Diff line number Diff line change
Expand Up @@ -140,54 +140,34 @@ <h3> Summary of exceptions </h3>
{% endif %}
</div>

<h2> EOTasks </h2>
<h2> EONodes </h2>

<div class="indent">

<h3> Initialization parameters </h3>

<div class="indent">
{% for task in task_descriptions %}
<h4 id="{{ task['uid'] }}"> {{ task['name'] }} </h4>

{% if task['args'] %}
<ul>
{% for key, value in task['args'].items() %}
<li> <pre> {{ key }} = {{ value }} </pre> </li>
{% endfor %}
</ul>
{% else %}
<div class="indent">
<p>
No initialization parameters
</p>
</div>
{% endif %}
<h3 id="{{ task['uid'] }}"> {{ task['name'] }} </h3>

{% endfor %}
</div>

<h3> Task sources </h3>

<div class="indent">
{% for task_title, task_source in task_sources.items() %}
<h4> {{ task_title }} </h4>
<div class="indent">
<b>Execution duration:</b> {{ task['duration_report'] }} <br>

<div class="indent">
{% if task_source is none %}
<p>
Cannot collect source code of a task which is not defined in a .py file
</p>
{% elif task_source is string %}
{{ task_source }}
{% else %}
<h4> Initialization parameters </h4>
{% if task['args'] %}
<ul>
{% for key, value in task['args'].items() %}
<li> <pre> {{ key }} = {{ value }} </pre> </li>
{% endfor %}
</ul>
{% else %}
<div class="indent">
<p>
Imported from {{ task_source[0] }} version {{ task_source[1] }}
No initialization parameters
</p>
{% endif %}
</div>
{% endfor %}
</div>
{% endif %}
</div>
{% endfor %}
</div>

</div>

<h2> Execution details </h2>
Expand Down

0 comments on commit 8b13353

Please sign in to comment.