From e4974e1c69dc218953fbf8d311d9d2f8c3205e62 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jarl=20Sondre=20S=C3=A6ther?= <jarl_sondre@hotmail.com>
Date: Tue, 29 Oct 2024 10:25:35 +0100
Subject: [PATCH] add backup to gpu monitoring

---
 src/itwinai/cli.py                       | 40 +++++++++++++++++++-----
 src/itwinai/torch/monitoring/plotting.py | 11 +++++++
 2 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/src/itwinai/cli.py b/src/itwinai/cli.py
index 1769fc42..2a04bc04 100644
--- a/src/itwinai/cli.py
+++ b/src/itwinai/cli.py
@@ -24,10 +24,13 @@ def generate_gpu_energy_plot(
     log_dir: str = "utilization_logs",
     pattern_str: str = r"dataframe_(?:\w+)_(?:\d+)\.csv$",
     output_file: str = "plots/gpu_energy_plot.png",
-    backup_dir: Optional[str] = "backup/"
+    do_backup: bool = True,
+    backup_dir: Optional[str] = "scalability_backups/",
+    experiment_name: Optional[str] = None,
+    run_name: Optional[str] = None,
 ) -> None:
     """Generate a GPU energy plot showing the expenditure for each combination of
-    strategy and number of GPUs in Watt hours. Backs up the data used to create the 
+    strategy and number of GPUs in Watt hours. Backs up the data used to create the
     plot if ``backup_dir`` is not None
 
     Args:
@@ -37,10 +40,11 @@ def generate_gpu_energy_plot(
             Defaults to ``dataframe_(?:\\w+)_(?:\\d+)\\.csv$``.
         output_file: The path to where the resulting plot should be saved. Defaults to
             ``plots/gpu_energy_plot.png``.
-        backup_dir: The path to where the data used to produce the plot should be 
-            saved. 
+        backup_dir: The path to where the data used to produce the plot should be
+            saved.
 
     """
+    import uuid
     import matplotlib.pyplot as plt
     from itwinai.torch.monitoring.plotting import gpu_energy_plot, read_energy_df
 
@@ -59,6 +63,20 @@ def generate_gpu_energy_plot(
     plt.savefig(output_path)
     print(f"\nSaved GPU energy plot at '{output_path.resolve()}'.")
 
+    if not do_backup:
+        return
+
+    if experiment_name is None:
+        random_id = uuid4()
+        experiment_name = "exp_" + random_id[:6]
+    if run_name is None:
+        random_id = uuid4()
+        run_name = "run_" + random_id[:6]
+
+    backup_path = Path(backup_dir) / experiment_name / run_name / "gpu_energy.csv"
+    gpu_utilization_df.to_csv(backup_path, index=False)
+    print(f"Storing backup file at '{backup_path.resolve()}'.")
+
 
 @app.command()
 def generate_communication_plot(
@@ -148,8 +166,8 @@ def scalability_report(
     pattern: Annotated[
         str, typer.Option(help="Python pattern matching names of CSVs in sub-folders.")
     ],
-    log_dir: Annotated[ 
-        str, typer.Option(help="Directory location for the data files to read") 
+    log_dir: Annotated[
+        str, typer.Option(help="Directory location for the data files to read")
     ],
     plot_title: Annotated[Optional[str], typer.Option(help=("Plot name."))] = None,
     # skip_id: Annotated[Optional[int], typer.Option(help=("Skip epoch ID."))] = None,
@@ -170,12 +188,18 @@ def scalability_report(
     """
     # TODO: add max depth and path different from CWD
 
-    from itwinai.scalability import read_scalability_files, archive_data, create_relative_plot, create_absolute_plot
+    from itwinai.scalability import (
+        read_scalability_files,
+        archive_data,
+        create_relative_plot,
+        create_absolute_plot,
+    )
+
     log_dir_path = Path(log_dir)
 
     combined_df, csv_files = read_scalability_files(
         pattern=pattern, log_dir=log_dir_path
-    ) 
+    )
     print("Merged CSV:")
     print(combined_df)
 
diff --git a/src/itwinai/torch/monitoring/plotting.py b/src/itwinai/torch/monitoring/plotting.py
index 3d86a838..fc8ef512 100644
--- a/src/itwinai/torch/monitoring/plotting.py
+++ b/src/itwinai/torch/monitoring/plotting.py
@@ -127,3 +127,14 @@ def gpu_energy_plot(gpu_utilization_df: pd.DataFrame) -> Tuple[Figure, Axes]:
 
 def backup_data(file_paths: List): 
     pass
+
+
+
+
+
+
+
+
+
+
+