From 1362a3a0904c3035d69300fe8eb3e0b48576a4bd Mon Sep 17 00:00:00 2001
From: Max Nigmatulin <mvnigma@gmail.com>
Date: Wed, 19 Jul 2023 01:47:20 +0500
Subject: [PATCH] Add timeouted agents save feature

---
 VSharp.ML.AIAgent/.gitignore                  |  2 +-
 VSharp.ML.AIAgent/config.py                   | 19 +++++++++++++++
 .../learning/genetic_alorithm.py              |  3 ++-
 VSharp.ML.AIAgent/learning/r_learn.py         | 23 +++++++++++--------
 .../ml/model_wrappers/nnwrapper.py            |  1 +
 .../weights_dump/weights_dump.py              |  9 +++++---
 6 files changed, 43 insertions(+), 14 deletions(-)

diff --git a/VSharp.ML.AIAgent/.gitignore b/VSharp.ML.AIAgent/.gitignore
index 9fc059d45..46a3eb297 100644
--- a/VSharp.ML.AIAgent/.gitignore
+++ b/VSharp.ML.AIAgent/.gitignore
@@ -1,6 +1,6 @@
 # python cache and venv
 .env
 __pycache__/
-epochs_best/
+report/
 *.pkl
 *.onnx
diff --git a/VSharp.ML.AIAgent/config.py b/VSharp.ML.AIAgent/config.py
index e93007a86..7c7bd5aaa 100644
--- a/VSharp.ML.AIAgent/config.py
+++ b/VSharp.ML.AIAgent/config.py
@@ -1,4 +1,7 @@
 import logging
+from dataclasses import dataclass
+from pathlib import Path
+from shutil import rmtree
 
 import ml.models
 
@@ -18,9 +21,25 @@ class ServerConfig:
     VSHARP_INSTANCES_START_PORT = 8100
 
 
+@dataclass(slots=True, frozen=True)
+class DumpByTimeoutFeature:
+    enabled: bool
+    timeout_seconds: int
+    save_path: Path
+
+    def create_save_path_if_not_exists(self):
+        if self.enabled:
+            if self.save_path.exists():
+                rmtree(self.save_path)
+            self.save_path.mkdir()
+
+
 class FeatureConfig:
     VERBOSE_TABLES = True
     SHOW_SUCCESSORS = True
     NAME_LEN = 7
     N_BEST_SAVED_EACH_GEN = 2
     DISABLE_MESSAGE_CHECKS = True
+    DUMP_BY_TIMEOUT = DumpByTimeoutFeature(
+        enabled=True, timeout_seconds=1200, save_path=Path("./report/timeouted_agents/")
+    )
diff --git a/VSharp.ML.AIAgent/learning/genetic_alorithm.py b/VSharp.ML.AIAgent/learning/genetic_alorithm.py
index cfe335470..3565602f8 100644
--- a/VSharp.ML.AIAgent/learning/genetic_alorithm.py
+++ b/VSharp.ML.AIAgent/learning/genetic_alorithm.py
@@ -8,7 +8,7 @@
 from torch.multiprocessing import set_start_method
 
 from common.constants import APP_LOG_FILE, BASE_REPORT_DIR
-from config import GeneralConfig
+from config import FeatureConfig, GeneralConfig
 from epochs_statistics.utils import (
     create_report_dir,
     init_epochs_best_dir,
@@ -59,6 +59,7 @@ def run(
     init_tables_file()
     init_log_file()
     init_epochs_best_dir()
+    FeatureConfig.DUMP_BY_TIMEOUT.create_save_path_if_not_exists()
 
     ga_instance = pygad.GA(
         num_generations=num_generations,
diff --git a/VSharp.ML.AIAgent/learning/r_learn.py b/VSharp.ML.AIAgent/learning/r_learn.py
index 52868914d..f06193493 100644
--- a/VSharp.ML.AIAgent/learning/r_learn.py
+++ b/VSharp.ML.AIAgent/learning/r_learn.py
@@ -27,7 +27,6 @@
     rewrite_best_tables_file,
 )
 from ml.model_wrappers.nnwrapper import NNWrapper
-from ml.model_wrappers.protocols import Predictor
 from selection.classes import AgentResultsOnGameMaps, GameResult, Map2Result
 from selection.scorer import straight_scorer
 from timer.resources_manager import manage_map_inference_times_array
@@ -44,7 +43,7 @@
 
 
 def play_map(
-    with_agent: NAgent, with_model: Predictor
+    with_agent: NAgent, with_model: NNWrapper
 ) -> tuple[GameResult, TimeDuration]:
     steps_count = 0
     game_state = None
@@ -90,6 +89,12 @@ def play_map(
 
     end_time = perf_counter()
 
+    if (
+        FeatureConfig.DUMP_BY_TIMEOUT.enabled
+        and end_time - start_time > FeatureConfig.DUMP_BY_TIMEOUT.timeout_seconds
+    ):
+        save_weights(with_model.weights, to=FeatureConfig.DUMP_BY_TIMEOUT.save_path)
+
     if actual_coverage != 100 and steps_count != steps:
         logging.error(
             f"<{with_model.name()}>: not all steps exshausted on {with_agent.map.MapName} with non-100% coverage"
@@ -211,35 +216,35 @@ def fitness_function(ga_inst, solution, solution_idx) -> float:
     model.load_state_dict(model_weights_dict)
     model.to(DEVICE)
     model.eval()
-    predictor = NNWrapper(model, weights_flat=solution)
+    nnwrapper = NNWrapper(model, weights_flat=solution)
 
     with game_server_socket_manager() as ws:
         maps = get_maps(websocket=ws, type=maps_type)
         with tqdm.tqdm(
             total=len(maps),
-            desc=f"{predictor.name():20}: {maps_type.value}",
+            desc=f"{nnwrapper.name():20}: {maps_type.value}",
             **TQDM_FORMAT_DICT,
         ) as pbar:
             rst: list[GameResult] = []
             list_of_map2result: list[Map2Result] = []
             for game_map in maps:
-                logging.info(f"<{predictor.name()}> is playing {game_map.MapName}")
+                logging.info(f"<{nnwrapper.name()}> is playing {game_map.MapName}")
 
                 game_result, time = play_map(
-                    with_agent=NAgent(ws, game_map, max_steps), with_model=predictor
+                    with_agent=NAgent(ws, game_map, max_steps), with_model=nnwrapper
                 )
                 rst.append(game_result)
                 list_of_map2result.append(Map2Result(game_map, game_result))
 
                 logging.info(
-                    f"<{predictor.name()}> finished map {game_map.MapName} "
+                    f"<{nnwrapper.name()}> finished map {game_map.MapName} "
                     f"in {game_result.steps_count} steps, {time} seconds, "
                     f"actual coverage: {game_result.actual_coverage_percent:.2f}"
                 )
                 pbar.update(1)
-    send_game_results(Agent2ResultsOnMaps(predictor, list_of_map2result))
+    send_game_results(Agent2ResultsOnMaps(nnwrapper, list_of_map2result))
 
     dump_and_reset_epoch_times(
-        f"{predictor.name()}_epoch{ga_inst.generations_completed}_pid{getpid()}"
+        f"{nnwrapper.name()}_epoch{ga_inst.generations_completed}_pid{getpid()}"
     )
     return straight_scorer(rst)
diff --git a/VSharp.ML.AIAgent/ml/model_wrappers/nnwrapper.py b/VSharp.ML.AIAgent/ml/model_wrappers/nnwrapper.py
index 48ca4934b..a4f569be3 100644
--- a/VSharp.ML.AIAgent/ml/model_wrappers/nnwrapper.py
+++ b/VSharp.ML.AIAgent/ml/model_wrappers/nnwrapper.py
@@ -11,6 +11,7 @@
 class NNWrapper(Predictor):
     def __init__(self, model: torch.nn.Module, weights_flat: list[float]) -> None:
         self.model = model
+        self.weights = weights_flat
         self._name = str(sum(weights_flat))
         self._hash = tuple(weights_flat).__hash__()
 
diff --git a/VSharp.ML.AIAgent/weights_dump/weights_dump.py b/VSharp.ML.AIAgent/weights_dump/weights_dump.py
index 3b48aa51d..2731c0815 100644
--- a/VSharp.ML.AIAgent/weights_dump/weights_dump.py
+++ b/VSharp.ML.AIAgent/weights_dump/weights_dump.py
@@ -1,8 +1,11 @@
 import json
+from pathlib import Path
 
 from numpy import typing as npt
 
 
-def save_weights(w: npt.NDArray, to: str):
-    with open(to / f"{sum(w)}.txt", "w+") as weights_file:
-        json.dump(list(w), weights_file)
+def save_weights(w: npt.NDArray, to: Path):
+    file_to_create = to / f"{sum(w)}.txt"
+    if not file_to_create.exists():
+        with open(file_to_create, "w+") as weights_file:
+            json.dump(list(w), weights_file)