PySymGym · emnigma · Dec 5, 2023 · Aug 18, 2023 · Aug 21, 2023 · Sep 1, 2023
diff --git a/VSharp.ML.AIAgent/.gitignore b/VSharp.ML.AIAgent/.gitignore
@@ -1,5 +1,6 @@
 # python cache and venv
 .env
+nvidia_env
 __pycache__/
 report**/
 ml/pretrained_models/

diff --git a/VSharp.ML.AIAgent/learning/play_game.py b/VSharp.ML.AIAgent/learning/play_game.py
@@ -2,29 +2,30 @@
 from statistics import StatisticsError
 from time import perf_counter
 from typing import TypeAlias
-import random
 
 import tqdm
 from func_timeout import FunctionTimedOut, func_set_timeout
 
 from common.classes import GameResult, Map2Result
 from common.constants import TQDM_FORMAT_DICT
+from common.game import GameMap
 from common.utils import get_states
-from config import FeatureConfig
+from config import FeatureConfig, GeneralConfig
 from connection.broker_conn.socket_manager import game_server_socket_manager
 from connection.game_server_conn.connector import Connector
-from connection.game_server_conn.utils import MapsType, get_maps
+from connection.game_server_conn.utils import MapsType
 from learning.timer.resources_manager import manage_map_inference_times_array
 from learning.timer.stats import compute_statistics
 from learning.timer.utils import get_map_inference_times
+from ml.data_loader_compact import ServerDataloaderHeteroVector
 from ml.fileop import save_model
 from ml.model_wrappers.protocols import Predictor
 
 TimeDuration: TypeAlias = float
 
 
 def play_map(
-    with_connector: Connector, with_predictor: Predictor
+    with_connector: Connector, with_predictor: Predictor, with_dataset
 ) -> tuple[GameResult, TimeDuration]:
     steps_count = 0
     game_state = None
@@ -33,12 +34,23 @@ def play_map(
 
     start_time = perf_counter()
 
+    map_steps = []
+
+    def add_single_step(input, output):
+        hetero_input, _ = ServerDataloaderHeteroVector.convert_input_to_tensor(input)
+        hetero_input["y_true"] = output
+        hetero_input.to(GeneralConfig.DEVICE)
+        map_steps.append(hetero_input)
+
     try:
         for _ in range(steps):
             game_state = with_connector.recv_state_or_throw_gameover()
-            predicted_state_id = with_predictor.predict(
+            predicted_state_id, nn_output = with_predictor.predict(
                 game_state, with_connector.map.MapName
             )
+
+            add_single_step(game_state, nn_output)
+
             logging.debug(
                 f"<{with_predictor.name()}> step: {steps_count}, available states: {get_states(game_state)}, predicted: {predicted_state_id}"
             )
@@ -83,15 +95,21 @@ def play_map(
         errors_count=errors_count,
         actual_coverage_percent=actual_coverage,
     )
-
-    with_predictor.update(with_connector.map.MapName, model_result)
+    if with_dataset is not None:
+        map_result = (
+            model_result.actual_coverage_percent,
+            -model_result.tests_count,
+            model_result.errors_count,
+            -model_result.steps_count,
+        )
+        with_dataset.update(with_connector.map.MapName, map_result, map_steps)
     return model_result, end_time - start_time
 
 
 def play_map_with_stats(
-    with_connector: Connector, with_predictor: Predictor
+    with_connector: Connector, with_predictor: Predictor, with_dataset
 ) -> tuple[GameResult, TimeDuration]:
-    model_result, time_duration = play_map(with_connector, with_predictor)
+    model_result, time_duration = play_map(with_connector, with_predictor, with_dataset)
 
     with manage_map_inference_times_array():
         try:
@@ -110,15 +128,19 @@ def play_map_with_stats(
 
 @func_set_timeout(FeatureConfig.DUMP_BY_TIMEOUT.timeout_sec)
 def play_map_with_timeout(
-    with_connector: Connector, with_predictor: Predictor
+    with_connector: Connector, with_predictor: Predictor, with_dataset
 ) -> tuple[GameResult, TimeDuration]:
-    return play_map_with_stats(with_connector, with_predictor)
+    return play_map_with_stats(with_connector, with_predictor, with_dataset)
 
 
-def play_game(with_predictor: Predictor, max_steps: int, maps_type: MapsType):
-    with game_server_socket_manager() as ws:
-        maps = get_maps(websocket=ws, type=maps_type)
-    random.shuffle(maps)
+def play_game(
+    with_predictor: Predictor,
+    max_steps: int,
+    maps: list[GameMap],
+    maps_type: MapsType,
+    with_dataset=None,
+):
+    # random.shuffle(maps)
     with tqdm.tqdm(
         total=len(maps),
         desc=f"{with_predictor.name():20}: {maps_type.value}",
@@ -138,6 +160,7 @@ def play_game(with_predictor: Predictor, max_steps: int, maps_type: MapsType):
                     game_result, time = play_func(
                         with_connector=Connector(ws, game_map, max_steps),
                         with_predictor=with_predictor,
+                        with_dataset=with_dataset,
                     )
                 logging.info(
                     f"<{with_predictor.name()}> finished map {game_map.MapName} "
@@ -159,4 +182,4 @@ def play_game(with_predictor: Predictor, max_steps: int, maps_type: MapsType):
                 )
             list_of_map2result.append(Map2Result(game_map, game_result))
             pbar.update(1)
-    return list_of_map2result
+    return (list_of_map2result, with_dataset.maps_data)
diff --git a/VSharp.ML.AIAgent/ml/common_model/dataset.py b/VSharp.ML.AIAgent/ml/common_model/dataset.py
@@ -0,0 +1,136 @@
+from collections.abc import Sequence
+import torch
+
+import os
+import numpy as np
+
+import tqdm
+import logging
+from ml.common_model.utils import load_dataset_state_dict
+import csv
+from torch_geometric.data import HeteroData
+from typing import TypeAlias
+
+
+MapName: TypeAlias = str
+GameStatistics: TypeAlias = tuple[int, int, int, int]
+GameStepHeteroData: TypeAlias = HeteroData
+GameStepsOnMapInfo: TypeAlias = tuple[GameStatistics, Sequence[GameStepHeteroData]]
+
+
+class FullDataset:
+    def __init__(
+        self,
+        dataset_root_path,
+        dataset_map_results_file_name,
+        similar_steps_save_prob=0,
+    ):
+        self.dataset_map_results_file_name = dataset_map_results_file_name
+        self.dataset_root_path = dataset_root_path
+        self.maps_data: dict[str, GameStepsOnMapInfo] = dict()
+        self.similar_steps_save_prob = similar_steps_save_prob
+
+    def load(self):
+        maps_results = load_dataset_state_dict(self.dataset_map_results_file_name)
+        for file_with_map_steps in tqdm.tqdm(
+            os.listdir(self.dataset_root_path), desc="data loading"
+        ):
+            map_steps = torch.load(
+                os.path.join(self.dataset_root_path, file_with_map_steps),
+                map_location="cpu",
+            )
+            map_name = file_with_map_steps[:-3]
+            filtered_map_steps = self.filter_map_steps(map_steps)
+            filtered_map_steps = self.remove_similar_steps(filtered_map_steps)
+            self.maps_data[map_name] = (maps_results[map_name], filtered_map_steps)
+
+    def remove_similar_steps(self, map_steps):
+        filtered_map_steps = []
+        for step in map_steps:
+            if (
+                len(filtered_map_steps) != 0
+                and step["y_true"].size() == filtered_map_steps[-1]["y_true"].size()
+            ):
+                cos_d = 1 - torch.sum(
+                    (step["y_true"] / torch.linalg.vector_norm(step["y_true"]))
+                    * (
+                        filtered_map_steps[-1]["y_true"]
+                        / torch.linalg.vector_norm(filtered_map_steps[-1]["y_true"])
+                    )
+                )
+                if (
+                    cos_d < 1e-7
+                    and step["game_vertex"]["x"].size()[0]
+                    == filtered_map_steps[-1]["game_vertex"]["x"].size()[0]
+                ):
+                    step.use_for_train = np.random.choice(
+                        [True, False],
+                        p=[
+                            self.similar_steps_save_prob,
+                            1 - self.similar_steps_save_prob,
+                        ],
+                    )
+                else:
+                    step.use_for_train = True
+            else:
+                step.use_for_train = True
+            filtered_map_steps.append(step)
+        return filtered_map_steps
+
+    def filter_map_steps(self, map_steps):
+        filtered_map_steps = []
+        for step in map_steps:
+            if step["y_true"].size()[0] != 1 and not step["y_true"].isnan().any():
+                max_ind = torch.argmax(step["y_true"])
+                step["y_true"] = torch.zeros_like(step["y_true"])
+                step["y_true"][max_ind] = 1.0
+                filtered_map_steps.append(step)
+        return filtered_map_steps
+
+    def get_plain_data(self):
+        result = []
+        for _, map_steps in self.maps_data.values():
+            for step in map_steps:
+                if step.use_for_train:
+                    result.append(step)
+        return result
+
+    def save(self):
+        values_for_csv = []
+        for map_name in self.maps_data.keys():
+            values_for_csv.append(
+                {
+                    "map_name": map_name,
+                    "result": self.maps_data[map_name][0],
+                }
+            )
+            torch.save(
+                self.maps_data[map_name][1],
+                os.path.join(self.dataset_root_path, map_name + ".pt"),
+            )
+        with open(self.dataset_map_results_file_name, "w") as csv_file:
+            writer = csv.DictWriter(csv_file, fieldnames=["map_name", "result"])
+            writer.writerows(values_for_csv)
+
+    def update(
+        self,
+        map_name,
+        map_result: tuple[int, int, int, int],
+        map_steps,
+        move_to_cpu=False,
+    ):
+        if move_to_cpu:
+            for x in map_steps:
+                x.to("cpu")
+        filtered_map_steps = self.filter_map_steps(map_steps)
+        if map_name in self.maps_data.keys():
+            if self.maps_data[map_name][0] < map_result:
+                logging.info(
+                    f"The model with result = {self.maps_data[map_name][0]} was replaced with the model with "
+                    f"result = {map_result} on the map {map_name}"
+                )
+                filtered_map_steps = self.remove_similar_steps(filtered_map_steps)
+                self.maps_data[map_name] = (map_result, filtered_map_steps)
+        else:
+            filtered_map_steps = self.remove_similar_steps(filtered_map_steps)
+            self.maps_data[map_name] = (map_result, filtered_map_steps)
diff --git a/VSharp.ML.AIAgent/ml/common_model/models.py b/VSharp.ML.AIAgent/ml/common_model/models.py
@@ -14,10 +14,12 @@ def __init__(
         hidden_channels,
         num_gv_layers=2,
         num_sv_layers=2,
+        num_gv_hops=3,
+        num_sv_hops=3,
     ):
         super().__init__()
-        self.tag_conv1 = TAGConv(5, hidden_channels, 2)
-        self.tag_conv2 = TAGConv(6, hidden_channels, 3)
+        self.tag_conv1 = TAGConv(5, hidden_channels, num_gv_hops)
+        self.tag_conv2 = TAGConv(6, hidden_channels, num_sv_hops)
         self.gv_layers = nn.ModuleList()
         self.gv_layers.append(self.tag_conv1)
         self.gv_layers.append(SAGEConv(-1, hidden_channels))
@@ -44,46 +46,89 @@ def __init__(
 
         self.mlp = MLP(hidden_channels, [1])
 
-    def forward(self, x_dict, edge_index_dict, edge_attr_dict):
+    def forward(
+        self,
+        game_x,
+        state_x,
+        edge_index_v_v,
+        edge_type_v_v,
+        edge_index_history_v_s,
+        edge_attr_history_v_s,
+        edge_index_in_v_s,
+        edge_index_s_s,
+    ):
         game_x = self.gv_layers[0](
-            x_dict["game_vertex"],
-            edge_index_dict[("game_vertex", "to", "game_vertex")],
+            game_x,
+            edge_index_v_v,
         ).relu()
         for layer in self.gv_layers[1:]:
             game_x = layer(
                 game_x,
-                edge_index_dict[("game_vertex", "to", "game_vertex")],
+                edge_index_v_v,
             ).relu()
 
         state_x = self.sv_layers[0](
-            x_dict["state_vertex"],
-            edge_index_dict[("state_vertex", "parent_of", "state_vertex")],
+            state_x,
+            edge_index_s_s,
         ).relu()
         for layer in self.sv_layers[1:]:
             state_x = layer(
                 state_x,
-                edge_index_dict[("state_vertex", "parent_of", "state_vertex")],
+                edge_index_s_s,
             ).relu()
 
         history_x = self.history1(
             (game_x, state_x),
-            edge_index_dict[("game_vertex", "history", "state_vertex")],
-            edge_attr_dict,
+            edge_index_history_v_s,
+            edge_attr_history_v_s,
             size=(game_x.size(0), state_x.size(0)),
         ).relu()
 
-        in_x = self.in1(
-            (game_x, history_x), edge_index_dict[("game_vertex", "in", "state_vertex")]
-        ).relu()
+        in_x = self.in1((game_x, history_x), edge_index_in_v_s).relu()
 
         state_x = self.sv_layers2[0](
             in_x,
-            edge_index_dict[("state_vertex", "parent_of", "state_vertex")],
+            edge_index_s_s,
         ).relu()
         for layer in self.sv_layers2[1:]:
             state_x = layer(
                 state_x,
-                edge_index_dict[("state_vertex", "parent_of", "state_vertex")],
+                edge_index_s_s,
             ).relu()
-        x = self.mlp(in_x)
+        x = self.mlp(state_x)
         return x
+
+
+class ParallelBlocks(torch.nn.Module):
+    def __init__(self, models_list, mlp_list):
+        super().__init__()
+        self.models_list = models_list
+        self.mlp = MLP(len(models_list), mlp_list)
+
+    def forward(
+        self,
+        game_x,
+        state_x,
+        edge_index_v_v,
+        edge_type_v_v,
+        edge_index_history_v_s,
+        edge_attr_history_v_s,
+        edge_index_in_v_s,
+        edge_index_s_s,
+    ):
+        results_list = []
+        for model in self.models_list:
+            results_list.append(
+                model(
+                    game_x,
+                    state_x,
+                    edge_index_v_v,
+                    edge_type_v_v,
+                    edge_index_history_v_s,
+                    edge_attr_history_v_s,
+                    edge_index_in_v_s,
+                    edge_index_s_s,
+                )
+            )
+        results_tensor = torch.cat(results_list, dim=1)
+        return results_tensor