diff --git a/NeoML/Python/CMakeLists.txt b/NeoML/Python/CMakeLists.txt
index 7262abab2..994a715fd 100644
--- a/NeoML/Python/CMakeLists.txt
+++ b/NeoML/Python/CMakeLists.txt
@@ -17,7 +17,7 @@ find_package(Python3 ${Python3_FIND_VERSION} EXACT COMPONENTS Interpreter Develo
 FetchContent_Declare(
     pybind11
     GIT_REPOSITORY https://github.com/pybind/pybind11.git
-    GIT_TAG v2.11.1
+    GIT_TAG v2.13.5
 )
 
 FetchContent_GetProperties(pybind11)
diff --git a/NeoML/Python/neoml/AutoDiff.py b/NeoML/Python/neoml/AutoDiff.py
index 2ac593f27..32833cf12 100644
--- a/NeoML/Python/neoml/AutoDiff.py
+++ b/NeoML/Python/neoml/AutoDiff.py
@@ -1,4 +1,4 @@
-""" Copyright (c) 2017-2020 ABBYY Production LLC
+""" Copyright (c) 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -18,7 +18,7 @@
 import neoml.PythonWrapper as PythonWrapper
 from neoml.MathEngine import MathEngine
 from neoml.Blob import Blob
-import numpy
+import numpy as np
 
 # ----------------------------------------------------------------------------------------------------------------------
 
@@ -28,15 +28,15 @@ def const(math_engine, shape, data):
     if not isinstance(math_engine, MathEngine):
         raise ValueError('The `math_engine` should be neoml.MathEngine.')
 
-    np_shape = numpy.array(shape, dtype=numpy.int32, copy=False)
+    np_shape = np.asarray(shape, dtype=np.int32)
 
     if len(np_shape) > 7:
         raise ValueError('The `shape` should have not more than 7 dimensions.')
 
-    if numpy.isscalar(data):
+    if np.isscalar(data):
         return Blob(PythonWrapper.blob_const(math_engine._internal, np_shape, float(data)))
 
-    np_data = numpy.array(data, dtype=numpy.float32, copy=False, order='C')
+    np_data = np.asarray(data, dtype=np.float32, order='C')
 
     if len(np_data.shape) > 7:
         raise ValueError('The `shape` should have not more than 7 dimensions.')
@@ -99,7 +99,7 @@ def sum(a, axes=None):
     if a.size == 0:
         raise ValueError("The blob shouldn't be empty.")
 
-    axes = numpy.array([] if axes is None else axes, dtype=numpy.int32)
+    axes = np.array([] if axes is None else axes, dtype=np.int32)
     if not neoml.Utils.check_axes(axes):
         raise ValueError("`axes` should be unique and in range [0, 6].")
 
@@ -129,7 +129,7 @@ def mean(a, axes=None):
     if a.size == 0:
         raise ValueError("The blob shouldn't be empty.")
 
-    axes = numpy.array([] if axes is None else axes, dtype=numpy.int32)
+    axes = np.array([] if axes is None else axes, dtype=np.int32)
     if not neoml.Utils.check_axes(axes):
         raise ValueError("`axes` should be unique and in range [0, 6].")
 
@@ -207,7 +207,7 @@ def broadcast(blob, shape):
     if blob.size == 0:
         raise ValueError("The blobs mustn't be empty.")
 
-    np_shape = numpy.array(shape, dtype=numpy.int32, copy=False)
+    np_shape = np.asarray(shape, dtype=np.int32)
 
     if len(np_shape) > 7:
         raise ValueError('The `shape` should have not more than 7 dimensions.')
@@ -227,12 +227,12 @@ def reshape(blob, shape):
     if blob.size == 0:
         raise ValueError("The blobs mustn't be empty.")
 
-    np_shape = numpy.array(shape, dtype=numpy.int32, copy=False)
+    np_shape = np.asarray(shape, dtype=np.int32)
 
     if len(np_shape) > 7:
         raise ValueError('The `shape` should have not more than 7 dimensions.')
 
-    if numpy.prod(np_shape) != blob.size:
+    if np.prod(np_shape) != blob.size:
         raise ValueError('`shape` is incompatible with current size.')
 
     PythonWrapper.blob_reshape(blob._internal, np_shape)
diff --git a/NeoML/Python/neoml/Blob.py b/NeoML/Python/neoml/Blob.py
index 680675eb2..b512717ed 100644
--- a/NeoML/Python/neoml/Blob.py
+++ b/NeoML/Python/neoml/Blob.py
@@ -1,4 +1,4 @@
-""" Copyright (c) 2017-2020 ABBYY Production LLC
+""" Copyright (c) 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -319,12 +319,12 @@ def asblob(math_engine, data, shape=None, copy=False):
     if shape is None:
         shape = np.ones(7, np.int32)
     else:
-        shape = np.array(shape, dtype=np.int32, copy=False)
+        shape = np.asarray(shape, dtype=np.int32)
 
     if len(shape) != 7:
         raise ValueError('The `shape` must have 7 dimension sizes.')
 
-    np_data = np.array(data, copy=False, order='C')
+    np_data = np.asarray(data, order='C')
 
     if len(np_data.shape) > 7:
         raise ValueError('The `shape` must have not more then 7 dimensions.')
@@ -407,7 +407,7 @@ def tensor(math_engine, shape, dtype="float32"):
     if dtype != "float32" and dtype != "int32":
         raise ValueError('The `dtype` must be one of {`float32`, `int32`}.')
 
-    shape = np.array(shape, dtype=np.int32, copy=False)
+    shape = np.asarray(shape, dtype=np.int32)
 
     if shape.size != 7:
         raise ValueError('The `shape.size` must be == 7.')
@@ -449,7 +449,7 @@ def list_blob(math_engine, batch_len, batch_width, list_size, channels, dtype="f
     if channels < 1:
         raise ValueError('The `channels` must be > 0.')
 
-    shape = np.array((batch_len, batch_width, list_size, 1, 1, 1, channels), dtype=np.int32, copy=False)
+    shape = np.asarray((batch_len, batch_width, list_size, 1, 1, 1, channels), dtype=np.int32)
 
     return Blob(PythonWrapper.tensor(math_engine._internal, shape, dtype))
 
@@ -490,7 +490,7 @@ def image2d(math_engine, batch_len, batch_width, height, width, channels, dtype=
     if channels < 1:
         raise ValueError('The `channels` must be > 0.')
 
-    shape = np.array((batch_len, batch_width, 1, height, width, 1, channels), dtype=np.int32, copy=False)
+    shape = np.asarray((batch_len, batch_width, 1, height, width, 1, channels), dtype=np.int32)
 
     return Blob(PythonWrapper.tensor(math_engine._internal, shape, dtype))
 
@@ -536,6 +536,6 @@ def image3d(math_engine, batch_len, batch_width, height, width, depth, channels,
     if channels < 1:
         raise ValueError('The `channels` must be > 0.')
 
-    shape = np.array((batch_len, batch_width, 1, height, width, depth, channels), dtype=np.int32, copy=False)
+    shape = np.asarray((batch_len, batch_width, 1, height, width, depth, channels), dtype=np.int32)
 
     return Blob(PythonWrapper.tensor(math_engine._internal, shape, dtype))
diff --git a/NeoML/Python/neoml/Clustering.py b/NeoML/Python/neoml/Clustering.py
index fb1e9bb67..69b04a800 100644
--- a/NeoML/Python/neoml/Clustering.py
+++ b/NeoML/Python/neoml/Clustering.py
@@ -1,4 +1,4 @@
-""" Copyright (c) 2017-2021 ABBYY Production LLC
+""" Copyright (c) 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -16,9 +16,9 @@
 
 import numpy
 from .Utils import convert_data, get_data
-from scipy.sparse import csr_matrix
 import neoml.PythonWrapper as PythonWrapper
 
+
 class FirstCome(PythonWrapper.FirstCome) :
     """First come clustering creates a new cluster for each new vector
     that is far enough from the clusters already existing.
diff --git a/NeoML/Python/neoml/CrossValidation.py b/NeoML/Python/neoml/CrossValidation.py
index 82db9ae53..4ba284323 100644
--- a/NeoML/Python/neoml/CrossValidation.py
+++ b/NeoML/Python/neoml/CrossValidation.py
@@ -1,4 +1,4 @@
-""" Copyright (c) 2017-2021 ABBYY Production LLC
+""" Copyright (c) 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -16,7 +16,6 @@
 
 import numpy
 from .Utils import convert_data, get_data
-from scipy.sparse import csr_matrix, issparse
 import neoml.PythonWrapper as PythonWrapper
 
 
diff --git a/NeoML/Python/neoml/DecisionTree.py b/NeoML/Python/neoml/DecisionTree.py
index 42954bfbb..23d1ab645 100644
--- a/NeoML/Python/neoml/DecisionTree.py
+++ b/NeoML/Python/neoml/DecisionTree.py
@@ -1,4 +1,4 @@
-""" Copyright (c) 2017-2021 ABBYY Production LLC
+""" Copyright (c) 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -14,9 +14,8 @@
 --------------------------------------------------------------------------------------------------------------
 """
 
-import numpy
+import numpy as np
 from .Utils import convert_data, get_data
-from scipy.sparse import csr_matrix, issparse
 import neoml.PythonWrapper as PythonWrapper
 
 
@@ -126,20 +125,20 @@ def train(self, X, Y, weight=None):
         """
 
         x = convert_data(X)
-        y = numpy.array(Y, dtype=numpy.int32, copy=False, order='C')
+        y = np.asarray(Y, dtype=np.int32, order='C')
 
         if x.shape[0] != y.size:
             raise ValueError('The `X` and `Y` inputs must be the same length.')
 
         if weight is None:
-            weight = numpy.ones(y.size, numpy.float32, order='C')
+            weight = np.ones(y.size, np.float32, order='C')
         else:
-            weight = numpy.array(weight, dtype=numpy.float32, copy=False, order='C')
+            weight = np.asarray(weight, dtype=np.float32, order='C')
 
-        if numpy.any(y < 0):
+        if np.any(y < 0):
             raise ValueError('All `Y` elements must be >= 0.')
 
-        if numpy.any(weight < 0):
+        if np.any(weight < 0):
             raise ValueError('All `weight` elements must be >= 0.')
 
         return DecisionTreeClassificationModel(super().train_classifier(*get_data(x), int(x.shape[1]), y, weight))
diff --git a/NeoML/Python/neoml/Dnn/DnnDistributed.py b/NeoML/Python/neoml/Dnn/DnnDistributed.py
index 3a532d29d..d99eec2e0 100644
--- a/NeoML/Python/neoml/Dnn/DnnDistributed.py
+++ b/NeoML/Python/neoml/Dnn/DnnDistributed.py
@@ -1,8 +1,11 @@
-""" Copyright (c) 2017-2020 ABBYY Production LLC
+""" Copyright (c) 2017-2024 ABBYY
+
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
+
     http://www.apache.org/licenses/LICENSE-2.0
+
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -11,9 +14,6 @@
 --------------------------------------------------------------------------------------------------------------*/
 """
 
-import uuid
-import os
-from neoml.MathEngine import MathEngine
 import neoml.PythonWrapper as PythonWrapper
 from neoml.Blob import Blob
 from neoml.Dnn import Dnn
diff --git a/NeoML/Python/neoml/Dnn/MultichannelLookup.py b/NeoML/Python/neoml/Dnn/MultichannelLookup.py
index 71bd1bc80..87e5f1e46 100644
--- a/NeoML/Python/neoml/Dnn/MultichannelLookup.py
+++ b/NeoML/Python/neoml/Dnn/MultichannelLookup.py
@@ -1,4 +1,4 @@
-""" Copyright (c) 2017-2020 ABBYY Production LLC
+""" Copyright (c) 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -14,7 +14,6 @@
 --------------------------------------------------------------------------------------------------------------
 """
 
-import numpy
 import neoml.PythonWrapper as PythonWrapper
 from .Dnn import Layer
 from .Initializer import Initializer
@@ -135,7 +134,7 @@ def initialize(self, initializer):
         than the one set for the whole network in general.
         """
         if initializer is None:
-            return self._internal.clear()
+            self._internal.clear()
     
         if not isinstance(initializer, Initializer):
             raise ValueError('The `initializer` must be an Initializer.')
diff --git a/NeoML/Python/neoml/Dnn/SpaceAndDepth.py b/NeoML/Python/neoml/Dnn/SpaceAndDepth.py
index 7363659b7..0006a9878 100644
--- a/NeoML/Python/neoml/Dnn/SpaceAndDepth.py
+++ b/NeoML/Python/neoml/Dnn/SpaceAndDepth.py
@@ -1,4 +1,4 @@
-""" Copyright (c) 2017-2020 ABBYY Production LLC
+""" Copyright (c) 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -17,7 +17,6 @@
 import neoml.PythonWrapper as PythonWrapper
 from .Dnn import Layer
 from neoml.Utils import check_input_layers
-import neoml.Blob as Blob
 
 
 class SpaceToDepth(Layer):
diff --git a/NeoML/Python/neoml/Dnn/Split.py b/NeoML/Python/neoml/Dnn/Split.py
index b8a06acbf..f3a74886d 100644
--- a/NeoML/Python/neoml/Dnn/Split.py
+++ b/NeoML/Python/neoml/Dnn/Split.py
@@ -1,4 +1,4 @@
-""" Copyright (c) 2017-2020 ABBYY Production LLC
+""" Copyright (c) 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -17,7 +17,7 @@
 import neoml.PythonWrapper as PythonWrapper
 from .Dnn import Layer
 from neoml.Utils import check_input_layers
-import numpy
+import numpy as np
 
 
 class SplitChannels(Layer):
@@ -55,12 +55,12 @@ def __init__(self, input_layer, sizes, name=None):
 
         layers, outputs = check_input_layers(input_layer, 1)
 
-        s = numpy.array(sizes, dtype=numpy.int32, copy=False)
+        s = np.asarray(sizes, dtype=np.int32)
 
         if s.size > 3:
             raise ValueError('The `sizes` must contain not more than 3 elements.')
 
-        if numpy.any(s < 0):
+        if np.any(s < 0):
             raise ValueError('The `sizes` must contain only positive values.')
 
         internal = PythonWrapper.SplitChannels(str(name), layers[0], int(outputs[0]), s)
@@ -104,12 +104,12 @@ def __init__(self, input_layer, sizes, name=None):
 
         layers, outputs = check_input_layers(input_layer, 1)
 
-        s = numpy.array(sizes, dtype=numpy.int32, copy=False)
+        s = np.asarray(sizes, dtype=np.int32)
 
         if s.size > 3:
             raise ValueError('The `sizes` must contain not more than 3 elements.')
 
-        if numpy.any(s < 0):
+        if np.any(s < 0):
             raise ValueError('The `sizes` must contain only positive values.')
 
         internal = PythonWrapper.SplitDepth(str(name), layers[0], int(outputs[0]), s)
@@ -153,12 +153,12 @@ def __init__(self, input_layer, sizes, name=None):
 
         layers, outputs = check_input_layers(input_layer, 1)
 
-        s = numpy.array(sizes, dtype=numpy.int32, copy=False)
+        s = np.asarray(sizes, dtype=np.int32)
 
         if s.size > 3:
             raise ValueError('The `sizes` must contain not more than 3 elements.')
 
-        if numpy.any(s < 0):
+        if np.any(s < 0):
             raise ValueError('The `sizes` must contain only positive values.')
 
         internal = PythonWrapper.SplitWidth(str(name), layers[0], int(outputs[0]), s)
@@ -202,12 +202,12 @@ def __init__(self, input_layer, sizes, name=None):
 
         layers, outputs = check_input_layers(input_layer, 1)
 
-        s = numpy.array(sizes, dtype=numpy.int32, copy=False)
+        s = np.asarray(sizes, dtype=np.int32)
 
         if s.size > 3:
             raise ValueError('The `sizes` must contain not more than 3 elements.')
 
-        if numpy.any(s < 0):
+        if np.any(s < 0):
             raise ValueError('The `sizes` must contain only positive values.')
 
         internal = PythonWrapper.SplitHeight(str(name), layers[0], int(outputs[0]), s)
@@ -251,12 +251,12 @@ def __init__(self, input_layer, sizes, name=None):
 
         layers, outputs = check_input_layers(input_layer, 1)
 
-        s = numpy.array(sizes, dtype=numpy.int32, copy=False)
+        s = np.asarray(sizes, dtype=np.int32)
 
         if s.size > 3:
             raise ValueError('The `sizes` must contain not more than 3 elements.')
 
-        if numpy.any(s < 0):
+        if np.any(s < 0):
             raise ValueError('The `sizes` must contain only positive values.')
 
         internal = PythonWrapper.SplitListSize(str(name), layers[0], int(outputs[0]), s)
@@ -300,12 +300,12 @@ def __init__(self, input_layer, sizes, name=None):
 
         layers, outputs = check_input_layers(input_layer, 1)
 
-        s = numpy.array(sizes, dtype=numpy.int32, copy=False)
+        s = np.asarray(sizes, dtype=np.int32)
 
         if s.size > 3:
             raise ValueError('The `sizes` must contain not more than 3 elements.')
 
-        if numpy.any(s < 0):
+        if np.any(s < 0):
             raise ValueError('The `sizes` must contain only positive values.')
 
         internal = PythonWrapper.SplitBatchWidth(str(name), layers[0], int(outputs[0]), s)
@@ -349,12 +349,12 @@ def __init__(self, input_layer, sizes, name=None):
 
         layers, outputs = check_input_layers(input_layer, 1)
 
-        s = numpy.array(sizes, dtype=numpy.int32, copy=False)
+        s = np.asarray(sizes, dtype=np.int32)
 
         if s.size > 3:
             raise ValueError('The `sizes` must contain not more than 3 elements.')
 
-        if numpy.any(s < 0):
+        if np.any(s < 0):
             raise ValueError('The `sizes` must contain only positive values.')
 
         internal = PythonWrapper.SplitBatchLength(str(name), layers[0], int(outputs[0]), s)
diff --git a/NeoML/Python/neoml/Dnn/TiedEmbeddings.py b/NeoML/Python/neoml/Dnn/TiedEmbeddings.py
index 197e6ec18..f5b89b7e3 100644
--- a/NeoML/Python/neoml/Dnn/TiedEmbeddings.py
+++ b/NeoML/Python/neoml/Dnn/TiedEmbeddings.py
@@ -1,4 +1,4 @@
-""" Copyright (c) 2017-2020 ABBYY Production LLC
+""" Copyright (c) 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -14,11 +14,9 @@
 --------------------------------------------------------------------------------------------------------------
 """
 
-import numpy
 import neoml.PythonWrapper as PythonWrapper
 from .Dnn import Layer
 from neoml.Utils import check_input_layers
-import neoml.Blob as Blob
 
 
 class TiedEmbeddings(Layer):
@@ -30,9 +28,9 @@ class TiedEmbeddings(Layer):
         The integer in each tuple specifies the number of the output.
         If not set, the first output will be used.
     :type input_layers: object, tuple(object, int) or list of them
-    :param embeddings_layer_name: The name of the layer used for embeddings. 
-        Needs to be a MultichannelLookup layer.   
-    :type embeddings_layer_name: str
+    :param embeddings_layer_path: The path (list of layer names) of the layer used for embeddings. 
+        Needs to be a MultichannelLookup layer.
+    :type embeddings_layer_path: list of str
     :param channel: The channel index in the embeddings layer.
     :type channel: int, >=0
     :param name: The layer name.
@@ -50,7 +48,7 @@ class TiedEmbeddings(Layer):
 
     For each input the layer has one output of the same dimensions.
     """
-    def __init__(self, input_layers, embeddings_layer_name, channel, name=None):
+    def __init__(self, input_layers, embeddings_layer_path, channel, name=None):
 
         if type(input_layers) is PythonWrapper.TiedEmbeddings:
             super().__init__(input_layers)
@@ -59,9 +57,14 @@ def __init__(self, input_layers, embeddings_layer_name, channel, name=None):
         if channel < 0:
             raise ValueError('`channel` must be >= 0.')
 
+        # Check the path is a not-empty list of strings
+        path = embeddings_layer_path
+        if not (path and isinstance(path, list) and all(isinstance(s, str) for s in path)):
+            raise ValueError('`embeddings_layer_path` arg must be a not-empty list of strings')
+
         layers, outputs = check_input_layers(input_layers, 0)
 
-        internal = PythonWrapper.TiedEmbeddings(str(name), layers, outputs, str(embeddings_layer_name), int(channel))
+        internal = PythonWrapper.TiedEmbeddings(str(name), layers, outputs, embeddings_layer_path, int(channel))
         super().__init__(internal)
 
     @property
@@ -76,6 +79,21 @@ def embeddings_layer_name(self, embeddings_layer_name):
         """
         self._internal.set_embeddings_layer_name(embeddings_layer_name)
 
+    @property
+    def embeddings_layer_path(self):
+        """Gets the path of the layer used for representation table.
+        """
+        return self._internal.get_embeddings_layer_path()
+
+    @embeddings_layer_path.setter
+    def embeddings_layer_path(self, path):
+        """Sets the path of the layer used for representation table.
+        """
+        # Check the path is a not-empty list of strings
+        if not (path and isinstance(path, list) and all(isinstance(s, str) for s in path)):
+            raise ValueError('`path` arg must be a not-empty list of strings')
+        self._internal.set_embeddings_layer_path(path)
+
     @property
     def channel(self):
         """Gets the channel index in the embeddings layer.
diff --git a/NeoML/Python/neoml/Dnn/Transformer.py b/NeoML/Python/neoml/Dnn/Transformer.py
index 3b4ad168b..fa2af226c 100644
--- a/NeoML/Python/neoml/Dnn/Transformer.py
+++ b/NeoML/Python/neoml/Dnn/Transformer.py
@@ -1,4 +1,4 @@
-""" Copyright (c) 2017-2020 ABBYY Production LLC
+""" Copyright (c) 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -17,7 +17,6 @@
 import neoml.PythonWrapper as PythonWrapper
 from .Dnn import Layer
 from neoml.Utils import check_input_layers
-import numpy
 
 
 class TransformerEncoder(Layer):
@@ -26,14 +25,18 @@ class TransformerEncoder(Layer):
     :param input_layer: The input layer and the number of its output. If no number
         is specified, the first output will be connected. 
     :type input_layer: object, tuple(object, int)
-    :param head_count: The number of heads in self-attention layer
+    :param head_count: The number of heads in self-attention sub-layer
     :type head_count: int, default=1
     :param hidden_size: The hidden size of self-attention layer, must be a multiple of head_count
     :type hidden_size: int, default=1
-    :param dropout: The rate of dropouts
+    :param dropout: The rate of dropouts of transformer
     :type dropout: float, default=0.
+    :param sa_dropout: The rate of dropout of self-attention sub-layer
+    :type sa_dropout: float, default=0.
     :param feed_forward_size: The size of the first fully-connected layer in feed-forward
     :type feed_forward_size: int, default=1
+    :param pre_norm: The place of the normalization layer: right after input or before feedForward as usual
+    :type pre_norm: bool, default=False
     :param activation: activation used between fully-connected layers in feed-forward
     :type activation: str, {"linear", "elu", "relu", "leaky_relu", "abs", "sigmoid", "tanh",
         "hard_tanh", "hard_sigmoid", "power", "hswish", "gelu"}, default="relu"
@@ -65,7 +68,7 @@ class TransformerEncoder(Layer):
     """
     activations = ["linear", "elu", "relu", "leaky_relu", "abs", "sigmoid", "tanh", "hard_tanh", "hard_sigmoid", "power", "hswish", "gelu"]
 
-    def __init__(self, input_layers, head_count=1, hidden_size=1, dropout=0., feed_forward_size=1, activation='relu', name=None):
+    def __init__(self, input_layers, head_count=1, hidden_size=1, dropout=0., sa_dropout=0., feed_forward_size=1, activation='relu', pre_norm=False, name=None):
 
         if type(input_layers) is PythonWrapper.TransformerEncoder:
             super().__init__(input_layers)
@@ -81,7 +84,10 @@ def __init__(self, input_layers, head_count=1, hidden_size=1, dropout=0., feed_f
             raise ValueError('The `hidden_size` must be a multiple of `head_count`')
 
         if dropout >= 1.:
-            raise ValueError('The `dropout` must be < 1.')
+            raise ValueError('The `dropout` for transformer must be < 1.')
+
+        if sa_dropout >= 1.:
+            raise ValueError('The `dropout` for self-attention must be < 1.')
 
         if feed_forward_size < 1:
             raise ValueError('The `feed_forward_size` must be > 0.')
@@ -94,7 +100,7 @@ def __init__(self, input_layers, head_count=1, hidden_size=1, dropout=0., feed_f
         layers, outputs = check_input_layers(input_layers, (1, 2))
 
         internal = PythonWrapper.TransformerEncoder(str(name), layers, outputs,
-            int(head_count), int(hidden_size), float(dropout), int(feed_forward_size), int(activation))
+            int(head_count), int(hidden_size), float(dropout), float(sa_dropout), int(feed_forward_size), int(activation), bool(pre_norm))
         super().__init__(internal)
 
     @property
@@ -128,19 +134,32 @@ def hidden_size(self, hidden_size):
 
     @property
     def dropout(self):
-        """Gets the dropout rate.
+        """Gets the dropout rate for transformer.
         """
         return self._internal.get_dropout()
 
     @dropout.setter
     def dropout(self, dropout):
-        """Sets the hidden size of the attention.
-        Must be a multiple of the head_count.
+        """Sets the dropout rate for transformer.
         """
         if dropout >= 1.0:
-            raise ValueError('The `dropout` must be < 1.')
+            raise ValueError('The `dropout` for transformer must be < 1.')
         self._internal.set_dropout(float(dropout))
 
+    @property
+    def sa_dropout(self):
+        """Gets the dropout rate of self-attention sub-layer.
+        """
+        return self._internal.get_sa_dropout()
+
+    @sa_dropout.setter
+    def sa_dropout(self, dropout):
+        """Sets the dropout rate of self-attention sub-layer.
+        """
+        if dropout >= 1.0:
+            raise ValueError('The `dropout` for self-attention must be < 1.')
+        self._internal.set_sa_dropout(float(dropout))
+
     @property
     def feed_forward_size(self):
         """Gets the feed forward size.
@@ -155,3 +174,15 @@ def feed_forward_size(self, feed_forward_size):
             raise ValueError('The `feed_forward_size` must be > 0.')
         self._internal.set_feed_forward_size(int(feed_forward_size))
 
+    @property
+    def pre_norm(self):
+        """Gets the place of the normalization layer.
+        """
+        return self._internal.get_pre_norm()
+
+    @pre_norm.setter
+    def pre_norm(self, pre_norm):
+        """Sets the place of the normalization layer.
+        """
+        self._internal.set_pre_norm(bool(pre_norm))
+
diff --git a/NeoML/Python/neoml/GradientBoost.py b/NeoML/Python/neoml/GradientBoost.py
index 223474ac9..39b82567d 100644
--- a/NeoML/Python/neoml/GradientBoost.py
+++ b/NeoML/Python/neoml/GradientBoost.py
@@ -1,4 +1,4 @@
-""" Copyright (c) 2017-2021 ABBYY Production LLC
+""" Copyright (c) 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -14,7 +14,7 @@
 --------------------------------------------------------------------------------------------------------------
 """
 
-import numpy
+import numpy as np
 from .Utils import convert_data, get_data
 import neoml.PythonWrapper as PythonWrapper
 
@@ -156,20 +156,20 @@ def train(self, X, Y, weight=None):
         :rtype: neoml.GradientBoost.GradientBoostClassificationModel
         """
         x = convert_data( X )
-        y = numpy.array( Y, dtype=numpy.int32, copy=False, order='C' )
+        y = np.asarray( Y, dtype=np.int32, order='C' )
 
         if x.shape[0] != y.size:
             raise ValueError('The `X` and `Y` inputs must be the same length.')
 
         if weight is None:
-            weight = numpy.ones(y.size, numpy.float32, order='C')
+            weight = np.ones(y.size, np.float32, order='C')
         else:
-            weight = numpy.array( weight, dtype=numpy.float32, copy=False, order='C' )
+            weight = np.asarray( weight, dtype=np.float32, order='C' )
 
-        if numpy.any(y < 0):
+        if np.any(y < 0):
             raise ValueError('All `Y` elements must be >= 0.')
 
-        if numpy.any(weight < 0):
+        if np.any(weight < 0):
             raise ValueError('All `weight` elements must be >= 0.')
 
         return GradientBoostClassificationModel(super().train_classifier(*get_data(x), int(x.shape[1]), y, weight)) 
@@ -310,17 +310,17 @@ def train(self, X, Y, weight=None):
         :rtype: neoml.GradientBoost.GradientBoostRegressionModel
         """
         x = convert_data( X )
-        y = numpy.array( Y, dtype=numpy.float32, copy=False, order='C' )
+        y = np.asarray( Y, dtype=np.float32, order='C' )
 
         if x.shape[0] != y.size:
             raise ValueError('The `X` and `Y` inputs must be the same length.')
 
         if weight is None:
-            weight = numpy.ones(y.size, numpy.float32, order='C')
+            weight = np.ones(y.size, np.float32, order='C')
         else:
-            weight = numpy.array( weight, dtype=numpy.float32, copy=False, order='C' )
+            weight = np.asarray( weight, dtype=np.float32, order='C' )
 
-        if numpy.any(weight < 0):
+        if np.any(weight < 0):
             raise ValueError('All `weight` elements must be >= 0.')
 
         return GradientBoostRegressionModel(super().train_regressor(*get_data(x), int(x.shape[1]), y, weight)) 
diff --git a/NeoML/Python/neoml/Linear.py b/NeoML/Python/neoml/Linear.py
index c8b3911ca..c79222c49 100644
--- a/NeoML/Python/neoml/Linear.py
+++ b/NeoML/Python/neoml/Linear.py
@@ -1,4 +1,4 @@
-""" Copyright (c) 2017-2021 ABBYY Production LLC
+""" Copyright (c) 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -14,11 +14,11 @@
 --------------------------------------------------------------------------------------------------------------
 """
 
-import numpy
+import numpy as np
 from .Utils import convert_data, get_data
-from scipy.sparse import csr_matrix, issparse
 import neoml.PythonWrapper as PythonWrapper
 
+
 class LinearClassificationModel :
     """Linear binary classification model.
     """
@@ -112,20 +112,20 @@ def train(self, X, Y, weight=None):
         :rtype: neoml.Linear.LinearClassificationModel
         """
         x = convert_data( X )
-        y = numpy.array( Y, dtype=numpy.int32, copy=False, order='C' )
+        y = np.asarray( Y, dtype=np.int32, order='C' )
 
         if x.shape[0] != y.size:
             raise ValueError('The `X` and `Y` inputs must be the same length.')
 
         if weight is None:
-            weight = numpy.ones(y.size, numpy.float32, order='C')
+            weight = np.ones(y.size, np.float32, order='C')
         else:
-            weight = numpy.array( weight, dtype=numpy.float32, copy=False, order='C' )
+            weight = np.asarray( weight, dtype=np.float32, order='C' )
 
-        if numpy.any(y < 0):
+        if np.any(y < 0):
             raise ValueError('All `Y` elements must be >= 0.')
 
-        if numpy.any(weight < 0):
+        if np.any(weight < 0):
             raise ValueError('All `weight` elements must be >= 0.')
 
         return LinearClassificationModel(super().train_classifier(*get_data(x), int(x.shape[1]), y, weight))
@@ -217,17 +217,17 @@ def train(self, X, Y, weight=None):
         :rtype: neoml.Linear.LinearRegressionModel
         """
         x = convert_data( X )
-        y = numpy.array( Y, dtype=numpy.float32, copy=False, order='C' )
+        y = np.asarray( Y, dtype=np.float32, order='C' )
 
         if x.shape[0] != y.size:
             raise ValueError('The `X` and `Y` inputs must be the same length.')
 
         if weight is None:
-            weight = numpy.ones(y.size, numpy.float32, order='C')
+            weight = np.ones(y.size, np.float32, order='C')
         else:
-            weight = numpy.array( weight, dtype=numpy.float32, copy=False, order='C' )
+            weight = np.asarray( weight, dtype=np.float32, order='C' )
 
-        if numpy.any(weight < 0):
+        if np.any(weight < 0):
             raise ValueError('All `weight` elements must be >= 0.')
 
         return LinearRegressionModel(super().train_regressor(*get_data(x), int(x.shape[1]), y, weight))
diff --git a/NeoML/Python/neoml/PCA.py b/NeoML/Python/neoml/PCA.py
index c2b87207a..34f9007f6 100644
--- a/NeoML/Python/neoml/PCA.py
+++ b/NeoML/Python/neoml/PCA.py
@@ -1,4 +1,4 @@
-""" Copyright (c) 2017-2021 ABBYY Production LLC
+""" Copyright (c) 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -14,11 +14,10 @@
 --------------------------------------------------------------------------------------------------------------
 """
 
-import numpy
 from .Utils import convert_data, get_data
-from scipy.sparse import csr_matrix
 import neoml.PythonWrapper as PythonWrapper
 
+
 """
 Singular Value Decomposition of a given matrix into matrices u, s, v.
 
@@ -51,6 +50,7 @@ def svd(matrix, compute_u = True, compute_v = False, algorithm = 'full', compone
     return PythonWrapper.singular_value_decomposition(*x.shape, *get_data(x),
         compute_u, compute_v, algorithm == 'full', components)
 
+
 class PCA(PythonWrapper.PCA) :
     """Principal components analysis (PCA) algorithm. 
     It uses singular value decomposition to project the data into
diff --git a/NeoML/Python/neoml/SVM.py b/NeoML/Python/neoml/SVM.py
index 4ff26df48..0563f7b88 100644
--- a/NeoML/Python/neoml/SVM.py
+++ b/NeoML/Python/neoml/SVM.py
@@ -1,4 +1,4 @@
-""" Copyright (c) 2017-2021 ABBYY Production LLC
+""" Copyright (c) 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -16,9 +16,9 @@
 
 import numpy
 from .Utils import convert_data, get_data
-from scipy.sparse import csr_matrix, issparse
 import neoml.PythonWrapper as PythonWrapper
 
+
 class SvmClassificationModel :
     """Support-vector machine (SVM) classification model.
     """
@@ -39,6 +39,7 @@ def classify(self, X):
         x = convert_data( X )
         return self.internal.classify(*get_data(x))
 
+
 class SvmClassifier(PythonWrapper.Svm) :
     """Support-vector machine (SVM) classifier.
 
diff --git a/NeoML/Python/neoml/Utils.py b/NeoML/Python/neoml/Utils.py
index d5412f355..760e7ef30 100644
--- a/NeoML/Python/neoml/Utils.py
+++ b/NeoML/Python/neoml/Utils.py
@@ -1,4 +1,4 @@
-""" Copyright (c) 2017-2020 ABBYY Production LLC
+""" Copyright (c) 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -66,7 +66,7 @@ def convert_data(X):
     if issparse(X):
         return csr_matrix(X, dtype=np.float32)
 
-    data = np.array(X, dtype=np.float32, copy=False, order='C')
+    data = np.asarray(X, dtype=np.float32, order='C')
     if data.ndim != 2:
         raise ValueError('X must be of shape (n_samples, n_features)')
     return data
diff --git a/NeoML/Python/setup.py b/NeoML/Python/setup.py
index 0fc9a4ca6..b7526c588 100644
--- a/NeoML/Python/setup.py
+++ b/NeoML/Python/setup.py
@@ -1,4 +1,21 @@
 # -*- coding: utf-8 -*-
+
+""" Copyright (c) 2017-2024 ABBYY
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+--------------------------------------------------------------------------------------------------------------
+"""
+
 import os
 import sys
 import subprocess
@@ -21,6 +38,7 @@
     "win-amd64": "x64"
 }
 
+
 class CMakeExtension(Extension):
     def __init__(self, name, sourcedir=""):
         Extension.__init__(self, name, sources=[])
@@ -66,6 +84,7 @@ def build_extension(self, ext):
             ["cmake", "--build", ".", "--target", "install"] + build_args, cwd=self.build_temp
         )
 
+
 # Get version from Build/Inc/ProductBuildNumber.h file
 def get_version():
     file_path = os.path.join(this_directory, "../../Build/Inc/ProductBuildNumber.h")
@@ -76,13 +95,14 @@ def get_version():
         if result:
             return "{}.{}.{}".format(result.group(1), result.group(2), result.group(3))
         raise Exception("Failed to parse {}".format(file_path))
-    return ""
+
 
 # Get the content of README.txt file
 def get_long_description():
     with open(os.path.join(this_directory, 'README.txt'), encoding='utf-8') as f:
         return f.read()
 
+
 setup(
     name='neoml',
     version=get_version(),
@@ -90,7 +110,7 @@ def get_long_description():
     long_description=get_long_description(),
     long_description_content_type='text/markdown',
     url='http://github.com/neoml-lib/neoml',
-    install_requires=['numpy>=1.19.1, <2.0.0', 'scipy>=1.5.2', 'onnx==1.14.1', 'protobuf==3.20.*'],
+    install_requires=['numpy>=2.0.2', 'scipy>=1.5.2', 'onnx==1.16.0', 'protobuf==3.20.*'],
     ext_modules=[CMakeExtension("neoml.PythonWrapper")],
     cmdclass={"build_ext": CMakeBuild},
     include_package_data=True,
@@ -99,5 +119,6 @@ def get_long_description():
     test_suite='tests'
 )
 
+
 if is_readthedocs:
     os.chdir(launch_dir)
diff --git a/NeoML/Python/src/PyClustering.cpp b/NeoML/Python/src/PyClustering.cpp
index fb5b8cff2..f511219bc 100644
--- a/NeoML/Python/src/PyClustering.cpp
+++ b/NeoML/Python/src/PyClustering.cpp
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2021 ABBYY Production LLC
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -57,7 +57,7 @@ class CPyClustering {
 
 py::tuple CPyClustering::Clusterize( py::array indices, py::array data, py::array rowPtr, bool isSparse, int featureCount, py::array weight )
 {
-	CPtr<CPyClusteringData> problem = new CPyClusteringData( static_cast<int>( weight.size() ), featureCount,
+	CPtr<const CPyClusteringData> problem = new CPyClusteringData( static_cast<int>( weight.size() ), featureCount,
 		reinterpret_cast<const int*>( isSparse ? indices.data() : nullptr ), reinterpret_cast<const float*>( data.data() ),
 		reinterpret_cast<const int*>( rowPtr.data() ), reinterpret_cast<const float*>( weight.data() ) );
 
@@ -68,7 +68,8 @@ py::tuple CPyClustering::Clusterize( py::array indices, py::array data, py::arra
 		clustering->Clusterize( problem.Ptr(), result );
 	}
 
-	py::array_t<int, py::array::c_style> clusters( static_cast<int>( weight.size() ) );
+	py::array_t<int, py::array::c_style> clusters( py::ssize_t{ weight.size() } );
+	NeoAssert( weight.size() == clusters.size() );
 	auto tempClusters = clusters.mutable_unchecked<1>();
 	for( int i = 0; i < result.Data.Size(); i++ ) {
 		tempClusters(i) = result.Data[i];
diff --git a/NeoML/Python/src/PyDnnDistributed.cpp b/NeoML/Python/src/PyDnnDistributed.cpp
index 95d72f5fb..9c8554b3b 100644
--- a/NeoML/Python/src/PyDnnDistributed.cpp
+++ b/NeoML/Python/src/PyDnnDistributed.cpp
@@ -1,4 +1,5 @@
-/* Copyright © 2017-2023 ABBYY
+/* Copyright © 2017-2024 ABBYY
+
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
@@ -69,7 +70,8 @@ py::array CPyDistributedTraining::LastLosses( const std::string& layer )
 {
 	CArray<float> losses;
 	GetLastLoss( layer, losses );
-	py::array_t<float, py::array::c_style> lastLosses( ssize_t{ losses.Size() } );
+	py::array_t<float, py::array::c_style> lastLosses( py::ssize_t{ losses.Size() } );
+	NeoAssert( losses.Size() == lastLosses.size() );
 	memcpy( static_cast<float*>( lastLosses.request().ptr ), losses.GetPtr(), losses.Size() * sizeof( float ) );
 	return lastLosses;
 }
diff --git a/NeoML/Python/src/PyOnnxLayers.cpp b/NeoML/Python/src/PyOnnxLayers.cpp
index 171752e3f..edb64e175 100644
--- a/NeoML/Python/src/PyOnnxLayers.cpp
+++ b/NeoML/Python/src/PyOnnxLayers.cpp
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2023 ABBYY Production LLC
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -58,7 +58,8 @@ class CPyOnnxTransformHelper : public CPyLayer {
 
 	py::array GetRules() const
 	{
-		py::array_t<int, py::array::c_style> result( 7 );
+		py::array_t<int, py::array::c_style> result( py::ssize_t{ 7 } );
+		NeoAssert( 7 == result.size() );
 		auto temp = result.mutable_unchecked();
 		for( int i = 0; i < 7; ++i ) {
 			temp[i] = static_cast<int>( Layer<COnnxTransformHelper>()->GetRule( static_cast<TBlobDim>( i ) ) ); 
diff --git a/NeoML/Python/src/PyPCA.cpp b/NeoML/Python/src/PyPCA.cpp
index 35fff0756..77b710f06 100644
--- a/NeoML/Python/src/PyPCA.cpp
+++ b/NeoML/Python/src/PyPCA.cpp
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2021 ABBYY Production LLC
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -21,9 +21,10 @@ limitations under the License.
 
 static inline py::array getArray( const CArray<float>& matr )
 {
-	py::array_t<float, py::array::c_style> array( ssize_t{ matr.Size() } );
-	memcpy( static_cast<float*>( array.request().ptr ), matr.GetPtr(), matr.Size() * sizeof( float ) );
-	return array;
+	py::array_t<float, py::array::c_style> result( py::ssize_t{ matr.Size() } );
+	NeoAssert( matr.Size() == result.size() );
+	memcpy( static_cast<float*>( result.request().ptr ), matr.GetPtr(), matr.Size() * sizeof( float ) );
+	return result;
 }
 
 static CFloatMatrixDesc getMatrix( int height, int width, const int* columns, const float* values, const int* rowPtr )
@@ -217,7 +218,8 @@ void InitializePCA(py::module& m)
 			leftArray.resize( { height, components } );
 			memcpy( static_cast<float*>( leftArray.request().ptr ), leftVectors.GetPtr(), height * components * sizeof( float ) );
 		}
-		py::array_t<float, py::array::c_style> singularArray( ssize_t{ components } );
+		py::array_t<float, py::array::c_style> singularArray( py::ssize_t{ components } );
+		NeoAssert( components == singularArray.size() );
 		memcpy( static_cast<float*>( singularArray.request().ptr ), singularValues.GetPtr(), components * sizeof( float ) );
 		py::array_t<float, py::array::c_style> rightArray;
 		if( returnRightVectors ) {
@@ -226,4 +228,4 @@ void InitializePCA(py::module& m)
 		}
 		return py::make_tuple( leftArray, singularArray, rightArray );
 	}, py::return_value_policy::reference );
-}
\ No newline at end of file
+}
diff --git a/NeoML/Python/src/PyTiedEmbeddingsLayer.cpp b/NeoML/Python/src/PyTiedEmbeddingsLayer.cpp
index 2da9efff9..dbe1dbae3 100644
--- a/NeoML/Python/src/PyTiedEmbeddingsLayer.cpp
+++ b/NeoML/Python/src/PyTiedEmbeddingsLayer.cpp
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2021 ABBYY Production LLC
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -25,6 +25,24 @@ class CPyTiedEmbeddingsLayer : public CPyLayer {
 	std::string GetEmbeddingsLayerName() const { return Layer<CTiedEmbeddingsLayer>()->GetEmbeddingsLayerName(); }
 	void SetEmbeddingsLayerName(const std::string& name) { Layer<CTiedEmbeddingsLayer>()->SetEmbeddingsLayerName(name.c_str()); }
 
+	py::list GetEmbeddingsLayerPath() const
+	{
+		const CArray<CString>& path = Layer<CTiedEmbeddingsLayer>()->GetEmbeddingsLayerPath();
+		py::list embeddingsPath;
+		for( auto& s : path ) {
+			embeddingsPath.append( s.Ptr() );
+		}
+		return embeddingsPath;
+	}
+	void SetEmbeddingsLayerPath( const py::list& embeddingsPath )
+	{
+		CArray<CString> path;
+		for( auto& item : embeddingsPath ) {
+			path.Add( item.cast<string>().c_str() );
+		}
+		Layer<CTiedEmbeddingsLayer>()->SetEmbeddingsLayerPath( path );
+	}
+
  	int GetChannel() const { return Layer<CTiedEmbeddingsLayer>()->GetChannelIndex(); }
 	void SetChannel(int value) { Layer<CTiedEmbeddingsLayer>()->SetChannelIndex(value); }
 
@@ -43,28 +61,30 @@ void InitializeTiedEmbeddingsLayer( py::module& m )
 		{
 			return new CPyTiedEmbeddingsLayer( *layer.Layer<CTiedEmbeddingsLayer>(), layer.MathEngineOwner() );
 		}))
-		.def( py::init([]( const std::string& name, const py::list& inputs, const py::list& input_outputs, const std::string& embeddingsName, int channel )
+		.def( py::init([]( const std::string& name, const py::list& inputs, const py::list& input_outputs, const py::list& embeddingsPath, int channel )
 		{
 			py::gil_scoped_release release;
 			CDnn& dnn = inputs[0].cast<CPyLayer>().Dnn();
 			IMathEngine& mathEngine = dnn.GetMathEngine();
-
 			CPtr<CTiedEmbeddingsLayer> tied = new CTiedEmbeddingsLayer( mathEngine );
 			tied->SetName( FindFreeLayerName( dnn, "TiedEmbeddings", name ).c_str() );
 			tied->SetChannelIndex( channel );
-			tied->SetEmbeddingsLayerName( embeddingsName.c_str() );
-
-			dnn.AddLayer( *tied );
-
+			CArray<CString> path;
+			for( auto& item : embeddingsPath ) {
+				path.Add( item.cast<string>().c_str() );
+			}
+			tied->SetEmbeddingsLayerPath( path );
 			for( int i = 0; i < inputs.size(); i++ ) {
 				tied->Connect( i, inputs[i].cast<CPyLayer>().BaseLayer(), input_outputs[i].cast<int>() );
 			}
-
+			dnn.AddLayer( *tied );
 			return new CPyTiedEmbeddingsLayer( *tied, inputs[0].cast<CPyLayer>().MathEngineOwner() );
 		}) )
 		.def( "get_channel", &CPyTiedEmbeddingsLayer::GetChannel, py::return_value_policy::reference )
 		.def( "set_channel", &CPyTiedEmbeddingsLayer::SetChannel, py::return_value_policy::reference )
 		.def( "get_embeddings_layer_name", &CPyTiedEmbeddingsLayer::GetEmbeddingsLayerName, py::return_value_policy::reference )
 		.def( "set_embeddings_layer_name", &CPyTiedEmbeddingsLayer::SetEmbeddingsLayerName, py::return_value_policy::reference )
+		.def( "get_embeddings_layer_path", &CPyTiedEmbeddingsLayer::GetEmbeddingsLayerPath, py::return_value_policy::reference )
+		.def( "set_embeddings_layer_path", &CPyTiedEmbeddingsLayer::SetEmbeddingsLayerPath, py::return_value_policy::reference )
 	;
 }
diff --git a/NeoML/Python/src/PyTiedEmbeddingsLayer.h b/NeoML/Python/src/PyTiedEmbeddingsLayer.h
index d05488ae3..02f2b2935 100644
--- a/NeoML/Python/src/PyTiedEmbeddingsLayer.h
+++ b/NeoML/Python/src/PyTiedEmbeddingsLayer.h
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2021 ABBYY Production LLC
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
diff --git a/NeoML/Python/src/PyTrainingModel.cpp b/NeoML/Python/src/PyTrainingModel.cpp
index 6f0cab30c..d1e6d8881 100644
--- a/NeoML/Python/src/PyTrainingModel.cpp
+++ b/NeoML/Python/src/PyTrainingModel.cpp
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2021 ABBYY Production LLC
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -219,7 +219,8 @@ py::array_t<double> CPyRegressionModel::Predict( py::array indices, py::array da
 		}
 	}
 
-	py::array_t<double, py::array::c_style> totalResult( rowCount );
+	py::array_t<double, py::array::c_style> totalResult( py::ssize_t{ rowCount } );
+	NeoAssert( rowCount == totalResult.size() );
 	auto r = totalResult.mutable_unchecked<1>();
 	for( int i = 0; i < rowCount; i++ ) {
 		r(i) = resultPredictions[i];
@@ -562,12 +563,12 @@ void InitializeTrainingModel(py::module& m)
 			TScore score = scoreName == "f1" ? F1Score : AccuracyScore;
 			crossValidation.Execute( parts, score, results, stratified );
 		}
-		py::array_t<double, py::array::c_style> scores( results.Success.Size() );
+		py::array_t<double, py::array::c_style> scores( py::ssize_t{ results.Success.Size() } );
+		NeoAssert( results.Success.Size() == scores.size() );
 		auto tempScores = scores.mutable_unchecked<1>();
 		for( int i = 0; i < results.Success.Size(); i++ ) {
 			tempScores(i) = results.Success[i];
 		}
-
 		return scores;
 	});
 
diff --git a/NeoML/Python/src/PyTransformLayer.cpp b/NeoML/Python/src/PyTransformLayer.cpp
index 52fef8be4..8bbec7d9f 100644
--- a/NeoML/Python/src/PyTransformLayer.cpp
+++ b/NeoML/Python/src/PyTransformLayer.cpp
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2021 ABBYY Production LLC
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -34,7 +34,8 @@ class CPyTransformLayer : public CPyLayer {
 	}
 	py::array GetOperations() const
 	{
-		py::array_t<int, py::array::c_style> result( 7 );
+		py::array_t<int, py::array::c_style> result( py::ssize_t{ 7 } );
+		NeoAssert( 7 == result.size() );
 		auto temp = result.mutable_unchecked();
 		for( int i = 0; i < 7; i++ ) {
 			const CTransformLayer::CDimensionRule& rule = Layer<CTransformLayer>()->GetDimensionRule(static_cast<TBlobDim>(i));
@@ -44,7 +45,8 @@ class CPyTransformLayer : public CPyLayer {
 	}
 	py::array GetParameters() const
 	{
-		py::array_t<int, py::array::c_style> result( 7 );
+		py::array_t<int, py::array::c_style> result( py::ssize_t{ 7 } );
+		NeoAssert( 7 == result.size() );
 		auto temp = result.mutable_unchecked();
 		for( int i = 0; i < 7; i++ ) {
 			const CTransformLayer::CDimensionRule& rule = Layer<CTransformLayer>()->GetDimensionRule(static_cast<TBlobDim>(i));
diff --git a/NeoML/Python/src/PyTransformerLayer.cpp b/NeoML/Python/src/PyTransformerLayer.cpp
index cc6deba06..86864cc42 100644
--- a/NeoML/Python/src/PyTransformerLayer.cpp
+++ b/NeoML/Python/src/PyTransformerLayer.cpp
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2021 ABBYY Production LLC
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -31,10 +31,17 @@ class CPyTransformerEncoderLayer : public CPyLayer {
 
 	float GetDropoutRate() const { return Layer<CTransformerEncoderLayer>()->GetDropoutRate(); }
 	void SetDropoutRate( float rate ) { Layer<CTransformerEncoderLayer>()->SetDropoutRate( rate ); }
+
+	float GetSelfAttentionDropoutRate() const { return Layer<CTransformerEncoderLayer>()->GetSelfAttentionDropoutRate(); }
+	void SetSelfAttentionDropoutRate( float rate ) { Layer<CTransformerEncoderLayer>()->SetSelfAttentionDropoutRate( rate ); }
 	
 	int GetFeedForwardSize() const { return Layer<CTransformerEncoderLayer>()->GetFeedForwardSize(); }
 	void SetFeedForwardSize( int size ) { Layer<CTransformerEncoderLayer>()->SetFeedForwardSize( size ); }
 
+	// Place of the normalization layer: right after input or before feedForward as usual
+	bool GetPreNorm() const { return Layer<CTransformerEncoderLayer>()->GetPreNorm(); }
+	void SetPreNorm( bool preNorm ) { return Layer<CTransformerEncoderLayer>()->SetPreNorm( preNorm ); }
+
 	py::object CreatePythonObject() const
 	{
 		py::object pyModule = py::module::import( "neoml.Dnn" );
@@ -52,7 +59,7 @@ void InitializeTransformerLayer( py::module& m )
 			return new CPyTransformerEncoderLayer( *layer.Layer<CTransformerEncoderLayer>(), layer.MathEngineOwner() );
 		} ) )
 		.def( py::init( []( const std::string& name, const py::list& inputs, const py::list& input_outputs,
-			int headCount, int hiddenSize, float dropout, int feedForwardSize, int activationIndex )
+			int headCount, int hiddenSize, float dropout, float sa_dropout, int feedForwardSize, int activationIndex, bool pre_norm )
 		{
 			py::gil_scoped_release release;
 			CDnn& dnn = inputs[0].cast<CPyLayer>().Dnn();
@@ -62,8 +69,10 @@ void InitializeTransformerLayer( py::module& m )
 			transformer->SetHeadCount( headCount );
 			transformer->SetHiddenSize( hiddenSize );
 			transformer->SetDropoutRate( dropout );
+			transformer->SetSelfAttentionDropoutRate( sa_dropout );
 			transformer->SetFeedForwardSize( feedForwardSize );
 			transformer->SetActivation( static_cast<TActivationFunction>( activationIndex ) );
+			transformer->SetPreNorm( pre_norm );
 			for( int i = 0; i < inputs.size(); i++ ) {
 				transformer->Connect( i, inputs[i].cast<CPyLayer>().BaseLayer(), input_outputs[i].cast<int>() );
 			}
@@ -76,7 +85,11 @@ void InitializeTransformerLayer( py::module& m )
 		.def( "set_hidden_size", &CPyTransformerEncoderLayer::SetHiddenSize, py::return_value_policy::reference )
 		.def( "get_dropout", &CPyTransformerEncoderLayer::GetDropoutRate, py::return_value_policy::reference )
 		.def( "set_dropout", &CPyTransformerEncoderLayer::SetDropoutRate, py::return_value_policy::reference )
+		.def( "get_sa_dropout", &CPyTransformerEncoderLayer::GetSelfAttentionDropoutRate, py::return_value_policy::reference )
+		.def( "set_sa_dropout", &CPyTransformerEncoderLayer::SetSelfAttentionDropoutRate, py::return_value_policy::reference )
 		.def( "get_feed_forward_size", &CPyTransformerEncoderLayer::GetFeedForwardSize, py::return_value_policy::reference )
 		.def( "set_feed_forward_size", &CPyTransformerEncoderLayer::SetFeedForwardSize, py::return_value_policy::reference )
+		.def( "get_pre_norm", &CPyTransformerEncoderLayer::GetPreNorm, py::return_value_policy::reference )
+		.def( "set_pre_norm", &CPyTransformerEncoderLayer::SetPreNorm, py::return_value_policy::reference )
 	;
 }
diff --git a/NeoML/Python/src/PyTransformerLayer.h b/NeoML/Python/src/PyTransformerLayer.h
index 647926dd6..a7ea81124 100644
--- a/NeoML/Python/src/PyTransformerLayer.h
+++ b/NeoML/Python/src/PyTransformerLayer.h
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2021 ABBYY Production LLC
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
diff --git a/NeoML/Python/tests.py b/NeoML/Python/tests.py
index b6b9874c1..75473ef46 100644
--- a/NeoML/Python/tests.py
+++ b/NeoML/Python/tests.py
@@ -1,4 +1,21 @@
-from unittest import TestCase, skipIf
+# -*- coding: utf-8 -*-
+
+""" Copyright (c) 2017-2024 ABBYY
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+--------------------------------------------------------------------------------------------------------------
+"""
+from unittest import TestCase, skipIf, main
 import os
 import sys
 import tempfile
@@ -13,11 +30,12 @@
 
 class MultithreadedTestCase(TestCase):
     def _thread_function(self, target, kwargs):
-        print(f"python thread {threading.get_ident()} started")
+        print(f"\tpython thread {threading.get_ident()} started")
         target(**kwargs)
-        print(f"python thread {threading.get_ident()} finished")
+        print(f"\tpython thread {threading.get_ident()} finished")
 
     def _test_mt(self, target, result, enable_assert=False):
+        print(super().id()) # test name
         import time
         threads = []
         system_time, user_time = time.perf_counter(), time.process_time()
@@ -28,9 +46,8 @@ def _test_mt(self, target, result, enable_assert=False):
         for t in threads:
             t.join()
         system_time, user_time = time.perf_counter() - system_time, time.process_time() - user_time
-        print()
         print('System time {0:.6f} sec.'.format(system_time))
-        print('User time {0:.6f} sec.'.format(user_time))
+        print('User time {0:.6f} sec.\n'.format(user_time))
         if enable_assert:
             self.assertTrue(system_time < user_time)
 
@@ -692,7 +709,7 @@ def test_tied_embeddings(self):
         math_engine = neoml.MathEngine.CpuMathEngine()
         dnn = neoml.Dnn.Dnn(math_engine)
         source = neoml.Dnn.Source(dnn, "source")
-        tied = neoml.Dnn.TiedEmbeddings((source,), "embeddings", 0, "tied")
+        tied = neoml.Dnn.TiedEmbeddings((source,), [ "embeddings" ], 0, "tied")
         sink = neoml.Dnn.Sink(tied, "sink")
         layer = dnn.layers['tied']
         self.assertEqual(layer.name, 'tied')
@@ -705,6 +722,8 @@ def test_tied_embeddings(self):
         self.assertEqual(tied.embeddings_layer_name, "embeddings")
         tied.embeddings_layer_name = "embeddings2"
         self.assertEqual(tied.embeddings_layer_name, "embeddings2")
+        tied.embeddings_layer_path = [ "embeddings3" ]
+        self.assertEqual(tied.embeddings_layer_path, [ "embeddings3" ])
 
     def test_accuracy(self):
         math_engine = neoml.MathEngine.CpuMathEngine()
@@ -1657,7 +1676,7 @@ def test_transformer_encoder(self):
         dnn = neoml.Dnn.Dnn(math_engine)
         input_data = neoml.Dnn.Source(dnn, 'input_data')
         transformer_encoder = neoml.Dnn.TransformerEncoder(input_data, head_count=2, hidden_size=8,
-            dropout=0.2, feed_forward_size=3, activation='tanh', name='transformer_encoder')
+            dropout=0.2, sa_dropout=0.3, feed_forward_size=3, activation='tanh', pre_norm=False, name='transformer_encoder')
         sink = neoml.Dnn.Sink(transformer_encoder, name='sink')
         # getters/setters tests
         self.assertEqual(transformer_encoder.head_count, 2)
@@ -1669,9 +1688,13 @@ def test_transformer_encoder(self):
         self.assertAlmostEqual(transformer_encoder.dropout, 0.2, delta=1e-6)
         transformer_encoder.dropout = 0.1
         self.assertAlmostEqual(transformer_encoder.dropout, 0.1, delta=1e-6)
+        self.assertAlmostEqual(transformer_encoder.sa_dropout, 0.3, delta=1e-6)
+        transformer_encoder.sa_dropout = 0.15
+        self.assertAlmostEqual(transformer_encoder.sa_dropout, 0.15, delta=1e-6)
         self.assertEqual(transformer_encoder.feed_forward_size, 3)
         transformer_encoder.feed_forward_size = 15
         self.assertEqual(transformer_encoder.feed_forward_size, 15)
+        self.assertEqual(transformer_encoder.pre_norm, False)
         self.assertEqual(transformer_encoder.name, 'transformer_encoder')
         # run with different mask config
         for step in range(20):
@@ -1690,6 +1713,30 @@ def test_transformer_encoder(self):
                 outputs = dnn.run({'input_data': input_data_blob})
             self.assertEqual(outputs['sink'].shape, (1, batch_size, list_size_in, 1, 1, 1, obj_size_in))
 
+    def test_transformer_encoder_pre_norm(self):
+        batch_size = 2
+        list_size_in = 13
+        obj_size_in = 11
+        math_engine = neoml.MathEngine.CpuMathEngine()
+        dnn = neoml.Dnn.Dnn(math_engine)
+        input_data = neoml.Dnn.Source(dnn, 'input_data')
+        transformer_encoder = neoml.Dnn.TransformerEncoder(input_data, head_count=5, hidden_size=25,
+            dropout=0.1, sa_dropout=0.15, feed_forward_size=15, activation='tanh', pre_norm=True, name='transformer_encoder_pre')
+        sink = neoml.Dnn.Sink(transformer_encoder, name='sink')
+        # getters/setters tests
+        self.assertEqual(transformer_encoder.head_count, 5)
+        self.assertEqual(transformer_encoder.hidden_size, 25)
+        self.assertAlmostEqual(transformer_encoder.dropout, 0.1, delta=1e-6)
+        self.assertAlmostEqual(transformer_encoder.sa_dropout, 0.15, delta=1e-6)
+        self.assertEqual(transformer_encoder.feed_forward_size, 15)
+        self.assertEqual(transformer_encoder.pre_norm, True)
+        self.assertEqual(transformer_encoder.name, 'transformer_encoder_pre')
+        # run no mask config
+        for step in range(20):
+            input_data_blob = self._transformer_test_data(math_engine, batch_size, list_size_in, obj_size_in, seed=123545+step*5)
+            outputs = dnn.run({'input_data': input_data_blob})
+            self.assertEqual(outputs['sink'].shape, (1, batch_size, list_size_in, 1, 1, 1, obj_size_in))
+
     def test_bert_conv(self):
         seq_len = 7
         batch_size = 16
@@ -2856,6 +2903,9 @@ def test_kmeans(self):
 
 
 class TestPca(TestCase):
+    def setUp(self):
+        print(super().id()) # test name
+
     def test_full_svd(self):
         from neoml.PCA import svd
         x = np.array([[2, 1, 3, 2], [2, 4, 4, 1], [2, 4, 1, 1], [4, 4, 3, 4]], dtype=np.float32)
@@ -2953,6 +3003,9 @@ def test_load_store(self):
 
 @skipIf(sys.platform == 'darwin', 'Not supposed to work on MacOS')
 class DnnDistributedTestCase(TestCase):
+    def setUp(self):
+        print(super().id()) # test name
+
     def test_distributed(self):
         def set_data(math_engine, thread):
             source = neoml.Blob.asblob(math_engine, np.ones((20,), dtype=np.float32), (1, 1, 1, 1, 1, 1, 20))
@@ -2999,6 +3052,9 @@ def set_data(math_engine, thread):
 
 
 class TestBPE(TestCase):
+    def setUp(self):
+        print(super().id()) # test name
+
     def test_saveload(self):
         word_dictionary = [ "aa", "bb", "ab", "a", "b" ]
 
@@ -3044,3 +3100,7 @@ def test_train(self):
 
         bpe.cache_period = 10
         self.assertEqual(10, bpe.cache_period)
+
+
+if __name__ == "__main__":
+    main(module="tests")
diff --git a/NeoML/docs/en/Python/conf.py b/NeoML/docs/en/Python/conf.py
index 32694feb1..9e285e377 100644
--- a/NeoML/docs/en/Python/conf.py
+++ b/NeoML/docs/en/Python/conf.py
@@ -22,7 +22,7 @@
 # -- Project information -----------------------------------------------------
 
 project = 'NeoML'
-copyright = '2023, ABBYY'
+copyright = '2024, ABBYY'
 author = 'ABBYY'
 
 
@@ -88,7 +88,7 @@
 
 # -- General configuration ---------------------------------------------------
 
-import sphinx_rtd_theme
+#import sphinx_rtd_theme
 from os import getenv
 
 # Add any Sphinx extension module names here, as strings. They can be
diff --git a/NeoML/include/NeoML/Dnn/Layers/TiedEmbeddingsLayer.h b/NeoML/include/NeoML/Dnn/Layers/TiedEmbeddingsLayer.h
index 4ee5ccffa..df20be3fd 100644
--- a/NeoML/include/NeoML/Dnn/Layers/TiedEmbeddingsLayer.h
+++ b/NeoML/include/NeoML/Dnn/Layers/TiedEmbeddingsLayer.h
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2024 ABBYY Production LLC
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -22,8 +22,6 @@ namespace NeoML {
 
 class CMultichannelLookupLayer;
 
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
 // Tied embeddings layer.  https://arxiv.org/pdf/1608.05859.pdf
 // Uses matrix from CMultichannelLookupLayer.
 class NEOML_API CTiedEmbeddingsLayer : public CBaseLayer {
@@ -72,5 +70,4 @@ class NEOML_API CTiedEmbeddingsLayer : public CBaseLayer {
 // Tied embeddings.
 NEOML_API CLayerWrapper<CTiedEmbeddingsLayer> TiedEmbeddings( const char* name, int channel );
 
-////////////////////////////////////////////////////////////////////////////////////////////////////
 } // namespace NeoML
diff --git a/NeoML/samples/python/boosting.py b/NeoML/samples/python/boosting.py
index 46856f347..4546d7247 100644
--- a/NeoML/samples/python/boosting.py
+++ b/NeoML/samples/python/boosting.py
@@ -4,7 +4,7 @@
 
 __copyright__ = """
 
-Copyright © 2017-2021 ABBYY Production LLC
+Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -24,7 +24,6 @@
 
 import neoml
 import numpy as np
-import itertools
 import time
 
 # Get data
@@ -57,6 +56,7 @@ def accuracy(model, X, y):
     'thread_count' : 1,
 }
 
+
 # Train and test boosting for every builder type
 for builder in ['full', 'hist', 'multi_full']:
     start = time.time()
diff --git a/NeoML/src/Dnn/Layers/TiedEmbeddingsLayer.cpp b/NeoML/src/Dnn/Layers/TiedEmbeddingsLayer.cpp
index 590856770..da6e2541f 100644
--- a/NeoML/src/Dnn/Layers/TiedEmbeddingsLayer.cpp
+++ b/NeoML/src/Dnn/Layers/TiedEmbeddingsLayer.cpp
@@ -1,4 +1,4 @@
-/* Copyright © 2017-2024 ABBYY Production LLC
+/* Copyright © 2017-2024 ABBYY
 
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -21,8 +21,6 @@ limitations under the License.
 
 namespace NeoML {
 
-////////////////////////////////////////////////////////////////////////////////////////////////////
-
 CTiedEmbeddingsLayer::CTiedEmbeddingsLayer( IMathEngine& mathEngine ) :
 	CBaseLayer( mathEngine, "CTiedEmbeddingsLayer", true ),
 	channelIndex( 0 )
@@ -36,11 +34,11 @@ void CTiedEmbeddingsLayer::SetChannelIndex( int val )
 	channelIndex = val;
 }
 
-static const int CnnTiedEmbeddingsLayerVersion = 2001;
+constexpr int TiedEmbeddingsLayerVersion = 2001;
 
 void CTiedEmbeddingsLayer::Serialize( CArchive& archive )
 {
-	int version = archive.SerializeVersion(CnnTiedEmbeddingsLayerVersion, CDnn::ArchiveMinSupportedVersion);
+	const int version = archive.SerializeVersion( TiedEmbeddingsLayerVersion, CDnn::ArchiveMinSupportedVersion );
 	CBaseLayer::Serialize( archive );
 
 	if (version < 2001 && archive.IsLoading()) {
@@ -173,5 +171,4 @@ CLayerWrapper<CTiedEmbeddingsLayer> TiedEmbeddings( const char* name, int channe
 	} );
 }
 
-////////////////////////////////////////////////////////////////////////////////////////////////////
 } // namespace NeoML
diff --git a/NeoOnnx/test/Python/neoml_onnx_backend_test.py b/NeoOnnx/test/Python/neoml_onnx_backend_test.py
index d47038e42..c0e28d420 100644
--- a/NeoOnnx/test/Python/neoml_onnx_backend_test.py
+++ b/NeoOnnx/test/Python/neoml_onnx_backend_test.py
@@ -1,9 +1,27 @@
-"""Runs standard backend tests from ONNX on neoml.Onnx backend
+﻿# -*- coding: utf-8 -*-
+
+""" Copyright (c) 2017-2024 ABBYY
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+--------------------------------------------------------------------------------------------------------------
 """
 import neoml
 import unittest
 import onnx.backend.test
 
+
+""" Runs standard backend tests from ONNX on neoml.Onnx backend
+"""
 pytest_plugins = "onnx.backend.test.report"
 
 backend_test = onnx.backend.test.runner.Runner(neoml.Onnx, __name__)
@@ -262,6 +280,100 @@
 
 # TODO: ALARM!!! Run ALL the failing tests and fix all the asserts (it should be replaced with some exception)...
 
+# non float params
+backend_test.exclude('test_operator_non_float_params_cpu')            # RuntimeError: Internal Program Error: (DnnBlob.h, 337) NeoAssert(GetDataType() == CBlobType<T>::GetType());
+backend_test.exclude('test_add_uint8_cpu')                            # RuntimeError: Internal Program Error: (DnnBlob.h, 337) NeoAssert(GetDataType() == CBlobType<T>::GetType());
+backend_test.exclude('test_clip_default_int8_inbounds_expanded_cpu')  # RuntimeError: Internal Program Error: (DnnBlob.h, 337) NeoAssert(GetDataType() == CBlobType<T>::GetType());
+backend_test.exclude('test_clip_default_int8_max_expanded_cpu')       # RuntimeError: Internal Program Error: (DnnBlob.h, 337) NeoAssert(GetDataType() == CBlobType<T>::GetType());
+backend_test.exclude('test_clip_default_int8_min_expanded_cpu')       # RuntimeError: Internal Program Error: (DnnBlob.h, 337) NeoAssert(GetDataType() == CBlobType<T>::GetType());
+backend_test.exclude('test_div_uint8_cpu')                            # RuntimeError: Internal Program Error: (DnnBlob.h, 337) NeoAssert(GetDataType() == CBlobType<T>::GetType());
+backend_test.exclude('test_mul_uint8_cpu')                            # RuntimeError: Internal Program Error: (DnnBlob.h, 337) NeoAssert(GetDataType() == CBlobType<T>::GetType());
+backend_test.exclude('test_sub_uint8_cpu')                            # RuntimeError: Internal Program Error: (DnnBlob.h, 337) NeoAssert(GetDataType() == CBlobType<T>::GetType());
+
+backend_test.exclude('test_equal_bcast_cpu')                          # RuntimeError: Internal Program Error: (DnnBlob.h, 337) NeoAssert(GetDataType() == CBlobType<T>::GetType());
+backend_test.exclude('test_equal_cpu')                                # RuntimeError: Internal Program Error: (DnnBlob.h, 337) NeoAssert(GetDataType() == CBlobType<T>::GetType());
+backend_test.exclude('test_gather_0_cpu')                             # RuntimeError: Internal Program Error: (DnnBlob.h, 337) NeoAssert(GetDataType() == CBlobType<T>::GetType());
+backend_test.exclude('test_gather_1_cpu')                             # RuntimeError: Internal Program Error: (DnnBlob.h, 337) NeoAssert(GetDataType() == CBlobType<T>::GetType());
+backend_test.exclude('test_gather_2d_indices_cpu')                    # RuntimeError: Internal Program Error: (DnnBlob.h, 337) NeoAssert(GetDataType() == CBlobType<T>::GetType());
+backend_test.exclude('test_gather_negative_indices_cpu')              # RuntimeError: Internal Program Error: (DnnBlob.h, 337) NeoAssert(GetDataType() == CBlobType<T>::GetType());
+backend_test.exclude('test_not_2d_cpu')                               # RuntimeError: Internal Program Error: (DnnBlob.h, 337) NeoAssert(GetDataType() == CBlobType<T>::GetType());
+backend_test.exclude('test_not_3d_cpu')                               # RuntimeError: Internal Program Error: (DnnBlob.h, 337) NeoAssert(GetDataType() == CBlobType<T>::GetType());
+backend_test.exclude('test_not_4d_cpu')                               # RuntimeError: Internal Program Error: (DnnBlob.h, 337) NeoAssert(GetDataType() == CBlobType<T>::GetType());
+backend_test.exclude('test_scatternd_cpu')                            # RuntimeError: Internal Program Error: (DnnBlob.h, 337) NeoAssert(GetDataType() == CBlobType<T>::GetType());
+backend_test.exclude('test_where_example_cpu')                        # RuntimeError: Internal Program Error: (DnnBlob.h, 337) NeoAssert(GetDataType() == CBlobType<T>::GetType());
+backend_test.exclude('test_where_long_example_cpu')                   # RuntimeError: Internal Program Error: (DnnBlob.h, 337) NeoAssert(GetDataType() == CBlobType<T>::GetType());
+backend_test.exclude('test_Embedding_cpu')                            # RuntimeError: Internal Program Error: (DnnBlob.h, 337) NeoAssert(GetDataType() == CBlobType<T>::GetType());
+backend_test.exclude('test_Embedding_sparse_cpu')                     # RuntimeError: Internal Program Error: (DnnBlob.h, 337) NeoAssert(GetDataType() == CBlobType<T>::GetType());
+
+# NeoOnnx doesn't support
+backend_test.exclude('test_affine_grid_2d_align_corners_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_affine_grid_2d_align_corners_expanded_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_affine_grid_2d_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_affine_grid_2d_expanded_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_affine_grid_3d_align_corners_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_affine_grid_3d_align_corners_expanded_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_affine_grid_3d_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_affine_grid_3d_expanded_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_constant_cpu') # Unsupported opset version: 21
+backend_test.exclude('test_flatten_axis0_cpu') # Unsupported opset version: 21
+backend_test.exclude('test_flatten_axis1_cpu') # Unsupported opset version: 21
+backend_test.exclude('test_flatten_axis2_cpu') # Unsupported opset version: 21
+backend_test.exclude('test_flatten_axis3_cpu') # Unsupported opset version: 21
+backend_test.exclude('test_flatten_default_axis_cpu') # Unsupported opset version: 21
+backend_test.exclude('test_flatten_negative_axis1_cpu') # Unsupported opset version: 21
+backend_test.exclude('test_flatten_negative_axis2_cpu') # Unsupported opset version: 21
+backend_test.exclude('test_flatten_negative_axis3_cpu') # Unsupported opset version: 21
+backend_test.exclude('test_flatten_negative_axis4_cpu') # Unsupported opset version: 21
+backend_test.exclude('test_gelu_default_1_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_gelu_default_1_expanded_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_gelu_default_2_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_gelu_default_2_expanded_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_gelu_tanh_1_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_gelu_tanh_1_expanded_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_gelu_tanh_2_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_gelu_tanh_2_expanded_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_identity_cpu') # Unsupported opset version: 21
+backend_test.exclude('test_image_decoder_decode_bmp_rgb_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_image_decoder_decode_jpeg2k_rgb_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_image_decoder_decode_jpeg_bgr_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_image_decoder_decode_jpeg_grayscale_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_image_decoder_decode_jpeg_rgb_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_image_decoder_decode_png_rgb_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_image_decoder_decode_pnm_rgb_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_image_decoder_decode_tiff_rgb_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_image_decoder_decode_webp_rgb_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_regex_full_match_basic_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_regex_full_match_email_domain_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_regex_full_match_empty_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_shape_clip_end_cpu') # Unsupported opset version: 21
+backend_test.exclude('test_shape_clip_start_cpu') # Unsupported opset version: 21
+backend_test.exclude('test_shape_cpu') # Unsupported opset version: 21
+backend_test.exclude('test_shape_end_1_cpu') # Unsupported opset version: 21
+backend_test.exclude('test_shape_end_negative_1_cpu') # Unsupported opset version: 21
+backend_test.exclude('test_shape_example_cpu') # Unsupported opset version: 21
+backend_test.exclude('test_shape_start_1_cpu') # Unsupported opset version: 21
+backend_test.exclude('test_shape_start_1_end_2_cpu') # Unsupported opset version: 21
+backend_test.exclude('test_shape_start_1_end_negative_1_cpu') # Unsupported opset version: 21
+backend_test.exclude('test_shape_start_negative_1_cpu') # Unsupported opset version: 21
+backend_test.exclude('test_string_concat_broadcasting_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_string_concat_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_string_concat_empty_string_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_string_concat_utf8_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_string_concat_zero_dimensional_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_string_split_basic_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_string_split_consecutive_delimiters_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_string_split_empty_string_delimiter_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_string_split_empty_tensor_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_string_split_maxsplit_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_string_split_no_delimiter_cpu') # Unsupported opset version: 20
+backend_test.exclude('test_transpose_all_permutations_0_cpu') # Unsupported opset version: 21
+backend_test.exclude('test_transpose_all_permutations_1_cpu') # Unsupported opset version: 21
+backend_test.exclude('test_transpose_all_permutations_2_cpu') # Unsupported opset version: 21
+backend_test.exclude('test_transpose_all_permutations_3_cpu') # Unsupported opset version: 21
+backend_test.exclude('test_transpose_all_permutations_4_cpu') # Unsupported opset version: 21
+backend_test.exclude('test_transpose_all_permutations_5_cpu') # Unsupported opset version: 21
+backend_test.exclude('test_transpose_default_cpu') # Unsupported opset version: 21
+
 globals().update(backend_test.enable_report().test_cases)
 
 if __name__ == '__main__':