[CPU] Enable memory reuse for nested graphs

openvinotoolkit · Jan 21, 2025 · efb84d1 · efb84d1
1 parent bad9b10
commit efb84d1
Show file tree

Hide file tree

Showing 89 changed files with 1,474 additions and 718 deletions.
diff --git a/src/plugins/intel_cpu/src/allocation_context.hpp b/src/plugins/intel_cpu/src/allocation_context.hpp
@@ -0,0 +1,26 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+#include <unordered_map>
+#include <vector>
+
+namespace ov {
+namespace intel_cpu {
+
+class Node;
+class Edge;
+
+using GlobalExecutionIndex = std::unordered_map<std::shared_ptr<Node>, std::pair<int, int>>;
+
+struct AllocationContext {
+    std::vector<std::shared_ptr<Edge>> edges;
+    GlobalExecutionIndex execIndex;
+    std::vector<size_t> syncPoints;
+};
+
+}  // namespace intel_cpu
+}  // namespace ov
diff --git a/src/plugins/intel_cpu/src/compiled_model.cpp b/src/plugins/intel_cpu/src/compiled_model.cpp
@@ -10,9 +10,11 @@
 #include "async_infer_request.h"
 #include "config.h"
 #include "cpu/x64/cpu_isa_traits.hpp"
+#include "graph.h"
 #include "infer_request.h"
 #include "itt.h"
 #include "low_precision/low_precision.hpp"
+#include "memory_control.hpp"
 #include "memory_state.h"
 #include "openvino/core/type/element_type.hpp"
 #include "openvino/runtime/intel_cpu/properties.hpp"
@@ -54,7 +56,8 @@ CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
       m_cfg{std::move(cfg)},
       m_name{model->get_name()},
       m_loaded_from_cache(loaded_from_cache),
-      m_sub_memory_manager(std::move(sub_memory_manager)) {
+      m_sub_memory_manager(std::move(sub_memory_manager)),
+      m_networkMemoryControl(std::make_shared<NetworkMemoryControl>()) {
     m_mutex = std::make_shared<std::mutex>();
     const auto& core = m_plugin->get_core();
     if (!core)
@@ -160,15 +163,16 @@ CompiledModel::GraphGuard::Lock CompiledModel::get_graph() const {
                     std::lock_guard<std::mutex> lock{*m_mutex.get()};
                     auto isQuantizedFlag = (m_cfg.lpTransformsMode == Config::On) &&
                                            ov::pass::low_precision::LowPrecision::isFunctionQuantized(m_model);
-
                     ctx = std::make_shared<GraphContext>(m_cfg,
                                                          m_socketWeights[socketId],
                                                          isQuantizedFlag,
                                                          streamsExecutor,
                                                          m_sub_memory_manager);
                 }
+
                 const std::shared_ptr<const ov::Model> model = m_model;
-                graphLock._graph.CreateGraph(model, ctx);
+                graphLock._graph.Init(model, ctx);
+                graphLock._graph.Activate();
             } catch (...) {
                 exception = std::current_exception();
             }
@@ -349,7 +353,7 @@ void CompiledModel::release_memory() {
                         "Attempt to call release_memory() on a compiled model in a busy state. Please ensure that all "
                         "infer requests are completed before releasing memory.");
         auto ctx = graph.getGraphContext();
-        ctx->getNetworkMemoryControl()->releaseMemory();
+        ctx->releaseMemory();
     }
 }
 

diff --git a/src/plugins/intel_cpu/src/compiled_model.h b/src/plugins/intel_cpu/src/compiled_model.h
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include <memory>
 #include <string>
 #include <vector>
 
@@ -13,12 +14,13 @@
 #include "openvino/runtime/iinfer_request.hpp"
 #include "openvino/runtime/iplugin.hpp"
 #include "openvino/runtime/isync_infer_request.hpp"
-#include "openvino/runtime/threading/thread_local.hpp"
 #include "sub_memory_manager.hpp"
 
 namespace ov {
 namespace intel_cpu {
 
+class NetworkMemoryControl;
+
 class CompiledModel : public ov::ICompiledModel {
 public:
     struct GraphGuard : public Graph {
@@ -66,6 +68,10 @@ class CompiledModel : public ov::ICompiledModel {
         return m_name;
     }
 
+    std::shared_ptr<NetworkMemoryControl> get_network_memory_control() const {
+        return m_networkMemoryControl;
+    }
+
 private:
     std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request() const override;
     friend class CompiledModelHolder;
@@ -99,6 +105,7 @@ class CompiledModel : public ov::ICompiledModel {
 
     std::vector<std::shared_ptr<CompiledModel>> m_sub_compiled_models;
     std::shared_ptr<SubMemoryManager> m_sub_memory_manager = nullptr;
+    std::shared_ptr<NetworkMemoryControl> m_networkMemoryControl = nullptr;
     bool m_has_sub_compiled_models = false;
 };
 

diff --git a/src/plugins/intel_cpu/src/edge.cpp b/src/plugins/intel_cpu/src/edge.cpp
@@ -255,7 +255,7 @@ Edge::ReorderStatus Edge::needReorder() {
 }
 
 void Edge::reuse(MemoryPtr ptr) {
-    OPENVINO_ASSERT(ptr != nullptr, "Attempt to reuse initialized memory in ", *this);
+    OPENVINO_ASSERT(ptr != nullptr, "Attempt to reuse uninitialized memory in ", *this);
     memoryPtr = std::move(ptr);
     changeStatus(Status::Allocated);
 
@@ -433,11 +433,16 @@ const MemoryDesc& Edge::getOutputDesc() const {
 }
 
 const MemoryDesc& Edge::getDesc() const {
+    OPENVINO_ASSERT(!one_of(status, Status::Validated, Status::Allocated),
+                    "Desc of an Allocated edge ",
+                    *this,
+                    " must be accessed through the memory object");
+
     if (getInputDesc().getPrecision() == element::undefined)
         return getInputDesc();
 
     if (!getInputDesc().isCompatible(getOutputDesc()))
-        OPENVINO_THROW("Cannot get descriptor for edge: ", getParent()->getName(), "->", getChild()->getName());
+        OPENVINO_THROW("Cannot get descriptor for edge: ", *this);
 
     return getInputDesc();
 }
@@ -466,7 +471,7 @@ void Edge::validate() {
     getChild();
 
     if (status != Status::Allocated || !memoryPtr) {
-        OPENVINO_THROW("Error memory is not allocated!");
+        OPENVINO_THROW("Error memory is not allocated for edge: ", *this);
     }
     status = Status::Validated;
 }

diff --git a/src/plugins/intel_cpu/src/edge.h b/src/plugins/intel_cpu/src/edge.h
@@ -26,7 +26,13 @@ class Edge {
 public:
     Edge(const std::shared_ptr<Node>& parent, const std::shared_ptr<Node>& child, int pr_port = 0, int ch_port = 0);
 
-    enum class Status { Uninitialized, NeedAllocation, NotAllocated, Allocated, Validated };
+    enum class Status {
+        Uninitialized,   // base edge is unknown yet
+        NeedAllocation,  // edge is the base edge
+        NotAllocated,    // edge references another edge
+        Allocated,       // edge memory is allocated
+        Validated        // edge is validated
+    };
 
     enum class ReorderStatus { Regular = 0, Optimized = 1, No = 2 };
 
@@ -88,6 +94,7 @@ class Edge {
     }
 
     std::string hash() const;
+    const MemoryDesc& getDesc() const;
 
 private:
     std::weak_ptr<Node> parent;
@@ -105,7 +112,6 @@ class Edge {
     PortDescBaseCPtr getInputPortDesc() const;
     PortDescBaseCPtr getOutputPortDesc() const;
 
-    const MemoryDesc& getDesc() const;
     bool enforceReorder();
 
     void collectConsumers(std::vector<std::shared_ptr<Node>>& result) const;