Skip to content

Commit

Permalink
[CPU] Enable memory reuse for nested graphs
Browse files Browse the repository at this point in the history
  • Loading branch information
EgorDuplensky committed Jan 21, 2025
1 parent bad9b10 commit efb84d1
Show file tree
Hide file tree
Showing 89 changed files with 1,474 additions and 718 deletions.
26 changes: 26 additions & 0 deletions src/plugins/intel_cpu/src/allocation_context.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <memory>
#include <unordered_map>
#include <vector>

namespace ov {
namespace intel_cpu {

class Node;
class Edge;

using GlobalExecutionIndex = std::unordered_map<std::shared_ptr<Node>, std::pair<int, int>>;

struct AllocationContext {
std::vector<std::shared_ptr<Edge>> edges;
GlobalExecutionIndex execIndex;
std::vector<size_t> syncPoints;
};

} // namespace intel_cpu
} // namespace ov
12 changes: 8 additions & 4 deletions src/plugins/intel_cpu/src/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@
#include "async_infer_request.h"
#include "config.h"
#include "cpu/x64/cpu_isa_traits.hpp"
#include "graph.h"
#include "infer_request.h"
#include "itt.h"
#include "low_precision/low_precision.hpp"
#include "memory_control.hpp"
#include "memory_state.h"
#include "openvino/core/type/element_type.hpp"
#include "openvino/runtime/intel_cpu/properties.hpp"
Expand Down Expand Up @@ -54,7 +56,8 @@ CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
m_cfg{std::move(cfg)},
m_name{model->get_name()},
m_loaded_from_cache(loaded_from_cache),
m_sub_memory_manager(std::move(sub_memory_manager)) {
m_sub_memory_manager(std::move(sub_memory_manager)),
m_networkMemoryControl(std::make_shared<NetworkMemoryControl>()) {
m_mutex = std::make_shared<std::mutex>();
const auto& core = m_plugin->get_core();
if (!core)
Expand Down Expand Up @@ -160,15 +163,16 @@ CompiledModel::GraphGuard::Lock CompiledModel::get_graph() const {
std::lock_guard<std::mutex> lock{*m_mutex.get()};
auto isQuantizedFlag = (m_cfg.lpTransformsMode == Config::On) &&
ov::pass::low_precision::LowPrecision::isFunctionQuantized(m_model);

ctx = std::make_shared<GraphContext>(m_cfg,
m_socketWeights[socketId],
isQuantizedFlag,
streamsExecutor,
m_sub_memory_manager);
}

const std::shared_ptr<const ov::Model> model = m_model;
graphLock._graph.CreateGraph(model, ctx);
graphLock._graph.Init(model, ctx);
graphLock._graph.Activate();
} catch (...) {
exception = std::current_exception();
}
Expand Down Expand Up @@ -349,7 +353,7 @@ void CompiledModel::release_memory() {
"Attempt to call release_memory() on a compiled model in a busy state. Please ensure that all "
"infer requests are completed before releasing memory.");
auto ctx = graph.getGraphContext();
ctx->getNetworkMemoryControl()->releaseMemory();
ctx->releaseMemory();
}
}

Expand Down
9 changes: 8 additions & 1 deletion src/plugins/intel_cpu/src/compiled_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#pragma once

#include <memory>
#include <string>
#include <vector>

Expand All @@ -13,12 +14,13 @@
#include "openvino/runtime/iinfer_request.hpp"
#include "openvino/runtime/iplugin.hpp"
#include "openvino/runtime/isync_infer_request.hpp"
#include "openvino/runtime/threading/thread_local.hpp"
#include "sub_memory_manager.hpp"

namespace ov {
namespace intel_cpu {

class NetworkMemoryControl;

class CompiledModel : public ov::ICompiledModel {
public:
struct GraphGuard : public Graph {
Expand Down Expand Up @@ -66,6 +68,10 @@ class CompiledModel : public ov::ICompiledModel {
return m_name;
}

std::shared_ptr<NetworkMemoryControl> get_network_memory_control() const {
return m_networkMemoryControl;
}

private:
std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request() const override;
friend class CompiledModelHolder;
Expand Down Expand Up @@ -99,6 +105,7 @@ class CompiledModel : public ov::ICompiledModel {

std::vector<std::shared_ptr<CompiledModel>> m_sub_compiled_models;
std::shared_ptr<SubMemoryManager> m_sub_memory_manager = nullptr;
std::shared_ptr<NetworkMemoryControl> m_networkMemoryControl = nullptr;
bool m_has_sub_compiled_models = false;
};

Expand Down
11 changes: 8 additions & 3 deletions src/plugins/intel_cpu/src/edge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ Edge::ReorderStatus Edge::needReorder() {
}

void Edge::reuse(MemoryPtr ptr) {
OPENVINO_ASSERT(ptr != nullptr, "Attempt to reuse initialized memory in ", *this);
OPENVINO_ASSERT(ptr != nullptr, "Attempt to reuse uninitialized memory in ", *this);
memoryPtr = std::move(ptr);
changeStatus(Status::Allocated);

Expand Down Expand Up @@ -433,11 +433,16 @@ const MemoryDesc& Edge::getOutputDesc() const {
}

const MemoryDesc& Edge::getDesc() const {
OPENVINO_ASSERT(!one_of(status, Status::Validated, Status::Allocated),
"Desc of an Allocated edge ",
*this,
" must be accessed through the memory object");

if (getInputDesc().getPrecision() == element::undefined)
return getInputDesc();

if (!getInputDesc().isCompatible(getOutputDesc()))
OPENVINO_THROW("Cannot get descriptor for edge: ", getParent()->getName(), "->", getChild()->getName());
OPENVINO_THROW("Cannot get descriptor for edge: ", *this);

return getInputDesc();
}
Expand Down Expand Up @@ -466,7 +471,7 @@ void Edge::validate() {
getChild();

if (status != Status::Allocated || !memoryPtr) {
OPENVINO_THROW("Error memory is not allocated!");
OPENVINO_THROW("Error memory is not allocated for edge: ", *this);
}
status = Status::Validated;
}
Expand Down
10 changes: 8 additions & 2 deletions src/plugins/intel_cpu/src/edge.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,13 @@ class Edge {
public:
Edge(const std::shared_ptr<Node>& parent, const std::shared_ptr<Node>& child, int pr_port = 0, int ch_port = 0);

enum class Status { Uninitialized, NeedAllocation, NotAllocated, Allocated, Validated };
enum class Status {
Uninitialized, // base edge is unknown yet
NeedAllocation, // edge is the base edge
NotAllocated, // edge references another edge
Allocated, // edge memory is allocated
Validated // edge is validated
};

enum class ReorderStatus { Regular = 0, Optimized = 1, No = 2 };

Expand Down Expand Up @@ -88,6 +94,7 @@ class Edge {
}

std::string hash() const;
const MemoryDesc& getDesc() const;

private:
std::weak_ptr<Node> parent;
Expand All @@ -105,7 +112,6 @@ class Edge {
PortDescBaseCPtr getInputPortDesc() const;
PortDescBaseCPtr getOutputPortDesc() const;

const MemoryDesc& getDesc() const;
bool enforceReorder();

void collectConsumers(std::vector<std::shared_ptr<Node>>& result) const;
Expand Down
Loading

0 comments on commit efb84d1

Please sign in to comment.