From c8f4abfafbe1ca8b3ee3bef224a992a382d985b3 Mon Sep 17 00:00:00 2001 From: MirceaDan99 Date: Wed, 27 Nov 2024 18:29:41 +0200 Subject: [PATCH] Add `BlobContainer` class and derivates for each `std::vector` and `std::shared_ptr` blob types --- .../include/intel_npu/common/igraph.hpp | 57 +++++++++++++++++++ .../include/driver_compiler_adapter.hpp | 2 +- .../compiler_adapter/include/driver_graph.hpp | 2 +- .../include/plugin_compiler_adapter.hpp | 2 +- .../compiler_adapter/include/plugin_graph.hpp | 2 +- .../src/driver_compiler_adapter.cpp | 6 +- .../src/compiler_adapter/src/driver_graph.cpp | 23 ++++++++ .../src/plugin_compiler_adapter.cpp | 17 +++--- .../src/compiler_adapter/src/plugin_graph.cpp | 6 +- .../intel_npu/src/plugin/include/plugin.hpp | 8 ++- .../intel_npu/src/plugin/src/plugin.cpp | 22 ++++--- 11 files changed, 114 insertions(+), 33 deletions(-) diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp index 3f2373ed7f616b..677750c1b714dd 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp @@ -17,6 +17,63 @@ namespace intel_npu { +class BlobContainer { +public: + virtual void* get_ptr() { + OPENVINO_THROW("const BlobContainer::get_ptr() method is not implemented!"); + } + + virtual size_t size() const { + OPENVINO_THROW("BlobContainer::size() method is not implemented!"); + } + + virtual bool release_from_memory() { + OPENVINO_THROW("BlobContainer::release_from_memory() method is not implemented!"); + } +}; + +class BlobContainerVector : public BlobContainer { +public: + BlobContainerVector(std::vector blob) : _ownershipBlob(std::move(blob)) {} + + void* get_ptr() override { + return reinterpret_cast(_ownershipBlob.data()); + } + + size_t size() const override { + return _ownershipBlob.size(); + } + + bool release_from_memory() override { + _ownershipBlob.clear(); + _ownershipBlob.shrink_to_fit(); + return true; + } + +private: + std::vector _ownershipBlob; +}; + +class BlobContainerAlignedBuffer : public BlobContainer { +public: + BlobContainerAlignedBuffer(const std::shared_ptr& blobSO) : _ownershipBlob(blobSO) {} + + void* get_ptr() override { + return _ownershipBlob->get_ptr(); + } + + size_t size() const override { + return _ownershipBlob->size(); + } + + bool release_from_memory() override { + return false; + } + +private: + std::shared_ptr _ownershipBlob; +}; + class IGraph : public std::enable_shared_from_this { public: IGraph(ze_graph_handle_t handle, diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp index bb916ae0e8a4e8..3a2af03df8cead 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp @@ -23,7 +23,7 @@ class DriverCompilerAdapter final : public ICompilerAdapter { std::shared_ptr compile(const std::shared_ptr& model, const Config& config) const override; - std::shared_ptr parse(std::shared_ptr networkSOPtr, const Config& config) const override; + std::shared_ptr parse(std::unique_ptr blobPtr, const Config& config) const override; ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp index 9344e59d45a70e..b81b8b8679aca5 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp @@ -21,7 +21,7 @@ class DriverGraph final : public IGraph { ze_graph_handle_t graphHandle, NetworkMetadata metadata, const Config& config, - std::optional> blob); + std::optional> blob); size_t export_blob(std::ostream& stream) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp index 3271c189bb8e72..c60b80bcfaa314 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp @@ -21,7 +21,7 @@ class PluginCompilerAdapter final : public ICompilerAdapter { std::shared_ptr compile(const std::shared_ptr& model, const Config& config) const override; - std::shared_ptr parse(std::shared_ptr networkSOPtr, const Config& config) const override; + std::shared_ptr parse(std::unique_ptr blobPtr, const Config& config) const override; ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp index 1ddc4a81ec7267..61d4a6ed866529 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp @@ -23,7 +23,7 @@ class PluginGraph final : public IGraph { const std::shared_ptr& zeroInitStruct, ze_graph_handle_t graphHandle, NetworkMetadata metadata, - std::shared_ptr blobSOPtr, + std::unique_ptr blobPtr, const Config& config); size_t export_blob(std::ostream& stream) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp index e3a03b3b2e1a75..9fd2e57d6e37c2 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp @@ -203,11 +203,11 @@ std::shared_ptr DriverCompilerAdapter::compile(const std::shared_ptr DriverCompilerAdapter::parse(std::shared_ptr networkSOPtr, const Config& config) const { +std::shared_ptr DriverCompilerAdapter::parse(std::unique_ptr blobPtr, const Config& config) const { OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "parse"); _logger.debug("parse start"); - ze_graph_handle_t graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast(networkSOPtr->get_ptr()), networkSOPtr->size()); + ze_graph_handle_t graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast(blobPtr->get_ptr()), blobPtr->size()); _logger.debug("parse end"); OV_ITT_TASK_NEXT(PARSE_BLOB, "getNetworkMeta"); @@ -218,7 +218,7 @@ std::shared_ptr DriverCompilerAdapter::parse(std::shared_ptr>(std::move(networkSOPtr))); + std::optional>(std::move(blobPtr))); } ov::SupportedOpsMap DriverCompilerAdapter::query(const std::shared_ptr& model, diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp index 02a63316254d3e..73d26eb0ad851f 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp @@ -136,6 +136,29 @@ void DriverGraph::initialize(const Config& config) { } } +bool DriverGraph::release_blob(const Config& config) { + if (_blob == nullptr || _zeroInitStruct->getGraphDdiTable().version() < ZE_GRAPH_EXT_VERSION_1_8 || + config.get()) { + return false; + } + + ze_graph_properties_2_t properties = {}; + properties.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES; + _zeroInitStruct->getGraphDdiTable().pfnGetProperties2(_handle, &properties); + + if (~properties.initStageRequired & ZE_GRAPH_STAGE_INITIALIZE) { + return false; + } + + if(!_blob->release_from_memory()) { + return false; + } + + _logger.debug("Blob is released"); + + return true; +}; + DriverGraph::~DriverGraph() { if (_handle != nullptr) { auto result = _zeGraphExt->destroyGraph(_handle); diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index 95e7edab03cb84..d30fc613ecb4c6 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -80,7 +80,7 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptrcompile(model, config); - auto networkSO = std::make_shared>(std::move(networkDesc.compiledNetwork)); + auto blobPtr = std::make_unique(std::move(networkDesc.compiledNetwork)); _logger.debug("compile end"); ze_graph_handle_t graphHandle = nullptr; @@ -88,28 +88,27 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptrgetGraphHandle(networkSO->data(), networkSO->size()); + graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast(blobPtr->get_ptr()), blobPtr->size()); } catch (...) { _logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not " "allowed. Only exports are available"); } } - auto networkSOPtr = std::make_shared>>>(reinterpret_cast(networkSO->data()), networkSO->size(), networkSO); return std::make_shared(_zeGraphExt, _compiler, _zeroInitStruct, graphHandle, std::move(networkDesc.metadata), - networkSOPtr, + std::move(blobPtr), config); } -std::shared_ptr PluginCompilerAdapter::parse(std::shared_ptr networkSOPtr, const Config& config) const { +std::shared_ptr PluginCompilerAdapter::parse(std::unique_ptr blobPtr, const Config& config) const { OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "parse"); _logger.debug("parse start"); - std::vector network(networkSOPtr->size()); - network.assign(reinterpret_cast(networkSOPtr->get_ptr()), reinterpret_cast(networkSOPtr->get_ptr()) + networkSOPtr->size()); + std::vector network(blobPtr->size()); + network.assign(reinterpret_cast(blobPtr->get_ptr()), reinterpret_cast(blobPtr->get_ptr()) + blobPtr->size()); auto networkMeta = _compiler->parse(network, config); network.clear(); network.shrink_to_fit(); @@ -118,7 +117,7 @@ std::shared_ptr PluginCompilerAdapter::parse(std::shared_ptrgetGraphHandle(reinterpret_cast(networkSOPtr->get_ptr()), networkSOPtr->size()); + graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast(blobPtr->get_ptr()), blobPtr->size()); } return std::make_shared(_zeGraphExt, @@ -126,7 +125,7 @@ std::shared_ptr PluginCompilerAdapter::parse(std::shared_ptr PluginGraph::process_profiling_output(const std::vector& profData, const Config& config) const { - - // Need to fix increased memory usage below, ov::SharedBuffer won't permit us to get underlying shared buffer as it is private - // Only if we work with std::vector blobs, but then IGraph needs to have 2 declarations for the same blob - // Maybe if we templatize blob in IGraph to be either std::vector or std::shared_ptr? std::vector blob(_blob->size()); - blob.assign(reinterpret_cast(_blob->get_ptr()), reinterpret_cast(_blob->get_ptr()) + _blob->size()); + blob.assign(reinterpret_cast(_blob->get_ptr()), reinterpret_cast(_blob->get_ptr()) + _blob->size()); return _compiler->process_profiling_output(profData, blob, config); } diff --git a/src/plugins/intel_npu/src/plugin/include/plugin.hpp b/src/plugins/intel_npu/src/plugin/include/plugin.hpp index 0733e6e7f11fbd..74bf1a2966b6d2 100644 --- a/src/plugins/intel_npu/src/plugin/include/plugin.hpp +++ b/src/plugins/intel_npu/src/plugin/include/plugin.hpp @@ -44,14 +44,16 @@ class Plugin : public ov::IPlugin { std::shared_ptr import_model(std::istream& stream, const ov::AnyMap& properties) const override; - std::shared_ptr import_model(std::shared_ptr model_buffer, - const ov::AnyMap& properties) const override; + std::shared_ptr import_model(std::istream& /* unusedStream */, + std::shared_ptr model_buffer, + const ov::AnyMap& properties) const override; std::shared_ptr import_model(std::istream& stream, const ov::SoPtr& context, const ov::AnyMap& properties) const override; - std::shared_ptr import_model(std::shared_ptr model_buffer, + std::shared_ptr import_model(std::istream& stream, + std::shared_ptr model_buffer, const ov::SoPtr& context, const ov::AnyMap& properties) const override; diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 88db4cad62aeff..12dfd0fca5c9ee 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -784,15 +784,16 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c auto compiler = compilerAdapterFactory.getCompiler(_backends->getIEngineBackend(), localConfig); auto graphSize = getFileSize(stream); - auto blobSO = std::make_shared>(graphSize); - stream.read(reinterpret_cast(blobSO->data()), graphSize); + + std::vector blob(graphSize); + stream.read(reinterpret_cast(blob.data()), graphSize); if (!stream) { OPENVINO_THROW("Failed to read data from stream!"); } _logger.debug("Successfully read %zu bytes into blob.", graphSize); - auto blobSOPtr = std::make_shared>>>(reinterpret_cast(blobSO->data()), graphSize, blobSO); - auto graph = compiler->parse(std::move(blobSOPtr), localConfig); + auto blobContainerPtr = std::make_unique(std::move(blob)); + auto graph = compiler->parse(std::move(blobContainerPtr), localConfig); graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++)); const std::shared_ptr modelDummy = @@ -810,7 +811,9 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c return compiledModel; } -std::shared_ptr Plugin::import_model(std::shared_ptr model_buffer, const ov::AnyMap& properties) const { +std::shared_ptr Plugin::import_model(std::istream& /* unusedStream */, + std::shared_ptr model_buffer, + const ov::AnyMap& properties) const { OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::import_model"); OV_ITT_TASK_CHAIN(PLUGIN_IMPORT_MODEL, itt::domains::NPUPlugin, "Plugin::import_model", "merge_configs"); @@ -835,8 +838,8 @@ std::shared_ptr Plugin::import_model(std::shared_ptrparse(model_buffer, localConfig); + auto blobContainerPtr = std::make_unique(model_buffer); + auto graph = compiler->parse(std::move(blobContainerPtr), localConfig); graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++)); const std::shared_ptr modelDummy = @@ -865,7 +868,8 @@ std::shared_ptr Plugin::import_model(std::istream& stream, return import_model(stream, context, properties); } -std::shared_ptr Plugin::import_model(std::shared_ptr model_buffer, +std::shared_ptr Plugin::import_model(std::istream& stream, + std::shared_ptr model_buffer, const ov::SoPtr& context, const ov::AnyMap& properties) const { auto casted = std::dynamic_pointer_cast(context._ptr); @@ -873,7 +877,7 @@ std::shared_ptr Plugin::import_model(std::shared_ptr& model,