From 2b6200e0ef90af325bf19680c933d5e2e526b806 Mon Sep 17 00:00:00 2001 From: Oleg Pipikin Date: Tue, 19 Nov 2024 16:29:34 +0000 Subject: [PATCH] Update plugin API to import model with mmap buffer --- .../dev_api/openvino/runtime/iplugin.hpp | 27 +++++++++++++++++++ src/inference/src/cache_manager.hpp | 11 +++++--- src/inference/src/dev/core_impl.cpp | 11 +++++--- src/inference/src/dev/iplugin.cpp | 13 +++++++++ src/inference/src/dev/plugin.cpp | 11 ++++++++ src/inference/src/dev/plugin.hpp | 7 +++++ src/plugins/intel_cpu/src/plugin.cpp | 8 ++++++ src/plugins/intel_cpu/src/plugin.h | 11 ++++++++ src/plugins/intel_cpu/src/utils/serialize.cpp | 13 +++++---- src/plugins/intel_cpu/src/utils/serialize.hpp | 7 ++++- 10 files changed, 106 insertions(+), 13 deletions(-) diff --git a/src/inference/dev_api/openvino/runtime/iplugin.hpp b/src/inference/dev_api/openvino/runtime/iplugin.hpp index 8165e658c206f0..e88c3e4a539d15 100644 --- a/src/inference/dev_api/openvino/runtime/iplugin.hpp +++ b/src/inference/dev_api/openvino/runtime/iplugin.hpp @@ -185,6 +185,33 @@ class OPENVINO_RUNTIME_API IPlugin : public std::enable_shared_from_this& context, const ov::AnyMap& properties) const = 0; + /** + * @brief Creates an compiled model from an previously exported model using plugin implementation + * and removes OpenVINO Runtime magic and plugin name + * @param model Reference to model output stream + * @param weights_buffer AlignedBuffer with cached model + * @param properties A ov::AnyMap of properties + * @return An Compiled model + */ + virtual std::shared_ptr import_model(std::istream& model, + std::shared_ptr model_buffer, + const ov::AnyMap& properties) const; + + /** + * @brief Creates an compiled model from an previously exported model using plugin implementation + * and removes OpenVINO Runtime magic and plugin name + * @param model Reference to model output stream + * @param weights_buffer AlignedBuffer with cached model + * @param context A pointer to plugin context derived from RemoteContext class used to + * execute the network + * @param properties A ov::AnyMap of properties + * @return An Compiled model + */ + virtual std::shared_ptr import_model(std::istream& model, + std::shared_ptr model_buffer, + const ov::SoPtr& context, + const ov::AnyMap& properties) const; + /** * @brief Queries a plugin about supported layers in model * @param model Model object to query. diff --git a/src/inference/src/cache_manager.hpp b/src/inference/src/cache_manager.hpp index c441811c3cfd02..0f2e1e27e2a950 100644 --- a/src/inference/src/cache_manager.hpp +++ b/src/inference/src/cache_manager.hpp @@ -69,7 +69,7 @@ class ICacheManager { /** * @brief Function passing created input stream */ - using StreamReader = std::function; + using StreamReader = std::function)>; /** * @brief Callback when OpenVINO intends to read model from cache @@ -141,13 +141,16 @@ class FileStorageCacheManager final : public ICacheManager { auto mmap = ov::load_mmap_object(blob_file_name); auto shared_buffer = std::make_shared>>(mmap->data(), mmap->size(), mmap); +#if 0 OwningSharedStreamBuffer buf(shared_buffer); std::istream stream(&buf); - reader(stream); +#else + std::ifstream stream(blob_file_name, std::ios_base::binary); +#endif + reader(stream, shared_buffer); } else { std::ifstream stream(blob_file_name, std::ios_base::binary); - reader(stream); - } + reader(stream, nullptr); } } } diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp index 244d27b5eebb67..5984b26c99b123 100644 --- a/src/inference/src/dev/core_impl.cpp +++ b/src/inference/src/dev/core_impl.cpp @@ -1413,7 +1413,7 @@ ov::SoPtr ov::CoreImpl::load_model_from_cache( cacheContent.blobId, coreConfig.get_enable_mmap() && ov::util::contains(plugin.get_property(ov::internal::supported_properties), ov::internal::caching_with_mmap), - [&](std::istream& networkStream) { + [&](std::istream& networkStream, std::shared_ptr model_buffer) { OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::LoadTime, "Core::load_model_from_cache::ReadStreamAndImport"); @@ -1459,8 +1459,13 @@ ov::SoPtr ov::CoreImpl::load_model_from_cache( update_config[ov::weights_path.name()] = weights_path; } } - compiled_model = context ? plugin.import_model(networkStream, context, update_config) - : plugin.import_model(networkStream, update_config); + if (model_buffer) { + compiled_model = context ? plugin.import_model(networkStream, model_buffer, context, update_config) + : plugin.import_model(networkStream, model_buffer, update_config); + } else { + compiled_model = context ? plugin.import_model(networkStream, context, update_config) + : plugin.import_model(networkStream, update_config); + } }); } catch (const HeaderException&) { // For these exceptions just remove old cache and set that import didn't work diff --git a/src/inference/src/dev/iplugin.cpp b/src/inference/src/dev/iplugin.cpp index 1049e39bee6f49..42d735baa0449a 100644 --- a/src/inference/src/dev/iplugin.cpp +++ b/src/inference/src/dev/iplugin.cpp @@ -57,6 +57,19 @@ const std::string& ov::IPlugin::get_device_name() const { return m_plugin_name; } +std::shared_ptr ov::IPlugin::import_model(std::istream& model, + std::shared_ptr model_buffer, + const ov::AnyMap& properties) const{ + OPENVINO_THROW_NOT_IMPLEMENTED("This method is not implemented"); +} + +std::shared_ptr ov::IPlugin::import_model(std::istream& model, + std::shared_ptr model_buffer, + const ov::SoPtr& context, + const ov::AnyMap& properties) const{ + OPENVINO_THROW_NOT_IMPLEMENTED("This method is not implemented"); +} + void ov::IPlugin::set_core(const std::weak_ptr& core) { OPENVINO_ASSERT(!core.expired()); m_core = core; diff --git a/src/inference/src/dev/plugin.cpp b/src/inference/src/dev/plugin.cpp index 40207bac9087fa..23e0e04bb6d0e0 100644 --- a/src/inference/src/dev/plugin.cpp +++ b/src/inference/src/dev/plugin.cpp @@ -79,6 +79,17 @@ ov::SoPtr ov::Plugin::import_model(std::istream& model, OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, context, config), m_so}); } +ov::SoPtr ov::Plugin::import_model(std::istream& model, std::shared_ptr model_buffer, const ov::AnyMap& properties) const { + OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, model_buffer, properties), m_so}); +} + +ov::SoPtr ov::Plugin::import_model(std::istream& model, + std::shared_ptr model_buffer, + const ov::SoPtr& context, + const ov::AnyMap& config) const { + OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, model_buffer, context, config), m_so}); +} + ov::SoPtr ov::Plugin::create_context(const AnyMap& params) const { OV_PLUGIN_CALL_STATEMENT({ auto remote = m_ptr->create_context(params); diff --git a/src/inference/src/dev/plugin.hpp b/src/inference/src/dev/plugin.hpp index 14a5adebbab3a4..004fcc04446c0a 100644 --- a/src/inference/src/dev/plugin.hpp +++ b/src/inference/src/dev/plugin.hpp @@ -59,6 +59,13 @@ class Plugin { const ov::SoPtr& context, const ov::AnyMap& config) const; + SoPtr import_model(std::istream& model, std::shared_ptr model_buffer, const ov::AnyMap& properties) const; + + SoPtr import_model(std::istream& model, + std::shared_ptr model_buffer, + const ov::SoPtr& context, + const ov::AnyMap& config) const; + ov::SoPtr create_context(const AnyMap& params) const; ov::SoPtr get_default_context(const AnyMap& params) const; diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index b74d4f7c8acbbb..f81e531698eb01 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -553,6 +553,13 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& std::shared_ptr Plugin::import_model(std::istream& model_stream, const ov::AnyMap& config) const { + return import_model(model_stream, nullptr, config); +} + + +std::shared_ptr Plugin::import_model(std::istream& model_stream, + std::shared_ptr model_buffer, + const ov::AnyMap& config) const { OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "import_model"); CacheDecrypt decrypt{ codec_xor }; @@ -565,6 +572,7 @@ std::shared_ptr Plugin::import_model(std::istream& model_str ModelDeserializer deserializer( model_stream, + model_buffer, [this](const std::shared_ptr& model, const std::shared_ptr& weights) { return get_core()->read_model(model, weights); }, diff --git a/src/plugins/intel_cpu/src/plugin.h b/src/plugins/intel_cpu/src/plugin.h index 2548ba2c1cc8af..b33a6d922e0cae 100644 --- a/src/plugins/intel_cpu/src/plugin.h +++ b/src/plugins/intel_cpu/src/plugin.h @@ -34,6 +34,17 @@ class Plugin : public ov::IPlugin { "import_model with RemoteContext is not supported by CPU plugin!"); }; + std::shared_ptr import_model(std::istream& model, + std::shared_ptr model_buffer, + const ov::AnyMap& properties) const override; + std::shared_ptr import_model(std::istream& model, + std::shared_ptr model_buffer, + const ov::SoPtr& context, + const ov::AnyMap& properties) const override { + OPENVINO_THROW_NOT_IMPLEMENTED( + "import_model with RemoteContext is not supported by CPU plugin!"); + }; + ov::SupportedOpsMap query_model(const std::shared_ptr& model, const ov::AnyMap& properties) const override; ov::SoPtr create_context(const ov::AnyMap& remote_properties) const override { diff --git a/src/plugins/intel_cpu/src/utils/serialize.cpp b/src/plugins/intel_cpu/src/utils/serialize.cpp index 814e8d19311a8c..33d8140fbe4a84 100644 --- a/src/plugins/intel_cpu/src/utils/serialize.cpp +++ b/src/plugins/intel_cpu/src/utils/serialize.cpp @@ -30,8 +30,12 @@ void ModelSerializer::operator<<(const std::shared_ptr& model) { ////////// ModelDeserializer ////////// -ModelDeserializer::ModelDeserializer(std::istream& model_stream, ModelBuilder fn, const CacheDecrypt& decrypt_fn, bool decript_from_string) - : m_istream(model_stream), m_model_builder(std::move(fn)), m_decript_from_string(decript_from_string) { +ModelDeserializer::ModelDeserializer(std::istream& model_stream, + std::shared_ptr model_buffer, + ModelBuilder fn, + const CacheDecrypt& decrypt_fn, + bool decript_from_string) + : m_istream(model_stream), m_model_builder(std::move(fn)), m_decript_from_string(decript_from_string), m_model_buffer(model_buffer) { if (m_decript_from_string) { m_cache_decrypt.m_decrypt_str = decrypt_fn.m_decrypt_str; } else { @@ -42,9 +46,8 @@ ModelDeserializer::ModelDeserializer(std::istream& model_stream, ModelBuilder fn void ModelDeserializer::set_info(pugi::xml_node& root, std::shared_ptr& model) {} void ModelDeserializer::operator>>(std::shared_ptr& model) { - if (auto mmap_buffer = dynamic_cast(m_istream.rdbuf())) { - auto buffer = mmap_buffer->get_buffer(); - process_mmap(model, buffer); + if (m_model_buffer) { + process_mmap(model, m_model_buffer); } else { process_stream(model); } diff --git a/src/plugins/intel_cpu/src/utils/serialize.hpp b/src/plugins/intel_cpu/src/utils/serialize.hpp index 897a2c2e52f092..4dfdd6b22afbd4 100644 --- a/src/plugins/intel_cpu/src/utils/serialize.hpp +++ b/src/plugins/intel_cpu/src/utils/serialize.hpp @@ -31,7 +31,11 @@ class ModelDeserializer { public: typedef std::function(const std::shared_ptr&, const std::shared_ptr&)> ModelBuilder; - ModelDeserializer(std::istream& model, ModelBuilder fn, const CacheDecrypt& encrypt_fn, bool decript_from_string); + ModelDeserializer(std::istream& model, + std::shared_ptr model_buffer, + ModelBuilder fn, + const CacheDecrypt& encrypt_fn, + bool decript_from_string); virtual ~ModelDeserializer() = default; @@ -48,6 +52,7 @@ class ModelDeserializer { ModelBuilder m_model_builder; CacheDecrypt m_cache_decrypt; bool m_decript_from_string; + std::shared_ptr m_model_buffer; }; } // namespace intel_cpu