Use alternative from PR openvinotoolkit#27981 instead for memory ma…

…pped buffers
MirceaDan99 · Jan 22, 2025 · c8d36c3 · c8d36c3
1 parent a7113d7
commit c8d36c3
Show file tree

Hide file tree

Showing 19 changed files with 94 additions and 257 deletions.
diff --git a/src/core/dev_api/openvino/runtime/shared_buffer.hpp b/src/core/dev_api/openvino/runtime/shared_buffer.hpp
@@ -81,26 +81,6 @@ class OwningSharedStreamBuffer : public SharedStreamBuffer {
         return m_shared_obj;
     }
 
-    std::streamsize xsgetn(char* s, std::streamsize count) override {
-        auto streamSize = SharedStreamBuffer::xsgetn(s, count);
-        m_shared_obj->updateOffset(m_offset);
-        return streamSize;
-    }
-
-    int_type uflow() override {
-        auto val = SharedStreamBuffer::uflow();
-        m_shared_obj->updateOffset(m_offset);
-        return val;
-    }
-
-    pos_type seekoff(off_type off,
-                     std::ios_base::seekdir dir,
-                     std::ios_base::openmode which = std::ios_base::in) override {
-        auto pos = SharedStreamBuffer::seekoff(off, dir, which);
-        m_shared_obj->updateOffset(m_offset);
-        return pos;
-    }
-
 protected:
     std::shared_ptr<ov::AlignedBuffer> m_shared_obj;
 };

diff --git a/src/inference/dev_api/openvino/runtime/iplugin.hpp b/src/inference/dev_api/openvino/runtime/iplugin.hpp
@@ -185,33 +185,6 @@ class OPENVINO_RUNTIME_API IPlugin : public std::enable_shared_from_this<IPlugin
                                                              const ov::SoPtr<ov::IRemoteContext>& context,
                                                              const ov::AnyMap& properties) const = 0;
 
-    /**
-     * @brief Creates an compiled model from an previously exported model using plugin implementation
-     *        and removes OpenVINO Runtime magic and plugin name
-     * @param model Reference to model output stream
-     * @param weights_buffer AlignedBuffer with cached model
-     * @param properties A ov::AnyMap of properties
-     * @return An Compiled model
-     */
-    virtual std::shared_ptr<ov::ICompiledModel> import_model(std::istream& model,
-                                                             std::shared_ptr<ov::AlignedBuffer> model_buffer,
-                                                             const ov::AnyMap& properties) const;
-
-    /**
-     * @brief Creates an compiled model from an previously exported model using plugin implementation
-     *        and removes OpenVINO Runtime magic and plugin name
-     * @param model Reference to model output stream
-     * @param weights_buffer AlignedBuffer with cached model
-     * @param context A pointer to plugin context derived from RemoteContext class used to
-     *        execute the network
-     * @param properties A ov::AnyMap of properties
-     * @return An Compiled model
-     */
-    virtual std::shared_ptr<ov::ICompiledModel> import_model(std::istream& model,
-                                                             std::shared_ptr<ov::AlignedBuffer> model_buffer,
-                                                             const ov::SoPtr<ov::IRemoteContext>& context,
-                                                             const ov::AnyMap& properties) const;
-
     /**
      * @brief Queries a plugin about supported layers in model
      * @param model Model object to query.

diff --git a/src/inference/src/cache_manager.hpp b/src/inference/src/cache_manager.hpp
@@ -141,7 +141,6 @@ class FileStorageCacheManager final : public ICacheManager {
                 auto mmap = ov::load_mmap_object(blob_file_name);
                 auto shared_buffer =
                     std::make_shared<ov::SharedBuffer<std::shared_ptr<MappedMemory>>>(mmap->data(), mmap->size(), mmap);
-#if 0
                 OwningSharedStreamBuffer buf(shared_buffer);
                 std::istream stream(&buf);
                 reader(stream, shared_buffer);

diff --git a/src/inference/src/dev/compilation_context.cpp b/src/inference/src/dev/compilation_context.cpp
@@ -156,8 +156,7 @@ std::string ModelCache::compute_hash(const std::string& modelStr,
 
 //////////////////////////////////////////////////
 
-CompiledBlobHeader::CompiledBlobHeader(std::shared_ptr<ov::AlignedBuffer> model_buffer)
-    : m_model_buffer(model_buffer) {}
+CompiledBlobHeader::CompiledBlobHeader() {}
 
 CompiledBlobHeader::CompiledBlobHeader(const std::string& ieVersion,
                                        const std::string& fileInfo,

diff --git a/src/inference/src/dev/iplugin.cpp b/src/inference/src/dev/iplugin.cpp
@@ -58,19 +58,6 @@ const std::string& ov::IPlugin::get_device_name() const {
     return m_plugin_name;
 }
 
-std::shared_ptr<ov::ICompiledModel> ov::IPlugin::import_model(std::istream& model,
-                                                              std::shared_ptr<ov::AlignedBuffer> model_buffer,
-                                                              const ov::AnyMap& properties) const {
-    OPENVINO_THROW_NOT_IMPLEMENTED("This method is not implemented");
-}
-
-std::shared_ptr<ov::ICompiledModel> ov::IPlugin::import_model(std::istream& model,
-                                                              std::shared_ptr<ov::AlignedBuffer> model_buffer,
-                                                              const ov::SoPtr<ov::IRemoteContext>& context,
-                                                              const ov::AnyMap& properties) const {
-    OPENVINO_THROW_NOT_IMPLEMENTED("This method is not implemented");
-}
-
 void ov::IPlugin::set_core(const std::weak_ptr<ov::ICore>& core) {
     OPENVINO_ASSERT(!core.expired());
     m_core = core;

diff --git a/src/inference/src/dev/plugin.cpp b/src/inference/src/dev/plugin.cpp
@@ -79,19 +79,6 @@ ov::SoPtr<ov::ICompiledModel> ov::Plugin::import_model(std::istream& model,
     OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, context, config), m_so});
 }
 
-ov::SoPtr<ov::ICompiledModel> ov::Plugin::import_model(std::istream& model,
-                                                       std::shared_ptr<ov::AlignedBuffer> model_buffer,
-                                                       const ov::AnyMap& properties) const {
-    OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, model_buffer, properties), m_so});
-}
-
-ov::SoPtr<ov::ICompiledModel> ov::Plugin::import_model(std::istream& model,
-                                                       std::shared_ptr<ov::AlignedBuffer> model_buffer,
-                                                       const ov::SoPtr<ov::IRemoteContext>& context,
-                                                       const ov::AnyMap& config) const {
-    OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, model_buffer, context, config), m_so});
-}
-
 ov::SoPtr<ov::IRemoteContext> ov::Plugin::create_context(const AnyMap& params) const {
     OV_PLUGIN_CALL_STATEMENT({
         auto remote = m_ptr->create_context(params);

diff --git a/src/inference/src/dev/plugin.hpp b/src/inference/src/dev/plugin.hpp
@@ -59,15 +59,6 @@ class Plugin {
                                            const ov::SoPtr<ov::IRemoteContext>& context,
                                            const ov::AnyMap& config) const;
 
-    SoPtr<ov::ICompiledModel> import_model(std::istream& model,
-                                           std::shared_ptr<ov::AlignedBuffer> model_buffer,
-                                           const ov::AnyMap& properties) const;
-
-    SoPtr<ov::ICompiledModel> import_model(std::istream& model,
-                                           std::shared_ptr<ov::AlignedBuffer> model_buffer,
-                                           const ov::SoPtr<ov::IRemoteContext>& context,
-                                           const ov::AnyMap& config) const;
-
     ov::SoPtr<ov::IRemoteContext> create_context(const AnyMap& params) const;
 
     ov::SoPtr<ov::IRemoteContext> get_default_context(const AnyMap& params) const;
@@ -87,3 +78,4 @@ class Plugin {
 };
 
 }  // namespace ov
+
diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp
@@ -7,7 +7,6 @@
 #include "cpu_streams_calculation.hpp"
 #include "internal_properties.hpp"
 #include "itt.h"
-#include "openvino/op/paged_attention.hpp"
 #include "openvino/runtime/intel_cpu/properties.hpp"
 #include "openvino/runtime/internal_properties.hpp"
 #include "openvino/runtime/properties.hpp"
@@ -20,6 +19,7 @@
 #include "utils/precision_support.h"
 #include "utils/serialize.hpp"
 #include "weights_cache.hpp"
+#include "openvino/op/paged_attention.hpp"
 
 #if defined(__linux__)
 #    include <signal.h>
@@ -200,7 +200,7 @@ static Config::ModelType getModelType(const std::shared_ptr<const Model>& model)
         return Config::ModelType::CNN;
 
     if ((op::util::has_op_with_type<op::v13::ScaledDotProductAttention>(model) && model->get_variables().size() > 0) ||
-        op::util::has_op_with_type<ov::op::PagedAttentionExtension>(model))
+         op::util::has_op_with_type<ov::op::PagedAttentionExtension>(model))
         return Config::ModelType::LLM;
 
     return Config::ModelType::Unknown;
@@ -445,17 +445,15 @@ ov::Any Plugin::get_ro_property(const std::string& name, const ov::AnyMap& optio
 
         return decltype(ov::supported_properties)::value_type(std::move(supportedProperties));
     } else if (ov::internal::supported_properties == name) {
-        return decltype(ov::internal::supported_properties)::value_type {
+        return decltype(ov::internal::supported_properties)::value_type{
             ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO},
 #if !defined(OPENVINO_ARCH_ARM) && !(defined(__APPLE__) || defined(__MACOSX))
-                ov::PropertyName{ov::internal::caching_with_mmap.name(), ov::PropertyMutability::RO},
+            ov::PropertyName{ov::internal::caching_with_mmap.name(), ov::PropertyMutability::RO},
 #endif
-                ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW},
-                ov::PropertyName{ov::internal::compiled_model_runtime_properties.name(), ov::PropertyMutability::RO},
-                ov::PropertyName {
-                ov::internal::compiled_model_runtime_properties_supported.name(), ov::PropertyMutability::RO
-            }
-        };
+            ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW},
+            ov::PropertyName{ov::internal::compiled_model_runtime_properties.name(), ov::PropertyMutability::RO},
+            ov::PropertyName{ov::internal::compiled_model_runtime_properties_supported.name(),
+                             ov::PropertyMutability::RO}};
     } else if (name == ov::device::full_name) {
         return decltype(ov::device::full_name)::value_type(deviceFullName);
     } else if (name == ov::available_devices) {
@@ -557,7 +555,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&
 std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model_stream, const ov::AnyMap& config) const {
     OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "import_model");
 
-    CacheDecrypt decrypt{codec_xor};
+    CacheDecrypt decrypt{ codec_xor };
     bool decript_from_string = false;
     if (config.count(ov::cache_encryption_callbacks.name())) {
         const auto& encryption_callbacks = config.at(ov::cache_encryption_callbacks.name()).as<EncryptionCallbacks>();
@@ -578,8 +576,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model_str
         [this](const std::shared_ptr<ov::AlignedBuffer>& model, const std::shared_ptr<ov::AlignedBuffer>& weights) {
             return get_core()->read_model(model, weights);
         },
-        decrypt,
-        decript_from_string);
+        decrypt, decript_from_string);
 
     std::shared_ptr<ov::Model> model;
     deserializer >> model;

diff --git a/src/plugins/intel_cpu/src/plugin.h b/src/plugins/intel_cpu/src/plugin.h
@@ -20,7 +20,8 @@ class Plugin : public ov::IPlugin {
     std::shared_ptr<ov::ICompiledModel> compile_model(const std::shared_ptr<const ov::Model>& model,
                                                       const ov::AnyMap& properties,
                                                       const ov::SoPtr<ov::IRemoteContext>& context) const override {
-        OPENVINO_THROW_NOT_IMPLEMENTED("compile_model with RemoteContext is not supported by CPU plugin!");
+        OPENVINO_THROW_NOT_IMPLEMENTED(
+            "compile_model with RemoteContext is not supported by CPU plugin!");
     };
 
     void set_property(const ov::AnyMap& properties) override;
@@ -29,17 +30,8 @@ class Plugin : public ov::IPlugin {
     std::shared_ptr<ov::ICompiledModel> import_model(std::istream& model,
                                                      const ov::SoPtr<ov::IRemoteContext>& context,
                                                      const ov::AnyMap& properties) const override {
-        OPENVINO_THROW_NOT_IMPLEMENTED("import_model with RemoteContext is not supported by CPU plugin!");
-    };
-
-    std::shared_ptr<ov::ICompiledModel> import_model(std::istream& model,
-                                                     std::shared_ptr<ov::AlignedBuffer> model_buffer,
-                                                     const ov::AnyMap& properties) const override;
-    std::shared_ptr<ov::ICompiledModel> import_model(std::istream& model,
-                                                     std::shared_ptr<ov::AlignedBuffer> model_buffer,
-                                                     const ov::SoPtr<ov::IRemoteContext>& context,
-                                                     const ov::AnyMap& properties) const override {
-        OPENVINO_THROW_NOT_IMPLEMENTED("import_model with RemoteContext is not supported by CPU plugin!");
+        OPENVINO_THROW_NOT_IMPLEMENTED(
+            "import_model with RemoteContext is not supported by CPU plugin!");
     };
 
     ov::SupportedOpsMap query_model(const std::shared_ptr<const ov::Model>& model,

diff --git a/src/plugins/intel_cpu/src/utils/serialize.cpp b/src/plugins/intel_cpu/src/utils/serialize.cpp
@@ -16,8 +16,7 @@ namespace intel_cpu {
 ////////// ModelSerializer //////////
 
 ModelSerializer::ModelSerializer(std::ostream& ostream, CacheEncrypt encrypt_fn)
-    : m_ostream(ostream),
-      m_cache_encrypt(std::move(encrypt_fn)) {}
+    : m_ostream(ostream), m_cache_encrypt(std::move(encrypt_fn)) {}
 
 void ModelSerializer::operator<<(const std::shared_ptr<ov::Model>& model) {
     auto serialize_info = [&](std::ostream& stream) {
@@ -47,16 +46,15 @@ ModelDeserializer::ModelDeserializer(std::istream& model_stream,
     } else {
         m_cache_decrypt.m_decrypt_char = decrypt_fn.m_decrypt_char;
     }
-}
 
-void ModelDeserializer::set_info(pugi::xml_node& root, std::shared_ptr<ov::Model>& model) {}
+    void ModelDeserializer::set_info(pugi::xml_node& root, std::shared_ptr<ov::Model>& model) {}
 
-void ModelDeserializer::operator>>(std::shared_ptr<ov::Model>& model) {
-    if (m_model_buffer) {
-        process_mmap(model, m_model_buffer);
-    } else {
-        process_stream(model);
-    }
+    void ModelDeserializer::operator>>(std::shared_ptr<ov::Model>& model) {
+        if (m_model_buffer) {
+            process_mmap(model, m_model_buffer);
+        } else {
+            process_stream(model);
+        }
 }
 
 void ModelDeserializer::process_mmap(std::shared_ptr<ov::Model>& model,
@@ -83,10 +81,7 @@ void ModelDeserializer::process_mmap(std::shared_ptr<ov::Model>& model,
     // Read model input/output precisions.
     pugi::xml_document xml_in_out_doc;
     if (hdr.custom_data_size > 0lu) {
-        auto res = xml_in_out_doc.load_buffer(buffer_base + hdr.custom_data_offset,
-                                              hdr.custom_data_size,
-                                              pugi::parse_default,
-                                              pugi::encoding_utf8);
+        auto res = xml_in_out_doc.load_buffer(buffer_base + hdr.custom_data_offset, hdr.custom_data_size, pugi::parse_default, pugi::encoding_utf8);
         if (res.status != pugi::status_ok) {
             OPENVINO_THROW("[CPU] Could to deserialize custom data.");
         }
@@ -95,10 +90,7 @@ void ModelDeserializer::process_mmap(std::shared_ptr<ov::Model>& model,
     // Map blob content
     std::shared_ptr<ov::AlignedBuffer> weights_buf;
     if (hdr.consts_size) {
-        weights_buf =
-            std::make_shared<ov::SharedBuffer<std::shared_ptr<ov::AlignedBuffer>>>(buffer_base + hdr.consts_offset,
-                                                                                   hdr.consts_size,
-                                                                                   mmemory);
+        weights_buf = std::make_shared<ov::SharedBuffer<std::shared_ptr<ov::AlignedBuffer>>>(buffer_base + hdr.consts_offset, hdr.consts_size, mmemory);
     }
 
     // XML content
@@ -115,7 +107,9 @@ void ModelDeserializer::process_mmap(std::shared_ptr<ov::Model>& model,
         xml_buff->assign(buffer_base + hdr.model_offset, hdr.model_size);
     }
     std::shared_ptr<ov::AlignedBuffer> model_buf =
-        std::make_shared<ov::SharedBuffer<std::shared_ptr<std::string>>>(&((*xml_buff)[0]), hdr.model_size, xml_buff);
+            std::make_shared<ov::SharedBuffer<std::shared_ptr<std::string>>>(&((*xml_buff)[0]),
+                                                                             hdr.model_size,
+                                                                             xml_buff);
 
     model = m_model_builder(model_buf, weights_buf);
 
@@ -160,7 +154,7 @@ void ModelDeserializer::process_stream(std::shared_ptr<ov::Model>& model) {
     auto data_blob = std::make_shared<ov::Tensor>(ov::element::u8, ov::Shape({hdr.consts_size}));
     m_istream.seekg(hdr.consts_offset);
     if (hdr.consts_size) {
-        m_istream.read(static_cast<char*>(data_blob->data(ov::element::u8)), hdr.consts_size);
+        m_istream.read(static_cast<char *>(data_blob->data(ov::element::u8)), hdr.consts_size);
     }
 
     // read XML content
@@ -172,20 +166,16 @@ void ModelDeserializer::process_stream(std::shared_ptr<ov::Model>& model) {
         if (m_decript_from_string) {
             *xml_string = m_cache_decrypt.m_decrypt_str(*xml_string);
         } else {
-            m_cache_decrypt.m_decrypt_char(const_cast<char*>(xml_string->data()),
-                                           xml_string->data(),
-                                           xml_string->size());
+            m_cache_decrypt.m_decrypt_char(const_cast<char*>(xml_string->data()), xml_string->data(), xml_string->size());
         }
     }
 
-    auto model_buf =
-        std::make_shared<ov::SharedBuffer<std::shared_ptr<std::string>>>(const_cast<char*>(xml_string->data()),
-                                                                         xml_string->size(),
-                                                                         xml_string);
-    auto weights_buf = std::make_shared<ov::SharedBuffer<std::shared_ptr<ov::Tensor>>>(
-        reinterpret_cast<char*>(data_blob->data(ov::element::u8)),
-        hdr.consts_size,
-        data_blob);
+    auto model_buf = std::make_shared<ov::SharedBuffer<std::shared_ptr<std::string>>>(const_cast<char*>(xml_string->data()),
+                                                                                      xml_string->size(),
+                                                                                      xml_string);
+    auto weights_buf = std::make_shared<ov::SharedBuffer<std::shared_ptr<ov::Tensor>>>(reinterpret_cast<char*>(data_blob->data(ov::element::u8)),
+                                                                                       hdr.consts_size,
+                                                                                       data_blob);
 
     model = m_model_builder(model_buf, weights_buf);
 
@@ -194,5 +184,5 @@ void ModelDeserializer::process_stream(std::shared_ptr<ov::Model>& model) {
     set_info(root, model);
 }
 
-}  // namespace intel_cpu
-}  // namespace ov
+}   // namespace intel_cpu
+}   // namespace ov
diff --git a/src/plugins/intel_cpu/src/utils/serialize.hpp b/src/plugins/intel_cpu/src/utils/serialize.hpp
@@ -29,9 +29,7 @@ class ModelSerializer {
 
 class ModelDeserializer {
 public:
-    typedef std::function<std::shared_ptr<ov::Model>(const std::shared_ptr<ov::AlignedBuffer>&,
-                                                     const std::shared_ptr<ov::AlignedBuffer>&)>
-        ModelBuilder;
+    typedef std::function<std::shared_ptr<ov::Model>(const std::shared_ptr<ov::AlignedBuffer>&, const std::shared_ptr<ov::AlignedBuffer>&)> ModelBuilder;
 
     ModelDeserializer(std::istream& model,
                       std::shared_ptr<ov::AlignedBuffer> model_buffer,
@@ -57,5 +55,5 @@ class ModelDeserializer {
     std::shared_ptr<ov::AlignedBuffer> m_model_buffer;
 };
 
-}  // namespace intel_cpu
-}  // namespace ov
+}   // namespace intel_cpu
+}   // namespace ov