Skip to content

Commit

Permalink
Use alternative from PR openvinotoolkit#27981 instead for memory ma…
Browse files Browse the repository at this point in the history
…pped buffers
  • Loading branch information
MirceaDan99 committed Jan 22, 2025
1 parent a7113d7 commit c8d36c3
Show file tree
Hide file tree
Showing 19 changed files with 94 additions and 257 deletions.
20 changes: 0 additions & 20 deletions src/core/dev_api/openvino/runtime/shared_buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,26 +81,6 @@ class OwningSharedStreamBuffer : public SharedStreamBuffer {
return m_shared_obj;
}

std::streamsize xsgetn(char* s, std::streamsize count) override {
auto streamSize = SharedStreamBuffer::xsgetn(s, count);
m_shared_obj->updateOffset(m_offset);
return streamSize;
}

int_type uflow() override {
auto val = SharedStreamBuffer::uflow();
m_shared_obj->updateOffset(m_offset);
return val;
}

pos_type seekoff(off_type off,
std::ios_base::seekdir dir,
std::ios_base::openmode which = std::ios_base::in) override {
auto pos = SharedStreamBuffer::seekoff(off, dir, which);
m_shared_obj->updateOffset(m_offset);
return pos;
}

protected:
std::shared_ptr<ov::AlignedBuffer> m_shared_obj;
};
Expand Down
27 changes: 0 additions & 27 deletions src/inference/dev_api/openvino/runtime/iplugin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,33 +185,6 @@ class OPENVINO_RUNTIME_API IPlugin : public std::enable_shared_from_this<IPlugin
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& properties) const = 0;

/**
* @brief Creates an compiled model from an previously exported model using plugin implementation
* and removes OpenVINO Runtime magic and plugin name
* @param model Reference to model output stream
* @param weights_buffer AlignedBuffer with cached model
* @param properties A ov::AnyMap of properties
* @return An Compiled model
*/
virtual std::shared_ptr<ov::ICompiledModel> import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::AnyMap& properties) const;

/**
* @brief Creates an compiled model from an previously exported model using plugin implementation
* and removes OpenVINO Runtime magic and plugin name
* @param model Reference to model output stream
* @param weights_buffer AlignedBuffer with cached model
* @param context A pointer to plugin context derived from RemoteContext class used to
* execute the network
* @param properties A ov::AnyMap of properties
* @return An Compiled model
*/
virtual std::shared_ptr<ov::ICompiledModel> import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& properties) const;

/**
* @brief Queries a plugin about supported layers in model
* @param model Model object to query.
Expand Down
1 change: 0 additions & 1 deletion src/inference/src/cache_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,6 @@ class FileStorageCacheManager final : public ICacheManager {
auto mmap = ov::load_mmap_object(blob_file_name);
auto shared_buffer =
std::make_shared<ov::SharedBuffer<std::shared_ptr<MappedMemory>>>(mmap->data(), mmap->size(), mmap);
#if 0
OwningSharedStreamBuffer buf(shared_buffer);
std::istream stream(&buf);
reader(stream, shared_buffer);
Expand Down
3 changes: 1 addition & 2 deletions src/inference/src/dev/compilation_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,7 @@ std::string ModelCache::compute_hash(const std::string& modelStr,

//////////////////////////////////////////////////

CompiledBlobHeader::CompiledBlobHeader(std::shared_ptr<ov::AlignedBuffer> model_buffer)
: m_model_buffer(model_buffer) {}
CompiledBlobHeader::CompiledBlobHeader() {}

CompiledBlobHeader::CompiledBlobHeader(const std::string& ieVersion,
const std::string& fileInfo,
Expand Down
13 changes: 0 additions & 13 deletions src/inference/src/dev/iplugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,19 +58,6 @@ const std::string& ov::IPlugin::get_device_name() const {
return m_plugin_name;
}

std::shared_ptr<ov::ICompiledModel> ov::IPlugin::import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::AnyMap& properties) const {
OPENVINO_THROW_NOT_IMPLEMENTED("This method is not implemented");
}

std::shared_ptr<ov::ICompiledModel> ov::IPlugin::import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& properties) const {
OPENVINO_THROW_NOT_IMPLEMENTED("This method is not implemented");
}

void ov::IPlugin::set_core(const std::weak_ptr<ov::ICore>& core) {
OPENVINO_ASSERT(!core.expired());
m_core = core;
Expand Down
13 changes: 0 additions & 13 deletions src/inference/src/dev/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,19 +79,6 @@ ov::SoPtr<ov::ICompiledModel> ov::Plugin::import_model(std::istream& model,
OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, context, config), m_so});
}

ov::SoPtr<ov::ICompiledModel> ov::Plugin::import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::AnyMap& properties) const {
OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, model_buffer, properties), m_so});
}

ov::SoPtr<ov::ICompiledModel> ov::Plugin::import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& config) const {
OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, model_buffer, context, config), m_so});
}

ov::SoPtr<ov::IRemoteContext> ov::Plugin::create_context(const AnyMap& params) const {
OV_PLUGIN_CALL_STATEMENT({
auto remote = m_ptr->create_context(params);
Expand Down
10 changes: 1 addition & 9 deletions src/inference/src/dev/plugin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,15 +59,6 @@ class Plugin {
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& config) const;

SoPtr<ov::ICompiledModel> import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::AnyMap& properties) const;

SoPtr<ov::ICompiledModel> import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& config) const;

ov::SoPtr<ov::IRemoteContext> create_context(const AnyMap& params) const;

ov::SoPtr<ov::IRemoteContext> get_default_context(const AnyMap& params) const;
Expand All @@ -87,3 +78,4 @@ class Plugin {
};

} // namespace ov

23 changes: 10 additions & 13 deletions src/plugins/intel_cpu/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#include "cpu_streams_calculation.hpp"
#include "internal_properties.hpp"
#include "itt.h"
#include "openvino/op/paged_attention.hpp"
#include "openvino/runtime/intel_cpu/properties.hpp"
#include "openvino/runtime/internal_properties.hpp"
#include "openvino/runtime/properties.hpp"
Expand All @@ -20,6 +19,7 @@
#include "utils/precision_support.h"
#include "utils/serialize.hpp"
#include "weights_cache.hpp"
#include "openvino/op/paged_attention.hpp"

#if defined(__linux__)
# include <signal.h>
Expand Down Expand Up @@ -200,7 +200,7 @@ static Config::ModelType getModelType(const std::shared_ptr<const Model>& model)
return Config::ModelType::CNN;

if ((op::util::has_op_with_type<op::v13::ScaledDotProductAttention>(model) && model->get_variables().size() > 0) ||
op::util::has_op_with_type<ov::op::PagedAttentionExtension>(model))
op::util::has_op_with_type<ov::op::PagedAttentionExtension>(model))
return Config::ModelType::LLM;

return Config::ModelType::Unknown;
Expand Down Expand Up @@ -445,17 +445,15 @@ ov::Any Plugin::get_ro_property(const std::string& name, const ov::AnyMap& optio

return decltype(ov::supported_properties)::value_type(std::move(supportedProperties));
} else if (ov::internal::supported_properties == name) {
return decltype(ov::internal::supported_properties)::value_type {
return decltype(ov::internal::supported_properties)::value_type{
ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO},
#if !defined(OPENVINO_ARCH_ARM) && !(defined(__APPLE__) || defined(__MACOSX))
ov::PropertyName{ov::internal::caching_with_mmap.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::internal::caching_with_mmap.name(), ov::PropertyMutability::RO},
#endif
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW},
ov::PropertyName{ov::internal::compiled_model_runtime_properties.name(), ov::PropertyMutability::RO},
ov::PropertyName {
ov::internal::compiled_model_runtime_properties_supported.name(), ov::PropertyMutability::RO
}
};
ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW},
ov::PropertyName{ov::internal::compiled_model_runtime_properties.name(), ov::PropertyMutability::RO},
ov::PropertyName{ov::internal::compiled_model_runtime_properties_supported.name(),
ov::PropertyMutability::RO}};
} else if (name == ov::device::full_name) {
return decltype(ov::device::full_name)::value_type(deviceFullName);
} else if (name == ov::available_devices) {
Expand Down Expand Up @@ -557,7 +555,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&
std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model_stream, const ov::AnyMap& config) const {
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "import_model");

CacheDecrypt decrypt{codec_xor};
CacheDecrypt decrypt{ codec_xor };
bool decript_from_string = false;
if (config.count(ov::cache_encryption_callbacks.name())) {
const auto& encryption_callbacks = config.at(ov::cache_encryption_callbacks.name()).as<EncryptionCallbacks>();
Expand All @@ -578,8 +576,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model_str
[this](const std::shared_ptr<ov::AlignedBuffer>& model, const std::shared_ptr<ov::AlignedBuffer>& weights) {
return get_core()->read_model(model, weights);
},
decrypt,
decript_from_string);
decrypt, decript_from_string);

std::shared_ptr<ov::Model> model;
deserializer >> model;
Expand Down
16 changes: 4 additions & 12 deletions src/plugins/intel_cpu/src/plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ class Plugin : public ov::IPlugin {
std::shared_ptr<ov::ICompiledModel> compile_model(const std::shared_ptr<const ov::Model>& model,
const ov::AnyMap& properties,
const ov::SoPtr<ov::IRemoteContext>& context) const override {
OPENVINO_THROW_NOT_IMPLEMENTED("compile_model with RemoteContext is not supported by CPU plugin!");
OPENVINO_THROW_NOT_IMPLEMENTED(
"compile_model with RemoteContext is not supported by CPU plugin!");
};

void set_property(const ov::AnyMap& properties) override;
Expand All @@ -29,17 +30,8 @@ class Plugin : public ov::IPlugin {
std::shared_ptr<ov::ICompiledModel> import_model(std::istream& model,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& properties) const override {
OPENVINO_THROW_NOT_IMPLEMENTED("import_model with RemoteContext is not supported by CPU plugin!");
};

std::shared_ptr<ov::ICompiledModel> import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::AnyMap& properties) const override;
std::shared_ptr<ov::ICompiledModel> import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& properties) const override {
OPENVINO_THROW_NOT_IMPLEMENTED("import_model with RemoteContext is not supported by CPU plugin!");
OPENVINO_THROW_NOT_IMPLEMENTED(
"import_model with RemoteContext is not supported by CPU plugin!");
};

ov::SupportedOpsMap query_model(const std::shared_ptr<const ov::Model>& model,
Expand Down
56 changes: 23 additions & 33 deletions src/plugins/intel_cpu/src/utils/serialize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@ namespace intel_cpu {
////////// ModelSerializer //////////

ModelSerializer::ModelSerializer(std::ostream& ostream, CacheEncrypt encrypt_fn)
: m_ostream(ostream),
m_cache_encrypt(std::move(encrypt_fn)) {}
: m_ostream(ostream), m_cache_encrypt(std::move(encrypt_fn)) {}

void ModelSerializer::operator<<(const std::shared_ptr<ov::Model>& model) {
auto serialize_info = [&](std::ostream& stream) {
Expand Down Expand Up @@ -47,16 +46,15 @@ ModelDeserializer::ModelDeserializer(std::istream& model_stream,
} else {
m_cache_decrypt.m_decrypt_char = decrypt_fn.m_decrypt_char;
}
}

void ModelDeserializer::set_info(pugi::xml_node& root, std::shared_ptr<ov::Model>& model) {}
void ModelDeserializer::set_info(pugi::xml_node& root, std::shared_ptr<ov::Model>& model) {}

void ModelDeserializer::operator>>(std::shared_ptr<ov::Model>& model) {
if (m_model_buffer) {
process_mmap(model, m_model_buffer);
} else {
process_stream(model);
}
void ModelDeserializer::operator>>(std::shared_ptr<ov::Model>& model) {
if (m_model_buffer) {
process_mmap(model, m_model_buffer);
} else {
process_stream(model);
}
}

void ModelDeserializer::process_mmap(std::shared_ptr<ov::Model>& model,
Expand All @@ -83,10 +81,7 @@ void ModelDeserializer::process_mmap(std::shared_ptr<ov::Model>& model,
// Read model input/output precisions.
pugi::xml_document xml_in_out_doc;
if (hdr.custom_data_size > 0lu) {
auto res = xml_in_out_doc.load_buffer(buffer_base + hdr.custom_data_offset,
hdr.custom_data_size,
pugi::parse_default,
pugi::encoding_utf8);
auto res = xml_in_out_doc.load_buffer(buffer_base + hdr.custom_data_offset, hdr.custom_data_size, pugi::parse_default, pugi::encoding_utf8);
if (res.status != pugi::status_ok) {
OPENVINO_THROW("[CPU] Could to deserialize custom data.");
}
Expand All @@ -95,10 +90,7 @@ void ModelDeserializer::process_mmap(std::shared_ptr<ov::Model>& model,
// Map blob content
std::shared_ptr<ov::AlignedBuffer> weights_buf;
if (hdr.consts_size) {
weights_buf =
std::make_shared<ov::SharedBuffer<std::shared_ptr<ov::AlignedBuffer>>>(buffer_base + hdr.consts_offset,
hdr.consts_size,
mmemory);
weights_buf = std::make_shared<ov::SharedBuffer<std::shared_ptr<ov::AlignedBuffer>>>(buffer_base + hdr.consts_offset, hdr.consts_size, mmemory);
}

// XML content
Expand All @@ -115,7 +107,9 @@ void ModelDeserializer::process_mmap(std::shared_ptr<ov::Model>& model,
xml_buff->assign(buffer_base + hdr.model_offset, hdr.model_size);
}
std::shared_ptr<ov::AlignedBuffer> model_buf =
std::make_shared<ov::SharedBuffer<std::shared_ptr<std::string>>>(&((*xml_buff)[0]), hdr.model_size, xml_buff);
std::make_shared<ov::SharedBuffer<std::shared_ptr<std::string>>>(&((*xml_buff)[0]),
hdr.model_size,
xml_buff);

model = m_model_builder(model_buf, weights_buf);

Expand Down Expand Up @@ -160,7 +154,7 @@ void ModelDeserializer::process_stream(std::shared_ptr<ov::Model>& model) {
auto data_blob = std::make_shared<ov::Tensor>(ov::element::u8, ov::Shape({hdr.consts_size}));
m_istream.seekg(hdr.consts_offset);
if (hdr.consts_size) {
m_istream.read(static_cast<char*>(data_blob->data(ov::element::u8)), hdr.consts_size);
m_istream.read(static_cast<char *>(data_blob->data(ov::element::u8)), hdr.consts_size);
}

// read XML content
Expand All @@ -172,20 +166,16 @@ void ModelDeserializer::process_stream(std::shared_ptr<ov::Model>& model) {
if (m_decript_from_string) {
*xml_string = m_cache_decrypt.m_decrypt_str(*xml_string);
} else {
m_cache_decrypt.m_decrypt_char(const_cast<char*>(xml_string->data()),
xml_string->data(),
xml_string->size());
m_cache_decrypt.m_decrypt_char(const_cast<char*>(xml_string->data()), xml_string->data(), xml_string->size());
}
}

auto model_buf =
std::make_shared<ov::SharedBuffer<std::shared_ptr<std::string>>>(const_cast<char*>(xml_string->data()),
xml_string->size(),
xml_string);
auto weights_buf = std::make_shared<ov::SharedBuffer<std::shared_ptr<ov::Tensor>>>(
reinterpret_cast<char*>(data_blob->data(ov::element::u8)),
hdr.consts_size,
data_blob);
auto model_buf = std::make_shared<ov::SharedBuffer<std::shared_ptr<std::string>>>(const_cast<char*>(xml_string->data()),
xml_string->size(),
xml_string);
auto weights_buf = std::make_shared<ov::SharedBuffer<std::shared_ptr<ov::Tensor>>>(reinterpret_cast<char*>(data_blob->data(ov::element::u8)),
hdr.consts_size,
data_blob);

model = m_model_builder(model_buf, weights_buf);

Expand All @@ -194,5 +184,5 @@ void ModelDeserializer::process_stream(std::shared_ptr<ov::Model>& model) {
set_info(root, model);
}

} // namespace intel_cpu
} // namespace ov
} // namespace intel_cpu
} // namespace ov
8 changes: 3 additions & 5 deletions src/plugins/intel_cpu/src/utils/serialize.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,7 @@ class ModelSerializer {

class ModelDeserializer {
public:
typedef std::function<std::shared_ptr<ov::Model>(const std::shared_ptr<ov::AlignedBuffer>&,
const std::shared_ptr<ov::AlignedBuffer>&)>
ModelBuilder;
typedef std::function<std::shared_ptr<ov::Model>(const std::shared_ptr<ov::AlignedBuffer>&, const std::shared_ptr<ov::AlignedBuffer>&)> ModelBuilder;

ModelDeserializer(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
Expand All @@ -57,5 +55,5 @@ class ModelDeserializer {
std::shared_ptr<ov::AlignedBuffer> m_model_buffer;
};

} // namespace intel_cpu
} // namespace ov
} // namespace intel_cpu
} // namespace ov
Loading

0 comments on commit c8d36c3

Please sign in to comment.