Skip to content

Commit

Permalink
Update plugin API to import model with mmap buffer
Browse files Browse the repository at this point in the history
  • Loading branch information
olpipi committed Nov 20, 2024
1 parent 4cd1512 commit 2b6200e
Show file tree
Hide file tree
Showing 10 changed files with 106 additions and 13 deletions.
27 changes: 27 additions & 0 deletions src/inference/dev_api/openvino/runtime/iplugin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,33 @@ class OPENVINO_RUNTIME_API IPlugin : public std::enable_shared_from_this<IPlugin
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& properties) const = 0;

/**
* @brief Creates an compiled model from an previously exported model using plugin implementation
* and removes OpenVINO Runtime magic and plugin name
* @param model Reference to model output stream
* @param weights_buffer AlignedBuffer with cached model
* @param properties A ov::AnyMap of properties
* @return An Compiled model
*/
virtual std::shared_ptr<ov::ICompiledModel> import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::AnyMap& properties) const;

/**
* @brief Creates an compiled model from an previously exported model using plugin implementation
* and removes OpenVINO Runtime magic and plugin name
* @param model Reference to model output stream
* @param weights_buffer AlignedBuffer with cached model
* @param context A pointer to plugin context derived from RemoteContext class used to
* execute the network
* @param properties A ov::AnyMap of properties
* @return An Compiled model
*/
virtual std::shared_ptr<ov::ICompiledModel> import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& properties) const;

/**
* @brief Queries a plugin about supported layers in model
* @param model Model object to query.
Expand Down
11 changes: 7 additions & 4 deletions src/inference/src/cache_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ class ICacheManager {
/**
* @brief Function passing created input stream
*/
using StreamReader = std::function<void(std::istream&)>;
using StreamReader = std::function<void(std::istream&, std::shared_ptr<ov::AlignedBuffer>)>;

/**
* @brief Callback when OpenVINO intends to read model from cache
Expand Down Expand Up @@ -141,13 +141,16 @@ class FileStorageCacheManager final : public ICacheManager {
auto mmap = ov::load_mmap_object(blob_file_name);
auto shared_buffer =
std::make_shared<ov::SharedBuffer<std::shared_ptr<MappedMemory>>>(mmap->data(), mmap->size(), mmap);
#if 0
OwningSharedStreamBuffer buf(shared_buffer);
std::istream stream(&buf);
reader(stream);
#else
std::ifstream stream(blob_file_name, std::ios_base::binary);
#endif
reader(stream, shared_buffer);
} else {
std::ifstream stream(blob_file_name, std::ios_base::binary);
reader(stream);
}
reader(stream, nullptr); }
}
}

Expand Down
11 changes: 8 additions & 3 deletions src/inference/src/dev/core_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1413,7 +1413,7 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
cacheContent.blobId,
coreConfig.get_enable_mmap() && ov::util::contains(plugin.get_property(ov::internal::supported_properties),
ov::internal::caching_with_mmap),
[&](std::istream& networkStream) {
[&](std::istream& networkStream, std::shared_ptr<ov::AlignedBuffer> model_buffer) {
OV_ITT_SCOPE(FIRST_INFERENCE,
ov::itt::domains::LoadTime,
"Core::load_model_from_cache::ReadStreamAndImport");
Expand Down Expand Up @@ -1459,8 +1459,13 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
update_config[ov::weights_path.name()] = weights_path;
}
}
compiled_model = context ? plugin.import_model(networkStream, context, update_config)
: plugin.import_model(networkStream, update_config);
if (model_buffer) {
compiled_model = context ? plugin.import_model(networkStream, model_buffer, context, update_config)
: plugin.import_model(networkStream, model_buffer, update_config);
} else {
compiled_model = context ? plugin.import_model(networkStream, context, update_config)
: plugin.import_model(networkStream, update_config);
}
});
} catch (const HeaderException&) {
// For these exceptions just remove old cache and set that import didn't work
Expand Down
13 changes: 13 additions & 0 deletions src/inference/src/dev/iplugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,19 @@ const std::string& ov::IPlugin::get_device_name() const {
return m_plugin_name;
}

std::shared_ptr<ov::ICompiledModel> ov::IPlugin::import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::AnyMap& properties) const{
OPENVINO_THROW_NOT_IMPLEMENTED("This method is not implemented");
}

std::shared_ptr<ov::ICompiledModel> ov::IPlugin::import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& properties) const{
OPENVINO_THROW_NOT_IMPLEMENTED("This method is not implemented");
}

void ov::IPlugin::set_core(const std::weak_ptr<ov::ICore>& core) {
OPENVINO_ASSERT(!core.expired());
m_core = core;
Expand Down
11 changes: 11 additions & 0 deletions src/inference/src/dev/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,17 @@ ov::SoPtr<ov::ICompiledModel> ov::Plugin::import_model(std::istream& model,
OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, context, config), m_so});
}

ov::SoPtr<ov::ICompiledModel> ov::Plugin::import_model(std::istream& model, std::shared_ptr<ov::AlignedBuffer> model_buffer, const ov::AnyMap& properties) const {
OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, model_buffer, properties), m_so});
}

ov::SoPtr<ov::ICompiledModel> ov::Plugin::import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& config) const {
OV_PLUGIN_CALL_STATEMENT(return {m_ptr->import_model(model, model_buffer, context, config), m_so});
}

ov::SoPtr<ov::IRemoteContext> ov::Plugin::create_context(const AnyMap& params) const {
OV_PLUGIN_CALL_STATEMENT({
auto remote = m_ptr->create_context(params);
Expand Down
7 changes: 7 additions & 0 deletions src/inference/src/dev/plugin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,13 @@ class Plugin {
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& config) const;

SoPtr<ov::ICompiledModel> import_model(std::istream& model, std::shared_ptr<ov::AlignedBuffer> model_buffer, const ov::AnyMap& properties) const;

SoPtr<ov::ICompiledModel> import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& config) const;

ov::SoPtr<ov::IRemoteContext> create_context(const AnyMap& params) const;

ov::SoPtr<ov::IRemoteContext> get_default_context(const AnyMap& params) const;
Expand Down
8 changes: 8 additions & 0 deletions src/plugins/intel_cpu/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -553,6 +553,13 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&

std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model_stream,
const ov::AnyMap& config) const {
return import_model(model_stream, nullptr, config);
}


std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model_stream,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::AnyMap& config) const {
OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "import_model");

CacheDecrypt decrypt{ codec_xor };
Expand All @@ -565,6 +572,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& model_str

ModelDeserializer deserializer(
model_stream,
model_buffer,
[this](const std::shared_ptr<ov::AlignedBuffer>& model, const std::shared_ptr<ov::AlignedBuffer>& weights) {
return get_core()->read_model(model, weights);
},
Expand Down
11 changes: 11 additions & 0 deletions src/plugins/intel_cpu/src/plugin.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,17 @@ class Plugin : public ov::IPlugin {
"import_model with RemoteContext is not supported by CPU plugin!");
};

std::shared_ptr<ov::ICompiledModel> import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::AnyMap& properties) const override;
std::shared_ptr<ov::ICompiledModel> import_model(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& properties) const override {
OPENVINO_THROW_NOT_IMPLEMENTED(
"import_model with RemoteContext is not supported by CPU plugin!");
};

ov::SupportedOpsMap query_model(const std::shared_ptr<const ov::Model>& model,
const ov::AnyMap& properties) const override;
ov::SoPtr<ov::IRemoteContext> create_context(const ov::AnyMap& remote_properties) const override {
Expand Down
13 changes: 8 additions & 5 deletions src/plugins/intel_cpu/src/utils/serialize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,12 @@ void ModelSerializer::operator<<(const std::shared_ptr<ov::Model>& model) {

////////// ModelDeserializer //////////

ModelDeserializer::ModelDeserializer(std::istream& model_stream, ModelBuilder fn, const CacheDecrypt& decrypt_fn, bool decript_from_string)
: m_istream(model_stream), m_model_builder(std::move(fn)), m_decript_from_string(decript_from_string) {
ModelDeserializer::ModelDeserializer(std::istream& model_stream,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
ModelBuilder fn,
const CacheDecrypt& decrypt_fn,
bool decript_from_string)
: m_istream(model_stream), m_model_builder(std::move(fn)), m_decript_from_string(decript_from_string), m_model_buffer(model_buffer) {
if (m_decript_from_string) {
m_cache_decrypt.m_decrypt_str = decrypt_fn.m_decrypt_str;
} else {
Expand All @@ -42,9 +46,8 @@ ModelDeserializer::ModelDeserializer(std::istream& model_stream, ModelBuilder fn
void ModelDeserializer::set_info(pugi::xml_node& root, std::shared_ptr<ov::Model>& model) {}

void ModelDeserializer::operator>>(std::shared_ptr<ov::Model>& model) {
if (auto mmap_buffer = dynamic_cast<OwningSharedStreamBuffer*>(m_istream.rdbuf())) {
auto buffer = mmap_buffer->get_buffer();
process_mmap(model, buffer);
if (m_model_buffer) {
process_mmap(model, m_model_buffer);
} else {
process_stream(model);
}
Expand Down
7 changes: 6 additions & 1 deletion src/plugins/intel_cpu/src/utils/serialize.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,11 @@ class ModelDeserializer {
public:
typedef std::function<std::shared_ptr<ov::Model>(const std::shared_ptr<ov::AlignedBuffer>&, const std::shared_ptr<ov::AlignedBuffer>&)> ModelBuilder;

ModelDeserializer(std::istream& model, ModelBuilder fn, const CacheDecrypt& encrypt_fn, bool decript_from_string);
ModelDeserializer(std::istream& model,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
ModelBuilder fn,
const CacheDecrypt& encrypt_fn,
bool decript_from_string);

virtual ~ModelDeserializer() = default;

Expand All @@ -48,6 +52,7 @@ class ModelDeserializer {
ModelBuilder m_model_builder;
CacheDecrypt m_cache_decrypt;
bool m_decript_from_string;
std::shared_ptr<ov::AlignedBuffer> m_model_buffer;
};

} // namespace intel_cpu
Expand Down

0 comments on commit 2b6200e

Please sign in to comment.