Skip to content

Commit

Permalink
Add BlobContainer class and derivates for each `std::vector<uint8_t…
Browse files Browse the repository at this point in the history
…>` and `std::shared_ptr<ov::AlignedBuffer>` blob types
  • Loading branch information
MirceaDan99 committed Jan 15, 2025
1 parent 2f404c1 commit c8f4abf
Show file tree
Hide file tree
Showing 11 changed files with 114 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,63 @@

namespace intel_npu {

class BlobContainer {
public:
virtual void* get_ptr() {
OPENVINO_THROW("const BlobContainer::get_ptr() method is not implemented!");
}

virtual size_t size() const {
OPENVINO_THROW("BlobContainer::size() method is not implemented!");
}

virtual bool release_from_memory() {
OPENVINO_THROW("BlobContainer::release_from_memory() method is not implemented!");
}
};

class BlobContainerVector : public BlobContainer {
public:
BlobContainerVector(std::vector<uint8_t> blob) : _ownershipBlob(std::move(blob)) {}

void* get_ptr() override {
return reinterpret_cast<void*>(_ownershipBlob.data());
}

size_t size() const override {
return _ownershipBlob.size();
}

bool release_from_memory() override {
_ownershipBlob.clear();
_ownershipBlob.shrink_to_fit();
return true;
}

private:
std::vector<uint8_t> _ownershipBlob;
};

class BlobContainerAlignedBuffer : public BlobContainer {
public:
BlobContainerAlignedBuffer(const std::shared_ptr<ov::AlignedBuffer>& blobSO) : _ownershipBlob(blobSO) {}

void* get_ptr() override {
return _ownershipBlob->get_ptr();
}

size_t size() const override {
return _ownershipBlob->size();
}

bool release_from_memory() override {
return false;
}

private:
std::shared_ptr<ov::AlignedBuffer> _ownershipBlob;
};

class IGraph : public std::enable_shared_from_this<IGraph> {
public:
IGraph(ze_graph_handle_t handle,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class DriverCompilerAdapter final : public ICompilerAdapter {

std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;

std::shared_ptr<IGraph> parse(std::shared_ptr<ov::AlignedBuffer> networkSOPtr, const Config& config) const override;
std::shared_ptr<IGraph> parse(std::unique_ptr<BlobContainer> blobPtr, const Config& config) const override;

ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class DriverGraph final : public IGraph {
ze_graph_handle_t graphHandle,
NetworkMetadata metadata,
const Config& config,
std::optional<std::shared_ptr<ov::AlignedBuffer>> blob);
std::optional<std::unique_ptr<BlobContainer>> blob);

size_t export_blob(std::ostream& stream) const override;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class PluginCompilerAdapter final : public ICompilerAdapter {

std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;

std::shared_ptr<IGraph> parse(std::shared_ptr<ov::AlignedBuffer> networkSOPtr, const Config& config) const override;
std::shared_ptr<IGraph> parse(std::unique_ptr<BlobContainer> blobPtr, const Config& config) const override;

ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ class PluginGraph final : public IGraph {
const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct,
ze_graph_handle_t graphHandle,
NetworkMetadata metadata,
std::shared_ptr<ov::AlignedBuffer> blobSOPtr,
std::unique_ptr<BlobContainer> blobPtr,
const Config& config);

size_t export_blob(std::ostream& stream) const override;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,11 +203,11 @@ std::shared_ptr<IGraph> DriverCompilerAdapter::compile(const std::shared_ptr<con
std::nullopt);
}

std::shared_ptr<IGraph> DriverCompilerAdapter::parse(std::shared_ptr<ov::AlignedBuffer> networkSOPtr, const Config& config) const {
std::shared_ptr<IGraph> DriverCompilerAdapter::parse(std::unique_ptr<BlobContainer> blobPtr, const Config& config) const {
OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "parse");

_logger.debug("parse start");
ze_graph_handle_t graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast<const uint8_t*>(networkSOPtr->get_ptr()), networkSOPtr->size());
ze_graph_handle_t graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast<const uint8_t*>(blobPtr->get_ptr()), blobPtr->size());
_logger.debug("parse end");

OV_ITT_TASK_NEXT(PARSE_BLOB, "getNetworkMeta");
Expand All @@ -218,7 +218,7 @@ std::shared_ptr<IGraph> DriverCompilerAdapter::parse(std::shared_ptr<ov::Aligned
graphHandle,
std::move(networkMeta),
config,
std::optional<std::shared_ptr<ov::AlignedBuffer>>(std::move(networkSOPtr)));
std::optional<std::unique_ptr<BlobContainer>>(std::move(blobPtr)));
}

ov::SupportedOpsMap DriverCompilerAdapter::query(const std::shared_ptr<const ov::Model>& model,
Expand Down
23 changes: 23 additions & 0 deletions src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,29 @@ void DriverGraph::initialize(const Config& config) {
}
}

bool DriverGraph::release_blob(const Config& config) {
if (_blob == nullptr || _zeroInitStruct->getGraphDdiTable().version() < ZE_GRAPH_EXT_VERSION_1_8 ||
config.get<PERF_COUNT>()) {
return false;
}

ze_graph_properties_2_t properties = {};
properties.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES;
_zeroInitStruct->getGraphDdiTable().pfnGetProperties2(_handle, &properties);

if (~properties.initStageRequired & ZE_GRAPH_STAGE_INITIALIZE) {
return false;
}

if(!_blob->release_from_memory()) {
return false;
}

_logger.debug("Blob is released");

return true;
};

DriverGraph::~DriverGraph() {
if (_handle != nullptr) {
auto result = _zeGraphExt->destroyGraph(_handle);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,36 +80,35 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compile(const std::shared_ptr<con

_logger.debug("compile start");
auto networkDesc = _compiler->compile(model, config);
auto networkSO = std::make_shared<std::vector<uint8_t>>(std::move(networkDesc.compiledNetwork));
auto blobPtr = std::make_unique<BlobContainerVector>(std::move(networkDesc.compiledNetwork));
_logger.debug("compile end");

ze_graph_handle_t graphHandle = nullptr;

if (_zeGraphExt) {
// Depending on the config, we may get an error when trying to get the graph handle from the compiled network
try {
graphHandle = _zeGraphExt->getGraphHandle(networkSO->data(), networkSO->size());
graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast<const uint8_t*>(blobPtr->get_ptr()), blobPtr->size());
} catch (...) {
_logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not "
"allowed. Only exports are available");
}
}
auto networkSOPtr = std::make_shared<ov::SharedBuffer<std::shared_ptr<std::vector<uint8_t>>>>(reinterpret_cast<char*>(networkSO->data()), networkSO->size(), networkSO);
return std::make_shared<PluginGraph>(_zeGraphExt,
_compiler,
_zeroInitStruct,
graphHandle,
std::move(networkDesc.metadata),
networkSOPtr,
std::move(blobPtr),
config);
}

std::shared_ptr<IGraph> PluginCompilerAdapter::parse(std::shared_ptr<ov::AlignedBuffer> networkSOPtr, const Config& config) const {
std::shared_ptr<IGraph> PluginCompilerAdapter::parse(std::unique_ptr<BlobContainer> blobPtr, const Config& config) const {
OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "parse");

_logger.debug("parse start");
std::vector<uint8_t> network(networkSOPtr->size());
network.assign(reinterpret_cast<uint8_t*>(networkSOPtr->get_ptr()), reinterpret_cast<uint8_t*>(networkSOPtr->get_ptr()) + networkSOPtr->size());
std::vector<uint8_t> network(blobPtr->size());
network.assign(reinterpret_cast<const uint8_t*>(blobPtr->get_ptr()), reinterpret_cast<const uint8_t*>(blobPtr->get_ptr()) + blobPtr->size());
auto networkMeta = _compiler->parse(network, config);
network.clear();
network.shrink_to_fit();
Expand All @@ -118,15 +117,15 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::parse(std::shared_ptr<ov::Aligned
ze_graph_handle_t graphHandle = nullptr;

if (_zeGraphExt) {
graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast<const uint8_t*>(networkSOPtr->get_ptr()), networkSOPtr->size());
graphHandle = _zeGraphExt->getGraphHandle(reinterpret_cast<const uint8_t*>(blobPtr->get_ptr()), blobPtr->size());
}

return std::make_shared<PluginGraph>(_zeGraphExt,
_compiler,
_zeroInitStruct,
graphHandle,
std::move(networkMeta),
std::move(networkSOPtr),
std::move(blobPtr),
config);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,8 @@ size_t PluginGraph::export_blob(std::ostream& stream) {

std::vector<ov::ProfilingInfo> PluginGraph::process_profiling_output(const std::vector<uint8_t>& profData,
const Config& config) const {

// Need to fix increased memory usage below, ov::SharedBuffer won't permit us to get underlying shared buffer as it is private
// Only if we work with std::vector<uint8_t> blobs, but then IGraph needs to have 2 declarations for the same blob
// Maybe if we templatize blob in IGraph to be either std::vector<uint8_t> or std::shared_ptr<ov::AlignedBuffer>?
std::vector<uint8_t> blob(_blob->size());
blob.assign(reinterpret_cast<uint8_t*>(_blob->get_ptr()), reinterpret_cast<uint8_t*>(_blob->get_ptr()) + _blob->size());
blob.assign(reinterpret_cast<const uint8_t*>(_blob->get_ptr()), reinterpret_cast<const uint8_t*>(_blob->get_ptr()) + _blob->size());
return _compiler->process_profiling_output(profData, blob, config);
}

Expand Down
8 changes: 5 additions & 3 deletions src/plugins/intel_npu/src/plugin/include/plugin.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,16 @@ class Plugin : public ov::IPlugin {

std::shared_ptr<ov::ICompiledModel> import_model(std::istream& stream, const ov::AnyMap& properties) const override;

std::shared_ptr<ov::ICompiledModel> import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::AnyMap& properties) const override;
std::shared_ptr<ov::ICompiledModel> import_model(std::istream& /* unusedStream */,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::AnyMap& properties) const override;

std::shared_ptr<ov::ICompiledModel> import_model(std::istream& stream,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& properties) const override;

std::shared_ptr<ov::ICompiledModel> import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
std::shared_ptr<ov::ICompiledModel> import_model(std::istream& stream,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& properties) const override;

Expand Down
22 changes: 13 additions & 9 deletions src/plugins/intel_npu/src/plugin/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -784,15 +784,16 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c
auto compiler = compilerAdapterFactory.getCompiler(_backends->getIEngineBackend(), localConfig);

auto graphSize = getFileSize(stream);
auto blobSO = std::make_shared<std::vector<uint8_t>>(graphSize);
stream.read(reinterpret_cast<char*>(blobSO->data()), graphSize);

std::vector<uint8_t> blob(graphSize);
stream.read(reinterpret_cast<char*>(blob.data()), graphSize);
if (!stream) {
OPENVINO_THROW("Failed to read data from stream!");
}
_logger.debug("Successfully read %zu bytes into blob.", graphSize);

auto blobSOPtr = std::make_shared<ov::SharedBuffer<std::shared_ptr<std::vector<uint8_t>>>>(reinterpret_cast<char*>(blobSO->data()), graphSize, blobSO);
auto graph = compiler->parse(std::move(blobSOPtr), localConfig);
auto blobContainerPtr = std::make_unique<BlobContainerVector>(std::move(blob));
auto graph = compiler->parse(std::move(blobContainerPtr), localConfig);
graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++));

const std::shared_ptr<ov::Model> modelDummy =
Expand All @@ -810,7 +811,9 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c
return compiledModel;
}

std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer, const ov::AnyMap& properties) const {
std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& /* unusedStream */,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::AnyMap& properties) const {
OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::import_model");
OV_ITT_TASK_CHAIN(PLUGIN_IMPORT_MODEL, itt::domains::NPUPlugin, "Plugin::import_model", "merge_configs");

Expand All @@ -835,8 +838,8 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::shared_ptr<ov::Ali

try {
auto compiler = getCompiler(localConfig);

auto graph = compiler->parse(model_buffer, localConfig);
auto blobContainerPtr = std::make_unique<BlobContainerAlignedBuffer>(model_buffer);
auto graph = compiler->parse(std::move(blobContainerPtr), localConfig);
graph->update_network_name("net" + std::to_string(_compiledModelLoadCounter++));

const std::shared_ptr<ov::Model> modelDummy =
Expand Down Expand Up @@ -865,15 +868,16 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream,
return import_model(stream, context, properties);
}

std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::shared_ptr<ov::AlignedBuffer> model_buffer,
std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream,
std::shared_ptr<ov::AlignedBuffer> model_buffer,
const ov::SoPtr<ov::IRemoteContext>& context,
const ov::AnyMap& properties) const {
auto casted = std::dynamic_pointer_cast<RemoteContextImpl>(context._ptr);
if (casted == nullptr) {
OPENVINO_THROW("Invalid remote context type. Can't cast to ov::intel_npu::RemoteContext type");
}

return import_model(model_buffer, properties);
return import_model(stream, model_buffer, properties);
}

ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>& model,
Expand Down

0 comments on commit c8f4abf

Please sign in to comment.