Skip to content

Commit

Permalink
Use update mutable command list for states
Browse files Browse the repository at this point in the history
Signed-off-by: Bogdan Pereanu <[email protected]>
  • Loading branch information
pereanub committed Jan 17, 2025
1 parent 0e8fda2 commit fa93b35
Show file tree
Hide file tree
Showing 10 changed files with 739 additions and 71 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,19 @@ class ZeroInferRequest final : public SyncInferRequest {
const ov::Shape& shape,
const ov::Allocator& allocator = {}) const override;

void add_state(const IODescriptor& descriptor, size_t index) const override;

void update_pipeline_if_memory_changed();
void update_states_if_memory_changed();

const std::shared_ptr<ZeroInitStructsHolder> _initStructs;
const std::shared_ptr<IGraph> _graph;
const Config _config;
Logger _logger;

const std::vector<ArgumentDescriptor>& _executorInputDescriptors;
const std::vector<ArgumentDescriptor>& _executorOutputDescriptors;

// A copy of each tensor is needed to maintain the original L0 memory allocation in case the user provides another
// memory area for the tensor.
mutable std::vector<std::vector<std::shared_ptr<ov::ITensor>>> _levelZeroInputTensors;
Expand Down
49 changes: 49 additions & 0 deletions src/plugins/intel_npu/src/backend/include/zero_variable_state.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// Copyright (C) 2018-2025 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "intel_npu/common/variable_state.hpp"
#include "intel_npu/config/config.hpp"
#include "intel_npu/network_metadata.hpp"
#include "intel_npu/utils/logger/logger.hpp"
#include "intel_npu/utils/zero/zero_init.hpp"

namespace intel_npu {

class ZeroVariableState final : public VariableState {
public:
explicit ZeroVariableState(const std::shared_ptr<ZeroInitStructsHolder>& init_structs,
const IODescriptor& descriptor,
const ov::SoPtr<ov::ITensor>& tensor,
size_t index,
const Config& config);

void set_state(const ov::SoPtr<ov::ITensor>& new_state) override;

void reset() override;

size_t get_index() const;
const IODescriptor& get_descriptor() const;

bool tensor_was_updated() const;
void reset_tensor_updated_flag();

bool zero_tensor_should_be_updated() const;
void reset_zero_tensor_updated_flag();

~ZeroVariableState() override = default;

private:
std::shared_ptr<ZeroInitStructsHolder> _init_structs;
IODescriptor _descriptor;
size_t _index;

bool _tensor_updated = false;
bool _zero_tensor_updated = false;

Logger _logger;
};

} // namespace intel_npu
215 changes: 152 additions & 63 deletions src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "openvino/op/util/op_types.hpp"
#include "openvino/runtime/intel_npu/remote_properties.hpp"
#include "zero_memory.hpp"
#include "zero_variable_state.hpp"

using namespace intel_npu;

Expand Down Expand Up @@ -102,13 +103,13 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&
_graph(compiledModel->get_graph()),
_config(config),
_logger("ZeroInferRequest", config.get<LOG_LEVEL>()),
_executorInputDescriptors(_graph->get_input_descriptors()),
_executorOutputDescriptors(_graph->get_output_descriptors()),
_levelZeroInputTensors(_metadata.inputs.size(), std::vector<std::shared_ptr<ov::ITensor>>(1, nullptr)),
_levelZeroOutputTensors(_metadata.outputs.size(), nullptr),
_profilingPool(_initStructs, _graph, zeroProfiling::POOL_SIZE),
_profilingQuery(_initStructs, 0) {
_logger.debug("ZeroInferRequest::ZeroInferRequest - SyncInferRequest");
const std::vector<ArgumentDescriptor>& executorInputDescriptors = _graph->get_input_descriptors();
const std::vector<ArgumentDescriptor>& executorOutputDescriptors = _graph->get_output_descriptors();

auto proftype = config.get<PROFILING_TYPE>();
if (proftype == ov::intel_npu::ProfilingType::INFER) {
Expand All @@ -128,7 +129,7 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&

size_t ioIndex = 0;
for (const IODescriptor& inputDescriptor : _metadata.inputs) {
check_level_zero_attributes_match(inputDescriptor, executorInputDescriptors.at(ioIndex));
check_level_zero_attributes_match(inputDescriptor, _executorInputDescriptors.at(ioIndex));

if (!(inputDescriptor.isStateInput || inputDescriptor.isShapeTensor)) {
++ioIndex;
Expand All @@ -143,7 +144,7 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr<ZeroInitStructsHolder>&

ioIndex = 0;
for (const IODescriptor& outputDescriptor : _metadata.outputs) {
check_level_zero_attributes_match(outputDescriptor, executorOutputDescriptors.at(ioIndex));
check_level_zero_attributes_match(outputDescriptor, _executorOutputDescriptors.at(ioIndex));

if (!(outputDescriptor.isStateOutput || outputDescriptor.isShapeTensor)) {
++ioIndex;
Expand Down Expand Up @@ -204,6 +205,34 @@ void ZeroInferRequest::create_pipeline() {
auto groupOrdinal = zeroUtils::findGroupOrdinal(_initStructs->getDevice(), _properties);
_logger.debug("ZeroInferRequest::create_pipeline - init completed");

// Set new tensors and reset variable state flag if memory updated before creating the pipeline
_logger.debug("ZeroInferRequest::create_pipeline - set new tensors and reset variable state flag if memory updated "
"before creating the pipeline");
for (const auto& variableState : _variableStates) {
auto zeroState = std::dynamic_pointer_cast<ZeroVariableState>(variableState._ptr);

OPENVINO_ASSERT(zeroState != nullptr, "State is not compatible with NPU plugin");

if (zeroState->tensor_was_updated()) {
OPENVINO_ASSERT(zeroState->get_descriptor().relatedDescriptorIndex.has_value(),
"The link between state descriptors is missing, state name: ",
zeroState->get_descriptor().nameFromCompiler);

get_user_input(zeroState->get_index()) = zeroState->get_state();
_userOutputTensors.at(*zeroState->get_descriptor().relatedDescriptorIndex) = zeroState->get_state();

zeroState->reset_tensor_updated_flag();

if (zeroState->zero_tensor_should_be_updated()) {
zeroState->reset_zero_tensor_updated_flag();

get_level_zero_input(zeroState->get_index()) = zeroState->get_state()._ptr;
_levelZeroOutputTensors.at(*zeroState->get_descriptor().relatedDescriptorIndex) =
zeroState->get_state()._ptr;
}
}
}

_logger.debug("ZeroInferRequest::create_pipeline - constructing pipeline");

// Construct pipeline
Expand Down Expand Up @@ -454,6 +483,118 @@ ov::SoPtr<ov::ITensor> ZeroInferRequest::get_tensor(const ov::Output<const ov::N
return userTensors;
}

void ZeroInferRequest::update_pipeline_if_memory_changed() {
bool closePipeline = false;
size_t ioIndex = 0;

for (const auto& levelZeroTensor : _levelZeroInputTensors) {
const auto inputDescriptor = _metadata.inputs.at(ioIndex);
auto zeroTensor = std::dynamic_pointer_cast<ZeroTensor>(levelZeroTensor.at(SINGLE_TENSOR));

if (is_batched_input(ioIndex) || inputDescriptor.isShapeTensor ||
is_remote_tensor(levelZeroTensor.at(SINGLE_TENSOR)) || zeroTensor == nullptr) {
++ioIndex;
continue;
}

if (zeroTensor->memory_address_changed()) {
_logger.debug("Update input graph descriptor with the new tensor");
OPENVINO_ASSERT(zeroTensor->data(), "Empty buffer");

_pipeline->updateCommandList(_graph->get_input_descriptors().at(ioIndex).idx,
zeroTensor->data(),
zeroTensor->get_byte_size());
closePipeline = true;

if (!inputDescriptor.isStateInput) {
zeroTensor->reset_memory_flag();
}
}

++ioIndex;
}

ioIndex = 0;

for (const auto& levelZeroTensor : _levelZeroOutputTensors) {
const auto outputDescriptor = _metadata.outputs.at(ioIndex);
auto zeroTensor = std::dynamic_pointer_cast<ZeroTensor>(levelZeroTensor);

if (outputDescriptor.isShapeTensor || is_remote_tensor(levelZeroTensor) || zeroTensor == nullptr) {
++ioIndex;
continue;
}

if (zeroTensor->memory_address_changed()) {
_logger.debug("Update output graph descriptor with the new tensor");
OPENVINO_ASSERT(zeroTensor->data(), "Empty buffer");

_pipeline->updateCommandList(_graph->get_output_descriptors().at(ioIndex).idx,
zeroTensor->data(),
zeroTensor->get_byte_size());
closePipeline = true;

zeroTensor->reset_memory_flag();
}

++ioIndex;
}

if (closePipeline) {
_pipeline->closeCommandList();
}
}

void ZeroInferRequest::update_states_if_memory_changed() {
bool closePipeline = false;

for (const auto& variableState : _variableStates) {
auto zeroState = std::dynamic_pointer_cast<ZeroVariableState>(variableState._ptr);

OPENVINO_ASSERT(zeroState != nullptr, "State is not compatible with NPU plugin");

if (zeroState->tensor_was_updated()) {
OPENVINO_ASSERT(zeroState->get_descriptor().relatedDescriptorIndex.has_value(),
"The link between state descriptors is missing, state name: ",
zeroState->get_descriptor().nameFromCompiler);

get_user_input(zeroState->get_index()) = zeroState->get_state();
_userOutputTensors.at(*zeroState->get_descriptor().relatedDescriptorIndex) = zeroState->get_state();

zeroState->reset_tensor_updated_flag();

if (zeroState->zero_tensor_should_be_updated()) {
auto remoteTensor = std::dynamic_pointer_cast<ZeroRemoteTensor>(zeroState->get_state()._ptr);

void* userBuffer = !remoteTensor
? zeroState->get_state()->data()
: extract_object(remoteTensor->get_properties(), ov::intel_npu::mem_handle);

_pipeline->updateCommandList(_executorInputDescriptors.at(zeroState->get_index()).idx,
userBuffer,
zeroState->get_state()->get_byte_size());

_pipeline->updateCommandList(
_executorOutputDescriptors.at(*zeroState->get_descriptor().relatedDescriptorIndex).idx,
userBuffer,
zeroState->get_state()->get_byte_size());

zeroState->reset_zero_tensor_updated_flag();

get_level_zero_input(zeroState->get_index()) = zeroState->get_state()._ptr;
_levelZeroOutputTensors.at(*zeroState->get_descriptor().relatedDescriptorIndex) =
zeroState->get_state()._ptr;

closePipeline = true;
}
}
}

if (closePipeline) {
_pipeline->closeCommandList();
}
}

void ZeroInferRequest::infer() {
if (_config.get<RUN_INFERENCES_SEQUENTIALLY>()) {
OPENVINO_THROW("Only start async is supported when RUN_INFERENCES_SEQUENTIALLY is enabled!");
Expand All @@ -477,65 +618,8 @@ void ZeroInferRequest::infer_async() {
_pipelineIsCreated = true;
} else {
if (_initStructs->getMutableCommandListVersion()) {
bool closePipeline = false;
size_t ioIndex = 0;

for (const auto& levelZeroTensor : _levelZeroInputTensors) {
const auto inputDescriptor = _metadata.inputs.at(ioIndex);
auto zeroTensor = std::dynamic_pointer_cast<ZeroTensor>(levelZeroTensor.at(SINGLE_TENSOR));

if (is_batched_input(ioIndex) || inputDescriptor.isShapeTensor ||
is_remote_tensor(levelZeroTensor.at(SINGLE_TENSOR)) || zeroTensor == nullptr) {
++ioIndex;
continue;
}

if (zeroTensor->memory_address_changed()) {
_logger.debug("Update input graph descriptor with the new tensor");
OPENVINO_ASSERT(zeroTensor->data(), "Empty buffer");

_pipeline->updateCommandList(_graph->get_input_descriptors().at(ioIndex).idx,
zeroTensor->data(),
zeroTensor->get_byte_size());
closePipeline = true;

if (!inputDescriptor.isStateInput) {
zeroTensor->reset_memory_flag();
}
}

++ioIndex;
}

ioIndex = 0;

for (const auto& levelZeroTensor : _levelZeroOutputTensors) {
const auto outputDescriptor = _metadata.outputs.at(ioIndex);
auto zeroTensor = std::dynamic_pointer_cast<ZeroTensor>(levelZeroTensor);

if (outputDescriptor.isShapeTensor || is_remote_tensor(levelZeroTensor) || zeroTensor == nullptr) {
++ioIndex;
continue;
}

if (zeroTensor->memory_address_changed()) {
_logger.debug("Update output graph descriptor with the new tensor");
OPENVINO_ASSERT(zeroTensor->data(), "Empty buffer");

_pipeline->updateCommandList(_graph->get_output_descriptors().at(ioIndex).idx,
zeroTensor->data(),
zeroTensor->get_byte_size());
closePipeline = true;

zeroTensor->reset_memory_flag();
}

++ioIndex;
}

if (closePipeline) {
_pipeline->closeCommandList();
}
update_pipeline_if_memory_changed();
update_states_if_memory_changed();
}
}
}
Expand Down Expand Up @@ -753,6 +837,11 @@ std::shared_ptr<ov::ITensor> ZeroInferRequest::create_tensor(ov::element::Type t
return std::make_shared<ZeroTensor>(_initStructs, type, shape, allocator);
}

void ZeroInferRequest::add_state(const IODescriptor& descriptor, size_t index) const {
_variableStates.push_back(
std::make_shared<ZeroVariableState>(_initStructs, descriptor, get_user_input(index), index, _config));
}

std::vector<uint8_t> ZeroInferRequest::get_raw_profiling_data() const {
return _profilingQuery.getData<uint8_t>();
}
Expand Down
Loading

0 comments on commit fa93b35

Please sign in to comment.