diff --git a/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp b/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp index aaaa128518b34f..e6edc46b61eeb4 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp @@ -67,11 +67,19 @@ class ZeroInferRequest final : public SyncInferRequest { const ov::Shape& shape, const ov::Allocator& allocator = {}) const override; + void add_state(const IODescriptor& descriptor, size_t index) const override; + + void update_pipeline_if_memory_changed(); + void update_states_if_memory_changed(); + const std::shared_ptr _initStructs; const std::shared_ptr _graph; const Config _config; Logger _logger; + const std::vector& _executorInputDescriptors; + const std::vector& _executorOutputDescriptors; + // A copy of each tensor is needed to maintain the original L0 memory allocation in case the user provides another // memory area for the tensor. mutable std::vector>> _levelZeroInputTensors; diff --git a/src/plugins/intel_npu/src/backend/include/zero_variable_state.hpp b/src/plugins/intel_npu/src/backend/include/zero_variable_state.hpp new file mode 100644 index 00000000000000..1ec6b3df08078e --- /dev/null +++ b/src/plugins/intel_npu/src/backend/include/zero_variable_state.hpp @@ -0,0 +1,49 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "intel_npu/common/variable_state.hpp" +#include "intel_npu/config/config.hpp" +#include "intel_npu/network_metadata.hpp" +#include "intel_npu/utils/logger/logger.hpp" +#include "intel_npu/utils/zero/zero_init.hpp" + +namespace intel_npu { + +class ZeroVariableState final : public VariableState { +public: + explicit ZeroVariableState(const std::shared_ptr& init_structs, + const IODescriptor& descriptor, + const ov::SoPtr& tensor, + size_t index, + const Config& config); + + void set_state(const ov::SoPtr& new_state) override; + + void reset() override; + + size_t get_index() const; + const IODescriptor& get_descriptor() const; + + bool tensor_was_updated() const; + void reset_tensor_updated_flag(); + + bool zero_tensor_should_be_updated() const; + void reset_zero_tensor_updated_flag(); + + ~ZeroVariableState() override = default; + +private: + std::shared_ptr _init_structs; + IODescriptor _descriptor; + size_t _index; + + bool _tensor_updated = false; + bool _zero_tensor_updated = false; + + Logger _logger; +}; + +} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp index 0e911419bf6b2d..3b0585374f7652 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp @@ -13,6 +13,7 @@ #include "openvino/op/util/op_types.hpp" #include "openvino/runtime/intel_npu/remote_properties.hpp" #include "zero_memory.hpp" +#include "zero_variable_state.hpp" using namespace intel_npu; @@ -102,13 +103,13 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr& _graph(compiledModel->get_graph()), _config(config), _logger("ZeroInferRequest", config.get()), + _executorInputDescriptors(_graph->get_input_descriptors()), + _executorOutputDescriptors(_graph->get_output_descriptors()), _levelZeroInputTensors(_metadata.inputs.size(), std::vector>(1, nullptr)), _levelZeroOutputTensors(_metadata.outputs.size(), nullptr), _profilingPool(_initStructs, _graph, zeroProfiling::POOL_SIZE), _profilingQuery(_initStructs, 0) { _logger.debug("ZeroInferRequest::ZeroInferRequest - SyncInferRequest"); - const std::vector& executorInputDescriptors = _graph->get_input_descriptors(); - const std::vector& executorOutputDescriptors = _graph->get_output_descriptors(); auto proftype = config.get(); if (proftype == ov::intel_npu::ProfilingType::INFER) { @@ -128,7 +129,7 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr& size_t ioIndex = 0; for (const IODescriptor& inputDescriptor : _metadata.inputs) { - check_level_zero_attributes_match(inputDescriptor, executorInputDescriptors.at(ioIndex)); + check_level_zero_attributes_match(inputDescriptor, _executorInputDescriptors.at(ioIndex)); if (!(inputDescriptor.isStateInput || inputDescriptor.isShapeTensor)) { ++ioIndex; @@ -143,7 +144,7 @@ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr& ioIndex = 0; for (const IODescriptor& outputDescriptor : _metadata.outputs) { - check_level_zero_attributes_match(outputDescriptor, executorOutputDescriptors.at(ioIndex)); + check_level_zero_attributes_match(outputDescriptor, _executorOutputDescriptors.at(ioIndex)); if (!(outputDescriptor.isStateOutput || outputDescriptor.isShapeTensor)) { ++ioIndex; @@ -204,6 +205,34 @@ void ZeroInferRequest::create_pipeline() { auto groupOrdinal = zeroUtils::findGroupOrdinal(_initStructs->getDevice(), _properties); _logger.debug("ZeroInferRequest::create_pipeline - init completed"); + // Set new tensors and reset variable state flag if memory updated before creating the pipeline + _logger.debug("ZeroInferRequest::create_pipeline - set new tensors and reset variable state flag if memory updated " + "before creating the pipeline"); + for (const auto& variableState : _variableStates) { + auto zeroState = std::dynamic_pointer_cast(variableState._ptr); + + OPENVINO_ASSERT(zeroState != nullptr, "State is not compatible with NPU plugin"); + + if (zeroState->tensor_was_updated()) { + OPENVINO_ASSERT(zeroState->get_descriptor().relatedDescriptorIndex.has_value(), + "The link between state descriptors is missing, state name: ", + zeroState->get_descriptor().nameFromCompiler); + + get_user_input(zeroState->get_index()) = zeroState->get_state(); + _userOutputTensors.at(*zeroState->get_descriptor().relatedDescriptorIndex) = zeroState->get_state(); + + zeroState->reset_tensor_updated_flag(); + + if (zeroState->zero_tensor_should_be_updated()) { + zeroState->reset_zero_tensor_updated_flag(); + + get_level_zero_input(zeroState->get_index()) = zeroState->get_state()._ptr; + _levelZeroOutputTensors.at(*zeroState->get_descriptor().relatedDescriptorIndex) = + zeroState->get_state()._ptr; + } + } + } + _logger.debug("ZeroInferRequest::create_pipeline - constructing pipeline"); // Construct pipeline @@ -454,6 +483,118 @@ ov::SoPtr ZeroInferRequest::get_tensor(const ov::Output(levelZeroTensor.at(SINGLE_TENSOR)); + + if (is_batched_input(ioIndex) || inputDescriptor.isShapeTensor || + is_remote_tensor(levelZeroTensor.at(SINGLE_TENSOR)) || zeroTensor == nullptr) { + ++ioIndex; + continue; + } + + if (zeroTensor->memory_address_changed()) { + _logger.debug("Update input graph descriptor with the new tensor"); + OPENVINO_ASSERT(zeroTensor->data(), "Empty buffer"); + + _pipeline->updateCommandList(_graph->get_input_descriptors().at(ioIndex).idx, + zeroTensor->data(), + zeroTensor->get_byte_size()); + closePipeline = true; + + if (!inputDescriptor.isStateInput) { + zeroTensor->reset_memory_flag(); + } + } + + ++ioIndex; + } + + ioIndex = 0; + + for (const auto& levelZeroTensor : _levelZeroOutputTensors) { + const auto outputDescriptor = _metadata.outputs.at(ioIndex); + auto zeroTensor = std::dynamic_pointer_cast(levelZeroTensor); + + if (outputDescriptor.isShapeTensor || is_remote_tensor(levelZeroTensor) || zeroTensor == nullptr) { + ++ioIndex; + continue; + } + + if (zeroTensor->memory_address_changed()) { + _logger.debug("Update output graph descriptor with the new tensor"); + OPENVINO_ASSERT(zeroTensor->data(), "Empty buffer"); + + _pipeline->updateCommandList(_graph->get_output_descriptors().at(ioIndex).idx, + zeroTensor->data(), + zeroTensor->get_byte_size()); + closePipeline = true; + + zeroTensor->reset_memory_flag(); + } + + ++ioIndex; + } + + if (closePipeline) { + _pipeline->closeCommandList(); + } +} + +void ZeroInferRequest::update_states_if_memory_changed() { + bool closePipeline = false; + + for (const auto& variableState : _variableStates) { + auto zeroState = std::dynamic_pointer_cast(variableState._ptr); + + OPENVINO_ASSERT(zeroState != nullptr, "State is not compatible with NPU plugin"); + + if (zeroState->tensor_was_updated()) { + OPENVINO_ASSERT(zeroState->get_descriptor().relatedDescriptorIndex.has_value(), + "The link between state descriptors is missing, state name: ", + zeroState->get_descriptor().nameFromCompiler); + + get_user_input(zeroState->get_index()) = zeroState->get_state(); + _userOutputTensors.at(*zeroState->get_descriptor().relatedDescriptorIndex) = zeroState->get_state(); + + zeroState->reset_tensor_updated_flag(); + + if (zeroState->zero_tensor_should_be_updated()) { + auto remoteTensor = std::dynamic_pointer_cast(zeroState->get_state()._ptr); + + void* userBuffer = !remoteTensor + ? zeroState->get_state()->data() + : extract_object(remoteTensor->get_properties(), ov::intel_npu::mem_handle); + + _pipeline->updateCommandList(_executorInputDescriptors.at(zeroState->get_index()).idx, + userBuffer, + zeroState->get_state()->get_byte_size()); + + _pipeline->updateCommandList( + _executorOutputDescriptors.at(*zeroState->get_descriptor().relatedDescriptorIndex).idx, + userBuffer, + zeroState->get_state()->get_byte_size()); + + zeroState->reset_zero_tensor_updated_flag(); + + get_level_zero_input(zeroState->get_index()) = zeroState->get_state()._ptr; + _levelZeroOutputTensors.at(*zeroState->get_descriptor().relatedDescriptorIndex) = + zeroState->get_state()._ptr; + + closePipeline = true; + } + } + } + + if (closePipeline) { + _pipeline->closeCommandList(); + } +} + void ZeroInferRequest::infer() { if (_config.get()) { OPENVINO_THROW("Only start async is supported when RUN_INFERENCES_SEQUENTIALLY is enabled!"); @@ -477,65 +618,8 @@ void ZeroInferRequest::infer_async() { _pipelineIsCreated = true; } else { if (_initStructs->getMutableCommandListVersion()) { - bool closePipeline = false; - size_t ioIndex = 0; - - for (const auto& levelZeroTensor : _levelZeroInputTensors) { - const auto inputDescriptor = _metadata.inputs.at(ioIndex); - auto zeroTensor = std::dynamic_pointer_cast(levelZeroTensor.at(SINGLE_TENSOR)); - - if (is_batched_input(ioIndex) || inputDescriptor.isShapeTensor || - is_remote_tensor(levelZeroTensor.at(SINGLE_TENSOR)) || zeroTensor == nullptr) { - ++ioIndex; - continue; - } - - if (zeroTensor->memory_address_changed()) { - _logger.debug("Update input graph descriptor with the new tensor"); - OPENVINO_ASSERT(zeroTensor->data(), "Empty buffer"); - - _pipeline->updateCommandList(_graph->get_input_descriptors().at(ioIndex).idx, - zeroTensor->data(), - zeroTensor->get_byte_size()); - closePipeline = true; - - if (!inputDescriptor.isStateInput) { - zeroTensor->reset_memory_flag(); - } - } - - ++ioIndex; - } - - ioIndex = 0; - - for (const auto& levelZeroTensor : _levelZeroOutputTensors) { - const auto outputDescriptor = _metadata.outputs.at(ioIndex); - auto zeroTensor = std::dynamic_pointer_cast(levelZeroTensor); - - if (outputDescriptor.isShapeTensor || is_remote_tensor(levelZeroTensor) || zeroTensor == nullptr) { - ++ioIndex; - continue; - } - - if (zeroTensor->memory_address_changed()) { - _logger.debug("Update output graph descriptor with the new tensor"); - OPENVINO_ASSERT(zeroTensor->data(), "Empty buffer"); - - _pipeline->updateCommandList(_graph->get_output_descriptors().at(ioIndex).idx, - zeroTensor->data(), - zeroTensor->get_byte_size()); - closePipeline = true; - - zeroTensor->reset_memory_flag(); - } - - ++ioIndex; - } - - if (closePipeline) { - _pipeline->closeCommandList(); - } + update_pipeline_if_memory_changed(); + update_states_if_memory_changed(); } } } @@ -753,6 +837,11 @@ std::shared_ptr ZeroInferRequest::create_tensor(ov::element::Type t return std::make_shared(_initStructs, type, shape, allocator); } +void ZeroInferRequest::add_state(const IODescriptor& descriptor, size_t index) const { + _variableStates.push_back( + std::make_shared(_initStructs, descriptor, get_user_input(index), index, _config)); +} + std::vector ZeroInferRequest::get_raw_profiling_data() const { return _profilingQuery.getData(); } diff --git a/src/plugins/intel_npu/src/backend/src/zero_variable_state.cpp b/src/plugins/intel_npu/src/backend/src/zero_variable_state.cpp new file mode 100644 index 00000000000000..957f14a18e02d0 --- /dev/null +++ b/src/plugins/intel_npu/src/backend/src/zero_variable_state.cpp @@ -0,0 +1,110 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "zero_variable_state.hpp" + +#include "intel_npu/config/common.hpp" +#include "intel_npu/utils/zero/zero_api.hpp" +#include "zero_remote_tensor.hpp" + +namespace { + +template +Type extract_object(const ov::AnyMap& params, const ov::Property& p) { + auto itrHandle = params.find(p.name()); + ov::Any res = nullptr; + if (itrHandle == params.end()) { + OPENVINO_THROW("No parameter ", p.name(), " found in parameters map"); + } + res = itrHandle->second; + return res.as(); +} + +bool memory_was_allocated_in_the_same_l0_context(ze_context_handle_t hContext, const void* ptr) { + ze_memory_allocation_properties_t desc = {}; + desc.stype = ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES; + auto res = intel_npu::zeMemGetAllocProperties(hContext, ptr, &desc, nullptr); + if (res == ZE_RESULT_SUCCESS) { + if (desc.id) { + if ((desc.type & ZE_MEMORY_TYPE_HOST) || (desc.type & ZE_MEMORY_TYPE_DEVICE) || + (desc.type & ZE_MEMORY_TYPE_SHARED)) { + return true; + } + } + } + + return false; +} + +} // namespace + +namespace intel_npu { + +ZeroVariableState::ZeroVariableState(const std::shared_ptr& init_structs, + const IODescriptor& descriptor, + const ov::SoPtr& tensor, + size_t index, + const Config& config) + : VariableState(descriptor.nameFromCompiler, tensor), + _init_structs(init_structs), + _descriptor(descriptor), + _index(index), + _logger("ZeroVariableState", config.get()) {} + +void ZeroVariableState::set_state(const ov::SoPtr& new_state) { + if (new_state->get_byte_size() != m_state->get_byte_size()) { + OPENVINO_THROW("Byte size mismatch"); + } + + m_state = new_state; + _tensor_updated = true; + + if (_init_structs->getMutableCommandListVersion()) { + if (!is_remote_tensor(new_state._ptr)) { + if (memory_was_allocated_in_the_same_l0_context(_init_structs->getContext(), new_state->data())) { + _logger.debug("ZeroInferRequest::set_tensor_data - tensor was created in the same L0 context"); + _zero_tensor_updated = true; + } + + return; + } + + _zero_tensor_updated = true; + } +} + +void ZeroVariableState::reset() { + auto remoteTensor = std::dynamic_pointer_cast(m_state._ptr); + + void* userBuffer = + !remoteTensor ? m_state->data() : extract_object(remoteTensor->get_properties(), ov::intel_npu::mem_handle); + + std::memset(userBuffer, 0, m_state->get_byte_size()); +} + +size_t ZeroVariableState::get_index() const { + return _index; +} + +const IODescriptor& ZeroVariableState::get_descriptor() const { + return _descriptor; +} + +bool ZeroVariableState::tensor_was_updated() const { + return _tensor_updated; +} + +void ZeroVariableState::reset_tensor_updated_flag() { + _tensor_updated = false; +} + +bool ZeroVariableState::zero_tensor_should_be_updated() const { + return _zero_tensor_updated; +} + +void ZeroVariableState::reset_zero_tensor_updated_flag() { + _zero_tensor_updated = false; +} + +} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/sync_infer_request.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/sync_infer_request.hpp index 3c772168c0c93f..b3b47eb5aa59f6 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/sync_infer_request.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/sync_infer_request.hpp @@ -167,6 +167,8 @@ class SyncInferRequest : public ov::IInferRequest { const ov::Shape& shape, const ov::Allocator& allocator = {}) const; + virtual void add_state(const IODescriptor& descriptor, const size_t index) const; + bool is_batched_input(size_t idx) const; ov::SoPtr& get_user_input(size_t index) const; diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/variable_state.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/variable_state.hpp index acb83d5b718033..bd6576e14b6053 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/variable_state.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/variable_state.hpp @@ -9,14 +9,13 @@ namespace intel_npu { -class VariableState final : public ov::IVariableState { +class VariableState : public ov::IVariableState { public: - explicit VariableState(const std::string& name, const std::shared_ptr& tensor) - : ov::IVariableState(name) { + explicit VariableState(const std::string& name, const ov::SoPtr& tensor) : ov::IVariableState(name) { m_state = tensor; } - void set_state(const ov::SoPtr& newState) override { + virtual void set_state(const ov::SoPtr& newState) override { if (newState->get_byte_size() != m_state->get_byte_size()) { OPENVINO_THROW("Byte size mismatch"); } @@ -24,7 +23,7 @@ class VariableState final : public ov::IVariableState { std::memcpy(m_state->data(), newState->data(), newState->get_byte_size()); } - void reset() override { + virtual void reset() override { std::memset(m_state->data(), 0, m_state->get_byte_size()); } diff --git a/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp b/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp index 17dc6391761e5c..86730fc582be2f 100644 --- a/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp +++ b/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp @@ -326,7 +326,7 @@ std::shared_ptr SyncInferRequest::allocate_tensor(const IODescripto } if (descriptor.isStateInput) { - _variableStates.push_back(std::make_shared(descriptor.nameFromCompiler, tensor)); + add_state(descriptor, index); } } else if (_userOutputTensors.at(index) == nullptr) { _userOutputTensors.at(index) = tensor; @@ -341,6 +341,10 @@ std::shared_ptr SyncInferRequest::create_tensor(ov::element::Type t return ov::make_tensor(type, shape, allocator); } +void SyncInferRequest::add_state(const IODescriptor& descriptor, const size_t index) const { + _variableStates.push_back(std::make_shared(descriptor.nameFromCompiler, get_user_input(index))); +} + bool SyncInferRequest::is_batched_input(size_t idx) const { return _userInputTensors.at(idx).size() > 1; } diff --git a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.cpp b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.cpp index f45e30bb109849..f30fa2bb1416a3 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.cpp +++ b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.cpp @@ -11,7 +11,7 @@ using namespace ov::test::behavior; -const std::vector configsInferRequestRunTests = {{ov::log::level(ov::log::Level::ERR)}}; +const std::vector configsInferRequestRunTests = {{}}; INSTANTIATE_TEST_SUITE_P(compatibility_smoke_BehaviorTest, InferRequestRunTests, diff --git a/src/plugins/intel_npu/tests/functional/behavior/remote_tensor_tests/remote_run.cpp b/src/plugins/intel_npu/tests/functional/behavior/remote_tensor_tests/remote_run.cpp index 870f6596dca9ce..d3e537863227e4 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/remote_tensor_tests/remote_run.cpp +++ b/src/plugins/intel_npu/tests/functional/behavior/remote_tensor_tests/remote_run.cpp @@ -10,7 +10,7 @@ using namespace ov::test::behavior; -const std::vector remoteConfigs = {{ov::log::level(ov::log::Level::ERR)}}; +const std::vector remoteConfigs = {{}}; INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTest, RemoteRunTests, diff --git a/src/plugins/intel_npu/tests/functional/behavior/remote_tensor_tests/remote_run.hpp b/src/plugins/intel_npu/tests/functional/behavior/remote_tensor_tests/remote_run.hpp index fa58d4270889ad..dbbfc7ac4e92b1 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/remote_tensor_tests/remote_run.hpp +++ b/src/plugins/intel_npu/tests/functional/behavior/remote_tensor_tests/remote_run.hpp @@ -13,6 +13,10 @@ #include "common/utils.hpp" #include "openvino/core/any.hpp" #include "openvino/core/type/element_iterator.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/op.hpp" +#include "openvino/op/sigmoid.hpp" #include "openvino/runtime/compiled_model.hpp" #include "openvino/runtime/core.hpp" #include "openvino/runtime/intel_npu/level_zero/level_zero.hpp" @@ -76,6 +80,35 @@ class RemoteRunTests : public ov::test::behavior::OVPluginTestBase, return ss.str(); } + std::shared_ptr createModelWithStates(element::Type type, const Shape& shape) { + auto input = std::make_shared(type, shape); + auto mem_i1 = std::make_shared(type, shape, 0); + auto mem_r1 = std::make_shared(mem_i1, "r_1-3"); + auto mul1 = std::make_shared(mem_r1, input); + + auto mem_i2 = std::make_shared(type, shape, 0); + auto mem_r2 = std::make_shared(mem_i2, "c_1-3"); + auto mul2 = std::make_shared(mem_r2, mul1); + auto mem_w2 = std::make_shared(mul2, "c_1-3"); + + auto mem_w1 = std::make_shared(mul2, "r_1-3"); + auto sigm = std::make_shared(mul2); + sigm->set_friendly_name("sigmod_state"); + sigm->get_output_tensor(0).set_names({"sigmod_state"}); + mem_r1->set_friendly_name("Memory_1"); + mem_r1->get_output_tensor(0).set_names({"Memory_1"}); + mem_w1->add_control_dependency(mem_r1); + sigm->add_control_dependency(mem_w1); + + mem_r2->set_friendly_name("Memory_2"); + mem_r2->get_output_tensor(0).set_names({"Memory_2"}); + mem_w2->add_control_dependency(mem_r2); + sigm->add_control_dependency(mem_w2); + + auto function = std::make_shared(ov::NodeVector{sigm}, ov::ParameterVector{input}, "add_output"); + return function; + } + void TearDown() override { if (!m_cache_dir.empty()) { core->set_property({ov::cache_dir()}); @@ -434,6 +467,380 @@ TEST_P(RemoteRunTests, CheckOutputDataFromTwoRunsInOutRemoteTensorsHostTensor2) 0); } +TEST_P(RemoteRunTests, checkResultsAfterChangingStateTensors) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + testing::internal::Random random(1); + ov::Tensor input_tensor; + + auto original_shape = Shape{1, 10, 10, 10}; + auto shape_size = ov::shape_size(original_shape); + auto model = createModelWithStates(element::f32, original_shape); + + auto context = core->get_default_context(target_device); + + compiled_model = core->compile_model(model, target_device, configuration); + ov::InferRequest inference_request; + inference_request = compiled_model.create_infer_request(); + + auto input = compiled_model.input(); + OV_ASSERT_NO_THROW(input_tensor = inference_request.get_tensor(input)); + auto* input_data = input_tensor.data(); + for (size_t i = 0; i < shape_size; ++i) { + input_data[i] = static_cast(random.Generate(10)); + } + + auto states = inference_request.query_state(); + + auto tensor_state = states[0].get_state(); + auto tensor_state_shape = tensor_state.get_shape(); + auto l0_host_tensor0 = context.create_host_tensor(ov::element::f32, tensor_state_shape); + + tensor_state = states[1].get_state(); + tensor_state_shape = tensor_state.get_shape(); + auto l0_host_tensor1 = context.create_host_tensor(ov::element::f32, tensor_state_shape); + + states[0].set_state(l0_host_tensor0); + states[0].reset(); + states[1].set_state(l0_host_tensor1); + states[1].reset(); + + OV_ASSERT_NO_THROW(inference_request.infer()); + + auto output_tensor = inference_request.get_tensor("sigmod_state"); + auto output_data = output_tensor.data(); + for (size_t i = 0; i < output_tensor.get_size(); i++) { + EXPECT_NEAR(0.5f, output_data[i], 1e-5); + } + + auto tensor_size = l0_host_tensor0.get_size(); + auto state_data = static_cast(l0_host_tensor0.data()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(0.0, state_data[i], 1e-5); + } + + tensor_size = l0_host_tensor1.get_size(); + state_data = static_cast(l0_host_tensor1.data()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(0.0, state_data[i], 1e-5); + } + + tensor_state = states[0].get_state(); + tensor_state_shape = tensor_state.get_shape(); + auto l0_host_tensor2 = context.create_host_tensor(ov::element::f32, tensor_state_shape); + + tensor_state = states[1].get_state(); + tensor_state_shape = tensor_state.get_shape(); + auto l0_host_tensor3 = context.create_host_tensor(ov::element::f32, tensor_state_shape); + + states[0].set_state(l0_host_tensor2); + states[1].set_state(l0_host_tensor3); + + tensor_size = l0_host_tensor2.get_size(); + state_data = static_cast(l0_host_tensor2.data()); + for (size_t i = 0; i < tensor_size; ++i) { + state_data[i] = 1.0f; + } + + tensor_size = l0_host_tensor3.get_size(); + state_data = static_cast(l0_host_tensor3.data()); + for (size_t i = 0; i < tensor_size; ++i) { + state_data[i] = 1.0f; + } + + OV_ASSERT_NO_THROW(inference_request.infer()); + + tensor_size = l0_host_tensor2.get_size(); + state_data = static_cast(l0_host_tensor2.data()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(input_data[i], state_data[i], 1e-5); + } + + tensor_size = l0_host_tensor3.get_size(); + state_data = static_cast(l0_host_tensor3.data()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(input_data[i], state_data[i], 1e-5); + } +} + +TEST_P(RemoteRunTests, checkResultsAfterChangingStateTensorsWithRemoteTensors) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + testing::internal::Random random(1); + ov::Tensor input_tensor; + + auto original_shape = Shape{1, 2, 2, 2}; + auto shape_size = ov::shape_size(original_shape); + auto model = createModelWithStates(element::f32, original_shape); + + auto context = core->get_default_context(target_device).as(); + ; + + compiled_model = core->compile_model(model, target_device, configuration); + ov::InferRequest inference_request; + inference_request = compiled_model.create_infer_request(); + + auto input = compiled_model.input(); + OV_ASSERT_NO_THROW(input_tensor = inference_request.get_tensor(input)); + auto* input_data = input_tensor.data(); + for (size_t i = 0; i < shape_size; ++i) { + input_data[i] = static_cast(random.Generate(10)); + } + + auto states = inference_request.query_state(); + + auto tensor_state = states[0].get_state(); + auto tensor_state_shape = tensor_state.get_shape(); + auto l0_host_tensor0 = context.create_l0_host_tensor(ov::element::f32, tensor_state_shape); + + tensor_state = states[1].get_state(); + tensor_state_shape = tensor_state.get_shape(); + auto l0_host_tensor1 = context.create_l0_host_tensor(ov::element::f32, tensor_state_shape); + + states[0].set_state(l0_host_tensor0); + states[0].reset(); + states[1].set_state(l0_host_tensor1); + states[1].reset(); + + OV_ASSERT_NO_THROW(inference_request.infer()); + + auto output_tensor = inference_request.get_tensor("sigmod_state"); + auto output_data = output_tensor.data(); + for (size_t i = 0; i < output_tensor.get_size(); i++) { + EXPECT_NEAR(0.5f, output_data[i], 1e-5); + } + + auto tensor_size = l0_host_tensor0.get_size(); + auto state_data = static_cast(l0_host_tensor0.get()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(0.0, state_data[i], 1e-5); + } + + tensor_size = l0_host_tensor1.get_size(); + state_data = static_cast(l0_host_tensor1.get()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(0.0, state_data[i], 1e-5); + } + + tensor_state = states[0].get_state(); + tensor_state_shape = tensor_state.get_shape(); + auto l0_host_tensor2 = context.create_l0_host_tensor(ov::element::f32, tensor_state_shape); + + tensor_state = states[1].get_state(); + tensor_state_shape = tensor_state.get_shape(); + auto l0_host_tensor3 = context.create_l0_host_tensor(ov::element::f32, tensor_state_shape); + + states[0].set_state(l0_host_tensor2); + states[1].set_state(l0_host_tensor3); + + tensor_size = l0_host_tensor2.get_size(); + state_data = static_cast(l0_host_tensor2.get()); + for (size_t i = 0; i < tensor_size; ++i) { + state_data[i] = 1.0f; + } + + tensor_size = l0_host_tensor3.get_size(); + state_data = static_cast(l0_host_tensor3.get()); + for (size_t i = 0; i < tensor_size; ++i) { + state_data[i] = 1.0f; + } + + OV_ASSERT_NO_THROW(inference_request.infer()); + + tensor_size = l0_host_tensor2.get_size(); + state_data = static_cast(l0_host_tensor2.get()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(input_data[i], state_data[i], 1e-5); + } + + tensor_size = l0_host_tensor3.get_size(); + state_data = static_cast(l0_host_tensor3.get()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(input_data[i], state_data[i], 1e-5); + } +} + +TEST_P(RemoteRunTests, checkResultsAfterChangingStateDataWithRemoteAndRandomTensors0) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + testing::internal::Random random(1); + ov::Tensor input_tensor; + + auto original_shape = Shape{1, 10, 10, 10}; + auto shape_size = ov::shape_size(original_shape); + auto model = createModelWithStates(element::f32, original_shape); + + auto context = core->get_default_context(target_device).as(); + ; + + compiled_model = core->compile_model(model, target_device, configuration); + ov::InferRequest inference_request; + inference_request = compiled_model.create_infer_request(); + + auto input = compiled_model.input(); + OV_ASSERT_NO_THROW(input_tensor = inference_request.get_tensor(input)); + auto* input_data = input_tensor.data(); + for (size_t i = 0; i < shape_size; ++i) { + input_data[i] = static_cast(random.Generate(10)); + } + + auto states = inference_request.query_state(); + + auto tensor_state = states[0].get_state(); + auto tensor_state_shape = tensor_state.get_shape(); + auto l0_host_tensor = context.create_l0_host_tensor(ov::element::f32, tensor_state_shape); + + tensor_state = states[1].get_state(); + tensor_state_shape = tensor_state.get_shape(); + auto byte_size = tensor_state.get_byte_size(); + float* data = new float[byte_size / sizeof(float)]; + ov::Tensor random_tensor{ov::element::f32, tensor_state_shape, data}; + + states[0].set_state(l0_host_tensor); + states[0].reset(); + states[1].set_state(random_tensor); + states[1].reset(); + + OV_ASSERT_NO_THROW(inference_request.infer()); + + auto output_tensor = inference_request.get_tensor("sigmod_state"); + auto output_data = output_tensor.data(); + for (size_t i = 0; i < output_tensor.get_size(); i++) { + EXPECT_NEAR(0.5f, output_data[i], 1e-5); + } + + auto tensor_size = l0_host_tensor.get_size(); + auto state_data = static_cast(l0_host_tensor.get()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(0.0, state_data[i], 1e-5); + } + + tensor_size = random_tensor.get_size(); + state_data = static_cast(random_tensor.data()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(0.0, state_data[i], 1e-5); + } + + tensor_size = l0_host_tensor.get_size(); + state_data = static_cast(l0_host_tensor.get()); + for (size_t i = 0; i < tensor_size; ++i) { + state_data[i] = 1.0f; + } + + tensor_size = random_tensor.get_size(); + state_data = static_cast(random_tensor.data()); + for (size_t i = 0; i < tensor_size; ++i) { + state_data[i] = 1.0f; + } + + OV_ASSERT_NO_THROW(inference_request.infer()); + + tensor_size = l0_host_tensor.get_size(); + state_data = static_cast(l0_host_tensor.get()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(input_data[i], state_data[i], 1e-5); + } + + tensor_size = random_tensor.get_size(); + state_data = static_cast(random_tensor.data()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(input_data[i], state_data[i], 1e-5); + } +} + +TEST_P(RemoteRunTests, checkResultsAfterChangingStateDataWithRemoteAndRandomTensors1) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + testing::internal::Random random(1); + ov::Tensor input_tensor; + + auto original_shape = Shape{1, 10, 10, 10}; + auto shape_size = ov::shape_size(original_shape); + auto model = createModelWithStates(element::f32, original_shape); + + auto context = core->get_default_context(target_device).as(); + ; + + compiled_model = core->compile_model(model, target_device, configuration); + ov::InferRequest inference_request; + inference_request = compiled_model.create_infer_request(); + + auto input = compiled_model.input(); + OV_ASSERT_NO_THROW(input_tensor = inference_request.get_tensor(input)); + auto* input_data = input_tensor.data(); + for (size_t i = 0; i < shape_size; ++i) { + input_data[i] = static_cast(random.Generate(10)); + } + + auto states = inference_request.query_state(); + + auto tensor_state = states[0].get_state(); + auto tensor_state_shape = tensor_state.get_shape(); + auto l0_host_tensor = context.create_l0_host_tensor(ov::element::f32, tensor_state_shape); + + tensor_state = states[1].get_state(); + tensor_state_shape = tensor_state.get_shape(); + auto byte_size = tensor_state.get_byte_size(); + float* data = new float[byte_size / sizeof(float)]; + ov::Tensor random_tensor{ov::element::f32, tensor_state_shape, data}; + + auto tensor_size = l0_host_tensor.get_size(); + auto state_data = static_cast(l0_host_tensor.get()); + for (size_t i = 0; i < tensor_size; ++i) { + state_data[i] = 1.0f; + } + + tensor_size = random_tensor.get_size(); + state_data = static_cast(random_tensor.data()); + for (size_t i = 0; i < tensor_size; ++i) { + state_data[i] = 1.0f; + } + + states[0].set_state(l0_host_tensor); + states[1].set_state(random_tensor); + + OV_ASSERT_NO_THROW(inference_request.infer()); + + tensor_size = l0_host_tensor.get_size(); + state_data = static_cast(l0_host_tensor.get()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(input_data[i], state_data[i], 1e-5); + } + + tensor_size = random_tensor.get_size(); + state_data = static_cast(random_tensor.data()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(input_data[i], state_data[i], 1e-5); + } + + states[0].reset(); + states[1].reset(); + + OV_ASSERT_NO_THROW(inference_request.infer()); + + auto output_tensor = inference_request.get_tensor("sigmod_state"); + auto output_data = output_tensor.data(); + for (size_t i = 0; i < output_tensor.get_size(); i++) { + EXPECT_NEAR(0.5f, output_data[i], 1e-5); + } + + tensor_size = l0_host_tensor.get_size(); + state_data = static_cast(l0_host_tensor.get()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(0.0, state_data[i], 1e-5); + } + + tensor_size = random_tensor.get_size(); + state_data = static_cast(random_tensor.data()); + for (size_t i = 0; i < tensor_size; ++i) { + EXPECT_NEAR(0.0, state_data[i], 1e-5); + } +} + } // namespace behavior } // namespace test } // namespace ov