From 0e8fda2996283dd995e6d209c9eac0892cc1a8ce Mon Sep 17 00:00:00 2001 From: Bogdan Pereanu Date: Mon, 13 Jan 2025 16:42:28 +0200 Subject: [PATCH] Adding support for set_shape to the state tensors Signed-off-by: Bogdan Pereanu --- .../src/backend/src/zero_infer_request.cpp | 9 +- .../functional/behavior/infer_request_run.hpp | 131 ++++++++++++++++++ 2 files changed, 136 insertions(+), 4 deletions(-) diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp index 008e2bdd6d39de..0e911419bf6b2d 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp @@ -484,7 +484,7 @@ void ZeroInferRequest::infer_async() { const auto inputDescriptor = _metadata.inputs.at(ioIndex); auto zeroTensor = std::dynamic_pointer_cast(levelZeroTensor.at(SINGLE_TENSOR)); - if (is_batched_input(ioIndex) || inputDescriptor.isShapeTensor || inputDescriptor.isStateInput || + if (is_batched_input(ioIndex) || inputDescriptor.isShapeTensor || is_remote_tensor(levelZeroTensor.at(SINGLE_TENSOR)) || zeroTensor == nullptr) { ++ioIndex; continue; @@ -499,7 +499,9 @@ void ZeroInferRequest::infer_async() { zeroTensor->get_byte_size()); closePipeline = true; - zeroTensor->reset_memory_flag(); + if (!inputDescriptor.isStateInput) { + zeroTensor->reset_memory_flag(); + } } ++ioIndex; @@ -511,8 +513,7 @@ void ZeroInferRequest::infer_async() { const auto outputDescriptor = _metadata.outputs.at(ioIndex); auto zeroTensor = std::dynamic_pointer_cast(levelZeroTensor); - if (outputDescriptor.isShapeTensor || outputDescriptor.isStateOutput || - is_remote_tensor(levelZeroTensor) || zeroTensor == nullptr) { + if (outputDescriptor.isShapeTensor || is_remote_tensor(levelZeroTensor) || zeroTensor == nullptr) { ++ioIndex; continue; } diff --git a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp index 31b55704757b01..b8cc95899eb87a 100644 --- a/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp +++ b/src/plugins/intel_npu/tests/functional/behavior/infer_request_run.hpp @@ -10,6 +10,7 @@ #include #include +#include #include #include "base/ov_behavior_test_utils.hpp" @@ -20,7 +21,10 @@ #include "intel_npu/npu_private_properties.hpp" #include "openvino/core/any.hpp" #include "openvino/core/node_vector.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/multiply.hpp" #include "openvino/op/op.hpp" +#include "openvino/op/sigmoid.hpp" #include "openvino/opsets/opset8.hpp" #include "openvino/runtime/compiled_model.hpp" #include "openvino/runtime/core.hpp" @@ -124,6 +128,35 @@ class InferRequestRunTests : public ov::test::behavior::OVPluginTestBase, return std::make_shared(res, params); } + + std::shared_ptr createModelWithStates(element::Type type, const Shape& shape) { + auto input = std::make_shared(type, shape); + auto mem_i1 = std::make_shared(type, shape, 0); + auto mem_r1 = std::make_shared(mem_i1, "r_1-3"); + auto mul1 = std::make_shared(mem_r1, input); + + auto mem_i2 = std::make_shared(type, shape, 0); + auto mem_r2 = std::make_shared(mem_i2, "c_1-3"); + auto mul2 = std::make_shared(mem_r2, mul1); + auto mem_w2 = std::make_shared(mul2, "c_1-3"); + + auto mem_w1 = std::make_shared(mul2, "r_1-3"); + auto sigm = std::make_shared(mul2); + sigm->set_friendly_name("sigmod_state"); + sigm->get_output_tensor(0).set_names({"sigmod_state"}); + mem_r1->set_friendly_name("Memory_1"); + mem_r1->get_output_tensor(0).set_names({"Memory_1"}); + mem_w1->add_control_dependency(mem_r1); + sigm->add_control_dependency(mem_w1); + + mem_r2->set_friendly_name("Memory_2"); + mem_r2->get_output_tensor(0).set_names({"Memory_2"}); + mem_w2->add_control_dependency(mem_r2); + sigm->add_control_dependency(mem_w2); + + auto function = std::make_shared(ov::NodeVector{sigm}, ov::ParameterVector{input}, "add_output"); + return function; + } }; TEST_P(InferRequestRunTests, AllocatorCanDisposeBlobWhenOnlyInferRequestIsInScope) { @@ -962,6 +995,104 @@ TEST_P(SetShapeInferRunTests, checkResultsAfterIOBlobReallocation) { } } +TEST_P(SetShapeInferRunTests, checkResultsAfterStateTensorsReallocation) { + // Skip test according to plugin specific disabledTestPatterns() (if any) + SKIP_IF_CURRENT_TEST_IS_DISABLED() + + testing::internal::Random random(1); + ov::Tensor input_tensor; + + auto original_shape = Shape{1, 10, 10, 10}; + auto dummy_shape = Shape{1, 50, 100, 100}; + auto shape_size = ov::shape_size(original_shape); + auto model = createModelWithStates(element::f32, original_shape); + + auto context = core->get_default_context(target_device); + + compiled_model = core->compile_model(model, target_device, configuration); + ov::InferRequest inference_request; + inference_request = compiled_model.create_infer_request(); + + auto input = compiled_model.input(); + OV_ASSERT_NO_THROW(input_tensor = inference_request.get_tensor(input)); + auto* input_data = input_tensor.data(); + for (size_t i = 0; i < shape_size; ++i) { + input_data[i] = static_cast(random.Generate(10)); + } + + for (auto&& state : inference_request.query_state()) { + state.reset(); + } + + OV_ASSERT_NO_THROW(inference_request.infer()); + + auto output_tensor = inference_request.get_tensor("sigmod_state"); + auto output_data = output_tensor.data(); + for (size_t i = 0; i < output_tensor.get_size(); i++) { + EXPECT_NEAR(0.5f, output_data[i], 1e-5); + } + + auto states = inference_request.query_state(); + for (auto state : states) { + auto last_state = state.get_state(); + auto last_state_size = last_state.get_size(); + auto last_state_data = static_cast(last_state.data()); + + ASSERT_TRUE(last_state_size != 0) << "State size should not be 0"; + + for (size_t i = 0; i < last_state_size; ++i) { + EXPECT_NEAR(0.0, last_state_data[i], 1e-5); + } + } + + // create dummy Tensors to force the driver to allocate memory for the initial tensor somewhere else + [[maybe_unused]] auto l0_host_dummy_tensor_0 = context.create_host_tensor(ov::element::f32, dummy_shape); + [[maybe_unused]] auto l0_host_dummy_tensor_1 = context.create_host_tensor(ov::element::f32, dummy_shape); + [[maybe_unused]] auto l0_host_dummy_tensor_2 = context.create_host_tensor(ov::element::f32, dummy_shape); + [[maybe_unused]] auto l0_host_dummy_tensor_3 = context.create_host_tensor(ov::element::f32, dummy_shape); + [[maybe_unused]] auto l0_host_dummy_tensor_4 = context.create_host_tensor(ov::element::f32, dummy_shape); + [[maybe_unused]] auto l0_host_dummy_tensor_5 = context.create_host_tensor(ov::element::f32, dummy_shape); + [[maybe_unused]] auto l0_host_dummy_tensor_6 = context.create_host_tensor(ov::element::f32, dummy_shape); + [[maybe_unused]] auto l0_host_dummy_tensor_7 = context.create_host_tensor(ov::element::f32, dummy_shape); + + for (auto item : inference_request.query_state()) { + auto tensor_state = item.get_state(); + auto original_shape = tensor_state.get_shape(); + OV_ASSERT_NO_THROW(tensor_state.set_shape({1, 50, 20, 20})); + OV_ASSERT_NO_THROW(tensor_state.set_shape(original_shape)); + } + + for (auto&& state : inference_request.query_state()) { + state.reset(); + } + + for (auto state : states) { + auto last_state = state.get_state(); + auto last_state_size = last_state.get_size(); + auto last_state_data = static_cast(last_state.data()); + + ASSERT_TRUE(last_state_size != 0) << "State size should not be 0"; + + for (size_t i = 0; i < last_state_size; ++i) { + last_state_data[i] = 1.0f; + } + } + + OV_ASSERT_NO_THROW(inference_request.infer()); + + for (auto state : states) { + auto last_state = state.get_state(); + auto last_state_size = last_state.get_size(); + auto last_state_data = static_cast(last_state.data()); + + ASSERT_TRUE(last_state_size != 0) << "State size should not be 0"; + + for (size_t i = 0; i < last_state_size; ++i) { + EXPECT_NEAR(input_data[i], last_state_data[i], 1e-5); + } + } +} + } // namespace behavior } // namespace test } // namespace ov