From 66b14678502d1aada20d4d6357157b3dff2adcf8 Mon Sep 17 00:00:00 2001 From: Dmitry Matveev Date: Thu, 31 Oct 2024 08:32:49 +0000 Subject: [PATCH 001/104] NPUW: Eliminate unnecessary kvcache tensors copy (#27347) ### Details: - We mistakenly copy input parameters when we shouldn't - Yet another `||` -> `&&` change, hopefully less destructive this time ### Tickets: - *ticket-id* --- .../intel_npu/src/plugin/npuw/just_sync_infer_request.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.cpp b/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.cpp index 26363e66e55d2a..0e0b96582a663c 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.cpp @@ -597,7 +597,7 @@ void ov::npuw::JustInferRequest::bind_global_parameters(std::size_t idx) { LOG_BLOCK(); if (!is_spatial_param(sub_in_idx)) { // Input parameter is non-spatial, do normal handling - if (do_copy || m_input_allocated.count(g_tnsr->data()) == 0) { + if (m_input_allocated.count(g_tnsr->data()) == 0 && do_copy) { LOG_DEBUG("Will be copied"); copy_list.emplace_back(g_tnsr, s_port); } else { From 44b86a860ecb0a3e79e6f75627d6cc5270226e7a Mon Sep 17 00:00:00 2001 From: Vladimir Zlobin Date: Thu, 31 Oct 2024 12:54:52 +0400 Subject: [PATCH 002/104] benchmark_app/cpp: report an error if no files were found. (#26663) Python version already reports an error in that case. benchmark_app is the only user of `readInputFilesArguments()`. It could make sense earlier to emit the warning instead of the error because other samples. Ticket 152614 --- samples/cpp/common/utils/src/args_helper.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/samples/cpp/common/utils/src/args_helper.cpp b/samples/cpp/common/utils/src/args_helper.cpp index f4a3d10ceb0b5b..ba58f98e498e90 100644 --- a/samples/cpp/common/utils/src/args_helper.cpp +++ b/samples/cpp/common/utils/src/args_helper.cpp @@ -29,8 +29,7 @@ void readInputFilesArguments(std::vector& files, const std::string& arg) { struct stat sb; if (stat(arg.c_str(), &sb) != 0) { - slog::warn << "File " << arg << " cannot be opened!" << slog::endl; - return; + throw std::invalid_argument(arg + " file or directory not found."); } if (S_ISDIR(sb.st_mode)) { struct CloseDir { @@ -43,17 +42,20 @@ void readInputFilesArguments(std::vector& files, const std::string& using Dir = std::unique_ptr; Dir dp(opendir(arg.c_str())); if (dp == nullptr) { - slog::warn << "Directory " << arg << " cannot be opened!" << slog::endl; - return; + throw std::invalid_argument(arg + " directory cannot be opened!"); } struct dirent* ep; + size_t files_size = files.size(); while (nullptr != (ep = readdir(dp.get()))) { std::string fileName = ep->d_name; if (fileName == "." || fileName == "..") continue; files.push_back(arg + "/" + ep->d_name); } + if (files.size() == files_size) { + throw std::invalid_argument("No files were found in directory " + arg); + } } else { files.push_back(arg); } From 86083e0dbf8d173451a8ee47fa40496a62aea893 Mon Sep 17 00:00:00 2001 From: Mateusz Mikolajczyk Date: Thu, 31 Oct 2024 10:23:59 +0100 Subject: [PATCH 003/104] [Transformations] Add Squeeze-15 downgrade transformation (#27286) ### Details: - *Add Squeeze-15 downgrade transformation to Squeeze-0 for compatible attribute* - *...* ### Tickets: - *CVS-154027* ### PR requires [PR-26995](https://github.com/openvinotoolkit/openvino/pull/26995) to be merged --------- Co-authored-by: Michal Lukaszewski --- .../convert_squeeze15_downgrade.hpp | 23 ++++ .../common_optimizations.cpp | 2 + .../convert_squeeze15_downgrade.cpp | 40 +++++++ .../convert_squeeze15_downgrade_test.cpp | 112 ++++++++++++++++++ 4 files changed, 177 insertions(+) create mode 100644 src/common/transformations/include/transformations/op_conversions/convert_squeeze15_downgrade.hpp create mode 100644 src/common/transformations/src/transformations/op_conversions/convert_squeeze15_downgrade.cpp create mode 100644 src/common/transformations/tests/op_conversions/convert_squeeze15_downgrade_test.cpp diff --git a/src/common/transformations/include/transformations/op_conversions/convert_squeeze15_downgrade.hpp b/src/common/transformations/include/transformations/op_conversions/convert_squeeze15_downgrade.hpp new file mode 100644 index 00000000000000..c2ebfbc0f3138b --- /dev/null +++ b/src/common/transformations/include/transformations/op_conversions/convert_squeeze15_downgrade.hpp @@ -0,0 +1,23 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/matcher_pass.hpp" +#include "transformations_visibility.hpp" + +namespace ov { +namespace pass { +/** + * @ingroup ov_transformation_common_api + * @brief Converts Squeeze v15 to Squeeze v0. + */ +class TRANSFORMATIONS_API ConvertSqueeze15ToSqueeze0 : public MatcherPass { +public: + OPENVINO_RTTI("ConvertSqueeze15ToSqueeze0", "0"); + ConvertSqueeze15ToSqueeze0(); +}; + +} // namespace pass +} // namespace ov diff --git a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp index 9d46b583a828f2..37ee2d12d9aebb 100644 --- a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp @@ -98,6 +98,7 @@ #include "transformations/op_conversions/convert_softmax_downgrade.hpp" #include "transformations/op_conversions/convert_softmax_upgrade.hpp" #include "transformations/op_conversions/convert_space_to_depth.hpp" +#include "transformations/op_conversions/convert_squeeze15_downgrade.hpp" #include "transformations/op_conversions/convert_subtract.hpp" #include "transformations/op_conversions/convert_topk11_downgrade.hpp" #include "transformations/op_conversions/convert_xor_to_logical_xor.hpp" @@ -235,6 +236,7 @@ bool ov::pass::CommonOptimizations::run_on_model(const std::shared_ptr(); ADD_MATCHER(fq_fusions, FakeQuantizeMulFusion) diff --git a/src/common/transformations/src/transformations/op_conversions/convert_squeeze15_downgrade.cpp b/src/common/transformations/src/transformations/op_conversions/convert_squeeze15_downgrade.cpp new file mode 100644 index 00000000000000..50701d3d6acd56 --- /dev/null +++ b/src/common/transformations/src/transformations/op_conversions/convert_squeeze15_downgrade.cpp @@ -0,0 +1,40 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/op_conversions/convert_squeeze15_downgrade.hpp" + +#include "itt.hpp" +#include "openvino/core/rt_info.hpp" +#include "openvino/op/squeeze.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "transformations/utils/utils.hpp" + +ov::pass::ConvertSqueeze15ToSqueeze0::ConvertSqueeze15ToSqueeze0() { + MATCHER_SCOPE(ConvertSqueeze15ToSqueeze0); + + const auto& squeeze_v15_pattern = pattern::wrap_type(); + + const matcher_pass_callback callback = [OV_CAPTURE_CPY_AND_THIS](pattern::Matcher& m) { + const auto& squeeze_v15 = ov::as_type_ptr(m.get_match_root()); + if (!squeeze_v15 || transformation_callback(squeeze_v15)) { + return false; + } + std::shared_ptr squeeze_v0; + if (squeeze_v15->get_input_size() == 1) { + squeeze_v0 = std::make_shared(squeeze_v15->input_value(0)); + } else if (squeeze_v15->get_input_size() == 2 && !squeeze_v15->get_allow_axis_skip()) { + squeeze_v0 = std::make_shared(squeeze_v15->input_value(0), squeeze_v15->input_value(1)); + } else { + return false; + } + squeeze_v0->set_friendly_name(squeeze_v15->get_friendly_name()); + copy_runtime_info(squeeze_v15, squeeze_v0); + replace_node(squeeze_v15, squeeze_v0); + + return true; + }; + + auto m = std::make_shared(squeeze_v15_pattern, matcher_name); + register_matcher(m, callback); +} diff --git a/src/common/transformations/tests/op_conversions/convert_squeeze15_downgrade_test.cpp b/src/common/transformations/tests/op_conversions/convert_squeeze15_downgrade_test.cpp new file mode 100644 index 00000000000000..f3d90ab2c748bd --- /dev/null +++ b/src/common/transformations/tests/op_conversions/convert_squeeze15_downgrade_test.cpp @@ -0,0 +1,112 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "transformations/op_conversions/convert_squeeze15_downgrade.hpp" + +#include + +#include + +#include "common_test_utils/ov_test_utils.hpp" +#include "openvino/opsets/opset1.hpp" +#include "openvino/opsets/opset15.hpp" +#include "openvino/pass/manager.hpp" +#include "transformations/utils/utils.hpp" +using namespace ov; +using namespace testing; + +namespace { + +enum class IndicesMode { NONE, CONST, PARAM }; + +std::shared_ptr create_v15_model(const IndicesMode indices_mode, + const std::vector indices_const_val, + const bool allow_axis_skip) { + const PartialShape data_shape{-1, {2, 5}, 1, {1, 5}, 4}; + const auto& data = std::make_shared(ov::element::f32, data_shape); + ov::ParameterVector params = {data}; + std::shared_ptr squeeze; + if (indices_mode == IndicesMode::NONE) { + squeeze = std::make_shared(data, allow_axis_skip); + } else if (indices_mode == IndicesMode::PARAM) { + const auto& indices = + std::make_shared(ov::element::i32, PartialShape({data_shape.rank()})); + params.push_back(indices); + squeeze = std::make_shared(data, indices, allow_axis_skip); + } else if (indices_mode == IndicesMode::CONST) { + const auto& indices = + ov::opset15::Constant::create(ov::element::i32, Shape({indices_const_val.size()}), indices_const_val); + squeeze = std::make_shared(data, indices, allow_axis_skip); + } + squeeze->set_friendly_name("squeeze15"); + return std::make_shared(squeeze->outputs(), params); +} + +std::shared_ptr create_v1_model(const IndicesMode indices_mode, const std::vector indices_const_val) { + const PartialShape data_shape{-1, {2, 5}, 1, {1, 5}, 4}; + const auto& data = std::make_shared(ov::element::f32, data_shape); + ov::ParameterVector params = {data}; + std::shared_ptr squeeze; + if (indices_mode == IndicesMode::NONE) { + squeeze = std::make_shared(data); + } else if (indices_mode == IndicesMode::PARAM) { + const auto& indices = + std::make_shared(ov::element::i32, PartialShape({data_shape.rank()})); + params.push_back(indices); + squeeze = std::make_shared(data, indices); + } else if (indices_mode == IndicesMode::CONST) { + const auto& indices = + ov::opset1::Constant::create(ov::element::i32, Shape({indices_const_val.size()}), indices_const_val); + squeeze = std::make_shared(data, indices); + } + squeeze->set_friendly_name("squeeze15"); + return std::make_shared(squeeze->outputs(), params); +} + +} // namespace + +TEST_F(TransformationTestsF, ConvertSqueeze15ToSqueeze1_no_indices_no_skip) { + manager.register_pass(); + model = create_v15_model(IndicesMode::NONE, {}, false); + model_ref = create_v1_model(IndicesMode::NONE, {}); + EXPECT_EQ(model->output(0).get_partial_shape(), model_ref->output(0).get_partial_shape()); + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); + comparator.enable(FunctionsComparator::CmpValues::NAMES); +} + +TEST_F(TransformationTestsF, ConvertSqueeze15ToSqueeze1_no_indices_skip) { + manager.register_pass(); + model = create_v15_model(IndicesMode::NONE, {}, true); + model_ref = create_v1_model(IndicesMode::NONE, {}); + EXPECT_EQ(model->output(0).get_partial_shape(), model_ref->output(0).get_partial_shape()); + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); + comparator.enable(FunctionsComparator::CmpValues::NAMES); +} + +TEST_F(TransformationTestsF, ConvertSqueeze15ToSqueeze1_const_indices_no_skip) { + manager.register_pass(); + model = create_v15_model(IndicesMode::CONST, {0, -4, 3}, false); + model_ref = create_v1_model(IndicesMode::CONST, {0, -4, 3}); + EXPECT_EQ(model->output(0).get_partial_shape(), model_ref->output(0).get_partial_shape()); + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); + comparator.enable(FunctionsComparator::CmpValues::NAMES); +} + +TEST_F(TransformationTestsF, ConvertSqueeze15ToSqueeze1_dynamic_indices_no_skip) { + manager.register_pass(); + model = create_v15_model(IndicesMode::PARAM, {}, false); + model_ref = create_v1_model(IndicesMode::PARAM, {}); + EXPECT_EQ(model->output(0).get_partial_shape(), model_ref->output(0).get_partial_shape()); + comparator.enable(FunctionsComparator::CmpValues::CONST_VALUES); + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); + comparator.enable(FunctionsComparator::CmpValues::NAMES); +} + +TEST_F(TransformationTestsF, ConvertSqueeze15ToSqueeze1_unsupported_skip) { + manager.register_pass(); + model = create_v15_model(IndicesMode::PARAM, {}, true); +} From c685d44493f5a4b0403038f6f1ce9f350cfc0581 Mon Sep 17 00:00:00 2001 From: Alexandra Sidorova Date: Thu, 31 Oct 2024 13:24:38 +0400 Subject: [PATCH 004/104] [Snippets][CPU][Tests] Added tests for dynamic BF16/INT8 MHA (#27169) ### Details: - *Added more tests for the validation of INT8/BF16 MHA in CPU Plugin* - *Split the large "mha.cpp" file into the several small files with the same test semantic (comment https://github.com/openvinotoolkit/openvino/pull/26547#discussion_r1796616083)* ### Tickets: - *N/A* --- .../skip_tests_config.cpp | 8 +- .../snippets/matmul.cpp | 38 +- .../shared_tests_instances/snippets/mha.cpp | 543 +++--------------- .../snippets/mha_extracted_reshape.cpp | 40 ++ .../snippets/mha_fma.cpp | 33 ++ .../snippets/mha_quantized.cpp | 103 ++++ .../snippets/mha_select.cpp | 41 ++ .../snippets/mha_split_dim_m.cpp | 121 ++++ .../snippets/mha_transposed_b.cpp | 50 ++ .../snippets/mha_with_dyn_mul.cpp | 68 +++ .../snippets/mha_wo_transpose.cpp | 151 +++++ .../snippets/transpose_matmul.cpp | 32 +- .../shared_tests_instances/snippets/utils.hpp | 48 ++ .../plugin/shared/include/snippets/mha.hpp | 3 + .../plugin/shared/src/snippets/mha.cpp | 20 +- .../include/subgraph_mha.hpp | 15 +- .../ov_snippets_models/src/subgraph_mha.cpp | 113 ++-- 17 files changed, 807 insertions(+), 620 deletions(-) create mode 100644 src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_extracted_reshape.cpp create mode 100644 src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_fma.cpp create mode 100644 src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_quantized.cpp create mode 100644 src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_select.cpp create mode 100644 src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_split_dim_m.cpp create mode 100644 src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_transposed_b.cpp create mode 100644 src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_with_dyn_mul.cpp create mode 100644 src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_wo_transpose.cpp create mode 100644 src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/utils.hpp diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 6edc4f062536d0..90820d550df179 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -237,7 +237,6 @@ std::vector disabledTestPatterns() { R"(.*smoke_FakeQuantize.*/FakeQuantizeLayerTest.Inference.*TS=.*3.4.2.5.*LEVELS=255.*)", R"(.*smoke_FakeQuantizePerChannel.*/FakeQuantizeLayerTest.Inference.*TS=.*11.10.22.19.*LEVELS=(255|256).*netPRC=f32.*)", R"(.*smoke_MVN_5D/Mvn6LayerTest.Inference.*TS=.*3.4.2.5.*LEVELS=255.*netPRC=f16.*)", - R"(.*smoke_Snippets_MHAINT8MatMul/MHAINT8MatMul.*)", R"(.*smoke_static/ConvertFqRnnToQuantizedRnn.*2.1.5.*2.1.1.*2.1.1.*)", R"(.*smoke_InterpolateBicubicPillow_Layout_Test/InterpolateLayerCPUTest.CompareWithRefs/ShapeCalcMode=sizes_IS=\[?.2..20.?.?\]_TS.*1.17.4.4.*2.3.10.12.*1.17.4.4.*Sizes.*4.4.*10.20.*10.4.*PARAMETER.*0.0.0.0.*0.0.1.1.*2.3.*)", R"(.*smoke_LoopForCommon/LoopLayerCPUTest.CompareWithRefs/.*_netType=bf16.*)", @@ -563,7 +562,7 @@ std::vector disabledTestPatterns() { // ignored for not supported bf16 platforms retVector.emplace_back(R"(.*smoke_Snippets_EnforcePrecision_bf16.*)"); retVector.emplace_back(R"(.*smoke_Snippets_MHAWOTransposeEnforceBF16.*)"); - retVector.emplace_back(R"(.*smoke_Snippets_MHAEnforceBF16.*)"); + retVector.emplace_back(R"(.*smoke_Snippets_MHA.*EnforceBF16.*)"); retVector.emplace_back(R"(.*ConcatSDPTest.*bf16.*)"); } // [150842] Need to support dynamic K dimension of BF16|INT8 MatMul on AMX systems @@ -572,6 +571,11 @@ std::vector disabledTestPatterns() { retVector.emplace_back(R"(.*smoke_Snippets_MatMul/MatMul.CompareWithRefImpl/.*IS\[0\]=\[\?.\?.\?.\?\].*T\[0\]=(u8|i8|bf16)_T\[1\]=(i8|bf16).*)"); retVector.emplace_back(R"(.*smoke_Snippets_MatMulTransposeB.*IS\[0\]=\[\?.\?.\?.\?\].*T\[0\]=(u8|i8|bf16)_T\[1\]=(i8|bf16).*)"); retVector.emplace_back(R"(.*smoke_Snippets_MatMulBias.*IS\[0\]=\[\?.\?.\?.\?\].*T\[0\]=(u8|i8|bf16)_T\[1\]=(i8|bf16).*)"); + + retVector.emplace_back(R"(.*smoke_Snippets_MHAWOTransposeEnforceBF16_3D.*IS\[1\]=\[2.64.\?\].*)"); + retVector.emplace_back(R"(.*smoke_Snippets_MHA.*BF16.*/MHA.*IS\[0\]=\[(\?|1).(\?|4).(\?|12).(\?|64)\].*)"); + retVector.emplace_back(R"(.*smoke_Snippets_MHA.*BF16.*/MHA.*IS\[0\]=\[\?.\?.\?\].*)"); + retVector.emplace_back(R"(.*smoke_Snippets_(MHAINT8MatMul|MHAQuantMatMul0|MHAFQAfterMatMul_4D|smoke_Snippets_MHAFQ).*IS\[0\]=\[\?.\?.\?\.\?].*)"); } #ifdef SNIPPETS_LIBXSMM_TPP // GN in TPP requires exposing tmp Buffer results outside the loop (ticket: 151234) diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp index f5057137f9b65c..176f0cb4d46aed 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp @@ -4,44 +4,26 @@ #include "snippets/matmul.hpp" -#include "common_test_utils/test_constants.hpp" -#include "openvino/runtime/system_conf.hpp" +#include "utils.hpp" namespace ov { namespace test { namespace snippets { -#define STATIC_SHAPES(...) static_shapes_to_test_representation(std::vector>{__VA_ARGS__}) - namespace { -static inline std::vector> quantized_precisions() { - std::vector> prc = {}; - // In Snippets MatMul INT8 is supported only on VNNI/AMX platforms - if (ov::with_cpu_x86_avx512_core_vnni() || ov::with_cpu_x86_avx512_core_amx_int8()) { - prc.emplace_back(std::vector{element::i8, element::i8}); - prc.emplace_back(std::vector{element::u8, element::i8}); - } - return prc; -} - static inline std::vector> precisions() { - std::vector> prc = { - {element::f32, element::f32}, - }; + std::vector> prc = precision_f32(2); // Note: TPP doesn't support low precisions yet #ifndef SNIPPETS_LIBXSMM_TPP - auto quant = quantized_precisions(); + auto quant = quantized_precisions_if_supported(); std::copy(quant.begin(), quant.end(), std::back_inserter(prc)); - // In Snippets MatMul BF16 is supported only on bf16/AMX platforms - if (ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx512_core_amx_bf16()) { - prc.emplace_back(std::vector{element::bf16, element::bf16}); - } + auto bfloat = precision_bf16_if_supported(2); + std::copy(bfloat.begin(), bfloat.end(), std::back_inserter(prc)); #endif return prc; } - std::vector> input_shapes{ { {{}, {{2, 1, 3, 5}}}, {{}, {{1, 3, 5, 3}}} }, { {{}, {{3, 1, 32, 14}}}, {{}, {{1, 3, 14, 37}}} }, @@ -158,7 +140,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulBias, MatMulBias, INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulBiasQuantized, MatMulBiasQuantized, ::testing::Combine( ::testing::ValuesIn(input_shapes_bias), - ::testing::ValuesIn(quantized_precisions()), + ::testing::ValuesIn(quantized_precisions_if_supported()), ::testing::Values(MatMulType::MatMul), ::testing::Values(1), // Subgraph ::testing::Values(1), // Tokenized MatMul+Bias @@ -167,8 +149,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulBiasQuantized, MatMulBiasQuantized INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulsQuantized, MatMulsQuantized, ::testing::Combine( - ::testing::ValuesIn(STATIC_SHAPES({{1, 16, 128, 64}, {1, 16, 64, 128}, {128, 64}})), - ::testing::ValuesIn(quantized_precisions()), + ::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{1, 16, 128, 64}, {1, 16, 64, 128}, {128, 64}})), + ::testing::ValuesIn(quantized_precisions_if_supported()), ::testing::Values(MatMulType::MatMul), ::testing::Values(3), // Subgraph + Reshape + Subgraph ::testing::Values(2), // Tokenized [MatMul+FQ+Matmul] and [FQ] @@ -177,8 +159,8 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulsQuantized, MatMulsQuantized, INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMulsQuantizedSoftmax, MatMulsQuantizedSoftmax, ::testing::Combine( - ::testing::ValuesIn(STATIC_SHAPES({{1, 16, 128, 64}, {1, 16, 64, 128}, {128, 64}})), - ::testing::ValuesIn(quantized_precisions()), + ::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{1, 16, 128, 64}, {1, 16, 64, 128}, {128, 64}})), + ::testing::ValuesIn(quantized_precisions_if_supported()), ::testing::Values(MatMulType::MatMul), ::testing::Values(3), // Subgraph + Reshape + Subgraph ::testing::Values(2), // Tokenized [MatMul+FQ+Matmul] and [FQ] diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha.cpp index 79db0b1546b2a8..63f5176684ccc1 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha.cpp @@ -1,60 +1,70 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include "snippets/mha.hpp" -#include "common_test_utils/test_constants.hpp" -#include "internal_properties.hpp" -#include "utils/cpu_test_utils.hpp" -#include "openvino/runtime/system_conf.hpp" +#include "utils.hpp" namespace ov { namespace test { namespace snippets { -#define STATIC_SHAPES(...) static_shapes_to_test_representation(std::vector>{__VA_ARGS__}) namespace { -const auto& inputShapes_4D = STATIC_SHAPES( - {{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 128, 12, 64}}, - {{1, 128, 16, 64}, {1, 128, 16, 64}, {1, 16, 1, 1}, {1, 128, 16, 64}}, - {{1, 128, 16, 64}, {1, 128, 16, 64}, {1, 1, 1, 128}, {1, 128, 16, 64}}, - {{2, 68, 6, 92}, {2, 68, 6, 92}, {1, 1, 68, 68}, {2, 68, 6, 92}}, - {{1, 58, 16, 34}, {1, 58, 16, 34}, {1, 1, 1, 58}, {1, 58, 16, 34}}); - -const auto& inputShapes_3D = STATIC_SHAPES( - {{128, 12, 64}, {128, 12, 64}, {12, 128, 128}, {128, 12, 64}}, - {{68, 6, 92}, {68, 6, 92}, {1, 68, 68}, {68, 6, 92}}, - {{16, 2, 92}, {68, 2, 92}, {1, 16, 68}, {68, 2, 92}}); - -static inline bool is_bf16_supported() { - return ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx512_core_amx_bf16(); -} - -static inline std::vector> precision_f32(size_t count) { - std::vector> prc; - prc.emplace_back(std::vector(count, element::f32)); - return prc; -} - -static inline std::vector> precision_bf16(size_t count) { - std::vector> prc; - if (is_bf16_supported()) - prc.emplace_back(std::vector(count, element::bf16)); - return prc; +std::vector> transposedShape_4D(bool with_dynamic = true) { + auto shapes = SNIPPETS_TESTS_STATIC_SHAPES( + {{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 128, 12, 64}}, + {{1, 128, 16, 64}, {1, 128, 16, 64}, {1, 16, 1, 1}, {1, 128, 16, 64}}, + {{1, 128, 16, 64}, {1, 128, 16, 64}, {1, 1, 1, 128}, {1, 128, 16, 64}}, + {{2, 68, 6, 92}, {2, 68, 6, 92}, {1, 1, 68, 68}, {2, 68, 6, 92}}, + {{1, 58, 16, 34}, {1, 58, 16, 34}, {1, 1, 1, 58}, {1, 58, 16, 34}}); + if (with_dynamic) { + std::vector> dynamic_shapes = {{ + {PartialShape{-1, -1, -1, 100}, {{1, 64, 4, 100}, {2, 16, 2, 100}, {1, 72, 4, 100}}}, + {PartialShape{-1, 128, -1, 100}, {{1, 128, 4, 100}, {2, 128, 2, 100}, {1, 128, 4, 100}}}, + {PartialShape{-1, -1, -1, 128}, {{1, 4, 64, 128}, {2, 2, 16, 128}, {1, 4, 72, 128}}}, + {PartialShape{-1, 128, -1, 100}, {{1, 128, 4, 100}, {2, 128, 2, 100}, {1, 128, 4, 100}}}, + }, + { + {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {2, 16, 2, 100}, {1, 128, 3, 64}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 128, 1, 64}, {2, 128, 2, 100}, {1, 128, 1, 64}}}, + {PartialShape{-1, -1, -1, -1}, {{2, 1, 128, 128}, {2, 2, 16, 128}, {2, 1, 128, 128}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {2, 128, 2, 100}, {1, 128, 3, 64}}}, + }, + { + {PartialShape{-1, -1, 12, 64}, {{1, 70, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 70, 12, 64}}}, + {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {2, 10, 12, 64}, {2, 1, 12, 64}, {2, 10, 12, 64}, {1, 35, 12, 64}}}, + {PartialShape{-1, 12, -1, -1}, {{2, 12, 70, 35}, {1, 12, 20, 10}, {1, 12, 20, 10}, {1, 12, 20, 1}, {2, 12, 70, 35}}}, + {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 35, 12, 64}}}, + }}; + shapes.insert(shapes.end(), dynamic_shapes.begin(), dynamic_shapes.end()); + } + return shapes; } -static ov::AnyMap enable_callback() { - return ov::AnyMap({ov::intel_cpu::snippets_mode(ov::intel_cpu::SnippetsMode::ENABLE)}); +std::vector> transposedShape_3D(bool with_dynamic = true) { + auto shapes = SNIPPETS_TESTS_STATIC_SHAPES( + {{128, 12, 64}, {128, 12, 64}, {12, 128, 128}, {128, 12, 64}}, + {{68, 6, 92}, {68, 6, 92}, {1, 68, 68}, {68, 6, 92}}, + {{16, 2, 92}, {68, 2, 92}, {1, 16, 68}, {68, 2, 92}}); + if (with_dynamic) { + shapes.push_back({ + {PartialShape{-1, -1, -1}, {{128, 3, 64}, {128, 3, 64}, {68, 6, 87}}}, + {PartialShape{-1, -1, -1}, {{128, 1, 64}, {128, 1, 64}, {13, 6, 87}}}, + {PartialShape{-1, -1, -1}, {{1, 128, 128}, {1, 128, 128}, {1, 68, 13}}}, + {PartialShape{-1, -1, -1}, {{128, 3, 64}, {128, 3, 64}, {13, 6, 87}}}, + }); + } + return shapes; } INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHA_4D, MHA, - ::testing::Combine(::testing::ValuesIn(inputShapes_4D), + ::testing::Combine(::testing::ValuesIn(transposedShape_4D()), ::testing::ValuesIn(precision_f32(4)), ::testing::Values(ov::element::f32), - ::testing::ValuesIn({false, true}), + ::testing::Values(false), ::testing::Values(MHA::default_thread_count), ::testing::Values(1), ::testing::Values(1), @@ -62,27 +72,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHA_4D, ::testing::Values(CPUTestUtils::empty_plugin_config)), MHA::getTestCaseName); -std::vector> inputShapes_4D_dynamic{ - { - {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {1, 70, 3, 19}, {1, 128, 3, 64}, {1, 68, 6, 87}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 128, 1, 64}, {2, 49, 1, 19}, {1, 128, 1, 64}, {2, 13, 6, 87}}}, - {PartialShape{-1, -1, -1, -1}, {{2, 1, 128, 128}, {1, 1, 70, 49}, {2, 1, 128, 128}, {1, 1, 68, 13}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {1, 49, 3, 19}, {1, 128, 3, 64}, {2, 13, 6, 87}}}, - }, - { - {PartialShape{-1, -1, 12, 64}, {{1, 70, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 70, 12, 64}}}, - {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {2, 10, 12, 64}, {2, 1, 12, 64}, {2, 10, 12, 64}, {1, 35, 12, 64}}}, - {PartialShape{-1, 12, -1, -1}, {{2, 12, 70, 35}, {1, 12, 20, 10}, {1, 12, 20, 10}, {1, 12, 20, 1}, {2, 12, 70, 35}}}, - {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 35, 12, 64}}}, - } -}; - -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_DynMHA_4D, +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHA_4D_WithScalarMul, MHA, - ::testing::Combine(::testing::ValuesIn(inputShapes_4D_dynamic), + ::testing::Combine(::testing::ValuesIn(transposedShape_4D(false)), ::testing::ValuesIn(precision_f32(4)), ::testing::Values(ov::element::f32), - ::testing::ValuesIn({false}), + ::testing::Values(true), ::testing::Values(MHA::default_thread_count), ::testing::Values(1), ::testing::Values(1), @@ -90,13 +85,12 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_DynMHA_4D, ::testing::Values(CPUTestUtils::empty_plugin_config)), MHA::getTestCaseName); - INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHA_3D, MHA, - ::testing::Combine(::testing::ValuesIn(inputShapes_3D), + ::testing::Combine(::testing::ValuesIn(transposedShape_3D()), ::testing::ValuesIn(precision_f32(4)), ::testing::Values(ov::element::f32), - ::testing::ValuesIn({false, true}), + ::testing::Values(false), ::testing::Values(MHA::default_thread_count), ::testing::Values(5), // [122706]: Subgraph + 4 Transpose ::testing::Values(2), // decomposed Transpose + MHA @@ -104,111 +98,23 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHA_3D, ::testing::Values(CPUTestUtils::empty_plugin_config)), MHA::getTestCaseName); -const auto& splitm_static_shapes = STATIC_SHAPES({{1, 128, 2, 64}, {1, 128, 2, 64}, {1, 1, 1, 1}, {1, 128, 2, 64}}); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHA_4D_SplitDimensionM_static, - MHA, - ::testing::Combine(::testing::ValuesIn(splitm_static_shapes), - ::testing::ValuesIn(precision_f32(4)), - ::testing::Values(ov::element::f32), - ::testing::Values(true), - ::testing::Values(4), // 4 Threads - ::testing::Values(6), // Subgraph + 4 Reshapes on inputs and 1 Reshape on output - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(enable_callback())), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHA_3D_SplitDimensionM_static, - MHA, - ::testing::Combine( - ::testing::ValuesIn(STATIC_SHAPES({{384, 2, 64}, {384, 2, 64}, {1, 384, 384}, {384, 2, 64}})), - ::testing::ValuesIn(precision_f32(4)), - ::testing::Values(ov::element::f32), - ::testing::Values(true), - ::testing::Values(4), // 4 Threads - ::testing::Values(10), // Subgraph + 4 Reshapes on inputs and 1 Reshape on output + 4 Transposes - ::testing::Values(1), // MHA - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(enable_callback())), - MHA::getTestCaseName); - -std::vector> splitm_dynamic_shapes_4d = { - { - {PartialShape{-1, -1, -1, -1}, {{1, 128, 2, 64}, {1, 17, 2, 64}, {1, 128, 2, 64}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 128, 2, 64}, {1, 17, 2, 64}, {1, 128, 2, 64}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 1, 1, 128}, {1, 1, 1, 17}, {1, 1, 1, 128}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 128, 2, 64}, {1, 17, 2, 64}, {1, 128, 2, 64}}}, - }, - { - {PartialShape{-1, 128, -1, -1}, {{1, 128, 2, 64}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}}, - {PartialShape{-1, -1, 128, -1}, {{1, 1, 128, 16}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 32}}}, - }, - { - {PartialShape{-1, 32, -1, -1}, {{1, 32, 2, 64}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}}, - {PartialShape{-1, -1, 32, -1}, {{1, 1, 32, 16}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 32}}}, - }, - { - {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}}, - {PartialShape{-1, -1, 16, -1}, {{1, 1, 16, 16}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 32}}}, - }, -}; - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHA_4D_SplitDimensionM_dynamic, - MHA, - ::testing::Combine(::testing::ValuesIn(splitm_dynamic_shapes_4d), - ::testing::ValuesIn(precision_f32(4)), - ::testing::Values(ov::element::f32), - ::testing::Values(false), - ::testing::Values(4), // 4 Threads - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -std::vector> splitm_dynamic_shapes_3d = { - { - {PartialShape{-1, -1, -1}, {{128, 2, 64}, {17, 2, 64}, {128, 2, 64}}}, - {PartialShape{-1, -1, -1}, {{128, 2, 64}, {17, 2, 64}, {128, 2, 64}}}, - {PartialShape{-1, -1, -1}, {{1, 1, 128}, {1, 1, 17}, {1, 1, 128}}}, - {PartialShape{-1, -1, -1}, {{128, 2, 64}, {17, 2, 64}, {128, 2, 64}}}, - }, - { - {PartialShape{-1, 2, 64}, {{128, 2, 64}, {64, 2, 64}, {128, 2, 64}}}, - {PartialShape{-1, 2, 64}, {{128, 2, 64}, {64, 2, 64}, {128, 2, 64}}}, - {PartialShape{1, 1, -1}, {{1, 1, 128}, {1, 1, 64}, {1, 1, 128}}}, - {PartialShape{-1, 2, 64}, {{128, 2, 64}, {64, 2, 64}, {128, 2, 64}}}, - }, -}; - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHA_3D_SplitDimensionM_dynamic, - MHA, - ::testing::Combine(::testing::ValuesIn(splitm_dynamic_shapes_3d), - ::testing::ValuesIn(precision_f32(4)), - ::testing::Values(ov::element::f32), - ::testing::Values(false), - ::testing::Values(4), // 4 Threads - ::testing::Values(5), // Subgraph + 4 Transpose - ::testing::Values(2), // MHA + one of the transposes is executed via Subgraph (because callback is disabled) - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); +INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHA_3D_WithScalarMul, + MHA, + ::testing::Combine(::testing::ValuesIn(transposedShape_3D(false)), + ::testing::ValuesIn(precision_f32(4)), + ::testing::Values(ov::element::f32), + ::testing::Values(true), + ::testing::Values(MHA::default_thread_count), + ::testing::Values(5), // [122706]: Subgraph + 4 Transpose + ::testing::Values(2), // decomposed Transpose + MHA + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHABF16_4D, MHA, - ::testing::Combine(::testing::ValuesIn(inputShapes_4D), - ::testing::ValuesIn(precision_bf16(4)), + ::testing::Combine(::testing::ValuesIn(transposedShape_4D()), + ::testing::ValuesIn(precision_bf16_if_supported(4)), ::testing::Values(ov::element::f32), ::testing::ValuesIn({false, true}), ::testing::Values(MHA::default_thread_count), @@ -220,7 +126,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHABF16_4D, INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAEnforceBF16, MHA, - ::testing::Combine(::testing::ValuesIn(inputShapes_4D), + ::testing::Combine(::testing::ValuesIn(transposedShape_4D()), ::testing::ValuesIn(precision_f32(4)), ::testing::Values(ov::element::bf16), ::testing::ValuesIn({false}), @@ -231,321 +137,6 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAEnforceBF16, ::testing::Values(CPUTestUtils::cpu_bf16_plugin_config)), MHA::getTestCaseName); -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAMulAdd, - MHAMulAdd, - ::testing::Combine( - ::testing::ValuesIn(STATIC_SHAPES({{1, 10, 12, 16}, {1, 10, 12, 16}, {1, 10, 12, 16}})), - ::testing::ValuesIn(precision_f32(3)), - ::testing::Values(ov::element::f32), - ::testing::ValuesIn({false}), // Need to support True for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -const auto& inputShapeSelect = STATIC_SHAPES( - // without broadcast - {{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 12, 128, 128}, {1, 12, 128, 128}, {1, 128, 12, 64}}, - {{1, 94, 12, 54}, {1, 94, 12, 54}, {1, 12, 94, 94}, {1, 12, 94, 94}, {1, 12, 94, 94}, {1, 94, 12, 54}}, - // with broadcast - {{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 12, 1, 1}, {1, 12, 1, 1}, {1, 128, 12, 64}}, - {{2, 52, 6, 102}, {2, 52, 6, 102}, {1, 6, 52, 52}, {1, 6, 1, 1}, {1, 6, 1, 1}, {2, 52, 6, 102}} -); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHA, - MHASelect, - ::testing::Combine(::testing::ValuesIn(inputShapeSelect), - ::testing::ValuesIn(precision_f32(6)), - ::testing::Values(ov::element::f32), - ::testing::Values(false), // Need to support True for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(2), // Less + MHA - ::testing::Values(2), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -const auto& inputShapesWOTranspose_4D = STATIC_SHAPES( - {{1, 12, 197, 64}, {1, 12, 64, 197}, {1, 12, 197, 64}}, - {{1, 12, 12, 64}, {1, 12, 64, 48}, {1, 12, 48, 64}}); -const auto& inputShapesWOTranspose_3D = STATIC_SHAPES( - {{12, 197, 64}, {12, 64, 197}, {12, 197, 64}}, - {{12, 128, 100}, {12, 100, 128}, {12, 128, 100}}); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAWOTransposeOnInputs_4D, - MHAWOTransposeOnInputs, - ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_4D), - ::testing::Values(std::vector{}), - ::testing::Values(ov::element::f32), - ::testing::Values(true), // Need to support False for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAWOTranspose_4D, - MHAWOTranspose, - ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_4D), - ::testing::ValuesIn(precision_f32(3)), - ::testing::Values(ov::element::f32), - ::testing::ValuesIn({true}), // Need to support False for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAWOTranspose_3D, - MHAWOTranspose, - ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_3D), - ::testing::ValuesIn(precision_f32(3)), - ::testing::Values(ov::element::f32), - ::testing::ValuesIn({true}), // Need to support False for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -std::vector> inputShapesWOTranspose_3D_dynamic{ - { - {PartialShape{-1, -1, -1}, {{12, 19, 85}, {1, 40, 36}}}, - {PartialShape{-1, -1, -1}, {{1, 85, 19}, {2, 36, 40}}}, - {PartialShape{-1, -1, -1}, {{12, 19, 85}, {1, 40, 36}}}, - }, - { - {PartialShape{2, -1, 64}, {{2, 9, 64}, {2, 2, 64}, {2, 9, 64}}}, - {PartialShape{2, 64, -1}, {{2, 64, 9}, {2, 64, 2}, {2, 64, 9}}}, - {PartialShape{2, -1, 64}, {{2, 9, 64}, {2, 2, 64}, {2, 9, 64}}}, - }, -}; - - - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_DynMHAWOTranspose_3D, - MHAWOTranspose, - ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_3D_dynamic), - ::testing::ValuesIn(precision_f32(3)), - ::testing::Values(ov::element::f32), - ::testing::ValuesIn({true}), // Need to support False for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAWOTransposeBF16_4D, - MHAWOTranspose, - ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_4D), - ::testing::ValuesIn(precision_bf16(3)), - ::testing::Values(ov::element::f32), - ::testing::ValuesIn({true}), // Need to support False for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(5), // MHA + 4 extra Converts on inputs and output - ::testing::Values(5), // MHA + 4 extra Converts on inputs and output - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAWOTransposeBF16_3D, - MHAWOTranspose, - ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_3D), - ::testing::ValuesIn(precision_bf16(3)), - ::testing::Values(ov::element::f32), - ::testing::ValuesIn({true}), // Need to support False for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(5), // MHA + 4 extra Converts on inputs and output - ::testing::Values(5), // MHA + 4 extra Converts on inputs and output - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAWOTransposeEnforceBF16_4D, - MHAWOTranspose, - ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_4D), - ::testing::ValuesIn(precision_f32(3)), - ::testing::Values(ov::element::bf16), - ::testing::ValuesIn({true}), // Need to support False for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(5), // MHA + 4 extra Converts on inputs and output - ::testing::Values(5), // MHA + 4 extra Converts on inputs and output - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::cpu_bf16_plugin_config)), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAWOTransposeEnforceBF16_3D, - MHAWOTranspose, - ::testing::Combine(::testing::ValuesIn(inputShapesWOTranspose_3D), - ::testing::ValuesIn(precision_f32(3)), - ::testing::Values(ov::element::bf16), - ::testing::ValuesIn({true}), // Need to support False for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(5), // MHA + 4 extra Converts on inputs and output - ::testing::Values(5), // MHA + 4 extra Converts on inputs and output - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::cpu_bf16_plugin_config)), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAINT8MatMul, - MHAINT8MatMul, - ::testing::Combine(::testing::ValuesIn(std::vector>(inputShapes_4D.begin(), - inputShapes_4D.begin() + 2)), - ::testing::Values(std::vector{}), - ::testing::Values(ov::element::f32), - ::testing::Values(false), // The graph doesn't contain Multiply - ::testing::Values(MHA::default_thread_count), - ::testing::Values(6), // FQx3 on inputs + MHA + Transpose on output + Deq Mul - ::testing::Values(5), // FQx3 on inputs + MHA + Deq Mul - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAQuantMatMul0, - MHAQuantMatMul0, - ::testing::Combine( - ::testing::ValuesIn(STATIC_SHAPES({{1, 128, 768}, {1, 128, 768}, {1, 1, 1, 128}, {1, 128, 768}})), - ::testing::Values(std::vector{}), - ::testing::Values(ov::element::f32), - ::testing::Values(false), // The graph doesn't contain Multiply - ::testing::Values(MHA::default_thread_count), - ::testing::Values(9), // FQx2 on inputs + MHA + Transpose on output + 4 Reshapes + Deq Mul - ::testing::Values(4), // FQx2 on inputs + MHA + Deq Mul - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MHAFQAfterMatMul_4D, - MHAFQAfterMatMul, - ::testing::Combine(::testing::ValuesIn(inputShapes_4D), - ::testing::Values(std::vector{}), - ::testing::Values(ov::element::f32), - ::testing::Values(false), // The graph doesn't contain Multiply - ::testing::Values(MHA::default_thread_count), - ::testing::Values(3), // MHA + Transpose on output + Deq Mul - ::testing::Values(2), // MHA + Deq Mul - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAFQ, - MHAFQ, - ::testing::Combine(::testing::ValuesIn(STATIC_SHAPES({{1, 64, 12, 64}, - {1, 64, 12, 64}, - {1, 1, 1, 64}, - {1, 64, 12, 64}})), - ::testing::Values(std::vector{}), - ::testing::Values(ov::element::f32), - ::testing::Values(false), // The graph doesn't contain Multiply - ::testing::Values(MHA::default_thread_count), - ::testing::Values(7), // Transposex2 + Subgraphsx5 - ::testing::Values(5), // MHA + Deq Mul on output + Deqs on inputs + 2 xFQ on inputs - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -std::vector> inputShapesTransposedB { - { - {{}, {{1, 12, 12, 64}}}, - {{}, {{1, 12, 48, 64}}}, - {{}, {{1, 12, 48, 64}}} - }, - { - {PartialShape{-1, 3, -1, 64}, {{1, 3, 12, 64}, {2, 3, 36, 64}}}, - {PartialShape{-1, 3, -1, 64}, {{1, 3, 14, 64}, {2, 3, 42, 64}}}, - {PartialShape{-1, 3, -1, -1}, {{1, 3, 14, 36}, {2, 3, 42, 36}}}, - }, - { - {PartialShape{2, -1, 32, -1}, {{2, 1, 32, 70}, {2, 2, 32, 96}}}, - {PartialShape{2, -1, 49, -1}, {{2, 3, 49, 70}, {2, 1, 49, 96}}}, - {PartialShape{2, -1, 49, -1}, {{2, 1, 49, 17}, {2, 2, 49, 81}}}, - }, -}; - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHATransposedB, - MHATransposedB, - ::testing::Combine(::testing::ValuesIn(inputShapesTransposedB), - ::testing::Values(std::vector{}), - ::testing::Values(ov::element::f32), - ::testing::ValuesIn({true}), // Need to support False for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -const auto& inputShapesExtractedReshape = STATIC_SHAPES( - {{2, 196, 64}, {2, 64, 196}, {2, 14, 14, 14, 1}, {2, 14, 14, 1, 14}, {2, 196, 64}}, - {{1, 16, 10}, {1, 10, 16}, {1, 4, 4, 4, 1}, {1, 4, 4, 1, 4}, {1, 16, 10}}, - {{1, 16, 10}, {1, 10, 16}, {1, 1, 1, 1, 1}, {1, 4, 4, 4, 4}, {1, 16, 10}}, - {{1, 16, 10}, {1, 10, 16}, {1, 4, 4, 4, 4}, {1, 1, 1, 1, 1}, {1, 16, 10}}, - {{1, 4, 16, 10}, {1, 4, 10, 16}, {1, 4, 256}, {1, 4, 256}, {1, 4, 16, 10}}, - {{1, 4, 16, 10}, {1, 4, 10, 16}, {1, 1, 256}, {1, 4, 1}, {1, 4, 16, 10}}); - -INSTANTIATE_TEST_SUITE_P( - smoke_Snippets_MHAWithExtractedReshape, - MHAWithExtractedReshape, - ::testing::Combine(::testing::ValuesIn(inputShapesExtractedReshape), - ::testing::Values(std::vector{}), - ::testing::Values(ov::element::f32), - ::testing::ValuesIn({true}), // False is not supported for graph builder in tests - ::testing::Values(MHA::default_thread_count), - ::testing::Values(3), // Extracted Add + Extracted Reshape + MHA - ::testing::Values(2), // Extracted Add + MHA - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHA::getTestCaseName); - -std::vector> inputShapes_4D_WithMul_dynamic{ - { - {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {1, 70, 3, 19}, {1, 128, 3, 64}, {1, 68, 6, 87}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 128, 1, 64}, {2, 49, 1, 19}, {1, 128, 1, 64}, {2, 13, 6, 87}}}, - {PartialShape{1}, {{1}, {1}, {1}, {1} }}, - {PartialShape{-1, -1, -1, -1}, {{2, 1, 128, 128}, {1, 1, 70, 49}, {2, 1, 128, 128}, {1, 1, 68, 13}}}, - {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {1, 49, 3, 19}, {1, 128, 3, 64}, {2, 13, 6, 87}}}, - }, - { - {PartialShape{-1, -1, 12, 64}, {{1, 70, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 70, 12, 64}}}, - {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {2, 10, 12, 64}, {2, 1, 12, 64}, {2, 10, 12, 64}, {1, 35, 12, 64}}}, - {PartialShape{-1, 12, 64, -1}, {{1, 12, 64, 35}, {1, 12, 64, 10}, {1, 12, 64, 10}, {1, 12, 64, 1}, {1, 12, 64, 35}}}, - {PartialShape{-1, 12, -1, -1}, {{2, 12, 70, 35}, {1, 12, 20, 10}, {1, 12, 20, 10}, {1, 12, 20, 1}, {2, 12, 70, 35}}}, - {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 35, 12, 64}}}, - } -}; - -INSTANTIATE_TEST_SUITE_P(smoke_Snippets_DynMHA_4D_WithMul, - MHAWithDynamicMul, - ::testing::Combine(::testing::ValuesIn(inputShapes_4D_WithMul_dynamic), - ::testing::ValuesIn(precision_f32(5)), - ::testing::Values(ov::element::f32), - ::testing::Values(MHA::default_thread_count), - ::testing::Values(1), - ::testing::Values(1), - ::testing::Values(ov::test::utils::DEVICE_CPU), - ::testing::Values(CPUTestUtils::empty_plugin_config)), - MHAWithDynamicMul::getTestCaseName); - } // namespace } // namespace snippets } // namespace test diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_extracted_reshape.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_extracted_reshape.cpp new file mode 100644 index 00000000000000..f3c1439395650a --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_extracted_reshape.cpp @@ -0,0 +1,40 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/mha.hpp" + +#include "utils.hpp" + +namespace ov { +namespace test { +namespace snippets { + +namespace { + +const auto& inputShapesExtractedReshape = SNIPPETS_TESTS_STATIC_SHAPES( + {{2, 196, 64}, {2, 64, 196}, {2, 14, 14, 14, 1}, {2, 14, 14, 1, 14}, {2, 196, 64}}, + {{1, 16, 10}, {1, 10, 16}, {1, 4, 4, 4, 1}, {1, 4, 4, 1, 4}, {1, 16, 10}}, + {{1, 16, 10}, {1, 10, 16}, {1, 1, 1, 1, 1}, {1, 4, 4, 4, 4}, {1, 16, 10}}, + {{1, 16, 10}, {1, 10, 16}, {1, 4, 4, 4, 4}, {1, 1, 1, 1, 1}, {1, 16, 10}}, + {{1, 4, 16, 10}, {1, 4, 10, 16}, {1, 4, 256}, {1, 4, 256}, {1, 4, 16, 10}}, + {{1, 4, 16, 10}, {1, 4, 10, 16}, {1, 1, 256}, {1, 4, 1}, {1, 4, 16, 10}}); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAWithExtractedReshape, + MHAWithExtractedReshape, + ::testing::Combine(::testing::ValuesIn(inputShapesExtractedReshape), + ::testing::Values(std::vector{}), + ::testing::Values(ov::element::f32), + ::testing::ValuesIn({true}), // False is not supported for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(3), // Extracted Add + Extracted Reshape + MHA + ::testing::Values(2), // Extracted Add + MHA + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_fma.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_fma.cpp new file mode 100644 index 00000000000000..4bf35e2daa690d --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_fma.cpp @@ -0,0 +1,33 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/mha.hpp" + +#include "utils.hpp" + +namespace ov { +namespace test { +namespace snippets { + +namespace { + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAMulAdd, + MHAMulAdd, + ::testing::Combine( + ::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{1, 10, 12, 16}, {1, 10, 12, 16}, {1, 10, 12, 16}})), + ::testing::ValuesIn(precision_f32(3)), + ::testing::Values(ov::element::f32), + ::testing::ValuesIn({false}), // Need to support True for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(1), + ::testing::Values(1), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_quantized.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_quantized.cpp new file mode 100644 index 00000000000000..0c731b74565863 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_quantized.cpp @@ -0,0 +1,103 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/mha.hpp" + +#include "utils.hpp" + +namespace ov { +namespace test { +namespace snippets { + +namespace { + +std::vector> inputShapesQuantized { + { + {{}, {{1, 128, 16, 64}}}, + {{}, {{1, 128, 16, 64}}}, + {{}, {{1, 16, 1, 1}}}, + {{}, {{1, 128, 16, 64}}} + }, + { + {{}, {{2, 68, 6, 92}}}, + {{}, {{2, 68, 6, 92}}}, + {{}, {{1, 1, 68, 68}}}, + {{}, {{2, 68, 6, 92}}} + }, + // K, N are static + { + {PartialShape{-1, -1, -1, 100}, {{1, 64, 4, 100}, {2, 16, 2, 100}, {1, 72, 4, 100}}}, + {PartialShape{-1, 128, -1, 100}, {{1, 128, 4, 100}, {2, 128, 2, 100}, {1, 128, 4, 100}}}, + {PartialShape{-1, -1, -1, 128}, {{1, 4, 64, 128}, {2, 2, 16, 128}, {1, 4, 72, 128}}}, + {PartialShape{-1, 128, -1, 100}, {{1, 128, 4, 100}, {2, 128, 2, 100}, {1, 128, 4, 100}}}, + }, + { + {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {2, 16, 2, 100}, {1, 128, 3, 64}, {1, 128, 12, 600}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 128, 1, 64}, {2, 128, 2, 100}, {1, 128, 1, 64}, {1, 128, 12, 600}}}, + {PartialShape{-1, -1, -1, -1}, {{2, 1, 128, 128}, {1, 1, 1, 128}, {2, 1, 128, 128}, {1, 12, 1, 1}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {2, 128, 2, 100}, {1, 128, 3, 64}, {1, 128, 12, 600}}}, + } +}; + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAINT8MatMul, + MHAINT8MatMul, + ::testing::Combine(::testing::ValuesIn(inputShapesQuantized), + ::testing::Values(std::vector{}), + ::testing::Values(ov::element::f32), + ::testing::Values(false), // The graph doesn't contain Multiply + ::testing::Values(MHA::default_thread_count), + ::testing::Values(6), // FQx3 on inputs + MHA + Transpose on output + Deq Mul + ::testing::Values(5), // FQx3 on inputs + MHA + Deq Mul + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAQuantMatMul0, + MHAQuantMatMul0, + ::testing::Combine( + ::testing::ValuesIn(inputShapesQuantized), + ::testing::Values(std::vector{}), + ::testing::Values(ov::element::f32), + ::testing::Values(false), // The graph doesn't contain Multiply + ::testing::Values(MHA::default_thread_count), + ::testing::Values(5), // FQx2 on inputs + MHA + Transpose on output + Deq Mul + ::testing::Values(4), // FQx2 on inputs + MHA + Deq Mul + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAFQAfterMatMul_4D, + MHAFQAfterMatMul, + ::testing::Combine(::testing::ValuesIn(inputShapesQuantized), + ::testing::Values(std::vector{}), + ::testing::Values(ov::element::f32), + ::testing::Values(false), // The graph doesn't contain Multiply + ::testing::Values(MHA::default_thread_count), + ::testing::Values(3), // MHA + Transpose on output + Deq Mul + ::testing::Values(2), // MHA + Deq Mul + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAFQ, + MHAFQ, + ::testing::Combine(::testing::ValuesIn(inputShapesQuantized), + ::testing::Values(std::vector{}), + ::testing::Values(ov::element::f32), + ::testing::Values(false), // The graph doesn't contain Multiply + ::testing::Values(MHA::default_thread_count), + ::testing::Values(7), // Transposex2 + Subgraphsx5 + ::testing::Values(5), // MHA + Deq Mul on output + Deqs on inputs + 2 xFQ on inputs + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_select.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_select.cpp new file mode 100644 index 00000000000000..3fc1417d20b102 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_select.cpp @@ -0,0 +1,41 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/mha.hpp" + +#include "utils.hpp" + +namespace ov { +namespace test { +namespace snippets { + +namespace { + +const auto& inputShapeSelect = SNIPPETS_TESTS_STATIC_SHAPES( + // without broadcast + {{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 12, 128, 128}, {1, 12, 128, 128}, {1, 128, 12, 64}}, + {{1, 94, 12, 54}, {1, 94, 12, 54}, {1, 12, 94, 94}, {1, 12, 94, 94}, {1, 12, 94, 94}, {1, 94, 12, 54}}, + // with broadcast + {{1, 128, 12, 64}, {1, 128, 12, 64}, {1, 12, 128, 128}, {1, 12, 1, 1}, {1, 12, 1, 1}, {1, 128, 12, 64}}, + {{2, 52, 6, 102}, {2, 52, 6, 102}, {1, 6, 52, 52}, {1, 6, 1, 1}, {1, 6, 1, 1}, {2, 52, 6, 102}} +); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHA, + MHASelect, + ::testing::Combine(::testing::ValuesIn(inputShapeSelect), + ::testing::ValuesIn(precision_f32(6)), + ::testing::Values(ov::element::f32), + ::testing::Values(false), // Need to support True for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(2), // Less + MHA + ::testing::Values(2), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_split_dim_m.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_split_dim_m.cpp new file mode 100644 index 00000000000000..bb5f7fe2fa5b52 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_split_dim_m.cpp @@ -0,0 +1,121 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/mha.hpp" + +#include "utils.hpp" + +namespace ov { +namespace test { +namespace snippets { + +namespace { + +static ov::AnyMap enable_callback() { + return ov::AnyMap({ov::intel_cpu::snippets_mode(ov::intel_cpu::SnippetsMode::ENABLE)}); +} + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHA_4D_SplitDimensionM_static, + MHA, + ::testing::Combine(::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{1, 128, 2, 64}, {1, 128, 2, 64}, {1, 1, 1, 1}, {1, 128, 2, 64}})), + ::testing::ValuesIn(precision_f32(4)), + ::testing::Values(ov::element::f32), + ::testing::Values(true), + ::testing::Values(4), // 4 Threads + ::testing::Values(6), // Subgraph + 4 Reshapes on inputs and 1 Reshape on output + ::testing::Values(1), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(enable_callback())), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHA_3D_SplitDimensionM_static, + MHA, + ::testing::Combine( + ::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{384, 2, 64}, {384, 2, 64}, {1, 384, 384}, {384, 2, 64}})), + ::testing::ValuesIn(precision_f32(4)), + ::testing::Values(ov::element::f32), + ::testing::Values(true), + ::testing::Values(4), // 4 Threads + ::testing::Values(10), // Subgraph + 4 Reshapes on inputs and 1 Reshape on output + 4 Transposes + ::testing::Values(1), // MHA + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(enable_callback())), + MHA::getTestCaseName); + +std::vector> splitm_dynamic_shapes_4d = { + { + {PartialShape{-1, -1, -1, -1}, {{1, 128, 2, 64}, {1, 17, 2, 64}, {1, 128, 2, 64}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 128, 2, 64}, {1, 17, 2, 64}, {1, 128, 2, 64}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 1, 1, 128}, {1, 1, 1, 17}, {1, 1, 1, 128}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 128, 2, 64}, {1, 17, 2, 64}, {1, 128, 2, 64}}}, + }, + { + {PartialShape{-1, 128, -1, -1}, {{1, 128, 2, 64}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}}, + {PartialShape{-1, -1, 128, -1}, {{1, 1, 128, 16}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 32}}}, + }, + { + {PartialShape{-1, 32, -1, -1}, {{1, 32, 2, 64}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}}, + {PartialShape{-1, -1, 32, -1}, {{1, 1, 32, 16}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 32}}}, + }, + { + {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 64}}}, + {PartialShape{-1, -1, 16, -1}, {{1, 1, 16, 16}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 16, 2, 32}}}, + }, +}; + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHA_4D_SplitDimensionM_dynamic, + MHA, + ::testing::Combine(::testing::ValuesIn(splitm_dynamic_shapes_4d), + ::testing::ValuesIn(precision_f32(4)), + ::testing::Values(ov::element::f32), + ::testing::Values(false), + ::testing::Values(4), // 4 Threads + ::testing::Values(1), + ::testing::Values(1), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +std::vector> splitm_dynamic_shapes_3d = { + { + {PartialShape{-1, -1, -1}, {{128, 2, 64}, {17, 2, 64}, {128, 2, 64}}}, + {PartialShape{-1, -1, -1}, {{128, 2, 64}, {17, 2, 64}, {128, 2, 64}}}, + {PartialShape{-1, -1, -1}, {{1, 1, 128}, {1, 1, 17}, {1, 1, 128}}}, + {PartialShape{-1, -1, -1}, {{128, 2, 64}, {17, 2, 64}, {128, 2, 64}}}, + }, + { + {PartialShape{-1, 2, 64}, {{128, 2, 64}, {64, 2, 64}, {128, 2, 64}}}, + {PartialShape{-1, 2, 64}, {{128, 2, 64}, {64, 2, 64}, {128, 2, 64}}}, + {PartialShape{1, 1, -1}, {{1, 1, 128}, {1, 1, 64}, {1, 1, 128}}}, + {PartialShape{-1, 2, 64}, {{128, 2, 64}, {64, 2, 64}, {128, 2, 64}}}, + }, +}; + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHA_3D_SplitDimensionM_dynamic, + MHA, + ::testing::Combine(::testing::ValuesIn(splitm_dynamic_shapes_3d), + ::testing::ValuesIn(precision_f32(4)), + ::testing::Values(ov::element::f32), + ::testing::Values(false), + ::testing::Values(4), // 4 Threads + ::testing::Values(5), // Subgraph + 4 Transpose + ::testing::Values(2), // MHA + one of the transposes is executed via Subgraph (because callback is disabled) + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_transposed_b.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_transposed_b.cpp new file mode 100644 index 00000000000000..45260df3cab280 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_transposed_b.cpp @@ -0,0 +1,50 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/mha.hpp" + +#include "utils.hpp" + +namespace ov { +namespace test { +namespace snippets { + +namespace { + +std::vector> inputShapesTransposedB { + { + {{}, {{1, 12, 12, 64}}}, + {{}, {{1, 12, 48, 64}}}, + {{}, {{1, 12, 48, 64}}} + }, + { + {PartialShape{-1, 3, -1, 64}, {{1, 3, 12, 64}, {2, 3, 36, 64}}}, + {PartialShape{-1, 3, -1, 64}, {{1, 3, 14, 64}, {2, 3, 42, 64}}}, + {PartialShape{-1, 3, -1, -1}, {{1, 3, 14, 36}, {2, 3, 42, 36}}}, + }, + { + {PartialShape{2, -1, 32, -1}, {{2, 1, 32, 70}, {2, 2, 32, 96}}}, + {PartialShape{2, -1, 49, -1}, {{2, 3, 49, 70}, {2, 1, 49, 96}}}, + {PartialShape{2, -1, 49, -1}, {{2, 1, 49, 17}, {2, 2, 49, 81}}}, + }, +}; + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHATransposedB, + MHATransposedB, + ::testing::Combine(::testing::ValuesIn(inputShapesTransposedB), + ::testing::Values(std::vector{}), + ::testing::Values(ov::element::f32), + ::testing::ValuesIn({true}), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(1), + ::testing::Values(1), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_with_dyn_mul.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_with_dyn_mul.cpp new file mode 100644 index 00000000000000..7876d737af2281 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_with_dyn_mul.cpp @@ -0,0 +1,68 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/mha.hpp" + +#include "utils.hpp" + +namespace ov { +namespace test { +namespace snippets { + +namespace { + +std::vector> transposedShape_4D_WithMul { + { + {PartialShape{-1, -1, -1, 100}, {{1, 64, 4, 100}, {2, 16, 2, 100}, {1, 72, 4, 100}}}, + {PartialShape{-1, 200, -1, 100}, {{1, 200, 4, 100}, {2, 200, 2, 100}, {1, 200, 4, 100}}}, + {PartialShape{-1, -1, 100, 200}, {{1, 4, 100, 200}, {2, 2, 100, 200}, {1, 4, 100, 200}}}, + {PartialShape{-1, -1, -1, 200}, {{1, 4, 64, 200}, {2, 2, 16, 200}, {1, 4, 72, 200}}}, + {PartialShape{-1, 200, -1, 100}, {{1, 200, 4, 100}, {2, 200, 2, 100}, {1, 200, 4, 100}}}, + }, + { + {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {1, 70, 3, 19}, {1, 128, 3, 64}, {1, 68, 6, 87}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 128, 1, 64}, {2, 49, 1, 19}, {1, 128, 1, 64}, {2, 13, 6, 87}}}, + {PartialShape{1}, {{1}, {1}, {1}, {1} }}, + {PartialShape{-1, -1, -1, -1}, {{2, 1, 128, 128}, {1, 1, 70, 49}, {2, 1, 128, 128}, {1, 1, 68, 13}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 128, 3, 64}, {1, 49, 3, 19}, {1, 128, 3, 64}, {2, 13, 6, 87}}}, + }, + { + {PartialShape{-1, -1, 12, 64}, {{1, 70, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 20, 12, 64}, {1, 70, 12, 64}}}, + {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {2, 10, 12, 64}, {2, 1, 12, 64}, {2, 10, 12, 64}, {1, 35, 12, 64}}}, + {PartialShape{-1, 12, 64, -1}, {{1, 12, 64, 35}, {1, 12, 64, 10}, {1, 12, 64, 10}, {1, 12, 64, 1}, {1, 12, 64, 35}}}, + {PartialShape{-1, 12, -1, -1}, {{2, 12, 70, 35}, {1, 12, 20, 10}, {1, 12, 20, 10}, {1, 12, 20, 1}, {2, 12, 70, 35}}}, + {PartialShape{-1, -1, 12, 64}, {{1, 35, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 10, 12, 64}, {1, 35, 12, 64}}}, + } +}; + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHA_4D_WithDynamicMul, + MHAWithDynamicMul, + ::testing::Combine(::testing::ValuesIn(transposedShape_4D_WithMul), + ::testing::ValuesIn(precision_f32(5)), + ::testing::Values(ov::element::f32), + ::testing::Values(MHA::default_thread_count), + ::testing::Values(1), + ::testing::Values(1), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHAWithDynamicMul::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHA_4D_WithDynamicMul_EnforceBF16, + MHAWithDynamicMul, + ::testing::Combine(::testing::ValuesIn(transposedShape_4D_WithMul), + ::testing::ValuesIn(precision_f32(5)), + ::testing::Values(ov::element::bf16), + ::testing::Values(MHA::default_thread_count), + ::testing::Values(8), // MHA + 1 Transpose on output + 6 Converts around + ::testing::Values(7), // MHA + 6 Converts around + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHAWithDynamicMul::getTestCaseName); + +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_wo_transpose.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_wo_transpose.cpp new file mode 100644 index 00000000000000..0967ef27087674 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/mha_wo_transpose.cpp @@ -0,0 +1,151 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/mha.hpp" + +#include "utils.hpp" + +namespace ov { +namespace test { +namespace snippets { + +namespace { + +std::vector> originalShape_4D { + { {{}, {{1, 12, 197, 64}}}, {{}, {{1, 12, 64, 197}}}, {{}, {{1, 12, 197, 64}}} }, + { {{}, {{1, 12, 12, 64}}}, {{}, {{1, 12, 64, 48}}}, {{}, {{1, 12, 48, 64}}} }, + { + {PartialShape{-1, -1, -1, -1}, {{1, 3, 128, 64}, {1, 12, 197, 100}, {1, 3, 128, 64}, {1, 12, 197, 600}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 3, 64, 128}, {1, 12, 100, 197}, {1, 3, 64, 128}, {1, 12, 600, 197}}}, + {PartialShape{-1, -1, -1, -1}, {{1, 3, 128, 64}, {1, 12, 197, 100}, {1, 3, 128, 64}, {1, 12, 197, 600}}}, + }, + { + {PartialShape{1, 4, -1, -1}, {{1, 4, 384, 64}, {1, 4, 197, 64}, {1, 4, 384, 560}}}, + {PartialShape{1, 4, -1, -1}, {{1, 4, 64, 128}, {1, 4, 64, 197}, {1, 4, 560, 384}}}, + {PartialShape{1, 4, -1, 64}, {{1, 4, 128, 64}, {1, 4, 197, 64}, {1, 4, 384, 64}}}, + } +}; + +std::vector> originalShape_3D { + { {{}, {{12, 197, 64}}}, {{}, {{12, 64, 197}}}, {{}, {{12, 197, 64}}} }, + { {{}, {{12, 128, 100}}}, {{}, {{12, 100, 128}}}, {{}, {{12, 128, 100}}} }, + { + {PartialShape{-1, -1, 64}, {{2, 9, 64}, {1, 64, 64}, {2, 64, 64}}}, + {PartialShape{-1, 64, 124}, {{2, 64, 124}, {1, 64, 124}, {2, 64, 124}}}, + {PartialShape{-1, 124, 64}, {{2, 124, 64}, {1, 124, 64}, {2, 124, 64}}}, + }, + { + {PartialShape{-1, -1, -1}, {{12, 19, 85}, {1, 40, 36}}}, + {PartialShape{-1, -1, -1}, {{1, 85, 19}, {2, 36, 40}}}, + {PartialShape{-1, -1, -1}, {{12, 19, 85}, {1, 40, 36}}}, + }, + { + {PartialShape{2, -1, 64}, {{2, 9, 64}, {2, 4, 64}, {2, 9, 64}}}, + {PartialShape{2, 64, -1}, {{2, 64, 9}, {2, 64, 4}, {2, 64, 9}}}, + {PartialShape{2, -1, 64}, {{2, 9, 64}, {2, 4, 64}, {2, 9, 64}}}, + } +}; + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAWOTransposeOnInputs_4D, + MHAWOTransposeOnInputs, + ::testing::Combine(::testing::ValuesIn(originalShape_4D), + ::testing::Values(std::vector{}), + ::testing::Values(ov::element::f32), + ::testing::Values(true), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(1), + ::testing::Values(1), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAWOTranspose_4D, + MHAWOTranspose, + ::testing::Combine(::testing::ValuesIn(originalShape_4D), + ::testing::ValuesIn(precision_f32(3)), + ::testing::Values(ov::element::f32), + ::testing::Values(true), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(1), + ::testing::Values(1), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAWOTranspose_3D, + MHAWOTranspose, + ::testing::Combine(::testing::ValuesIn(originalShape_3D), + ::testing::ValuesIn(precision_f32(3)), + ::testing::Values(ov::element::f32), + ::testing::Values(true), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(1), + ::testing::Values(1), + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAWOTransposeBF16_4D, + MHAWOTranspose, + ::testing::Combine(::testing::ValuesIn(originalShape_4D), + ::testing::ValuesIn(precision_bf16_if_supported(3)), + ::testing::Values(ov::element::f32), + ::testing::Values(true), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(5), // MHA + 4 extra Converts on inputs and output + ::testing::Values(5), // MHA + 4 extra Converts on inputs and output + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAWOTransposeBF16_3D, + MHAWOTranspose, + ::testing::Combine(::testing::ValuesIn(originalShape_3D), + ::testing::ValuesIn(precision_bf16_if_supported(3)), + ::testing::Values(ov::element::f32), + ::testing::Values(true), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(5), // MHA + 4 extra Converts on inputs and output + ::testing::Values(5), // MHA + 4 extra Converts on inputs and output + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::empty_plugin_config)), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAWOTransposeEnforceBF16_4D, + MHAWOTranspose, + ::testing::Combine(::testing::ValuesIn(originalShape_4D), + ::testing::ValuesIn(precision_f32(3)), + ::testing::Values(ov::element::bf16), + ::testing::Values(true), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(5), // MHA + 4 extra Converts on inputs and output + ::testing::Values(5), // MHA + 4 extra Converts on inputs and output + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::cpu_bf16_plugin_config)), + MHA::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P( + smoke_Snippets_MHAWOTransposeEnforceBF16_3D, + MHAWOTranspose, + ::testing::Combine(::testing::ValuesIn(originalShape_3D), + ::testing::ValuesIn(precision_f32(3)), + ::testing::Values(ov::element::bf16), + ::testing::Values(true), // Need to support False for graph builder in tests + ::testing::Values(MHA::default_thread_count), + ::testing::Values(5), // MHA + 4 extra Converts on inputs and output + ::testing::Values(5), // MHA + 4 extra Converts on inputs and output + ::testing::Values(ov::test::utils::DEVICE_CPU), + ::testing::Values(CPUTestUtils::cpu_bf16_plugin_config)), + MHA::getTestCaseName); + +} // namespace +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp index c05087283305e4..ea7de9ccb209ad 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/transpose_matmul.cpp @@ -6,36 +6,28 @@ #include "common_test_utils/test_constants.hpp" #include "openvino/runtime/system_conf.hpp" +#include "utils.hpp" namespace ov { namespace test { namespace snippets { -#define STATIC_SHAPES(...) static_shapes_to_test_representation(std::vector>{__VA_ARGS__}) - namespace { static inline std::vector> precisions(bool only_fp32 = true) { - std::vector> prc = { - {element::f32, element::f32}, - }; -// Note: low precisions are not supported by TPP yet (ticker: 130010) + std::vector> prc = precision_f32(2); +// Note: TPP doesn't support low precisions yet #ifndef SNIPPETS_LIBXSMM_TPP if (!only_fp32) { - // In Snippets MatMul INT8 is supported only on VNNI/AMX platforms - if (ov::with_cpu_x86_avx512_core_vnni() || ov::with_cpu_x86_avx512_core_amx_int8()) { - prc.emplace_back(std::vector{element::i8, element::i8}); - prc.emplace_back(std::vector{element::u8, element::i8}); - } - // In Snippets MatMul BF16 is supported only on bf16/AMX platforms - if (ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx512_core_amx_bf16()) { - prc.emplace_back(std::vector{element::bf16, element::bf16}); - } + auto quant = quantized_precisions_if_supported(); + std::copy(quant.begin(), quant.end(), std::back_inserter(prc)); + auto bfloat = precision_bf16_if_supported(2); + std::copy(bfloat.begin(), bfloat.end(), std::back_inserter(prc)); } #endif return prc; } namespace transpose_zero_input { -const auto& transpose_input_shapes = STATIC_SHAPES({{1, 49, 2, 23}, {2, 2, 23, 39}}); +const auto& transpose_input_shapes = SNIPPETS_TESTS_STATIC_SHAPES({{1, 49, 2, 23}, {2, 2, 23, 39}}); INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMult, TransposeMatMul, ::testing::Combine( ::testing::ValuesIn(transpose_input_shapes), @@ -84,7 +76,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_FullyConnected, TransposeMatMul, } // namespace transpose_zero_input namespace transpose_first_input { -const auto& transpose_input_shapes = STATIC_SHAPES({{2, 1, 49, 13}, {1, 13, 3, 39}}); +const auto& transpose_input_shapes = SNIPPETS_TESTS_STATIC_SHAPES({{2, 1, 49, 13}, {1, 13, 3, 39}}); INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMult, TransposeMatMul, ::testing::Combine( ::testing::ValuesIn(transpose_input_shapes), @@ -126,7 +118,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_TransposeMatMulFQ, TransposeMatMulFQ, } // namespace transpose_first_input namespace transpose_output { -const auto& transpose_input_shapes = STATIC_SHAPES({{2, 1, 49, 13}, {1, 2, 13, 39}}); +const auto& transpose_input_shapes = SNIPPETS_TESTS_STATIC_SHAPES({{2, 1, 49, 13}, {1, 2, 13, 39}}); INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMult, TransposeMatMul, ::testing::Combine( @@ -195,7 +187,7 @@ static inline std::vector> precisions(bool only_fp32 } INSTANTIATE_TEST_SUITE_P(smoke_Snippets_ExplicitTransposeMatMul, ExplicitTransposeMatMul, ::testing::Combine( - ::testing::ValuesIn(STATIC_SHAPES({{1, 2, 69, 43}, {2, 49, 2, 43}})), + ::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{1, 2, 69, 43}, {2, 49, 2, 43}})), ::testing::Values(1), // Transpose on second input ::testing::ValuesIn(precisions()), ::testing::Values(MatMulType::MatMul), @@ -223,7 +215,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_Snippets_DynExplicitTransposeMatMul, ExplicitTran INSTANTIATE_TEST_SUITE_P(smoke_Snippets_TransposeMatMulBias, ExplicitTransposeMatMulBias, ::testing::Combine( - ::testing::ValuesIn(STATIC_SHAPES({{1, 2, 69, 43}, {2, 49, 2, 43}, {1, 1, 69, 49}})), + ::testing::ValuesIn(SNIPPETS_TESTS_STATIC_SHAPES({{1, 2, 69, 43}, {2, 49, 2, 43}, {1, 1, 69, 49}})), ::testing::Values(1), // Transpose on second input ::testing::ValuesIn(precisions()), ::testing::Values(MatMulType::MatMul), diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/utils.hpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/utils.hpp new file mode 100644 index 00000000000000..6c0d54da973086 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/utils.hpp @@ -0,0 +1,48 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "internal_properties.hpp" +#include "utils/cpu_test_utils.hpp" +#include "openvino/runtime/system_conf.hpp" + +namespace ov { +namespace test { +namespace snippets { + +#define SNIPPETS_TESTS_STATIC_SHAPES(...) static_shapes_to_test_representation(std::vector>{__VA_ARGS__}) + +static inline bool is_bf16_supported_by_brgemm() { + return ov::with_cpu_x86_bfloat16() || ov::with_cpu_x86_avx512_core_amx_bf16(); +} + +static inline bool is_i8_supported_by_brgemm() { + return ov::with_cpu_x86_avx512_core_vnni() || ov::with_cpu_x86_avx512_core_amx_int8(); +} + +static inline std::vector> precision_f32(size_t count) { + std::vector> prc; + prc.emplace_back(std::vector(count, element::f32)); + return prc; +} + +static inline std::vector> precision_bf16_if_supported(size_t count) { + std::vector> prc; + if (is_bf16_supported_by_brgemm()) + prc.emplace_back(std::vector(count, element::bf16)); + return prc; +} + +static inline std::vector> quantized_precisions_if_supported() { + std::vector> prc = {}; + // In Snippets MatMul INT8 is supported only on VNNI/AMX platforms + if (is_i8_supported_by_brgemm()) { + prc.emplace_back(std::vector{element::i8, element::i8}); + prc.emplace_back(std::vector{element::u8, element::i8}); + } + return prc; +} + +} // namespace snippets +} // namespace test +} // namespace ov diff --git a/src/tests/functional/plugin/shared/include/snippets/mha.hpp b/src/tests/functional/plugin/shared/include/snippets/mha.hpp index f8198dee0218ee..34cb4d452bfb15 100644 --- a/src/tests/functional/plugin/shared/include/snippets/mha.hpp +++ b/src/tests/functional/plugin/shared/include/snippets/mha.hpp @@ -44,6 +44,7 @@ class MHABase : virtual public SnippetsTestsCommon { void generate_inputs(const std::vector& targetInputStaticShapes) override; virtual std::shared_ptr get_subgraph() const = 0; virtual void init_params(std::vector& input_shapes, ov::element::Type& prc, ov::AnyMap& additional_config) = 0; + virtual void init_thresholds(); size_t m_thread_count; std::vector m_input_types; @@ -88,6 +89,7 @@ class MHATransposedB : public MHA { class MHAINT8MatMul : public MHA { protected: std::shared_ptr get_subgraph() const override; + void init_thresholds() override; }; class MHAQuantMatMul0 : public MHA { @@ -103,6 +105,7 @@ class MHAFQAfterMatMul : public MHA { class MHAFQ : public MHA { protected: std::shared_ptr get_subgraph() const override; + void init_thresholds() override; }; class MHAWithExtractedReshape : public MHA { diff --git a/src/tests/functional/plugin/shared/src/snippets/mha.cpp b/src/tests/functional/plugin/shared/src/snippets/mha.cpp index 351cd50856357d..8d0cb8613bc47e 100644 --- a/src/tests/functional/plugin/shared/src/snippets/mha.cpp +++ b/src/tests/functional/plugin/shared/src/snippets/mha.cpp @@ -53,15 +53,19 @@ void MHABase::SetUp() { configuration.insert({"SNIPPETS_MODE", "IGNORE_CALLBACK"}); } - setInferenceType(prc); inType = outType = prc; + setInferenceType(prc); + init_thresholds(); +} + + void MHABase::init_thresholds() { // Note: Libxsmm calculates Exp in a slightly different way, so the abs values might differ a bit. Ticket: 130699 #ifdef SNIPPETS_LIBXSMM_TPP abs_threshold = 1e-6; #endif - if (prc == ov::element::bf16) + if (inType == ov::element::bf16) rel_threshold = 0.05f; -} + } std::string MHA::getTestCaseName(testing::TestParamInfo obj) { std::vector input_shapes; @@ -194,6 +198,11 @@ std::shared_ptr MHAINT8MatMul::get_subgraph() const { return std::make_shared(inputDynamicShapes); } +void MHAINT8MatMul::init_thresholds() { + MHABase::init_thresholds(); + abs_threshold = 4e-6; +} + std::shared_ptr MHAQuantMatMul0::get_subgraph() const { return std::make_shared(inputDynamicShapes); } @@ -206,6 +215,11 @@ std::shared_ptr MHAFQ::get_subgraph() const { return std::make_shared(inputDynamicShapes); } +void MHAFQ::init_thresholds() { + MHABase::init_thresholds(); + abs_threshold = 0.016; +} + std::shared_ptr MHAMulAdd::get_subgraph() const { return std::make_shared(inputDynamicShapes); } diff --git a/src/tests/ov_helpers/ov_snippets_models/include/subgraph_mha.hpp b/src/tests/ov_helpers/ov_snippets_models/include/subgraph_mha.hpp index 90ab47214effee..f54f92c598a45f 100644 --- a/src/tests/ov_helpers/ov_snippets_models/include/subgraph_mha.hpp +++ b/src/tests/ov_helpers/ov_snippets_models/include/subgraph_mha.hpp @@ -235,9 +235,7 @@ class MHAWOTransposeSplitMFunction : public MHAWOTransposeFunction { * FakeQuantize i8 * \ / * Add - * Reshape0 - * Softmax - * Reshape1 Transpose2[0,2,1,3] + * Softmax Transpose2[0,2,1,3] * \ / * MatMul1 * FakeQuantize i8 @@ -261,9 +259,7 @@ class MHAFQAfterMatMulFunction : public SnippetsFunctionBase { * FakeQuantize i8 * \ / * Add - * Reshape0 - * Softmax - * Reshape1 FakeQuantize i8 + * Softmax FakeQuantize i8 * FakeQuantize u8 Transpose2[0,2,1,3] * \ / * MatMul1 @@ -281,20 +277,17 @@ class MHAINT8MatMulFunction : public SnippetsFunctionBase { }; /* Graph: - * FakeQuantize i8 Reshape1 - * Reshape0 Transpose1[0,2,3,1] + * FakeQuantize i8 Transpose1[0,2,3,1] * Transpose0[0,2,1,3] FakeQuantize i8 * \ / * MatMul0 * \ / - * Add Reshape2 + * Add * Softmax Transpose2[0,2,1,3] * \ / * MatMul1 * FakeQuantize i8 * Transpose3[0,2,1,3] - * Reshape3 - * Note: Reshapes are tosplit Tokenization between FQs and deq Mul and MHA since Snippets::Ignore_Callback may be enabled */ class MHAQuantMatMul0Function : public SnippetsFunctionBase { public: diff --git a/src/tests/ov_helpers/ov_snippets_models/src/subgraph_mha.cpp b/src/tests/ov_helpers/ov_snippets_models/src/subgraph_mha.cpp index 1dbf8d7d22ed26..34f42ec838aa6d 100644 --- a/src/tests/ov_helpers/ov_snippets_models/src/subgraph_mha.cpp +++ b/src/tests/ov_helpers/ov_snippets_models/src/subgraph_mha.cpp @@ -598,38 +598,25 @@ std::shared_ptr MHAFQAfterMatMulFunction::initOriginal() const { auto transpose2Param = std::make_shared(precision, input_shapes[3]); ov::ParameterVector ngraphParam = {transpose0Param, transpose1Param, addParam, transpose2Param}; - const auto shape_rank = input_shapes[0].get_shape().size(); + const auto shape_rank = input_shapes[0].size(); auto transpose0Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 1, 3}); auto transpose1Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 3, 1}); auto transpose2Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 1, 3}); auto transpose3Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 1, 3}); - std::vector reshape0ConstData = {static_cast(input_shapes[0].get_shape()[0] * - input_shapes[0].get_shape()[1] * input_shapes[0].get_shape()[2]), - -1}; - auto reshape0Const = ov::op::v0::Constant::create(ov::element::i64, {reshape0ConstData.size()}, reshape0ConstData); - - std::vector reshape1ConstData = {static_cast(input_shapes[0].get_shape()[0]), - static_cast(input_shapes[0].get_shape()[2]), - static_cast(input_shapes[0].get_shape()[1]), - static_cast(input_shapes[0].get_shape()[1])}; - auto reshape1Const = ov::op::v0::Constant::create(ov::element::i64, {reshape1ConstData.size()}, reshape1ConstData); - bool transA = false; bool transB = false; const auto transpose0 = std::make_shared(transpose0Param, transpose0Const); const auto transpose1 = std::make_shared(transpose1Param, transpose1Const); const auto matMul0 = std::make_shared(transpose0, transpose1, transA, transB); auto fq0 = ov::test::utils::make_fake_quantize(matMul0, ov::element::f32, 256, {1}, - {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); + {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); const auto add = std::make_shared(fq0, addParam); - const auto reshape0 = std::make_shared(add, reshape0Const, true); - const auto softMax = std::make_shared(reshape0, 1); - const auto reshape1 = std::make_shared(softMax, reshape1Const, true); + const auto softMax = std::make_shared(add, -1); const auto transpose2 = std::make_shared(transpose2Param, transpose2Const); - const auto matMul1 = std::make_shared(reshape1, transpose2, transA, transB); + const auto matMul1 = std::make_shared(softMax, transpose2, transA, transB); auto fq1 = ov::test::utils::make_fake_quantize(matMul1, ov::element::f32, 256, {1}, - {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); + {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); const auto transpose3 = std::make_shared(fq1, transpose3Const); ov::ResultVector results{std::make_shared(transpose3)}; @@ -642,46 +629,33 @@ std::shared_ptr MHAINT8MatMulFunction::initOriginal() const { auto transpose2Param = std::make_shared(precision, input_shapes[3]); ov::ParameterVector ngraphParam = {transpose0Param, transpose1Param, addParam, transpose2Param}; - const auto shape_rank = input_shapes[0].get_shape().size(); + const auto shape_rank = input_shapes[0].size(); auto transpose0Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 1, 3}); auto transpose1Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 3, 1}); auto transpose2Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 1, 3}); auto transpose3Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 1, 3}); - std::vector reshape0ConstData = {static_cast(input_shapes[0].get_shape()[0] * - input_shapes[0].get_shape()[1] * input_shapes[0].get_shape()[2]), - -1}; - auto reshape0Const = ov::op::v0::Constant::create(ov::element::i64, {reshape0ConstData.size()}, reshape0ConstData); - - std::vector reshape1ConstData = {static_cast(input_shapes[0].get_shape()[0]), - static_cast(input_shapes[0].get_shape()[2]), - static_cast(input_shapes[0].get_shape()[1]), - static_cast(input_shapes[0].get_shape()[1])}; - auto reshape1Const = ov::op::v0::Constant::create(ov::element::i64, {reshape1ConstData.size()}, reshape1ConstData); - auto fq0 = ov::test::utils::make_fake_quantize(transpose0Param, ov::element::f32, 256, {1}, - {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); + {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); auto fq1 = ov::test::utils::make_fake_quantize(transpose1Param, ov::element::f32, 256, {1}, - {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); + {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); auto fq2 = ov::test::utils::make_fake_quantize(transpose2Param, ov::element::f32, 256, {1}, - {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); + {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); bool transA = false; bool transB = false; const auto transpose0 = std::make_shared(fq0, transpose0Const); const auto transpose1 = std::make_shared(fq1, transpose1Const); const auto matMul0 = std::make_shared(transpose0, transpose1, transA, transB); auto fq3 = ov::test::utils::make_fake_quantize(matMul0, ov::element::f32, 256, {1}, - {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); + {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); const auto add = std::make_shared(fq3, addParam); - const auto reshape0 = std::make_shared(add, reshape0Const, true); - const auto softMax = std::make_shared(reshape0, 1); - const auto reshape1 = std::make_shared(softMax, reshape1Const, true); - auto fq4 = ov::test::utils::make_fake_quantize(reshape1, ov::element::f32, 256, {1}, - {0}, {0.820726}, {0}, {0.820726}); + const auto softMax = std::make_shared(add, -1); + auto fq4 = ov::test::utils::make_fake_quantize(softMax, ov::element::f32, 256, {1}, + {0}, {0.820726}, {0}, {0.820726}); const auto transpose2 = std::make_shared(fq2, transpose2Const); const auto matMul1 = std::make_shared(fq4, transpose2, transA, transB); auto fq5 = ov::test::utils::make_fake_quantize(matMul1, ov::element::f32, 256, {1}, - {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); + {-35.0172004}, {34.7436294}, {-35.0172004}, {34.7436294}); const auto transpose3 = std::make_shared(fq5, transpose3Const); ov::ResultVector results{std::make_shared(transpose3)}; @@ -694,34 +668,20 @@ std::shared_ptr MHAQuantMatMul0Function::initOriginal() const { auto transpose2Param = std::make_shared(precision, input_shapes[3]); ov::ParameterVector ngraphParam = {transpose0Param, transpose1Param, addParam, transpose2Param}; - const auto channel = int64_t(12); - const auto last_dim = input_shapes[0].get_shape().back(); - OPENVINO_ASSERT(last_dim % channel == 0, "Incorrect test configuration"); - const auto new_shape = std::vector{0, 0, channel, static_cast(last_dim) / channel}; - - auto reshape0Const = ov::op::v0::Constant::create(ov::element::i64, {new_shape.size()}, new_shape); - auto reshape1Const = ov::op::v0::Constant::create(ov::element::i64, {new_shape.size()}, new_shape); - auto reshape2Const = ov::op::v0::Constant::create(ov::element::i64, {new_shape.size()}, new_shape); - auto reshape3Const = ov::op::v0::Constant::create(ov::element::i64, {input_shapes[0].size()}, std::vector{0, 0, -1}); - - auto transpose0Const = ov::op::v0::Constant::create(ov::element::i64, {4}, std::vector{0, 2, 1, 3}); - auto transpose1Const = ov::op::v0::Constant::create(ov::element::i64, {4}, std::vector{0, 2, 3, 1}); - auto transpose2Const = ov::op::v0::Constant::create(ov::element::i64, {4}, std::vector{0, 2, 1, 3}); - auto transpose3Const = ov::op::v0::Constant::create(ov::element::i64, {4}, std::vector{0, 2, 1, 3}); - - const auto reshape1 = std::make_shared(transpose1Param, reshape1Const, true); - const auto reshape2 = std::make_shared(transpose2Param, reshape2Const, true); + const auto shape_rank = input_shapes[0].size(); + auto transpose0Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 1, 3}); + auto transpose1Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 3, 1}); + auto transpose2Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 1, 3}); + auto transpose3Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 1, 3}); - const auto transpose1 = std::make_shared(reshape1, transpose1Const); - const auto transpose2 = std::make_shared(reshape2, transpose2Const); + const auto transpose1 = std::make_shared(transpose1Param, transpose1Const); + const auto transpose2 = std::make_shared(transpose2Param, transpose2Const); auto fq0 = ov::test::utils::make_fake_quantize(transpose0Param, ov::element::f32, 256, {1}, - {-12.5187311}, {12.4209289}, {-12.5187311}, {12.4209289}); + {-12.5187311}, {12.4209289}, {-12.5187311}, {12.4209289}); auto fq1 = ov::test::utils::make_fake_quantize(transpose1, ov::element::f32, 256, {1}, - {-1.43326699}, {1.42206954}, {-1.43326699}, {1.42206954}); - - const auto reshape0 = std::make_shared(fq0, reshape0Const, true); - const auto transpose0 = std::make_shared(reshape0, transpose0Const); + {-1.43326699}, {1.42206954}, {-1.43326699}, {1.42206954}); + const auto transpose0 = std::make_shared(fq0, transpose0Const); const auto matMul0 = std::make_shared(transpose0, fq1); const auto add = std::make_shared(matMul0, addParam); @@ -729,11 +689,10 @@ std::shared_ptr MHAQuantMatMul0Function::initOriginal() const { const auto matMul1 = std::make_shared(softMax, transpose2); auto fq2 = ov::test::utils::make_fake_quantize(matMul1, ov::element::f32, 256, {1}, - {-1.81826221}, {1.804057}, {-1.81826221}, {1.804057}); + {-1.81826221}, {1.804057}, {-1.81826221}, {1.804057}); const auto transpose3 = std::make_shared(fq2, transpose3Const); - const auto reshape3 = std::make_shared(transpose3, reshape3Const, true); - ov::ResultVector results{std::make_shared(reshape3)}; + ov::ResultVector results{std::make_shared(transpose3)}; return std::make_shared(results, ngraphParam, "mha"); } std::shared_ptr MHAFQFunction::initOriginal() const { @@ -743,18 +702,15 @@ std::shared_ptr MHAFQFunction::initOriginal() const { auto transpose2Param = std::make_shared(precision, input_shapes[3]); ov::ParameterVector ngraphParam = {transpose0Param, transpose1Param, addParam, transpose2Param}; - const auto shape_rank = input_shapes[0].get_shape().size(); + const auto shape_rank = input_shapes[0].size(); auto transpose0Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 1, 3}); auto transpose1Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 3, 1}); auto transpose2Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 1, 3}); auto transpose3Const = ov::op::v0::Constant::create(ov::element::i64, {shape_rank}, std::vector{0, 2, 1, 3}); - const auto fq0 = ov::test::utils::make_fake_quantize(transpose0Param, ov::element::f32, 256, {1}, - {-5.217694}, {6.661877}, {-5.217694}, {6.661877}); - const auto fq1 = ov::test::utils::make_fake_quantize(transpose1Param, ov::element::f32, 256, {1}, - {-6.40245}, {6.45286}, {-6.40245}, {6.45286}); - const auto fq_add = ov::test::utils::make_fake_quantize(addParam, ov::element::f32, 256, {1}, - {-1000}, {0}, {-1000}, {0}); + const auto fq0 = ov::test::utils::make_fake_quantize(transpose0Param, ov::element::f32, 256, {1}, {-5.217694}, {6.661877}, {-5.217694}, {6.661877}); + const auto fq1 = ov::test::utils::make_fake_quantize(transpose1Param, ov::element::f32, 256, {1}, {-6.40245}, {6.45286}, {-6.40245}, {6.45286}); + const auto fq_add = ov::test::utils::make_fake_quantize(addParam, ov::element::f32, 256, {1}, {-1000}, {0}, {-1000}, {0}); bool transA = false; bool transB = false; @@ -766,16 +722,13 @@ std::shared_ptr MHAFQFunction::initOriginal() const { const auto mul_deq_const = ov::op::v0::Constant::create(ov::element::f32, ov::Shape{1}, std::vector{0.00098425}); const auto mul_deq = std::make_shared(convert, mul_deq_const); const auto mul = std::make_shared(transpose1, mul_deq); - auto fq1_1 = ov::test::utils::make_fake_quantize(mul, ov::element::f32, 256, {1}, - {-0.8003067}, {0.8066083}, {-0.8003067}, {0.8066083}); + const auto fq1_1 = ov::test::utils::make_fake_quantize(mul, ov::element::f32, 256, {1}, {-0.8003067}, {0.8066083}, {-0.8003067}, {0.8066083}); const auto matMul0 = std::make_shared(transpose0, fq1_1, transA, transB); - auto fq2 = ov::test::utils::make_fake_quantize(matMul0, ov::element::f32, 256, {1}, - {-14.50351}, {17.65645}, {-14.50351}, {17.65645}); + const auto fq2 = ov::test::utils::make_fake_quantize(matMul0, ov::element::f32, 256, {1}, {-14.50351}, {17.65645}, {-14.50351}, {17.65645}); const auto add = std::make_shared(fq2, fq_add); const auto softMax = std::make_shared(add, 3); const auto matMul1 = std::make_shared(softMax, transpose2, transA, transB); - auto fq3 = ov::test::utils::make_fake_quantize(matMul1, ov::element::f32, 256, {1}, - {-1.895786}, {2.0028071}, {-1.895786}, {2.0028071}); + auto fq3 = ov::test::utils::make_fake_quantize(matMul1, ov::element::f32, 256, {1}, {-1.895786}, {2.0028071}, {-1.895786}, {2.0028071}); const auto transpose3 = std::make_shared(fq3, transpose3Const); ov::ResultVector results{std::make_shared(transpose3)}; From f2640a2d7ee57432b66b085540709904c8525afb Mon Sep 17 00:00:00 2001 From: Wenjing Kang Date: Thu, 31 Oct 2024 17:41:31 +0800 Subject: [PATCH 005/104] Update CMAKE_LANG_FLAGS_CONFIG_INIT appending in toolchain to avoid flag repetition (#27352) ### Details: -Currently, when using this toolchain and print the following flags in [CMakeLists](https://github.com/openvinotoolkit/openvino/blob/master/CMakeLists.txt) ``` CMAKE_CXX_FLAGS_RELEASE in OV CMakeLists.txt::: /MT /MT /O2 /Ob2 /DNDEBUG /Zi /FS /Zf /ZH:SHA_256 /guard:cf /Qspectre CMAKE_C_FLAGS_RELEASE in OV CMakeLists.txt::: /MT /MT /O2 /Ob2 /DNDEBUG /Zi /FS /Zf /ZH:SHA_256 /guard:cf /Qspectre CMAKE_CXX_FLAGS_RELEASE_INIT in OV CMakeLists.txt::: /MT /MT /O2 /Ob2 /DNDEBUG CMAKE_C_FLAGS_RELEASE_INIT in OV CMakeLists.txt::: /MT /MT /O2 /Ob2 /DNDEBUG ``` So there is repetition of `/MT` in flags. The change in this PR will fix this problem. The flags will be: ``` CMAKE_CXX_FLAGS_RELEASE in OV CMakeLists.txt::: /MT /O2 /Ob2 /DNDEBUG /Zi /FS /Zf /ZH:SHA_256 /guard:cf /Qspectre CMAKE_C_FLAGS_RELEASE in OV CMakeLists.txt::: /MT /O2 /Ob2 /DNDEBUG /Zi /FS /Zf /ZH:SHA_256 /guard:cf /Qspectre CMAKE_CXX_FLAGS_RELEASE_INIT in OV CMakeLists.txt::: /MT /O2 /Ob2 /DNDEBUG CMAKE_C_FLAGS_RELEASE_INIT in OV CMakeLists.txt::: /MT /O2 /Ob2 /DNDEBUG ``` ### Tickets: - *152927* Signed-off-by: Kang Wenjing --- cmake/toolchains/mt.runtime.win32.toolchain.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/toolchains/mt.runtime.win32.toolchain.cmake b/cmake/toolchains/mt.runtime.win32.toolchain.cmake index 9a99781eac0426..b331d370bfe7bf 100644 --- a/cmake/toolchains/mt.runtime.win32.toolchain.cmake +++ b/cmake/toolchains/mt.runtime.win32.toolchain.cmake @@ -28,9 +28,9 @@ if(use_static_runtime) set(flag_var "CMAKE_${lang}_FLAGS${build_type}_INIT") string(REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}") if (build_type STREQUAL "_DEBUG") - set(${flag_var} "${${flag_var}} /MTd") + set(${flag_var} "/MTd") else() - set(${flag_var} "${${flag_var}} /MT") + set(${flag_var} "/MT") endif() endforeach() endforeach() From 272843d81ad242f2622b8951d922baa299ccdfc1 Mon Sep 17 00:00:00 2001 From: Artemy Skrebkov Date: Thu, 31 Oct 2024 09:44:05 +0000 Subject: [PATCH 006/104] Add support for shape and data_shape parameters (#27314) ### Details: - Move helper function for reshaping to `npu_tools_utils` - Introduce `shape` and `data_shape` params ### Tickets: - E144161 --------- Signed-off-by: Skrebkov, Artemy --- .../tools/common/include/tools_helpers.hpp | 181 ++++++++++++++++++ .../tools/compile_tool/CMakeLists.txt | 3 +- .../intel_npu/tools/compile_tool/main.cpp | 109 +---------- .../tools/compile_tool/tools_helpers.hpp | 81 -------- .../tools/single-image-test/main.cpp | 132 +++++-------- 5 files changed, 236 insertions(+), 270 deletions(-) create mode 100644 src/plugins/intel_npu/tools/common/include/tools_helpers.hpp delete mode 100644 src/plugins/intel_npu/tools/compile_tool/tools_helpers.hpp diff --git a/src/plugins/intel_npu/tools/common/include/tools_helpers.hpp b/src/plugins/intel_npu/tools/common/include/tools_helpers.hpp new file mode 100644 index 00000000000000..e9743594ad8711 --- /dev/null +++ b/src/plugins/intel_npu/tools/common/include/tools_helpers.hpp @@ -0,0 +1,181 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include +#include +#include +#include +#include + +#include "openvino/openvino.hpp" + +struct InputInfo { + ov::element::Type type; + ov::PartialShape partialShape; + ov::Shape dataShape; + ov::Layout layout; +}; +using InputsInfo = std::map; + +std::string parameterNameToTensorName(std::string& name, std::vector>& inputs_info) { + auto count_name = std::any_of(inputs_info.begin(), inputs_info.end(), [name](ov::Output& port) { + return port.get_names().count(name) > 0; + }); + if (count_name) { + return name; + } else { + auto inputInfo = std::find_if(inputs_info.begin(), inputs_info.end(), [name](ov::Output& port) { + return name == port.get_node()->get_friendly_name(); + }); + if (inputInfo == inputs_info.end()) { + throw std::runtime_error("Provided I/O name \"" + name + + "\" is not found neither in tensor names nor in nodes names."); + } + return inputInfo->get_any_name(); + } +} + +std::map> parseInputParameters(std::string& parameter_string, + std::vector>& input_info) { + // Parse parameter string like "input0[value0],input1[value1]" or "[value]" (applied to all + // inputs) + std::map> return_value; + std::string search_string = parameter_string; + auto start_pos = search_string.find_first_of('['); + auto input_name = search_string.substr(0, start_pos); + while (start_pos != std::string::npos) { + auto end_pos = search_string.find_first_of(']'); + if (end_pos == std::string::npos) + break; + input_name = search_string.substr(0, start_pos); + auto input_value = search_string.substr(start_pos + 1, end_pos - start_pos - 1); + if (!input_name.empty()) { + return_value[parameterNameToTensorName(input_name, input_info)].push_back(input_value); + } else { + for (auto& item : input_info) { + return_value[item.get_any_name()].push_back(input_value); + } + } + search_string = search_string.substr(end_pos + 1); + if (search_string.empty() || (search_string.front() != ',' && search_string.front() != '[')) + break; + if (search_string.front() == ',') { + if (search_string.length() > 1) + search_string = search_string.substr(1); + else + throw std::logic_error("Can't parse input parameter string, there is nothing after the comma " + + parameter_string); + } + start_pos = search_string.find_first_of('['); + } + if (!search_string.empty()) + throw std::logic_error("Can't parse input parameter string: " + parameter_string); + return return_value; +} + +void boundDynamicShape(std::shared_ptr& model) { + for (auto&& item : model->get_parameters()) { + auto shape = item->get_partial_shape(); + if (shape.is_static()) { + continue; + } + auto rank = shape.rank(); + if (rank.is_dynamic()) { + throw std::logic_error("Rank \"" + rank.to_string() + "\" of the shape \"" + shape.to_string() + + "\" is dynamic which is not supported by NPU"); + } + auto layout = item->get_layout(); + if (!ov::layout::has_batch(layout)) { + item->set_layout(ov::Layout(layout.to_string().insert(1, "N,"))); + layout = item->get_layout(); + } + if (shape[ov::layout::batch_idx(layout)].is_dynamic()) { + std::cout << "WARNING: Shape \"" + shape.to_string() + "\"" + + " has dynamic batch size which is not supported by NPU\n" + " Setting batch to 1 forcibly" + << std::endl; + ov::set_batch(model, 1); + } + shape = item->get_partial_shape(); + if (shape.is_dynamic()) { + throw std::logic_error("Model's input shape \"" + shape.to_string() + "\"" + + " is dynamic which is not supported by NPU"); + } + } +} + +void setModelBatch(std::shared_ptr& model, uint32_t batch = 1) { + if (batch == 1) { + return; + } + for (auto&& item : model->get_parameters()) { + auto shape = item->get_partial_shape(); + auto rank = shape.rank(); + if (rank.is_dynamic()) { + throw std::logic_error("Rank \"" + rank.to_string() + "\" of the shape \"" + shape.to_string() + + "\" is dynamic which is not supported by NPU"); + } + auto layout = item->get_layout(); + if (!ov::layout::has_batch(layout)) { + item->set_layout(ov::Layout(layout.to_string().insert(1, "N,"))); + layout = item->get_layout(); + } + if (shape[ov::layout::batch_idx(layout)].is_dynamic()) { + throw std::logic_error("ERROR: Shape \"" + shape.to_string() + "\"" + + " has dynamic batch size which is not supported by NPU\n" + "Cannot apply fixed batch: " + + std::to_string(batch) + + ". Please remove the parameter from config: \"override_model_batch_size\""); + } + ov::set_batch(model, batch); + } +} + +void reshape(ov::OutputVector inputsInfo, InputsInfo& infoMap, std::shared_ptr& model, + std::string& shapeString, int overrideModelBatchSize, std::string_view device) { + std::vector infoMaps; + if (!shapeString.empty()) { + std::map> shapesMap = parseInputParameters(shapeString, inputsInfo); + + if (overrideModelBatchSize != 1) { + throw std::logic_error(R"(Incompatible params: "shape" and "override_model_batch_size")"); + } + for (auto& item : inputsInfo) { + InputInfo info; + auto name = item.get_any_name(); + + if (!shapesMap.empty()) { + if (shapesMap.count(name)) { + if (shapesMap.at(name).size() > 1) { + // Example: -shape input1[..][..] + throw std::logic_error("shape command line parameter doesn't support multiple " + "shapes for one input."); + } + info.partialShape = shapesMap.at(name)[0]; + } else { + info.partialShape = item.get_partial_shape(); + } + } + infoMap[name] = std::move(info); + infoMaps.push_back(infoMap); + } + std::map newShapes; + for (auto& item : infoMaps) { + for (auto& map : item) { + if (!newShapes.count(map.first)) { + newShapes[map.first] = map.second.partialShape; + } + } + } + model->reshape(newShapes); + } else { + if (device.find("NPU") != std::string::npos || + // FIXME: SIT on CPU also requires to bound dynamic shapes + device.find("CPU") != std::string::npos || device.find("TEMPLATE") != std::string::npos) { + boundDynamicShape(model); + } + + setModelBatch(model, overrideModelBatchSize); + } +} diff --git a/src/plugins/intel_npu/tools/compile_tool/CMakeLists.txt b/src/plugins/intel_npu/tools/compile_tool/CMakeLists.txt index 66ff751b9f5162..fc485030359428 100644 --- a/src/plugins/intel_npu/tools/compile_tool/CMakeLists.txt +++ b/src/plugins/intel_npu/tools/compile_tool/CMakeLists.txt @@ -24,7 +24,8 @@ ov_add_target(ADD_CPPLINT PRIVATE openvino::runtime gflags - Threads::Threads) + Threads::Threads + npu_tools_utils) set_target_properties(${TARGET_NAME} PROPERTIES FOLDER ${CMAKE_CURRENT_SOURCE_DIR} diff --git a/src/plugins/intel_npu/tools/compile_tool/main.cpp b/src/plugins/intel_npu/tools/compile_tool/main.cpp index 471fd55bb82b3f..7a088d1afc69e2 100644 --- a/src/plugins/intel_npu/tools/compile_tool/main.cpp +++ b/src/plugins/intel_npu/tools/compile_tool/main.cpp @@ -14,11 +14,12 @@ #include -#include "openvino/core/partial_shape.hpp" -#include "openvino/openvino.hpp" +#include +#include #include "tools_helpers.hpp" + static constexpr char help_message[] = "Optional. Print the usage message."; static constexpr char model_message[] = "Required. Path to the XML model."; @@ -168,64 +169,6 @@ bool isFP32(const ov::element::Type& type) { return type == ov::element::f32; } -void boundDynamicShape(std::shared_ptr& model) { - for (auto&& item : model->get_parameters()) { - auto shape = item->get_partial_shape(); - if (shape.is_static()) { - continue; - } - auto rank = shape.rank(); - if (rank.is_dynamic()) { - throw std::logic_error("Rank \"" + rank.to_string() + "\" of the shape \"" + shape.to_string() + - "\" is dynamic which is not supported by NPU"); - } - auto layout = item->get_layout(); - if (!ov::layout::has_batch(layout)) { - item->set_layout(ov::Layout(layout.to_string().insert(1, "N,"))); - layout = item->get_layout(); - } - if (shape[ov::layout::batch_idx(layout)].is_dynamic()) { - std::cout << "WARNING: Shape \"" + shape.to_string() + "\"" + - " has dynamic batch size which is not supported by NPU\n" - " Setting batch to 1 forcibly" - << std::endl; - ov::set_batch(model, 1); - } - shape = item->get_partial_shape(); - if (shape.is_dynamic()) { - throw std::logic_error("Model's input shape \"" + shape.to_string() + "\"" + - " is dynamic which is not supported by NPU"); - } - } -} - -void setModelBatch(std::shared_ptr& model, uint32_t batch = 1) { - if (batch == 1) { - return; - } - for (auto&& item : model->get_parameters()) { - auto shape = item->get_partial_shape(); - auto rank = shape.rank(); - if (rank.is_dynamic()) { - throw std::logic_error("Rank \"" + rank.to_string() + "\" of the shape \"" + shape.to_string() + - "\" is dynamic which is not supported by NPU"); - } - auto layout = item->get_layout(); - if (!ov::layout::has_batch(layout)) { - item->set_layout(ov::Layout(layout.to_string().insert(1, "N,"))); - layout = item->get_layout(); - } - if (shape[ov::layout::batch_idx(layout)].is_dynamic()) { - throw std::logic_error("ERROR: Shape \"" + shape.to_string() + "\"" + - " has dynamic batch size which is not supported by NPU\n" - "Cannot apply fixed batch: " + - std::to_string(batch) + - ". Please remove the parameter from config: \"override_model_batch_size\""); - } - ov::set_batch(model, batch); - } -} - void configurePrePostProcessing(std::shared_ptr& model, const std::string& ip, const std::string& op, const std::string& iop, const std::string& il, const std::string& ol, const std::string& iol, const std::string& iml, const std::string& oml, @@ -475,50 +418,6 @@ std::string getFileNameFromPath(const std::string& path, using TimeDiff = std::chrono::milliseconds; -void reshape(ov::OutputVector inputs_info, InputsInfo& info_map, std::shared_ptr& model) { - std::vector info_maps; - if (!FLAGS_shape.empty()) { - std::map> shapes_map = parseInputParameters(FLAGS_shape, inputs_info); - - if (FLAGS_override_model_batch_size != 1) { - throw std::logic_error("Incompatible params: \"shape\" and \"override_model_batch_size\""); - } - for (auto& item : inputs_info) { - InputInfo info; - auto name = item.get_any_name(); - - if (!shapes_map.empty()) { - if (shapes_map.count(name)) { - if (shapes_map.at(name).size() > 1) { - // Example: -shape input1[..][..] - throw std::logic_error("shape command line parameter doesn't support multiple " - "shapes for one input."); - } - info.partialShape = shapes_map.at(name)[0]; - } else { - info.partialShape = item.get_partial_shape(); - } - } - info_map[name] = std::move(info); - info_maps.push_back(info_map); - } - std::map newShapes; - for (auto& item : info_maps) { - for (auto& map : item) { - if (!newShapes.count(map.first)) { - newShapes[map.first] = map.second.partialShape; - } - } - } - model->reshape(newShapes); - } else { - if (FLAGS_d.find("NPU") != std::string::npos) { - boundDynamicShape(model); - } - - setModelBatch(model, FLAGS_override_model_batch_size); - } -} int main(int argc, char* argv[]) { try { @@ -552,7 +451,7 @@ int main(int argc, char* argv[]) { InputsInfo info_map; std::cout << "Performing reshape" << std::endl; - reshape(std::move(inputs_info), info_map, model); + reshape(std::move(inputs_info), info_map, model, FLAGS_shape, FLAGS_override_model_batch_size, FLAGS_d); std::cout << "Configuring model pre & post processing" << std::endl; configurePrePostProcessing(model, FLAGS_ip, FLAGS_op, FLAGS_iop, FLAGS_il, FLAGS_ol, FLAGS_iol, FLAGS_iml, diff --git a/src/plugins/intel_npu/tools/compile_tool/tools_helpers.hpp b/src/plugins/intel_npu/tools/compile_tool/tools_helpers.hpp deleted file mode 100644 index 6d42fd142b8971..00000000000000 --- a/src/plugins/intel_npu/tools/compile_tool/tools_helpers.hpp +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright (C) 2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "openvino/openvino.hpp" - -struct InputInfo { - ov::element::Type type; - ov::PartialShape partialShape; - ov::Shape dataShape; - ov::Layout layout; -}; -using InputsInfo = std::map; - -std::string parameterNameToTensorName(std::string& name, std::vector>& inputs_info) { - auto count_name = std::any_of(inputs_info.begin(), inputs_info.end(), [name](ov::Output& port) { - return port.get_names().count(name) > 0; - }); - if (count_name) { - return name; - } else { - auto inputInfo = std::find_if(inputs_info.begin(), inputs_info.end(), [name](ov::Output& port) { - return name == port.get_node()->get_friendly_name(); - }); - if (inputInfo == inputs_info.end()) { - throw std::runtime_error("Provided I/O name \"" + name + - "\" is not found neither in tensor names nor in nodes names."); - } - return inputInfo->get_any_name(); - } -} - -std::map> parseInputParameters(std::string& parameter_string, - std::vector>& input_info) { - // Parse parameter string like "input0[value0],input1[value1]" or "[value]" (applied to all - // inputs) - std::map> return_value; - std::string search_string = parameter_string; - auto start_pos = search_string.find_first_of('['); - auto input_name = search_string.substr(0, start_pos); - while (start_pos != std::string::npos) { - auto end_pos = search_string.find_first_of(']'); - if (end_pos == std::string::npos) - break; - input_name = search_string.substr(0, start_pos); - auto input_value = search_string.substr(start_pos + 1, end_pos - start_pos - 1); - if (!input_name.empty()) { - return_value[parameterNameToTensorName(input_name, input_info)].push_back(input_value); - } else { - for (auto& item : input_info) { - return_value[item.get_any_name()].push_back(input_value); - } - } - search_string = search_string.substr(end_pos + 1); - if (search_string.empty() || (search_string.front() != ',' && search_string.front() != '[')) - break; - if (search_string.front() == ',') { - if (search_string.length() > 1) - search_string = search_string.substr(1); - else - throw std::logic_error("Can't parse input parameter string, there is nothing after the comma " + - parameter_string); - } - start_pos = search_string.find_first_of('['); - } - if (!search_string.empty()) - throw std::logic_error("Can't parse input parameter string: " + parameter_string); - return return_value; -} diff --git a/src/plugins/intel_npu/tools/single-image-test/main.cpp b/src/plugins/intel_npu/tools/single-image-test/main.cpp index 4018982b022ed3..5658c18650243b 100644 --- a/src/plugins/intel_npu/tools/single-image-test/main.cpp +++ b/src/plugins/intel_npu/tools/single-image-test/main.cpp @@ -4,9 +4,11 @@ // #include "image_quality_helper.hpp" +#include "openvino/core/partial_shape.hpp" #include "semantic_segmentation_helpers.hpp" #include "tensor_utils.hpp" #include "yolo_helpers.hpp" +#include "tools_helpers.hpp" #include #include @@ -31,7 +33,8 @@ using TensorMap = std::map; struct TensorDescriptor { ov::element::Type precision; - ov::Shape shape; + ov::PartialShape shape; + ov::Shape dataShape; ov::Layout layout; }; @@ -83,6 +86,15 @@ DEFINE_string(oml, "", " is supported"); DEFINE_bool(img_as_bin, false, "Force binary input even if network expects an image"); DEFINE_bool(pc, false, "Report performance counters"); +DEFINE_string( + shape, "", + "Optional. Set shape for model input. For example, \"input1[1,3,224,224],input2[1,4]\" or \"[1,3,224,224]\"" + " in case of one input size. This parameter affects model input shape and can be dynamic." + " For dynamic dimensions use symbol `?` or '-1'. Ex. [?,3,?,?]." + " For bounded dimensions specify range 'min..max'. Ex. [1..10,3,?,?]."); +DEFINE_string(data_shape, "", + "Required for models with dynamic shapes. Set shape for input blobs. Only one shape can be set." + "In case of one input size: \"[1,3,224,224]\""); // for using input image mean and scale static constexpr char mean_values_message[] = @@ -1450,65 +1462,6 @@ std::pair runInfer(ov::InferRequest& inferRequest, ov::Compi return std::make_pair(out, profData); } -void boundDynamicShape(std::shared_ptr& model) { - for (auto&& item : model->get_parameters()) { - auto shape = item->get_partial_shape(); - if (shape.is_static()) { - continue; - } - auto rank = shape.rank(); - if (rank.is_dynamic()) { - throw std::logic_error("Rank \"" + rank.to_string() + "\" of the shape \"" + shape.to_string() + - "\" is dynamic which is not supported by SIT"); - } - auto layout = item->get_layout(); - if (!ov::layout::has_batch(layout)) { - item->set_layout(ov::Layout(layout.to_string().insert(1, "N,"))); - layout = item->get_layout(); - } - if (shape[ov::layout::batch_idx(layout)].is_dynamic()) { - std::cout << "WARNING: Shape \"" + shape.to_string() + "\"" + - " has dynamic batch size which is not supported by SIT\n" - " Setting batch to 1 forcibly" - << std::endl; - ov::set_batch(model, 1); - } - shape = item->get_partial_shape(); - if (shape.is_dynamic()) { - throw std::logic_error("Model's input shape \"" + shape.to_string() + "\"" + - " is dynamic which is not supported by SIT"); - } - } -} - -void setModelBatch(std::shared_ptr& model, uint32_t batch) { - if (batch == 1) { - return; - } - - // New batch value is applicable if the model has non dynamic inputs/outputs only - // Amend layout by adding N if it has no batch dimension - for (auto&& item : model->get_parameters()) { - auto shape = item->get_partial_shape(); - auto rank = shape.rank(); - if (rank.is_dynamic()) { - throw std::logic_error("Rank \"" + rank.to_string() + "\" of the shape \"" + shape.to_string() + - "\" is dynamic which is not supported by SIT"); - } - auto layout = item->get_layout(); - if (!ov::layout::has_batch(layout)) { - item->set_layout(ov::Layout(layout.to_string().insert(1, "N,"))); - } - - shape = item->get_partial_shape(); - if (shape.is_dynamic()) { - throw std::logic_error("Model's input shape \"" + shape.to_string() + "\"" + - " is dynamic which is not supported by SIT"); - } - } - ov::set_batch(model, batch); -} - // FIXME: User must provide layout explicitly. // No "default" layout for IRv11 models. static ov::Layout getLayoutByRank(const size_t rank) { @@ -1558,8 +1511,8 @@ bool testSSDDetection(const TensorMap& outputs, const TensorMap& references, const ov::Tensor& reference = references.begin()->second; const TensorDescriptor& inputDescriptor = inputDescriptors.begin()->second; - const auto imgWidth = inputDescriptor.shape.at(ov::layout::width_idx(inputDescriptor.layout)); - const auto imgHeight = inputDescriptor.shape.at(ov::layout::height_idx(inputDescriptor.layout)); + const auto imgWidth = inputDescriptor.dataShape.at(ov::layout::width_idx(inputDescriptor.layout)); + const auto imgHeight = inputDescriptor.dataShape.at(ov::layout::height_idx(inputDescriptor.layout)); auto confThresh = FLAGS_confidence_threshold; auto probTolerance = FLAGS_prob_tolerance; @@ -1592,8 +1545,8 @@ bool testYoloV2(const TensorMap& outputs, const TensorMap& references, const Ten const TensorDescriptor& inputDescriptor = inputDescriptors.begin()->second; - const auto imgWidth = inputDescriptor.shape.at(ov::layout::width_idx(inputDescriptor.layout)); - const auto imgHeight = inputDescriptor.shape.at(ov::layout::height_idx(inputDescriptor.layout)); + const auto imgWidth = inputDescriptor.dataShape.at(ov::layout::width_idx(inputDescriptor.layout)); + const auto imgHeight = inputDescriptor.dataShape.at(ov::layout::height_idx(inputDescriptor.layout)); double confThresh = FLAGS_confidence_threshold; double probTolerance = FLAGS_prob_tolerance; double boxTolerance = FLAGS_box_tolerance; @@ -1624,8 +1577,8 @@ bool testYoloV3(const TensorMap& outputs, const TensorMap& references, const Ten "Mismatch between the number of model outputs and the number of references"); const TensorDescriptor& inputDescriptor = inputDescriptors.begin()->second; - const auto imgWidth = inputDescriptor.shape.at(ov::layout::width_idx(inputDescriptor.layout)); - const auto imgHeight = inputDescriptor.shape.at(ov::layout::height_idx(inputDescriptor.layout)); + const auto imgWidth = inputDescriptor.dataShape.at(ov::layout::width_idx(inputDescriptor.layout)); + const auto imgHeight = inputDescriptor.dataShape.at(ov::layout::height_idx(inputDescriptor.layout)); double confThresh = FLAGS_confidence_threshold; double probTolerance = FLAGS_prob_tolerance; @@ -1663,8 +1616,8 @@ bool testYoloV4(const TensorMap& outputs, const TensorMap& references, const Ten "Mismatch between the number of model outputs and the number of references"); const TensorDescriptor& inputDescriptor = inputDescriptors.begin()->second; - const auto imgWidth = inputDescriptor.shape.at(ov::layout::width_idx(inputDescriptor.layout)); - const auto imgHeight = inputDescriptor.shape.at(ov::layout::height_idx(inputDescriptor.layout)); + const auto imgWidth = inputDescriptor.dataShape.at(ov::layout::width_idx(inputDescriptor.layout)); + const auto imgHeight = inputDescriptor.dataShape.at(ov::layout::height_idx(inputDescriptor.layout)); double confThresh = FLAGS_confidence_threshold; double probTolerance = FLAGS_prob_tolerance; @@ -1733,6 +1686,16 @@ bool testMeanIoU(const TensorMap& outputs, const TensorMap& references, const La return compare_mean_IoU(iou, semSegThreshold, classes); } +static ov::Shape parseDataShape(const std::string& dataShapeStr) { + std::vector dataShape; + std::istringstream ss(dataShapeStr); + std::string token; + while (std::getline(ss, token, ',')) { + dataShape.push_back(std::stoul(token)); + } + return ov::Shape(dataShape); +} + static int runSingleImageTest() { std::cout << "Run single image test" << std::endl; try { @@ -1814,12 +1777,12 @@ static int runSingleImageTest() { auto model = core.read_model(FLAGS_network); nameIOTensors(model); - setModelBatch(model, FLAGS_override_model_batch_size); - if (FLAGS_device.find("NPU") != std::string::npos || - // FIXME: SIT on CPU also requires to bound dynamic shapes - FLAGS_device.find("CPU") != std::string::npos || FLAGS_device.find("TEMPLATE") != std::string::npos) { - boundDynamicShape(model); - } + auto inputs_info = std::const_pointer_cast(model)->inputs(); + InputsInfo info_map; + + std::cout << "Performing reshape" << std::endl; + reshape(std::move(inputs_info), info_map, model, FLAGS_shape, + FLAGS_override_model_batch_size, FLAGS_device); ov::preprocess::PrePostProcessor ppp(model); @@ -1856,11 +1819,11 @@ static int runSingleImageTest() { inModelLayout.has_value()) { inLayerModelLayout = inModelLayout.value(); } else { - const auto shape = inputInfo[i].get_shape(); + const auto shape = inputInfo[i].get_partial_shape(); inLayerModelLayout = getLayoutByRank(shape.size()); std::cout << "WARNING: Configuring preprocessing. Since --iml option isn't set, input model " "layout for layer \"" - << inputInfo[i].get_any_name() << "\" is infered from shape: " << toString(shape) + << inputInfo[i].get_any_name() << "\" is infered from shape: " << shape.to_string() << " rank (" << shape.size() << ") as " << inLayerModelLayout.to_string() << std::endl; } @@ -1917,11 +1880,11 @@ static int runSingleImageTest() { outModelLayout.has_value()) { outLayerModelLayout = outModelLayout.value(); } else { - const auto shape = outputInfo[i].get_shape(); + const auto shape = outputInfo[i].get_partial_shape(); outLayerModelLayout = getLayoutByRank(shape.size()); std::cout << "WARNING: Configuring preprocessing. Since --oml option isn't set, output model " "layout for layer \"" - << outputInfo[i].get_any_name() << "\" is infered from shape: " << toString(shape) + << outputInfo[i].get_any_name() << "\" is infered from shape: " << shape.to_shape() << " rank (" << shape.size() << ") as " << outLayerModelLayout.to_string() << std::endl; } @@ -1933,6 +1896,7 @@ static int runSingleImageTest() { } } + std::cout << "Compile model" << std::endl; compiledModel = core.compile_model(ppp.build(), FLAGS_device); } else { std::cout << "Import network " << FLAGS_network << std::endl; @@ -1994,7 +1958,8 @@ static int runSingleImageTest() { // Load the input data for (const auto& inputInfo : inputsInfo) { - const ov::Shape& shape = inputInfo.get_shape(); + const auto& shape = inputInfo.get_partial_shape(); + const auto dataShape = shape.is_static() ? shape.get_shape() : parseDataShape(FLAGS_data_shape); const ov::element::Type& precision = inputInfo.get_element_type(); // Determine the input layout @@ -2012,19 +1977,20 @@ static int runSingleImageTest() { inputLayout = getLayoutByRank(shape.size()); std::cout << "WARNING: Loading input data. Since --iml option isn't set, input model layout for " "layer \"" - << inputInfo.get_any_name() << "\" is infered from shape: " << toString(shape) + << inputInfo.get_any_name() << "\" is infered from shape: " << shape.to_shape() << " rank (" << shape.size() << ") as " << inputLayout.to_string() << std::endl; } - inputDescriptors.emplace(inputInfo.get_any_name(), TensorDescriptor{precision, shape, inputLayout}); + inputDescriptors.emplace(inputInfo.get_any_name(), TensorDescriptor{precision, shape, + dataShape, inputLayout}); std::cout << "Load input #" << inputInd << " from " << inputFiles[inputInd] << " as " << precision << " " << inputLayout.to_string() << " " << shape << std::endl; const ov::Tensor tensor = !FLAGS_img_as_bin - ? loadInput(precision, shape, inputLayout, inputFiles[inputInd], FLAGS_color_format) - : loadInput(precision, shape, inputLayout, inputFiles[inputInd], FLAGS_color_format, + ? loadInput(precision, dataShape, inputLayout, inputFiles[inputInd], FLAGS_color_format) + : loadInput(precision, dataShape, inputLayout, inputFiles[inputInd], FLAGS_color_format, inputBinPrecisionForOneInfer[numberOfTestCase][inputInd]); std::ostringstream ostr; ostr << netFileName << "_input_" << inputInd << "_case_" << numberOfTestCase << ".blob"; From c902a0144a45aff068c15726fb27773feaa1f2ea Mon Sep 17 00:00:00 2001 From: Mingyu Kim Date: Thu, 31 Oct 2024 19:33:21 +0900 Subject: [PATCH 007/104] [GPU] update onednn (#27349) --- src/plugins/intel_gpu/thirdparty/onednn_gpu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/thirdparty/onednn_gpu b/src/plugins/intel_gpu/thirdparty/onednn_gpu index 062d247e7853b1..1722066ad4c0f1 160000 --- a/src/plugins/intel_gpu/thirdparty/onednn_gpu +++ b/src/plugins/intel_gpu/thirdparty/onednn_gpu @@ -1 +1 @@ -Subproject commit 062d247e7853b14ed287a130cc2dc221187430aa +Subproject commit 1722066ad4c0f15495f2d0fcbe9deb2bfd188c36 From a0b73e0a7a69873582301a460365792183101ab3 Mon Sep 17 00:00:00 2001 From: Przemyslaw Wysocki Date: Thu, 31 Oct 2024 12:07:18 +0100 Subject: [PATCH 008/104] [PyOV] Extend Python API with `Squeeze-15` (#27281) ### Details: - This PR includes commits from https://github.com/openvinotoolkit/openvino/pull/26995 ### Tickets: - CVS-154024 --------- Signed-off-by: p-wysocki Co-authored-by: Michal Barnas Co-authored-by: Roman Kazantsev Co-authored-by: Michal Lukaszewski --- .../src/openvino/runtime/opset15/__init__.py | 2 +- .../src/openvino/runtime/opset15/ops.py | 39 ++++++++++++++ .../python/tests/test_graph/test_ops_fused.py | 11 ---- .../python/tests/test_graph/test_squeeze.py | 51 +++++++++++++++++++ 4 files changed, 91 insertions(+), 12 deletions(-) create mode 100644 src/bindings/python/tests/test_graph/test_squeeze.py diff --git a/src/bindings/python/src/openvino/runtime/opset15/__init__.py b/src/bindings/python/src/openvino/runtime/opset15/__init__.py index 6cc9c24827a85f..c4dd48d9087ae1 100644 --- a/src/bindings/python/src/openvino/runtime/opset15/__init__.py +++ b/src/bindings/python/src/openvino/runtime/opset15/__init__.py @@ -188,7 +188,7 @@ from openvino.runtime.opset1.ops import split from openvino.runtime.opset1.ops import sqrt from openvino.runtime.opset1.ops import squared_difference -from openvino.runtime.opset1.ops import squeeze +from openvino.runtime.opset15.ops import squeeze from openvino.runtime.opset15.ops import stft from openvino.runtime.opset1.ops import strided_slice from openvino.runtime.opset1.ops import subtract diff --git a/src/bindings/python/src/openvino/runtime/opset15/ops.py b/src/bindings/python/src/openvino/runtime/opset15/ops.py index b3a131602af703..93aacb29572340 100644 --- a/src/bindings/python/src/openvino/runtime/opset15/ops.py +++ b/src/bindings/python/src/openvino/runtime/opset15/ops.py @@ -348,3 +348,42 @@ def search_sorted( inputs = as_nodes(sorted_sequence, values, name=name) attributes = {"right_mode": right_mode} return _get_node_factory_opset15().create("SearchSorted", inputs, attributes) + + +@nameable_op +def squeeze( + data: NodeInput, + axes: Optional[NodeInput] = None, + allow_axis_skip: bool = False, + name: Optional[str] = None, +) -> Node: + """Perform squeeze operation on input tensor. + + :param data: The node with data tensor. + :param axes: Optional list of integers, indicating the dimensions to squeeze. + Negative indices are supported. One of: input node or array. + :param allow_axis_skip: If true, shape inference results in a dynamic rank, when + selected axis has value 1 in its dynamic range. Used only if axes input + is given. Defaults to false. + :param name: Optional new name for output node. + :return: The new node performing a squeeze operation on input tensor. + + Remove single-dimensional entries from the shape of a tensor. + Takes an optional parameter `axes` with a list of axes to squeeze. + If `axes` is not provided, all the single dimensions will be removed from the shape. + + For example: + + Inputs: tensor with shape [1, 2, 1, 3, 1, 1], axes=[2, 4] + + Result: tensor with shape [1, 2, 3, 1] + """ + if axes is None: + inputs = as_nodes(data, name=name) + else: + inputs = as_nodes(data, axes, name=name) + return _get_node_factory_opset15().create( + "Squeeze", + inputs, + {"allow_axis_skip": allow_axis_skip} + ) diff --git a/src/bindings/python/tests/test_graph/test_ops_fused.py b/src/bindings/python/tests/test_graph/test_ops_fused.py index bdbf4a1a9f1f9c..2bab743bfd7afb 100644 --- a/src/bindings/python/tests/test_graph/test_ops_fused.py +++ b/src/bindings/python/tests/test_graph/test_ops_fused.py @@ -110,17 +110,6 @@ def test_clamp_operator(): assert list(model.get_output_shape(0)) == [2, 2] -def test_squeeze_operator(): - data_shape = [1, 2, 1, 3, 1, 1] - parameter_data = ov.parameter(data_shape, name="Data", dtype=np.float32) - axes = [2, 4] - model = ov.squeeze(parameter_data, axes) - - assert model.get_type_name() == "Squeeze" - assert model.get_output_size() == 1 - assert list(model.get_output_shape(0)) == [1, 2, 3, 1] - - def test_squared_difference_operator(): x1_shape = [1, 2, 3, 4] x2_shape = [2, 3, 4] diff --git a/src/bindings/python/tests/test_graph/test_squeeze.py b/src/bindings/python/tests/test_graph/test_squeeze.py new file mode 100644 index 00000000000000..869d84a0414841 --- /dev/null +++ b/src/bindings/python/tests/test_graph/test_squeeze.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import openvino.runtime.opset1 as ov_opset1 +import openvino.runtime.opset15 as ov_opset15 +import numpy as np +import pytest + + +def test_squeeze_v1_operator(): + data_shape = [1, 2, 1, 3, 1, 1] + parameter_data = ov_opset1.parameter(data_shape, name="Data", dtype=np.float32) + axes = [2, 4] + model = ov_opset1.squeeze(parameter_data, axes) + + assert model.get_type_name() == "Squeeze" + assert model.get_output_size() == 1 + assert list(model.get_output_shape(0)) == [1, 2, 3, 1] + + +@pytest.mark.parametrize(("input_shape", "axes", "allow_axis_skip", "expected_shape"), [ + ((1, 2, 1, 3, 1, 1), [1, 2, 4], True, [1, 2, 3, 1]), + ((1, 2, 1, 3, 1, 1), [1, 2, 4], False, [1, 2, 3, 1]), + ((2, -1, 3), [1], False, [2, 3]) +]) +def test_squeeze_v15_operator(input_shape, axes, allow_axis_skip, expected_shape): + parameter_data = ov_opset15.parameter(input_shape, name="Data", dtype=np.float32) + model = ov_opset15.squeeze(parameter_data, axes, allow_axis_skip, name="Squeeze") + + assert model.get_type_name() == "Squeeze" + assert model.get_output_size() == 1 + assert list(model.get_output_shape(0)) == expected_shape + + +def test_squeeze_v15_dynamic_rank_output(): + parameter_data = ov_opset15.parameter((2, -1, 3), name="Data", dtype=np.float32) + model = ov_opset15.squeeze(parameter_data, [1], True, name="Squeeze") + + assert model.get_type_name() == "Squeeze" + assert model.get_output_size() == 1 + assert model.get_output_partial_shape(0).to_string() == "[...]" + + +def test_squeeze_v15_axes_not_given(): + parameter_data = ov_opset15.parameter((1, 3, 1, 1, 3, 5), name="Data", dtype=np.float32) + model = ov_opset15.squeeze(data=parameter_data, name="Squeeze") + + assert model.get_type_name() == "Squeeze" + assert model.get_output_size() == 1 + assert list(model.get_output_shape(0)) == [3, 3, 5] From b9a94c3f8b83deb41ba2e748150d70157784f96b Mon Sep 17 00:00:00 2001 From: Ivan Tikhonov Date: Thu, 31 Oct 2024 15:08:57 +0400 Subject: [PATCH 009/104] [ONNX] Update DequantizeLinear21 converter (#27351) ### Details: Aligned with the canonical form of the dequantization subgraph. Reshape op has been moved up right after the Constant, it will be const folded in MOC, this is ok, Reshape const folding doesn't copy a constant, just copies a pointer. And ConvertLike were replaced with Convert. Perhaps that's a pretty rough change and we need to add a check here that the scale is a contant. And in that case use Convert instead of ConvertLike, if scale is not a constant, maybe we should leave ConvertLike. ### Tickets: - *https://jira.devtools.intel.com/browse/CVS-156329* --- .../frontend/src/op/dequantize_linear.cpp | 33 ++++++++++--------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp b/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp index b09bc73467bc10..d7b5214f3e53f4 100644 --- a/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp +++ b/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp @@ -221,19 +221,8 @@ ov::OutputVector dequantize_linear(const ov::frontend::onnx::Node& node) { FRONT_END_GENERAL_CHECK(src_x.get_partial_shape().is_static(), "DequantizeLinear cannot operate with dynamic shapes of input X"); - const auto& unsqueezed_axes = std::make_shared(ov::element::i64, Shape{1}, std::vector{1}); - - if (inputs.size() > 2) { - zp = inputs[2]; - if (zp.get_element_type() != scale.get_element_type()) { - zp = std::make_shared(zp, scale); - } - zp = std::make_shared(zp, unsqueezed_axes); - } - const auto axis = node.get_attribute_value("axis", 1); const auto block_size = static_cast(node.get_attribute_value("block_size", 0)); - const auto scale_type = scale.get_element_type(); FRONT_END_GENERAL_CHECK(axis == 0, "Axis != 0 isn't supported"); FRONT_END_GENERAL_CHECK(block_size > 0, "block_size must be greater than zero"); @@ -241,16 +230,30 @@ ov::OutputVector dequantize_linear(const ov::frontend::onnx::Node& node) { src_x.get_shape()[0] % block_size == 0, "DequantizeLinear doesn't support case when first dimension of X cannot be divided by block_size"); - const auto& x = src_x.get_element_type() == scale_type ? src_x : std::make_shared(src_x, scale); + ov::Output broadcastable_x = op::util::reshape( + src_x, + Shape{static_cast(src_x.get_shape()[0]) / block_size, block_size, src_x.get_shape()[1]}); + + const auto& unsqueezed_axes = std::make_shared(ov::element::i64, Shape{1}, std::vector{1}); + + const auto scale_type = scale.get_element_type(); + if (inputs.size() > 2) { + zp = inputs[2]; + if (zp.get_element_type() != scale.get_element_type()) { + zp = std::make_shared(zp, scale_type); + } + zp = std::make_shared(zp, unsqueezed_axes); + } + + const auto& x = src_x.get_element_type() == scale_type ? broadcastable_x + : std::make_shared(broadcastable_x, scale_type); // For further broadcasting scales and zp - reshape input to a shape [x.shape[0]/block_size, block_size, x.shape[1]] - ov::Output broadcastable_x = - op::util::reshape(x, Shape{static_cast(x.get_shape()[0]) / block_size, block_size, x.get_shape()[1]}); // Adding additional dimension for broadcasting scale = std::make_shared(scale, unsqueezed_axes); if (zp.get_node_shared_ptr()) { - broadcastable_x = std::make_shared(broadcastable_x, zp); + broadcastable_x = std::make_shared(x, zp); } const auto& scaled_x = std::make_shared(broadcastable_x, scale); From a488aec3812c8998028bab7e5996bb1c057f162e Mon Sep 17 00:00:00 2001 From: Roman Kazantsev Date: Fri, 1 Nov 2024 09:20:33 +0400 Subject: [PATCH 010/104] [TF FE] Run string ops tests on ARM (#27367) **Details:** Since openvino-tokenizers is build for ARM in the precommit, we are ready to switch on String ops tests **Ticket:** TBD Signed-off-by: Kazantsev, Roman --- .../tensorflow_tests/test_tf_LookupTableFind.py | 8 -------- .../tensorflow_tests/test_tf_RaggedTensorToSparse.py | 6 ------ .../tensorflow_tests/test_tf_RaggedTensorToTensor.py | 10 ---------- .../tensorflow_tests/test_tf_StaticRegexReplace.py | 6 ------ .../tensorflow_tests/test_tf_StringLower.py | 10 ---------- .../tensorflow_tests/test_tf_StringSplitV2.py | 6 ------ .../tensorflow_tests/test_tf_StringToHashBucketFast.py | 6 ------ 7 files changed, 52 deletions(-) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_LookupTableFind.py b/tests/layer_tests/tensorflow_tests/test_tf_LookupTableFind.py index bd1422f8719cea..97177a5adeec13 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_LookupTableFind.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_LookupTableFind.py @@ -1,8 +1,6 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -import platform - import numpy as np import pytest import tensorflow as tf @@ -91,12 +89,6 @@ def test_lookup_table_find(self, hash_table_type, keys_shape, params, ie_device, if ie_device == 'GPU' or run_in_jenkins(): pytest.skip("operation extesion is not supported on GPU or " "No layout format available for gather:LookupTableFind issue") - if params['keys_type'] == str: - if platform.system() in ('Darwin') or platform.machine() in ['arm', 'armv7l', - 'aarch64', - 'arm64', - 'ARM64']: - pytest.xfail(reason='126314, 132699: Build tokenizers for ARM and MacOS') self._test(*self.create_lookup_table_find_net(hash_table_type=hash_table_type, keys_shape=keys_shape, **params), ie_device, precision, ir_version, temp_dir=temp_dir, diff --git a/tests/layer_tests/tensorflow_tests/test_tf_RaggedTensorToSparse.py b/tests/layer_tests/tensorflow_tests/test_tf_RaggedTensorToSparse.py index 621b8430f64fdc..f0832676f0f982 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_RaggedTensorToSparse.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_RaggedTensorToSparse.py @@ -1,8 +1,6 @@ # Copyright (C) 2022-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -import platform - import numpy as np import pytest import tensorflow as tf @@ -55,10 +53,6 @@ def create_ragged_tensor_to_sparse_net(self, rt_dense_values_shape, rt_dense_val ]) @pytest.mark.precommit @pytest.mark.nightly - @pytest.mark.xfail(condition=platform.system() in ('Darwin', 'Linux') and platform.machine() in ['arm', 'armv7l', - 'aarch64', - 'arm64', 'ARM64'], - reason='126314, 132699: Build tokenizers for ARM and MacOS') def test_ragged_tensor_to_sparse(self, rt_dense_values_shape, rt_dense_values_type, rt_nested_splits, ie_device, precision, ir_version, temp_dir, use_legacy_frontend): if ie_device == 'GPU' or run_in_jenkins(): diff --git a/tests/layer_tests/tensorflow_tests/test_tf_RaggedTensorToTensor.py b/tests/layer_tests/tensorflow_tests/test_tf_RaggedTensorToTensor.py index 39afde0a2c6b08..0267874eb98b35 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_RaggedTensorToTensor.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_RaggedTensorToTensor.py @@ -1,8 +1,6 @@ # Copyright (C) 2022-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -import platform - import numpy as np import pytest import tensorflow as tf @@ -52,10 +50,6 @@ def create_ragged_tensor_to_tensor_net(self, shape_type, shape_value, values_sha @pytest.mark.parametrize('row_partition_types', [["ROW_SPLITS"]]) @pytest.mark.precommit @pytest.mark.nightly - @pytest.mark.xfail(condition=platform.system() in ('Darwin', 'Linux') and platform.machine() in ['arm', 'armv7l', - 'aarch64', - 'arm64', 'ARM64'], - reason='126314, 132699: Build tokenizers for ARM and MacOS') def test_ragged_tensor_to_tensor(self, shape_type, shape_value, values_shape, values_type, default_value, row_partition_tensors, row_partition_types, ie_device, precision, ir_version, temp_dir, use_legacy_frontend): @@ -110,10 +104,6 @@ def create_ragged_tensor_to_tensor_net(self, shape_type, shape_value, values_sha @pytest.mark.parametrize('row_partition_types', [["FIRST_DIM_SIZE", "VALUE_ROWIDS"]]) @pytest.mark.precommit @pytest.mark.nightly - @pytest.mark.xfail(condition=platform.system() in ('Darwin', 'Linux') and platform.machine() in ['arm', 'armv7l', - 'aarch64', - 'arm64', 'ARM64'], - reason='126314, 132699: Build tokenizers for ARM and MacOS') def test_ragged_tensor_to_tensor(self, shape_type, shape_value, values_shape, values_type, default_value, row_partition_tensors, row_partition_types, ie_device, precision, ir_version, temp_dir, use_legacy_frontend): diff --git a/tests/layer_tests/tensorflow_tests/test_tf_StaticRegexReplace.py b/tests/layer_tests/tensorflow_tests/test_tf_StaticRegexReplace.py index ef5e135537eb84..a3fa91ad0976f5 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_StaticRegexReplace.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_StaticRegexReplace.py @@ -1,8 +1,6 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -import platform - import numpy as np import pytest import tensorflow as tf @@ -41,10 +39,6 @@ def create_static_regex_replace_net(self, input_shape, pattern, rewrite, replace @pytest.mark.parametrize('replace_global', [None, True, False]) @pytest.mark.precommit @pytest.mark.nightly - @pytest.mark.xfail(condition=platform.system() in ('Darwin', 'Linux') and platform.machine() in ['arm', 'armv7l', - 'aarch64', - 'arm64', 'ARM64'], - reason='Ticket - 126314, 132699') def test_static_regex_replace(self, input_shape, pattern, rewrite, replace_global, ie_device, precision, ir_version, temp_dir, use_legacy_frontend): diff --git a/tests/layer_tests/tensorflow_tests/test_tf_StringLower.py b/tests/layer_tests/tensorflow_tests/test_tf_StringLower.py index f4c9e7260d7afb..5787c0b8318801 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_StringLower.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_StringLower.py @@ -3,7 +3,6 @@ import numpy as np import os -import platform import pytest import tensorflow as tf from common.tf_layer_test_class import CommonTFLayerTest @@ -46,10 +45,6 @@ def create_string_lower_net(self, input_shape, encoding, strings_dictionary): ['第一句話在這裡', '第二句話在這裡', '第三句話在這裡']]) @pytest.mark.precommit @pytest.mark.nightly - @pytest.mark.xfail(condition=platform.system() in ('Darwin', 'Linux') and platform.machine() in ['arm', 'armv7l', - 'aarch64', - 'arm64', 'ARM64'], - reason='Ticket - 126314, 132699') def test_string_lower(self, input_shape, encoding, strings_dictionary, ie_device, precision, ir_version, temp_dir, use_legacy_frontend): if ie_device == 'GPU' or run_in_jenkins(): @@ -78,10 +73,6 @@ def create_string_lower_model(self, output_dir): @pytest.mark.precommit @pytest.mark.nightly - @pytest.mark.xfail(condition=platform.system() in ('Darwin', 'Linux') and platform.machine() in ['arm', 'armv7l', - 'aarch64', - 'arm64', 'ARM64'], - reason='Ticket - 126314, 132699') def test_string_lower_with_ovc(self, ie_device, temp_dir, precision): if ie_device == 'GPU' or run_in_jenkins(): pytest.skip("operation extension is not supported on GPU") @@ -90,7 +81,6 @@ def test_string_lower_with_ovc(self, ie_device, temp_dir, precision): return_code, _, _ = generate_ir_ovc(input_model_path, {'output_model': output_model_path}) assert return_code == 0, "OVC tool is failed for conversion model {}".format(input_model_path) - import openvino_tokenizers import openvino as ov core = ov.Core() compiled_model = core.compile_model(output_model_path, ie_device) diff --git a/tests/layer_tests/tensorflow_tests/test_tf_StringSplitV2.py b/tests/layer_tests/tensorflow_tests/test_tf_StringSplitV2.py index 3745d07926bc43..84d7c269ce598f 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_StringSplitV2.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_StringSplitV2.py @@ -1,8 +1,6 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -import platform - import numpy as np import pytest import tensorflow as tf @@ -42,10 +40,6 @@ def create_string_split_v2_net(self, input_shape, sep, maxsplit): @pytest.mark.parametrize('maxsplit', [None, -1, 5, 10]) @pytest.mark.precommit @pytest.mark.nightly - @pytest.mark.xfail(condition=platform.system() in ('Darwin', 'Linux') and platform.machine() in ['arm', 'armv7l', - 'aarch64', - 'arm64', 'ARM64'], - reason='126314, 132699: Build tokenizers for ARM and MacOS') def test_string_split_v2(self, input_shape, sep, maxsplit, ie_device, precision, ir_version, temp_dir, use_legacy_frontend): diff --git a/tests/layer_tests/tensorflow_tests/test_tf_StringToHashBucketFast.py b/tests/layer_tests/tensorflow_tests/test_tf_StringToHashBucketFast.py index 08812fe7b46228..5fefb8117f3dcf 100644 --- a/tests/layer_tests/tensorflow_tests/test_tf_StringToHashBucketFast.py +++ b/tests/layer_tests/tensorflow_tests/test_tf_StringToHashBucketFast.py @@ -1,8 +1,6 @@ # Copyright (C) 2018-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -import platform - import numpy as np import pytest import tensorflow as tf @@ -45,10 +43,6 @@ def create_string_to_hash_bucket_fast_net(self, input_shape, strings_dictionary, ['', ' ', '12345 ']]) @pytest.mark.precommit @pytest.mark.nightly - @pytest.mark.xfail(condition=platform.system() in ('Darwin', 'Linux') and platform.machine() in ['arm', 'armv7l', - 'aarch64', - 'arm64', 'ARM64'], - reason='Ticket - 126314, 132699') def test_string_to_hash_bucket_fast(self, input_shape, num_buckets, strings_dictionary, ie_device, precision, ir_version, temp_dir, use_legacy_frontend): From 5833781ddbc476d77cf5593f1f8b34758988b9a8 Mon Sep 17 00:00:00 2001 From: Georgy Krivoruchko Date: Fri, 1 Nov 2024 12:03:18 +0400 Subject: [PATCH 011/104] [ONNX] Disabled constant folding for Subtract branch of DequantizeLinear-21 (#27359) ### Details: - Disabled constant folding for Subtract branch of DequantizeLinear-21 ### Tickets: - 156329 --- src/frontends/onnx/frontend/src/op/dequantize_linear.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp b/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp index d7b5214f3e53f4..47fcc7af60bf61 100644 --- a/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp +++ b/src/frontends/onnx/frontend/src/op/dequantize_linear.cpp @@ -18,6 +18,7 @@ #include "openvino/op/subtract.hpp" #include "openvino/op/transpose.hpp" #include "openvino/op/unsqueeze.hpp" +#include "transformations/rt_info/disable_constant_folding.hpp" #include "utils/common.hpp" #include "utils/reshape.hpp" using namespace ov::op; @@ -241,6 +242,7 @@ ov::OutputVector dequantize_linear(const ov::frontend::onnx::Node& node) { zp = inputs[2]; if (zp.get_element_type() != scale.get_element_type()) { zp = std::make_shared(zp, scale_type); + disable_constant_folding(zp.get_node_shared_ptr()); } zp = std::make_shared(zp, unsqueezed_axes); } From af389b482381b445a3d7bb6ed6c7de3a5320da87 Mon Sep 17 00:00:00 2001 From: Evgenya Nugmanova Date: Fri, 1 Nov 2024 12:14:46 +0400 Subject: [PATCH 012/104] Broadcast: symbol propagation (#27357) ### Details: - *Improves symbol propagation in LLMs and allows for better ShapeOf optimization* Signed-off-by: Evgeniia Nugmanova --- src/core/include/openvino/op/util/broadcast_base.hpp | 1 + src/core/src/op/util/broadcast_base.cpp | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/src/core/include/openvino/op/util/broadcast_base.hpp b/src/core/include/openvino/op/util/broadcast_base.hpp index 2e500eb611c04c..6300559ac8cf00 100644 --- a/src/core/include/openvino/op/util/broadcast_base.hpp +++ b/src/core/include/openvino/op/util/broadcast_base.hpp @@ -63,6 +63,7 @@ class OPENVINO_API BroadcastBase : public Op { bool evaluate_lower(TensorVector& outputs) const override; bool evaluate_upper(TensorVector& outputs) const override; + bool evaluate_symbol(ov::TensorSymbolVector& output_symbols) const override; PartialShape get_result_shape_pdpd(const PartialShape& arg0_shape, const PartialShape& target_shape, diff --git a/src/core/src/op/util/broadcast_base.cpp b/src/core/src/op/util/broadcast_base.cpp index 59154e45e2b37a..c2c838afeb38bd 100644 --- a/src/core/src/op/util/broadcast_base.cpp +++ b/src/core/src/op/util/broadcast_base.cpp @@ -471,3 +471,10 @@ bool ov::op::util::BroadcastBase::evaluate_upper(ov::TensorVector& output_values return false; return default_upper_bound_evaluator(this, output_values); } + +bool ov::op::util::BroadcastBase::evaluate_symbol(ov::TensorSymbolVector& output_symbols) const { + if (!input_value(1).get_tensor().has_and_set_bound() || + (get_input_size() > 2 && !input_value(2).get_tensor().has_and_set_bound())) + return false; + return default_symbol_evaluator(this, {0}, output_symbols); +} From caa1e6af13139692a34cf37787c9c79f949bcaaa Mon Sep 17 00:00:00 2001 From: Bogdan Pereanu Date: Fri, 1 Nov 2024 10:42:50 +0200 Subject: [PATCH 013/104] [NPU] Create compiler adapter class (#27006) ### Details: - *Create a new CompilerAdapter interface that hides different implementations of CIP and CID* - *iCompiler remains an interface only for CIP. This keeps CIP (developed in another repository) decoupled from L0* - we still use NetworkMetadata in the plugin flow, which needs to be decided later if is still needed or if it can be removed - *Graph object is created by compiler_adapter* - *Backend doesn't create/initialize graph any longer* - *Moving common objects for backend and compiler_adapter to utils/zero/* - *Destroy blob on the import path after we load the weights into the NPU memory* - *Create a new property to postpone weights loading until the creation of the first inference request, by default is performed right after the model is compiled - NPU_DEFER_WEIGHTS_LOAD* A short description of the new format: ![Screenshot 2024-10-30 151129](https://github.com/user-attachments/assets/89f86c36-f3e8-4906-8394-7cd0ae5617a2) ### Tickets: - *CVS-153081* --------- Signed-off-by: Bogdan Pereanu --- src/plugins/intel_npu/README.md | 2 +- src/plugins/intel_npu/cmake/features.cmake | 25 +- src/plugins/intel_npu/src/CMakeLists.txt | 13 +- .../al/include/intel_npu/config/runtime.hpp | 32 + .../src/al/include/intel_npu/icompiler.hpp | 162 +-- .../al/include/intel_npu/network_metadata.hpp | 127 ++ .../intel_npu/npu_private_properties.hpp | 7 + .../intel_npu/src/al/src/config/runtime.cpp | 1 + .../intel_npu/src/backend/CMakeLists.txt | 29 - .../src/backend/include/zero_backend.hpp | 9 +- .../src/backend/include/zero_device.hpp | 10 +- .../src/backend/include/zero_executor.hpp | 86 -- .../src/backend/include/zero_host_tensor.hpp | 2 +- .../backend/include/zero_infer_request.hpp | 9 +- .../src/backend/include/zero_memory.hpp | 2 +- .../src/backend/include/zero_pipeline.hpp | 13 +- .../src/backend/include/zero_profiling.hpp | 2 +- .../backend/include/zero_remote_tensor.hpp | 2 +- .../src/backend/src/zero_backend.cpp | 28 +- .../intel_npu/src/backend/src/zero_device.cpp | 36 +- .../src/backend/src/zero_executor.cpp | 187 --- .../src/backend/src/zero_infer_request.cpp | 74 +- .../src/backend/src/zero_pipeline.cpp | 62 +- .../intel_npu/src/common/CMakeLists.txt | 2 +- .../intel_npu/common/icompiled_model.hpp | 11 +- .../include/intel_npu/common/igraph.hpp | 103 ++ .../common/include/intel_npu/common/npu.hpp | 16 +- .../intel_npu/common/sync_infer_request.hpp | 3 +- .../src/common/src/sync_infer_request.cpp | 2 +- .../include/driver_compiler_adapter.hpp | 50 - .../include/zero_compiler_in_driver.hpp | 201 --- .../compiler/src/driver_compiler_adapter.cpp | 130 -- .../compiler/src/zero_compiler_in_driver.cpp | 1081 ----------------- .../CMakeLists.txt | 7 +- .../include/custom_stream_buffer.hpp | 4 +- .../include/driver_compiler_adapter.hpp | 64 + .../compiler_adapter/include/driver_graph.hpp | 50 + .../include/ir_serializer.hpp} | 4 +- .../include/plugin_compiler_adapter.hpp | 37 + .../compiler_adapter/include/plugin_graph.hpp | 49 + .../include/ze_graph_ext_wrappers.hpp | 159 +++ .../ze_graph_ext_wrappers_interface.hpp | 42 + .../src/driver_compiler_adapter.cpp | 606 +++++++++ .../src/compiler_adapter/src/driver_graph.cpp | 164 +++ .../src/ir_serializer.cpp} | 8 +- .../src/plugin_compiler_adapter.cpp | 160 +++ .../src/compiler_adapter/src/plugin_graph.cpp | 132 ++ .../src/precomp.hpp | 0 .../src/ze_graph_ext_wrappers.cpp | 568 +++++++++ .../intel_npu/src/plugin/CMakeLists.txt | 30 +- .../src/plugin/include/compiled_model.hpp | 37 +- .../intel_npu/src/plugin/include/compiler.hpp | 20 - .../intel_npu/src/plugin/include/plugin.hpp | 4 +- .../intel_npu/src/plugin/src/backends.cpp | 7 +- .../src/plugin/src/compiled_model.cpp | 138 +-- .../intel_npu/src/plugin/src/compiler.cpp | 101 -- .../intel_npu/src/plugin/src/plugin.cpp | 82 +- .../intel_npu/utils/zero}/zero_init.hpp | 6 +- .../intel_npu/utils/zero}/zero_types.hpp | 2 - .../intel_npu/utils/zero/zero_utils.hpp | 31 +- .../intel_npu/utils/zero}/zero_wrappers.hpp | 23 +- .../intel_npu/src/utils/src/CMakeLists.txt | 3 +- .../src/utils/src/zero/CMakeLists.txt | 34 +- .../src => utils/src/zero}/zero_init.cpp | 9 +- .../src => utils/src/zero}/zero_wrappers.cpp | 28 +- .../intel_npu/tests/functional/CMakeLists.txt | 10 +- .../custom_stream.cpp | 5 +- .../ov_infer_request/compile_and_infer.cpp | 4 +- .../functional/behavior/work_with_devices.hpp | 2 +- .../internal/overload/compile_and_infer.hpp | 8 +- .../overload/compiled_model/property.cpp | 2 +- .../behavior/compiled_model/properties.cpp | 2 +- .../intel_npu/thirdparty/CMakeLists.txt | 3 +- 73 files changed, 2620 insertions(+), 2544 deletions(-) create mode 100644 src/plugins/intel_npu/src/al/include/intel_npu/network_metadata.hpp delete mode 100644 src/plugins/intel_npu/src/backend/include/zero_executor.hpp delete mode 100644 src/plugins/intel_npu/src/backend/src/zero_executor.cpp create mode 100644 src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp delete mode 100644 src/plugins/intel_npu/src/compiler/include/driver_compiler_adapter.hpp delete mode 100644 src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp delete mode 100644 src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp delete mode 100644 src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp rename src/plugins/intel_npu/src/{compiler => compiler_adapter}/CMakeLists.txt (85%) rename src/plugins/intel_npu/src/{compiler => compiler_adapter}/include/custom_stream_buffer.hpp (95%) create mode 100644 src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp create mode 100644 src/plugins/intel_npu/src/compiler_adapter/include/driver_graph.hpp rename src/plugins/intel_npu/src/{compiler/include/graph_transformations.hpp => compiler_adapter/include/ir_serializer.hpp} (93%) create mode 100644 src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp create mode 100644 src/plugins/intel_npu/src/compiler_adapter/include/plugin_graph.hpp create mode 100644 src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp create mode 100644 src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers_interface.hpp create mode 100644 src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp create mode 100644 src/plugins/intel_npu/src/compiler_adapter/src/driver_graph.cpp rename src/plugins/intel_npu/src/{compiler/src/graph_transformations.cpp => compiler_adapter/src/ir_serializer.cpp} (94%) create mode 100644 src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp create mode 100644 src/plugins/intel_npu/src/compiler_adapter/src/plugin_graph.cpp rename src/plugins/intel_npu/src/{compiler => compiler_adapter}/src/precomp.hpp (100%) create mode 100644 src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp delete mode 100644 src/plugins/intel_npu/src/plugin/include/compiler.hpp delete mode 100644 src/plugins/intel_npu/src/plugin/src/compiler.cpp rename src/plugins/intel_npu/src/{backend/include => utils/include/intel_npu/utils/zero}/zero_init.hpp (95%) rename src/plugins/intel_npu/src/{backend/include => utils/include/intel_npu/utils/zero}/zero_types.hpp (99%) rename src/plugins/intel_npu/src/{backend/include => utils/include/intel_npu/utils/zero}/zero_wrappers.hpp (90%) rename src/plugins/intel_npu/src/{backend/src => utils/src/zero}/zero_init.cpp (98%) rename src/plugins/intel_npu/src/{backend/src => utils/src/zero}/zero_wrappers.cpp (91%) diff --git a/src/plugins/intel_npu/README.md b/src/plugins/intel_npu/README.md index b7508c68704e32..980faa71a15937 100644 --- a/src/plugins/intel_npu/README.md +++ b/src/plugins/intel_npu/README.md @@ -78,7 +78,7 @@ There is currently no support for multiple devices, which means only one level-z ### Inference pipeline -The result of the model compilation is represented through a NetworkDescription. This model description is passed by the plugin to the driver to create a level zero graph instance and obtain a graph handle that can later be used to execute multiple inferences in parallel for the same model. Since the same model instance is shared across all subsequent inference objects, this initialization step is performed by default right after the model is compiled and it can be postponed until the creation of the first inference request through the use of an environment variable: "IE_NPU_CREATE_EXECUTOR" (IE_NPU_CREATE_EXECUTOR=0 to postpone the initialization). +The result of the model compilation is represented through an IGraph object, which contains a valid level zero graph handle that can later be used to execute multiple inferences in parallel for the same model. By default, weights are loaded into the NPU memory right after the model is compiled, but this step can be postponed until the creation of the first inference request through the use of an internal NPU property: "NPU_DEFER_WEIGHTS_LOAD". Users can create one or more inference requests for a compiled model using OpenVINO API: diff --git a/src/plugins/intel_npu/cmake/features.cmake b/src/plugins/intel_npu/cmake/features.cmake index 0dde0f9d67f6e5..7d34c52c6d1292 100644 --- a/src/plugins/intel_npu/cmake/features.cmake +++ b/src/plugins/intel_npu/cmake/features.cmake @@ -4,29 +4,10 @@ ov_option(ENABLE_MLIR_COMPILER "Enable compilation of npu_mlir_compiler libraries" ON) -ov_option(ENABLE_NPU_RUNTIME_COMMON "Enable compilation of npu runtime common libraries" ON) +ov_option(ENABLE_NPU_PLUGIN_ENGINE "Enable compilation of NPU plugin engine" ON) -# if ENABLE_ZEROAPI_BACKEND=ON, it adds the ze_loader dependency for driver compiler -ov_dependent_option(ENABLE_NPU_PLUGIN_ENGINE "Enable compilation of NPU plugin engine" ON "ENABLE_NPU_RUNTIME_COMMON" OFF) - -ov_dependent_option(ENABLE_ZEROAPI_BACKEND "Enable zero-api as a plugin backend" ON "ENABLE_NPU_RUNTIME_COMMON;ENABLE_NPU_PLUGIN_ENGINE" OFF) - -ov_dependent_option(ENABLE_DRIVER_COMPILER_ADAPTER "Enable NPU Compiler inside driver" ON "ENABLE_ZEROAPI_BACKEND" OFF) - -if((NOT ENABLE_NPU_PLUGIN_ENGINE OR NOT ENABLE_NPU_RUNTIME_COMMON) AND ENABLE_TESTS) - message(FATAL_ERROR "Tests depends on npu plugin engine and npu runtime common libraries!") -endif() - -if((NOT ENABLE_NPU_PLUGIN_ENGINE OR NOT ENABLE_NPU_RUNTIME_COMMON) AND ENABLE_ZEROAPI_BACKEND) - message(FATAL_ERROR "Zero backend depends on npu plugin engine and npu common libraries!") -endif() - -if(NOT ENABLE_ZEROAPI_BACKEND AND ENABLE_DRIVER_COMPILER_ADAPTER) - message(FATAL_ERROR "Compiler adapter depends on zero backend to use same context!") -endif() - -if(NOT BUILD_SHARED_LIBS AND NOT ENABLE_MLIR_COMPILER AND NOT ENABLE_DRIVER_COMPILER_ADAPTER) - message(FATAL_ERROR "No compiler found for static build!") +if(NOT ENABLE_NPU_PLUGIN_ENGINE AND ENABLE_TESTS) + message(FATAL_ERROR "Tests depends on npu plugin engine!") endif() ov_dependent_option(ENABLE_IMD_BACKEND "Enable InferenceManagerDemo based NPU AL backend" OFF "NOT WIN32;NOT CMAKE_CROSSCOMPILING" OFF) diff --git a/src/plugins/intel_npu/src/CMakeLists.txt b/src/plugins/intel_npu/src/CMakeLists.txt index 5530eb1f3e59e5..f5d1fd5b41226c 100644 --- a/src/plugins/intel_npu/src/CMakeLists.txt +++ b/src/plugins/intel_npu/src/CMakeLists.txt @@ -9,18 +9,9 @@ add_subdirectory(utils) add_subdirectory(al) -if (ENABLE_NPU_RUNTIME_COMMON) +if (ENABLE_NPU_PLUGIN_ENGINE) add_subdirectory(common) -endif() - -if(ENABLE_DRIVER_COMPILER_ADAPTER AND ENABLE_ZEROAPI_BACKEND) - add_subdirectory(compiler) -endif() - -if(ENABLE_ZEROAPI_BACKEND) + add_subdirectory(compiler_adapter) add_subdirectory(backend) -endif() - -if (ENABLE_NPU_PLUGIN_ENGINE) add_subdirectory(plugin) endif() diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/config/runtime.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/config/runtime.hpp index d52c25f6a3e6a5..510ab7fc43b0c8 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/config/runtime.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/config/runtime.hpp @@ -131,6 +131,38 @@ struct CREATE_EXECUTOR final : OptionBase { } }; +// +// DEFER_WEIGHTS_LOAD +// + +struct DEFER_WEIGHTS_LOAD final : OptionBase { + static std::string_view key() { + return ov::intel_npu::defer_weights_load.name(); + } + + static int64_t defaultValue() { + return false; + } + + static constexpr std::string_view getTypeName() { + return "bool"; + } + +#ifdef NPU_PLUGIN_DEVELOPER_BUILD + static std::string_view envVar() { + return "OV_NPU_DEFER_WEIGHTS_LOAD"; + } +#endif + + static bool isPublic() { + return false; + } + + static OptionMode mode() { + return OptionMode::RunTime; + } +}; + // // NUM_STREAMS // diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/icompiler.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/icompiler.hpp index e0a02f12aa2e17..53696396603d9a 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/icompiler.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/icompiler.hpp @@ -6,128 +6,12 @@ #pragma once -#include -#include -#include -#include -#include -#include -#include - #include "intel_npu/config/config.hpp" -#include "openvino/core/partial_shape.hpp" -#include "openvino/core/type/element_type.hpp" -#include "openvino/runtime/common.hpp" +#include "intel_npu/network_metadata.hpp" #include "openvino/runtime/profiling_info.hpp" namespace intel_npu { -/** - * @brief A helper structure used for storing metadata corresponding to one input/output entry. - */ -struct IODescriptor { - /** - * @brief The name of the input/output assigned by the compiler. - * @details This value may differ from other name attributes: - * - The compiler could have created additional inputs/outputs (e.g. for representing states). These are not - * found in the original IR model. - * - The compiler may append indices to names in the case where duplicate names are found. - * @note The prefixes introduced by the compiler in order to differentiate the special cases (e.g. states and shape - * tensors) were removed prior to initializing this field. - */ - std::string nameFromCompiler; - - ov::element::Type precision; - - ov::PartialShape shapeFromCompiler; - - /** - * @brief If set to "true", the current object describes a buffer which may be used for altering a state tensor. - * @details This flag is set if the compiler prefixed the name using a "read value" prefix. The state input and - * state output descriptors are also tied using the "relatedDescriptorIndex" attribute. - */ - bool isStateInput = false; - - /** - * @brief If set to "true", the current object describes a buffer which reflects the value of a state tensor. - * @details This flag is set if the compiler prefixed the name using an "assign" prefix. The state input and - * state output descriptors are also tied using the "relatedDescriptorIndex" attribute. - */ - bool isStateOutput = false; - - /** - * @brief If set to "true", the buffer of the tensor described here contains as value the shape of the referenced - * tensor. - * @details This flag is set if the compiler prefixed the name using a "shape" prefix. - * - * The referenced tensor bears the same name ("nameFromCompiler"), but its "isShapeTensor" value is set to - * "false". The two descriptors are also tied using the "relatedDescriptorIndex" attribute. - */ - bool isShapeTensor = false; - - /** - * @brief Points towards a related descriptor. - * @details The related descriptors are defined by (state input, state output) or (dynamic tensor, shape tensor) - * pairs. - */ - std::optional relatedDescriptorIndex; - - /** - * @brief The friendly name of the node extracted from the IR model. - * @details In some cases, this field is required for constructing a dummy model which uses the same input/output - * metadata as the original IR model. - * - * This field may be empty if the I/O entry is not found in the original IR model (i.e. the entry was added by the - * compiler). - */ - std::string nodeFriendlyName; - - /** - * @brief The names of the output tensors extracted from the IR model. - * @details In some cases, this field is required for constructing a dummy model which uses the same input/output - * metadata as the original IR model. - * - * This field may be empty if the I/O entry is not found in the original IR model (i.e. the entry was added by the - * compiler). - */ - std::unordered_set outputTensorNames; - - /** - * @brief The shape extracted from the IR model. - * @details The values may differ from the ones found in "shapeFromCompiler" if batching is to be handled by the - * plugin. - * - * This field may be empty if the I/O entry is not found in the original IR model (i.e. the entry was added - * by the compiler). - */ - std::optional shapeFromIRModel = std::nullopt; -}; - -struct NetworkMetadata final { - std::string name; - - std::vector inputs; - std::vector outputs; - std::vector profilingOutputs; - - size_t numStreams = 1; - - // Used primarily in the CID path to pass the level zero graph handle from compiler to the backend executor - void* graphHandle = nullptr; - - /** - * @brief Binds the (state input, state output) and (dynamic tensor, shape tensor) pairs using the - * "relatedDescriptorIndex" attribute. - * @details For state inputs, the "relatedDescriptorIndex" value is set to the index of the output which bears the - * same name. The reverse is also applied. - * - * For shape tensors, the lookup is performed in the same container (inputs or outputs). The value is once again set - * to the index of the entry which bears the same name. - */ - void bindRelatedDescriptors(); - -}; // namespace intel_npu - /** * @struct NetworkDescription * @brief The object returned by the compiler @@ -138,7 +22,6 @@ struct NetworkDescription final { NetworkDescription(std::vector&& compiledNetwork, NetworkMetadata&& metadata) : compiledNetwork(std::move(compiledNetwork)), metadata(std::move(metadata)) {} - NetworkDescription(NetworkMetadata&& metadata) : metadata(std::move(metadata)) {} // Force move semantics to prevent blob copies NetworkDescription(const NetworkDescription&) = delete; NetworkDescription(NetworkDescription&&) = default; @@ -151,32 +34,6 @@ struct NetworkDescription final { NetworkMetadata metadata; }; -/** - * @struct CompiledNetwork - * @brief Custom container for compiled network, used for export - * @var CompiledNetwork::data - * Pointer to the address of compiled network - * @var CompiledNetwork:size - * Size of the compiled network - * @var CompiledNetwork::ownedStorage - * Plugin owned compiled network storage that is required in case of a driver that - * doesn't support graph extension 1.7, as in this case plugin must create a copy of the compiled network. - * @note It's unsafe to store either data or size outside of the compiled network object as its destructor - * would release the owning container - */ - -struct CompiledNetwork { - const uint8_t* data; - size_t size; - CompiledNetwork(const uint8_t* data, size_t size, std::vector storage) - : data(data), - size(size), - ownedStorage(std::move(storage)) {} - -private: - std::vector ownedStorage; -}; - /** * @interface ICompiler * @brief An interface to be implemented by a concrete compiler to provide @@ -184,12 +41,6 @@ struct CompiledNetwork { */ class ICompiler : public std::enable_shared_from_this { public: - /** - * @brief Returns the maximum OpenVino opset version supported by the compiler - * @return opset version e.g. 11 for opset11 - */ - virtual uint32_t getSupportedOpsetVersion() const = 0; - /** * @brief Transforms a network from the OpenVINO model representation to a format executable * by a NPU device @@ -216,8 +67,6 @@ class ICompiler : public std::enable_shared_from_this { * @param config a reference to NPUConfig containing plugin config options * Note: compilation options will be ignored, * since the network is already compiled - * @param netName a reference to the string describing network name - * to be used for creating network description * @return a shared pointer on an object implementing NetworkDescription interface */ virtual NetworkMetadata parse(const std::vector& network, const Config& config) const = 0; @@ -226,15 +75,6 @@ class ICompiler : public std::enable_shared_from_this { const std::vector& network, const Config& config) const = 0; - // Driver compiler can use this to release graphHandle, if we do not have executor - virtual void release([[maybe_unused]] std::shared_ptr networkDescription){}; - - virtual CompiledNetwork getCompiledNetwork(const NetworkDescription& networkDescription) { - return CompiledNetwork(networkDescription.compiledNetwork.data(), - networkDescription.compiledNetwork.size(), - networkDescription.compiledNetwork); - } - protected: virtual ~ICompiler() = default; }; diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/network_metadata.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/network_metadata.hpp new file mode 100644 index 00000000000000..b7a78b3dfd43e1 --- /dev/null +++ b/src/plugins/intel_npu/src/al/include/intel_npu/network_metadata.hpp @@ -0,0 +1,127 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +// Compiler Interface + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "intel_npu/config/config.hpp" +#include "openvino/core/partial_shape.hpp" +#include "openvino/core/type/element_type.hpp" +#include "openvino/runtime/common.hpp" + +namespace intel_npu { + +/** + * @brief A helper structure used for storing metadata corresponding to one input/output entry. + */ +struct IODescriptor { + /** + * @brief The name of the input/output assigned by the compiler. + * @details This value may differ from other name attributes: + * - The compiler could have created additional inputs/outputs (e.g. for representing states). These are not + * found in the original IR model. + * - The compiler may append indices to names in the case where duplicate names are found. + * @note The prefixes introduced by the compiler in order to differentiate the special cases (e.g. states and shape + * tensors) were removed prior to initializing this field. + */ + std::string nameFromCompiler; + + ov::element::Type precision; + + ov::PartialShape shapeFromCompiler; + + /** + * @brief If set to "true", the current object describes a buffer which may be used for altering a state tensor. + * @details This flag is set if the compiler prefixed the name using a "read value" prefix. The state input and + * state output descriptors are also tied using the "relatedDescriptorIndex" attribute. + */ + bool isStateInput = false; + + /** + * @brief If set to "true", the current object describes a buffer which reflects the value of a state tensor. + * @details This flag is set if the compiler prefixed the name using an "assign" prefix. The state input and + * state output descriptors are also tied using the "relatedDescriptorIndex" attribute. + */ + bool isStateOutput = false; + + /** + * @brief If set to "true", the buffer of the tensor described here contains as value the shape of the referenced + * tensor. + * @details This flag is set if the compiler prefixed the name using a "shape" prefix. + * + * The referenced tensor bears the same name ("nameFromCompiler"), but its "isShapeTensor" value is set to + * "false". The two descriptors are also tied using the "relatedDescriptorIndex" attribute. + */ + bool isShapeTensor = false; + + /** + * @brief Points towards a related descriptor. + * @details The related descriptors are defined by (state input, state output) or (dynamic tensor, shape tensor) + * pairs. + */ + std::optional relatedDescriptorIndex; + + /** + * @brief The friendly name of the node extracted from the IR model. + * @details In some cases, this field is required for constructing a dummy model which uses the same input/output + * metadata as the original IR model. + * + * This field may be empty if the I/O entry is not found in the original IR model (i.e. the entry was added by the + * compiler). + */ + std::string nodeFriendlyName; + + /** + * @brief The names of the output tensors extracted from the IR model. + * @details In some cases, this field is required for constructing a dummy model which uses the same input/output + * metadata as the original IR model. + * + * This field may be empty if the I/O entry is not found in the original IR model (i.e. the entry was added by the + * compiler). + */ + std::unordered_set outputTensorNames; + + /** + * @brief The shape extracted from the IR model. + * @details The values may differ from the ones found in "shapeFromCompiler" if batching is to be handled by the + * plugin. + * + * This field may be empty if the I/O entry is not found in the original IR model (i.e. the entry was added + * by the compiler). + */ + std::optional shapeFromIRModel = std::nullopt; +}; + +struct NetworkMetadata final { + std::string name; + + std::vector inputs; + std::vector outputs; + std::vector profilingOutputs; + + size_t numStreams = 1; + + /** + * @brief Binds the (state input, state output) and (dynamic tensor, shape tensor) pairs using the + * "relatedDescriptorIndex" attribute. + * @details For state inputs, the "relatedDescriptorIndex" value is set to the index of the output which bears the + * same name. The reverse is also applied. + * + * For shape tensors, the lookup is performed in the same container (inputs or outputs). The value is once again set + * to the index of the entry which bears the same name. + */ + void bindRelatedDescriptors(); + +}; // namespace intel_npu + +} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp index 0c5a04ce0c0d83..d8fabee177b2b9 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp @@ -351,6 +351,13 @@ static constexpr ov::Property batch_mode{"NPU_BATCH_MODE"}; */ static constexpr ov::Property create_executor{"NPU_CREATE_EXECUTOR"}; +/** + * @brief [Only for NPU Plugin] + * Type: boolean, default is false + * This option allows to omit loading the weights until inference is created + */ +static constexpr ov::Property defer_weights_load{"NPU_DEFER_WEIGHTS_LOAD"}; + /** * @brief Read-only property to get the name of used backend */ diff --git a/src/plugins/intel_npu/src/al/src/config/runtime.cpp b/src/plugins/intel_npu/src/al/src/config/runtime.cpp index 10f9b4a7c7222b..759956b6f597df 100644 --- a/src/plugins/intel_npu/src/al/src/config/runtime.cpp +++ b/src/plugins/intel_npu/src/al/src/config/runtime.cpp @@ -21,6 +21,7 @@ void intel_npu::registerRunTimeOptions(OptionsDesc& desc) { desc.add(); desc.add(); desc.add(); + desc.add(); desc.add(); desc.add(); desc.add(); diff --git a/src/plugins/intel_npu/src/backend/CMakeLists.txt b/src/plugins/intel_npu/src/backend/CMakeLists.txt index 01465a8179dc24..5a1585c0a63073 100644 --- a/src/plugins/intel_npu/src/backend/CMakeLists.txt +++ b/src/plugins/intel_npu/src/backend/CMakeLists.txt @@ -25,7 +25,6 @@ target_link_libraries(${TARGET_NAME} PRIVATE openvino::npu_al openvino::npu_common - openvino_npu_zero_result_parser ze_loader ) @@ -33,31 +32,3 @@ target_link_libraries(${TARGET_NAME} # targets install # ov_install_static_lib(${TARGET_NAME} ${NPU_INTERNAL_COMPONENT}) - -if(TARGET ze_loader) - if(NOT BUILD_SHARED_LIBS) - # Support link of static runtime in case system does not have ze_loader - install(TARGETS ze_loader EXPORT OpenVINOTargets - RUNTIME DESTINATION ${OV_CPACK_RUNTIMEDIR} COMPONENT ${NPU_PLUGIN_COMPONENT} - ARCHIVE DESTINATION ${OV_CPACK_ARCHIVEDIR} COMPONENT ${NPU_PLUGIN_COMPONENT} - LIBRARY DESTINATION ${OV_CPACK_LIBRARYDIR} COMPONENT ${NPU_PLUGIN_COMPONENT}) - - install(TARGETS utils EXPORT OpenVINOTargets - RUNTIME DESTINATION ${OV_CPACK_RUNTIMEDIR} COMPONENT ${NPU_PLUGIN_COMPONENT} - ARCHIVE DESTINATION ${OV_CPACK_ARCHIVEDIR} COMPONENT ${NPU_PLUGIN_COMPONENT} - LIBRARY DESTINATION ${OV_CPACK_LIBRARYDIR} COMPONENT ${NPU_PLUGIN_COMPONENT}) - - # export to local tree to build against static build tree - export(TARGETS ze_loader NAMESPACE openvino:: - APPEND FILE "${CMAKE_BINARY_DIR}/OpenVINOTargets.cmake") - - export(TARGETS utils NAMESPACE openvino:: - APPEND FILE "${CMAKE_BINARY_DIR}/OpenVINOTargets.cmake") - endif() - - # Support tests to run with ze_loader - install(TARGETS ze_loader - RUNTIME DESTINATION tests COMPONENT tests EXCLUDE_FROM_ALL - LIBRARY DESTINATION tests COMPONENT tests EXCLUDE_FROM_ALL) -endif() - diff --git a/src/plugins/intel_npu/src/backend/include/zero_backend.hpp b/src/plugins/intel_npu/src/backend/include/zero_backend.hpp index 68e4f9434418a6..038c7c1d2d9bf9 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_backend.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_backend.hpp @@ -9,7 +9,7 @@ #include "intel_npu/common/npu.hpp" #include "intel_npu/utils/logger/logger.hpp" -#include "zero_init.hpp" +#include "intel_npu/utils/zero/zero_init.hpp" namespace intel_npu { class ZeroEngineBackend final : public IEngineBackend { @@ -29,15 +29,14 @@ class ZeroEngineBackend final : public IEngineBackend { bool isCommandQueueExtSupported() const override; bool isLUIDExtSupported() const override; + const std::shared_ptr& getInitStruct() const; + void* getContext() const override; - void* getDriverHandle() const; - void* getDeviceHandle() const; - ze_graph_dditable_ext_curr_t& getGraphDdiTable() const; void updateInfo(const Config& config) override; private: - std::shared_ptr _instance; + std::shared_ptr _initStruct; std::map> _devices{}; Logger _logger; diff --git a/src/plugins/intel_npu/src/backend/include/zero_device.hpp b/src/plugins/intel_npu/src/backend/include/zero_device.hpp index e87a602613a92a..50f0d28ed210cd 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_device.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_device.hpp @@ -10,9 +10,9 @@ #include "intel_npu/common/icompiled_model.hpp" #include "intel_npu/common/npu.hpp" #include "intel_npu/utils/logger/logger.hpp" +#include "intel_npu/utils/zero/zero_init.hpp" +#include "intel_npu/utils/zero/zero_types.hpp" #include "openvino/runtime/intel_npu/remote_properties.hpp" -#include "zero_init.hpp" -#include "zero_types.hpp" namespace intel_npu { @@ -20,9 +20,6 @@ class ZeroDevice : public IDevice { public: ZeroDevice(const std::shared_ptr& initStructs); - std::shared_ptr createExecutor(const std::shared_ptr& networkDescription, - const Config& config) override; - std::string getName() const override; std::string getFullDeviceName() const override; Uuid getUuid() const override; @@ -36,7 +33,6 @@ class ZeroDevice : public IDevice { ov::device::Type getDeviceType() const override; std::shared_ptr createInferRequest(const std::shared_ptr& compiledModel, - const std::shared_ptr& executor, const Config& config) override; void updateInfo(const Config& config) override { log.setLevel(config.get()); @@ -76,8 +72,6 @@ class ZeroDevice : public IDevice { {ov::element::u8, 0.f}, {ov::element::i8, 0.f}}; - uint32_t _group_ordinal; - Logger log; }; } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/backend/include/zero_executor.hpp b/src/plugins/intel_npu/src/backend/include/zero_executor.hpp deleted file mode 100644 index eeb96defc16441..00000000000000 --- a/src/plugins/intel_npu/src/backend/include/zero_executor.hpp +++ /dev/null @@ -1,86 +0,0 @@ -// Copyright (C) 2018-2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include -#include - -#include - -#include "intel_npu/common/npu.hpp" -#include "intel_npu/utils/logger/logger.hpp" -#include "openvino/runtime/properties.hpp" -#include "zero_init.hpp" -#include "zero_wrappers.hpp" - -namespace intel_npu { - -class ZeroExecutor final : public IExecutor { -public: - ZeroExecutor(const std::shared_ptr& initStructs, - const std::shared_ptr& networkDescription, - const Config& config, - const uint32_t& group_ordinal); - - ZeroExecutor(const ZeroExecutor&) = delete; - ZeroExecutor& operator=(const ZeroExecutor&) = delete; - - ~ZeroExecutor() override; - - struct ArgumentDescriptor { - ze_graph_argument_properties_3_t info; - uint32_t idx; - }; - - void setArgumentValue(uint32_t argi_, const void* argv_) const; - void setWorkloadType(const ov::WorkloadType workloadType) const override; - void mutexLock() const; - void mutexUnlock() const; - inline ze_graph_handle_t graph() const { - return _graph; - } - inline std::shared_ptr getInitStructs() const { - return _initStructs; - } - inline const std::shared_ptr& getNetworkDesc() const { - return _networkDesc; - } - inline const std::shared_ptr& getCommandQueue() const { - return _command_queues; - } - inline const uint32_t& get_group_ordinal() const { - return _group_ordinal; - } - inline const std::vector& get_input_descriptors() const { - return _input_descriptors; - } - inline const std::vector& get_output_descriptors() const { - return _output_descriptors; - } - -private: - void initialize_graph_through_command_list() const; - - const Config _config; - Logger _logger; - - const std::shared_ptr _initStructs; - std::shared_ptr _networkDesc; - - ze_graph_dditable_ext_curr_t& _graph_ddi_table_ext; - - const uint32_t _group_ordinal; - - ze_graph_handle_t _graph = nullptr; - - std::vector _input_descriptors; - std::vector _output_descriptors; - - std::shared_ptr _command_queues; - - mutable std::mutex _mutex; -}; - -} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/backend/include/zero_host_tensor.hpp b/src/plugins/intel_npu/src/backend/include/zero_host_tensor.hpp index 52000930e2a751..a214c8e2cb2b5d 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_host_tensor.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_host_tensor.hpp @@ -5,8 +5,8 @@ #pragma once #include "intel_npu/config/config.hpp" +#include "intel_npu/utils/zero/zero_init.hpp" #include "openvino/runtime/itensor.hpp" -#include "zero_init.hpp" #include "zero_remote_tensor.hpp" namespace intel_npu { diff --git a/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp b/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp index 48aad52010a4c2..31248b582250da 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_infer_request.hpp @@ -11,19 +11,17 @@ #include "intel_npu/common/sync_infer_request.hpp" #include "intel_npu/utils/logger/logger.hpp" #include "intel_npu/utils/zero/zero_utils.hpp" -#include "zero_executor.hpp" +#include "intel_npu/utils/zero/zero_wrappers.hpp" #include "zero_pipeline.hpp" #include "zero_profiling.hpp" #include "zero_remote_tensor.hpp" -#include "zero_wrappers.hpp" namespace intel_npu { class ZeroInferRequest final : public SyncInferRequest { public: - explicit ZeroInferRequest(const std::shared_ptr& backendPtr, + explicit ZeroInferRequest(const std::shared_ptr& initStructs, const std::shared_ptr& compiledModel, - const std::shared_ptr& executor, const Config& config); ov::SoPtr get_tensor(const ov::Output& port) const override; @@ -85,8 +83,7 @@ class ZeroInferRequest final : public SyncInferRequest { std::vector>& get_input_tensors_data(size_t index) const; const std::shared_ptr _initStructs; - const std::shared_ptr _executorPtr; - const ZeroExecutor* _executor; + const std::shared_ptr _graph; const Config _config; Logger _logger; diff --git a/src/plugins/intel_npu/src/backend/include/zero_memory.hpp b/src/plugins/intel_npu/src/backend/include/zero_memory.hpp index 6ecbde0d546110..992f409b86a928 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_memory.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_memory.hpp @@ -11,7 +11,7 @@ #include #include "intel_npu/utils/logger/logger.hpp" -#include "zero_init.hpp" +#include "intel_npu/utils/zero/zero_init.hpp" namespace { diff --git a/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp b/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp index 62c8481d28ac1a..92a473a9fc412c 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_pipeline.hpp @@ -4,11 +4,11 @@ #pragma once +#include "intel_npu/common/igraph.hpp" #include "intel_npu/utils/zero/zero_utils.hpp" -#include "zero_executor.hpp" +#include "intel_npu/utils/zero/zero_wrappers.hpp" #include "zero_memory.hpp" #include "zero_profiling.hpp" -#include "zero_wrappers.hpp" namespace intel_npu { @@ -21,13 +21,15 @@ struct TensorData { struct Pipeline { public: Pipeline(const Config& config, - const std::shared_ptr& executorPtr, + const std::shared_ptr& initStructs, + const std::shared_ptr& graph, zeroProfiling::ProfilingPool& profiling_pool, zeroProfiling::ProfilingQuery& profiling_query, std::shared_ptr npu_profiling, const std::vector>>& inputTensorsData, const std::vector>& outputTensorsData, - const size_t numberOfCommandLists); + size_t numberOfCommandLists, + uint32_t group_ordinal); Pipeline(const Pipeline&) = delete; Pipeline& operator=(const Pipeline&) = delete; @@ -42,8 +44,7 @@ struct Pipeline { protected: const Config _config; - const ZeroExecutor* _executor; - CommandQueue& _command_queue; + std::shared_ptr _command_queue; std::vector> _command_lists; std::vector> _fences; EventPool _event_pool; diff --git a/src/plugins/intel_npu/src/backend/include/zero_profiling.hpp b/src/plugins/intel_npu/src/backend/include/zero_profiling.hpp index 505a7f0185e135..17e263a7aaf620 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_profiling.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_profiling.hpp @@ -12,8 +12,8 @@ #include "intel_npu/config/compiler.hpp" #include "intel_npu/utils/logger/logger.hpp" +#include "intel_npu/utils/zero/zero_types.hpp" #include "openvino/runtime/profiling_info.hpp" -#include "zero_types.hpp" namespace intel_npu { namespace zeroProfiling { diff --git a/src/plugins/intel_npu/src/backend/include/zero_remote_tensor.hpp b/src/plugins/intel_npu/src/backend/include/zero_remote_tensor.hpp index 0211bd5bd08962..5b08643704b651 100644 --- a/src/plugins/intel_npu/src/backend/include/zero_remote_tensor.hpp +++ b/src/plugins/intel_npu/src/backend/include/zero_remote_tensor.hpp @@ -9,8 +9,8 @@ #include #include "intel_npu/common/remote_tensor.hpp" +#include "intel_npu/utils/zero/zero_init.hpp" #include "openvino/runtime/intel_npu/remote_properties.hpp" -#include "zero_init.hpp" namespace intel_npu { diff --git a/src/plugins/intel_npu/src/backend/src/zero_backend.cpp b/src/plugins/intel_npu/src/backend/src/zero_backend.cpp index 86af62d414b88c..55aaad102e8b8f 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_backend.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_backend.cpp @@ -14,31 +14,31 @@ namespace intel_npu { ZeroEngineBackend::ZeroEngineBackend(const Config& config) : _logger("ZeroEngineBackend", Logger::global().level()) { _logger.debug("ZeroEngineBackend - initialize started"); - _instance = std::make_shared(); + _initStruct = std::make_shared(); - auto device = std::make_shared(_instance); + auto device = std::make_shared(_initStruct); _devices.emplace(std::make_pair(device->getName(), device)); _logger.debug("ZeroEngineBackend - initialize completed"); } uint32_t ZeroEngineBackend::getDriverVersion() const { - return _instance->getDriverVersion(); + return _initStruct->getDriverVersion(); } uint32_t ZeroEngineBackend::getGraphExtVersion() const { - return _instance->getGraphDdiTable().version(); + return _initStruct->getGraphDdiTable().version(); } bool ZeroEngineBackend::isBatchingSupported() const { - return _instance->isExtensionSupported("ZE_extension_graph_1_6", ZE_MAKE_VERSION(1, 6)); + return _initStruct->isExtensionSupported("ZE_extension_graph_1_6", ZE_MAKE_VERSION(1, 6)); } bool ZeroEngineBackend::isCommandQueueExtSupported() const { - return _instance->isExtensionSupported(std::string(ZE_COMMAND_QUEUE_NPU_EXT_NAME), ZE_MAKE_VERSION(1, 0)); + return _initStruct->isExtensionSupported(std::string(ZE_COMMAND_QUEUE_NPU_EXT_NAME), ZE_MAKE_VERSION(1, 0)); } bool ZeroEngineBackend::isLUIDExtSupported() const { - return _instance->isExtensionSupported(std::string(ZE_DEVICE_LUID_EXT_NAME), ZE_MAKE_VERSION(1, 0)); + return _initStruct->isExtensionSupported(std::string(ZE_DEVICE_LUID_EXT_NAME), ZE_MAKE_VERSION(1, 0)); } ZeroEngineBackend::~ZeroEngineBackend() = default; @@ -69,19 +69,11 @@ const std::vector ZeroEngineBackend::getDeviceNames() const { } void* ZeroEngineBackend::getContext() const { - return _instance->getContext(); + return _initStruct->getContext(); } -void* ZeroEngineBackend::getDriverHandle() const { - return _instance->getDriver(); -} - -void* ZeroEngineBackend::getDeviceHandle() const { - return _instance->getDevice(); -} - -ze_graph_dditable_ext_curr_t& ZeroEngineBackend::getGraphDdiTable() const { - return _instance->getGraphDdiTable(); +const std::shared_ptr& ZeroEngineBackend::getInitStruct() const { + return _initStruct; } void ZeroEngineBackend::updateInfo(const Config& config) { diff --git a/src/plugins/intel_npu/src/backend/src/zero_device.cpp b/src/plugins/intel_npu/src/backend/src/zero_device.cpp index 58bcd0eb7cc944..6e16dde3b120bf 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_device.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_device.cpp @@ -7,7 +7,6 @@ #include "intel_npu/common/itt.hpp" #include "intel_npu/utils/zero/zero_api.hpp" #include "intel_npu/utils/zero/zero_utils.hpp" -#include "zero_executor.hpp" #include "zero_host_tensor.hpp" #include "zero_infer_request.hpp" #include "zero_remote_tensor.hpp" @@ -64,38 +63,6 @@ ZeroDevice::ZeroDevice(const std::shared_ptr& initStructs device_gops[ov::element::i8] = gops; device_gops[ov::element::f16] = 0.5f * gops; } - - std::vector command_group_properties; - uint32_t command_queue_group_count = 0; - // Discover all command queue groups - THROW_ON_FAIL_FOR_LEVELZERO( - "zeDeviceGetCommandQueueGroupProperties", - zeDeviceGetCommandQueueGroupProperties(_initStructs->getDevice(), &command_queue_group_count, nullptr)); - - log.debug("ZeroDevice::ZeroDevice - resize command_queue_group_count"); - command_group_properties.resize(command_queue_group_count); - - for (auto& prop : command_group_properties) { - prop.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_GROUP_PROPERTIES; - prop.pNext = nullptr; - } - - THROW_ON_FAIL_FOR_LEVELZERO("zeDeviceGetCommandQueueGroupProperties", - zeDeviceGetCommandQueueGroupProperties(_initStructs->getDevice(), - &command_queue_group_count, - command_group_properties.data())); - - // Find the corresponding command queue group. - log.debug("ZeroDevice::ZeroDevice - findGroupOrdinal"); - _group_ordinal = zeroUtils::findGroupOrdinal(command_group_properties, device_properties); - log.debug("ZeroDevice::ZeroDevice - init completed"); -} - -std::shared_ptr ZeroDevice::createExecutor( - const std::shared_ptr& networkDescription, - const Config& config) { - OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "Device::createExecutor"); - return std::make_shared(_initStructs, networkDescription, config, _group_ordinal); } std::string ZeroDevice::getName() const { @@ -205,9 +172,8 @@ ov::device::Type ZeroDevice::getDeviceType() const { std::shared_ptr ZeroDevice::createInferRequest( const std::shared_ptr& compiledModel, - const std::shared_ptr& executor, const Config& config) { - return std::make_shared(_initStructs, compiledModel, executor, config); + return std::make_shared(_initStructs, compiledModel, config); } ov::SoPtr ZeroDevice::createRemoteTensor(std::shared_ptr context, diff --git a/src/plugins/intel_npu/src/backend/src/zero_executor.cpp b/src/plugins/intel_npu/src/backend/src/zero_executor.cpp deleted file mode 100644 index 32da2b2e0e4189..00000000000000 --- a/src/plugins/intel_npu/src/backend/src/zero_executor.cpp +++ /dev/null @@ -1,187 +0,0 @@ -// Copyright (C) 2018-2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "zero_executor.hpp" - -#include - -#include -#include -#include -#include - -#include "intel_npu/common/itt.hpp" -#include "intel_npu/config/common.hpp" -#include "intel_npu/prefix.hpp" -#include "intel_npu/utils/zero/zero_utils.hpp" -#include "openvino/runtime/properties.hpp" -#include "ze_command_queue_npu_ext.h" -#include "zero_device.hpp" - -using namespace intel_npu; - -ZeroExecutor::ZeroExecutor(const std::shared_ptr& initStructs, - const std::shared_ptr& networkDescription, - const Config& config, - const uint32_t& group_ordinal) - : _config(config), - _logger("Graph", _config.get()), - _initStructs(initStructs), - _networkDesc(networkDescription), - _graph_ddi_table_ext(_initStructs->getGraphDdiTable()), - _group_ordinal(group_ordinal), - _command_queues{std::make_shared(_initStructs->getDevice(), - _initStructs->getContext(), - zeroUtils::toZeQueuePriority(_config.get()), - _initStructs->getCommandQueueDdiTable(), - _config, - group_ordinal)} { - _logger.debug("ZeroExecutor::ZeroExecutor - create graph"); - OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_GRAPH, itt::domains::LevelZeroBackend, "Executor::ZeroExecutor", "graphCreate"); - - // _graph is a nullptr for CIP path, a new handle will be obtained from the driver based on the given - // compiledNetwork _graph gets (reuses) graphHandle from the compiler for CID path - if (_networkDesc->metadata.graphHandle == nullptr) { - _logger.debug("create graph handle on executor"); - ze_graph_desc_t desc{ZE_STRUCTURE_TYPE_GRAPH_DESC_PROPERTIES, - nullptr, - ZE_GRAPH_FORMAT_NATIVE, - _networkDesc->compiledNetwork.size(), - _networkDesc->compiledNetwork.data(), - nullptr}; - ze_result_t result = - _graph_ddi_table_ext.pfnCreate(_initStructs->getContext(), _initStructs->getDevice(), &desc, &_graph); - THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnCreate", result, _graph_ddi_table_ext); - - } else { - _logger.debug("reuse graph handle created from compiler"); - _graph = static_cast(_networkDesc->metadata.graphHandle); - } - - OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "pfnGetProperties"); - _logger.debug("performing pfnGetProperties"); - ze_graph_properties_t props{}; - props.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES; - - ze_result_t result = _graph_ddi_table_ext.pfnGetProperties(_graph, &props); - THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnGetProperties", result, _graph_ddi_table_ext); - - auto targetDriverExtVersion = _graph_ddi_table_ext.version(); - if (targetDriverExtVersion <= ZE_GRAPH_EXT_VERSION_1_1) { - OPENVINO_THROW("Incompatibility between the NPU plugin and driver! The driver version is too old, please " - "update the driver version"); - } - - OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "pfnGetArgumentProperties3"); - _logger.debug("performing pfnGetArgumentProperties3"); - for (uint32_t index = 0; index < props.numGraphArgs; ++index) { - ze_graph_argument_properties_3_t arg3{}; - arg3.stype = ZE_STRUCTURE_TYPE_GRAPH_ARGUMENT_PROPERTIES; - ze_result_t result = _graph_ddi_table_ext.pfnGetArgumentProperties3(_graph, index, &arg3); - THROW_ON_FAIL_FOR_LEVELZERO_EXT("pfnGetArgumentProperties3", result, _graph_ddi_table_ext); - - if (arg3.type == ZE_GRAPH_ARGUMENT_TYPE_INPUT) { - _input_descriptors.push_back(ArgumentDescriptor{arg3, index}); - } else { - _output_descriptors.push_back(ArgumentDescriptor{arg3, index}); - } - } - - if (_graph_ddi_table_ext.version() < ZE_GRAPH_EXT_VERSION_1_8) { - initialize_graph_through_command_list(); - } else { - ze_graph_properties_2_t properties = {}; - properties.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES; - _graph_ddi_table_ext.pfnGetProperties2(_graph, &properties); - - if (properties.initStageRequired & ZE_GRAPH_STAGE_INITIALIZE) { - OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "pfnGraphInitialize"); - _graph_ddi_table_ext.pfnGraphInitialize(_graph); - } - - if (properties.initStageRequired & ZE_GRAPH_STAGE_COMMAND_LIST_INITIALIZE) { - initialize_graph_through_command_list(); - } - } - - if (config.has()) { - setWorkloadType(config.get()); - } -} - -void ZeroExecutor::initialize_graph_through_command_list() const { - OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_GRAPH, - itt::domains::LevelZeroBackend, - "Executor::ZeroExecutor", - "initialize_graph_through_command_list"); - - _logger.debug("ZeroExecutor::ZeroExecutor init start - create graph_command_list"); - OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "Executor::ZeroExecutor"); - CommandList graph_command_list(_initStructs->getDevice(), - _initStructs->getContext(), - _graph_ddi_table_ext, - _config, - _group_ordinal); - _logger.debug("ZeroExecutor::ZeroExecutor - create graph_command_queue"); - CommandQueue graph_command_queue(_initStructs->getDevice(), - _initStructs->getContext(), - ZE_COMMAND_QUEUE_PRIORITY_NORMAL, - _initStructs->getCommandQueueDdiTable(), - _config, - _group_ordinal); - _logger.debug("ZeroExecutor::ZeroExecutor - create fence"); - Fence fence(graph_command_queue, _config); - - OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "appendGraphInitialize"); - _logger.debug("ZeroExecutor::ZeroExecutor - performing appendGraphInitialize"); - graph_command_list.appendGraphInitialize(_graph); - _logger.debug("ZeroExecutor::ZeroExecutor - closing graph command list"); - graph_command_list.close(); - - OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "queue_execute"); - _logger.debug("ZeroExecutor::ZeroExecutor - performing executeCommandList"); - graph_command_queue.executeCommandList(graph_command_list, fence); - _logger.debug("ZeroExecutor::ZeroExecutor - performing hostSynchronize"); - fence.hostSynchronize(); - _logger.debug("ZeroExecutor::ZeroExecutor - hostSynchronize completed"); -} - -void ZeroExecutor::setWorkloadType(const ov::WorkloadType workloadType) const { - ze_command_queue_workload_type_t zeWorkloadType; - switch (workloadType) { - case ov::WorkloadType::DEFAULT: - zeWorkloadType = ze_command_queue_workload_type_t::ZE_WORKLOAD_TYPE_DEFAULT; - break; - case ov::WorkloadType::EFFICIENT: - zeWorkloadType = ze_command_queue_workload_type_t::ZE_WORKLOAD_TYPE_BACKGROUND; - break; - default: - OPENVINO_THROW("Unknown value for WorkloadType!"); - } - - _command_queues->setWorkloadType(zeWorkloadType); -} - -void ZeroExecutor::setArgumentValue(uint32_t argi_, const void* argv_) const { - ze_result_t result = _graph_ddi_table_ext.pfnSetArgumentValue(_graph, argi_, argv_); - if (ZE_RESULT_SUCCESS != result) { - THROW_ON_FAIL_FOR_LEVELZERO_EXT("zeGraphSetArgumentValue", result, _graph_ddi_table_ext); - } -} - -void ZeroExecutor::mutexLock() const { - _mutex.lock(); -} - -void ZeroExecutor::mutexUnlock() const { - _mutex.unlock(); -} - -ZeroExecutor::~ZeroExecutor() { - _logger.debug("~ZeroExecutor() - pfnDestroy _graph "); - auto result = _graph_ddi_table_ext.pfnDestroy(_graph); - if (ZE_RESULT_SUCCESS != result) { - _logger.error("_graph_ddi_table_ext.pfnDestroy failed %#X", uint64_t(result)); - } -} diff --git a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp index dd2629372dc7d8..1c5ceecfac1961 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_infer_request.cpp @@ -31,8 +31,7 @@ constexpr bool OUTPUT = false; * @param ioDescriptor The OpenVINO API specific I/O descriptor which shall be compared. * @param zeDescriptor The Level Zero specific structure used for comparison. */ -void check_level_zero_attributes_match(const IODescriptor& ioDescriptor, - const ZeroExecutor::ArgumentDescriptor& zeDescriptor) { +void check_level_zero_attributes_match(const IODescriptor& ioDescriptor, const ArgumentDescriptor& zeDescriptor) { std::string zeDescriptorName = zeDescriptor.info.name; if (isStateInputName(zeDescriptorName)) { @@ -158,38 +157,35 @@ std::optional ZeroInferRequest::get_batch_size(const NetworkMetadata& me //------------------------------------------------------------------------------ ZeroInferRequest::ZeroInferRequest(const std::shared_ptr& initStructs, const std::shared_ptr& compiledModel, - const std::shared_ptr& executor, const Config& config) : SyncInferRequest(compiledModel, config), _initStructs(initStructs), - _executorPtr(executor), - _executor(static_cast(_executorPtr.get())), + _graph(compiledModel->get_graph()), _config(config), _logger("ZeroInferRequest", config.get()), _levelZeroInputTensors(_metadata.inputs.size(), std::vector>(1, nullptr)), _levelZeroOutputTensors(_metadata.outputs.size(), nullptr), _inputTensorsData(_metadata.inputs.size(), std::vector>(1, std::nullopt)), _outputTensorsData(_metadata.outputs.size(), std::nullopt), - _profilingPool(_executor->graph(), zeroProfiling::POOL_SIZE, _executor->getInitStructs()->getProfilingDdiTable()), - _profilingQuery(0, - _executor->getInitStructs()->getDevice(), - _executor->getInitStructs()->getProfilingDdiTable()) { + _profilingPool(static_cast(_graph->get_handle()), + zeroProfiling::POOL_SIZE, + _initStructs->getProfilingDdiTable()), + _profilingQuery(0, _initStructs->getDevice(), _initStructs->getProfilingDdiTable()) { _logger.debug("ZeroInferRequest::ZeroInferRequest - SyncInferRequest"); - const std::vector& executorInputDescriptors = _executor->get_input_descriptors(); - const std::vector& executorOutputDescriptors = - _executor->get_output_descriptors(); + const std::vector& executorInputDescriptors = _graph->get_input_descriptors(); + const std::vector& executorOutputDescriptors = _graph->get_output_descriptors(); auto proftype = config.get(); if (proftype == ov::intel_npu::ProfilingType::INFER) { _logger.debug("ZeroInferRequest::ZeroInferRequest - profiling type == ov::intel_npu::ProfilingType::INFER"); - _npuProfiling = std::make_shared(_executor->getInitStructs()->getContext(), - _executor->getInitStructs()->getDevice(), + _npuProfiling = std::make_shared(_initStructs->getContext(), + _initStructs->getDevice(), _config.get()); } _properties.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES; THROW_ON_FAIL_FOR_LEVELZERO("zeDeviceGetProperties", - zeDeviceGetProperties(_executor->getInitStructs()->getDevice(), &_properties)); + zeDeviceGetProperties(_initStructs->getDevice(), &_properties)); _outputAllocator = std::make_shared(_initStructs); _inputAllocator = @@ -278,17 +274,24 @@ void ZeroInferRequest::create_pipeline() { _levelZeroOutputTensors.at(outputIndex)->get_byte_size()}); } + // Find the corresponding command queue group. + _logger.debug("ZeroDevice::ZeroDevice - findGroupOrdinal"); + auto groupOrdinal = zeroUtils::findGroupOrdinal(_initStructs->getDevice(), _properties); + _logger.debug("ZeroDevice::ZeroDevice - init completed"); + _logger.debug("ZeroInferRequest::create_pipeline - constructing pipeline"); - // Construct pipeline + // Construct pipeline _pipeline = std::make_unique(_config, - _executorPtr, + _initStructs, + _graph, _profilingPool, _profilingQuery, _npuProfiling, _inputTensorsData, _outputTensorsData, - _numberOfCommandLists); + _numberOfCommandLists, + groupOrdinal); _logger.debug("ZeroInferRequest::create_pipeline - SyncInferRequest completed"); } @@ -338,8 +341,8 @@ void ZeroInferRequest::set_tensor_data(const std::shared_ptr tensor OV_ITT_TASK_NEXT(ZERO_SET_TENSOR, "updateCommandList"); _pipeline->updateCommandList(*tensorsData, - isInput ? _executor->get_input_descriptors().at(index).idx - : _executor->get_output_descriptors().at(index).idx); + isInput ? _graph->get_input_descriptors().at(index).idx + : _graph->get_output_descriptors().at(index).idx); } } } @@ -370,9 +373,9 @@ void ZeroInferRequest::set_remote_tensor_data(const std::shared_ptrupdateCommandList(*tensorsData, - isInput ? _executor->get_input_descriptors().at(index).idx - : _executor->get_output_descriptors().at(index).idx); + _pipeline->updateCommandList( + *tensorsData, + isInput ? _graph->get_input_descriptors().at(index).idx : _graph->get_output_descriptors().at(index).idx); } } @@ -390,13 +393,17 @@ void ZeroInferRequest::set_tensor(const ov::Output& port, const if (foundPort.is_input()) { if (get_user_input(foundPort.idx)._ptr == tensor._ptr) { // Got set_tensor with the same object - do nothing + _logger.debug("ZeroInferRequest::set_tensor - got the same tensor, do nothing"); return; } if (is_batched_input(foundPort.idx)) { // resize vector size to 1 if set_tensor is called after set_tensors get_input_tensors_data(foundPort.idx).resize(1); + get_input_tensors_data(foundPort.idx).shrink_to_fit(); get_level_zero_inputs(foundPort.idx).resize(1); + get_level_zero_inputs(foundPort.idx).shrink_to_fit(); get_user_inputs(foundPort.idx).resize(1); + get_user_inputs(foundPort.idx).shrink_to_fit(); } get_user_input(foundPort.idx) = tensor; @@ -485,7 +492,7 @@ void ZeroInferRequest::set_tensors(const ov::Output& port, if (_pipelineIsCreated) { OV_ITT_TASK_NEXT(SET_TENSORS, "updateCommandList"); _pipeline->updateCommandList(*get_input_tensor_data(foundPort.idx, i), - _executor->get_input_descriptors().at(foundPort.idx).idx, + _graph->get_input_descriptors().at(foundPort.idx).idx, i); } } @@ -537,14 +544,16 @@ void ZeroInferRequest::infer_async() { _logger.debug("InferRequest::infer_async started"); OV_ITT_TASK_CHAIN(ZERO_INFER, itt::domains::LevelZeroBackend, "infer_async", "start"); - _executor->mutexLock(); - if (!_pipelineIsCreated) { - OV_ITT_TASK_NEXT(ZERO_INFER, "create_pipeline"); - create_pipeline(); + { + std::lock_guard lock(_graph->get_mutex()); - _pipelineIsCreated = true; + if (!_pipelineIsCreated) { + OV_ITT_TASK_NEXT(ZERO_INFER, "create_pipeline"); + create_pipeline(); + + _pipelineIsCreated = true; + } } - _executor->mutexUnlock(); size_t inputIndex = 0; for (const auto& userTensor : _userInputTensors) { @@ -740,12 +749,9 @@ std::vector ZeroInferRequest::get_profiling_info() const { if (compilerType == ov::intel_npu::CompilerType::MLIR) { // For plugin compiler retreive raw profiling data from backend and delegate // processing to the compiler - const auto& networkDesc = compiledModel.get_network_description(); - const auto& compiler = compiledModel.get_compiler(); - const auto& blob = networkDesc->compiledNetwork; auto profData = get_raw_profiling_data(); _logger.debug("InferRequest::get_profiling_info complete with compiler->process_profiling_output()."); - return compiler->process_profiling_output(profData, blob, compilerConfig); + return _graph->process_profiling_output(profData, compilerConfig); } else { auto proftype = _config.get(); if (proftype == ov::intel_npu::ProfilingType::INFER) { diff --git a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp index 009eee6541e8ef..34eb71eaf112f7 100644 --- a/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp +++ b/src/plugins/intel_npu/src/backend/src/zero_pipeline.cpp @@ -11,25 +11,25 @@ #include "intel_npu/prefix.hpp" #include "intel_npu/utils/logger/logger.hpp" #include "intel_npu/utils/zero/zero_api.hpp" -#include "zero_types.hpp" +#include "intel_npu/utils/zero/zero_types.hpp" namespace intel_npu { Pipeline::Pipeline(const Config& config, - const std::shared_ptr& executorPtr, + const std::shared_ptr& initStructs, + const std::shared_ptr& graph, zeroProfiling::ProfilingPool& profiling_pool, zeroProfiling::ProfilingQuery& profiling_query, std::shared_ptr npu_profiling, const std::vector>>& inputTensorsData, const std::vector>& outputTensorsData, - const size_t numberOfCommandLists) + size_t numberOfCommandLists, + uint32_t group_ordinal) : _config(config), - _executor(static_cast(executorPtr.get())), - _command_queue(*_executor->getCommandQueue()), - _event_pool{_executor->getInitStructs()->getDevice(), - _executor->getInitStructs()->getContext(), - numberOfCommandLists ? static_cast(numberOfCommandLists) : 1, - _config}, + _command_queue(graph->get_command_queue()), + _event_pool{initStructs->getDevice(), + initStructs->getContext(), + numberOfCommandLists ? static_cast(numberOfCommandLists) : 1}, _npu_profiling(std::move(npu_profiling)), _logger("Pipeline", _config.get()) { OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "Zero_infer_request::Pipeline::Pipeline"); @@ -45,38 +45,37 @@ Pipeline::Pipeline(const Config& config, _logger.debug("Pipeline - emplace_back _event_pool and _command_queue"); for (size_t i = 0; i < numberOfCommandLists; i++) { _command_lists.emplace_back( - std::make_unique(_executor->getInitStructs()->getDevice(), - _executor->getInitStructs()->getContext(), - _executor->getInitStructs()->getGraphDdiTable(), - _config, - _executor->get_group_ordinal(), - _executor->getInitStructs()->getMutableCommandListVersion() ? true : false)); - _events.emplace_back(std::make_unique(_event_pool.handle(), static_cast(i), _config)); - _fences.emplace_back(std::make_unique(_command_queue, _config)); + std::make_unique(initStructs->getDevice(), + initStructs->getContext(), + initStructs->getGraphDdiTable(), + group_ordinal, + initStructs->getMutableCommandListVersion() ? true : false)); + _events.emplace_back(std::make_unique(_event_pool.handle(), static_cast(i))); + _fences.emplace_back(std::make_unique(*_command_queue)); } for (size_t i = 0; i < numberOfCommandLists; i++) { size_t ioIndex = 0; - for (const auto& desc : _executor->get_input_descriptors()) { + for (const auto& desc : graph->get_input_descriptors()) { if (inputTensorsData.at(ioIndex).size() > 1) { - _executor->setArgumentValue(desc.idx, inputTensorsData.at(ioIndex).at(i)->mem); + graph->set_argument_value(desc.idx, inputTensorsData.at(ioIndex).at(i)->mem); ++ioIndex; continue; } - _executor->setArgumentValue(desc.idx, - static_cast(inputTensorsData.at(ioIndex).at(0)->mem) + - (i * inputTensorsData.at(ioIndex).at(0)->size) / numberOfCommandLists); + graph->set_argument_value(desc.idx, + static_cast(inputTensorsData.at(ioIndex).at(0)->mem) + + (i * inputTensorsData.at(ioIndex).at(0)->size) / numberOfCommandLists); ++ioIndex; } ioIndex = 0; - for (const auto& desc : _executor->get_output_descriptors()) { - _executor->setArgumentValue(desc.idx, - static_cast(outputTensorsData.at(ioIndex)->mem) + - (i * outputTensorsData.at(ioIndex)->size) / numberOfCommandLists); + for (const auto& desc : graph->get_output_descriptors()) { + graph->set_argument_value(desc.idx, + static_cast(outputTensorsData.at(ioIndex)->mem) + + (i * outputTensorsData.at(ioIndex)->size) / numberOfCommandLists); ++ioIndex; } @@ -86,7 +85,8 @@ Pipeline::Pipeline(const Config& config, _command_lists.at(i)->appendNpuTimestamp(reinterpret_cast(_npu_profiling->npu_ts_infer_start)); } - _command_lists.at(i)->appendGraphExecute(_executor->graph(), profiling_query.getHandle()); + _command_lists.at(i)->appendGraphExecute(static_cast(graph->get_handle()), + profiling_query.getHandle()); /// append timestamp command if feature was activated if (_npu_profiling != nullptr) { @@ -108,11 +108,11 @@ void Pipeline::push() { _logger.debug("Pipeline - push() started"); for (size_t i = 0; i < _command_lists.size(); ++i) { - OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_PUSH, itt::domains::LevelZeroBackend, "Pipeline", "push"); + OV_ITT_TASK_CHAIN(ZERO_PIPELINE_IP_PUSH, itt::domains::LevelZeroBackend, "Pipeline", "push"); if (sync_output_with_fences_) { - _command_queue.executeCommandList(*_command_lists.at(i), *_fences.at(i)); + _command_queue->executeCommandList(*_command_lists.at(i), *_fences.at(i)); } else { - _command_queue.executeCommandList(*_command_lists.at(i)); + _command_queue->executeCommandList(*_command_lists.at(i)); } } @@ -121,7 +121,7 @@ void Pipeline::push() { void Pipeline::pull() { _logger.debug("Pipeline - pull() started"); - OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_IP_PULL, itt::domains::LevelZeroBackend, "Pipeline", "pull"); + OV_ITT_TASK_CHAIN(ZERO_PIPELINE_IP_PULL, itt::domains::LevelZeroBackend, "Pipeline", "pull"); for (size_t i = 0; i < _command_lists.size(); ++i) { if (sync_output_with_fences_) { diff --git a/src/plugins/intel_npu/src/common/CMakeLists.txt b/src/plugins/intel_npu/src/common/CMakeLists.txt index 2d1f5d9cbb39ea..1aa93cce1bc291 100644 --- a/src/plugins/intel_npu/src/common/CMakeLists.txt +++ b/src/plugins/intel_npu/src/common/CMakeLists.txt @@ -20,7 +20,7 @@ target_link_libraries(${TARGET_NAME} PUBLIC openvino::npu_al openvino::npu_logger_utils - openvino::runtime::dev + openvino::npu_zero_utils ) set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO}) diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiled_model.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiled_model.hpp index eb6a3de57e41fc..19023a1fca883f 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiled_model.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiled_model.hpp @@ -7,8 +7,8 @@ #include #include +#include "intel_npu/common/igraph.hpp" #include "intel_npu/config/common.hpp" -#include "intel_npu/icompiler.hpp" #include "openvino/runtime/icompiled_model.hpp" namespace intel_npu { @@ -17,17 +17,10 @@ class ICompiledModel : public ov::ICompiledModel { public: using ov::ICompiledModel::ICompiledModel; - virtual const std::shared_ptr& get_network_description() const = 0; + virtual const std::shared_ptr& get_graph() const = 0; virtual const Config& get_config() const = 0; - // Compiler is used for post-processing profiling data when using PERF_COUNT property - virtual const ov::SoPtr& get_compiler() const = 0; - - const NetworkMetadata& get_network_metadata() const { - return get_network_description()->metadata; - } - protected: std::shared_ptr shared_from_this() const { return std::dynamic_pointer_cast(ov::ICompiledModel::shared_from_this()); diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp new file mode 100644 index 00000000000000..51c4a4cf26eafd --- /dev/null +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/igraph.hpp @@ -0,0 +1,103 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include + +#include "intel_npu/network_metadata.hpp" +#include "intel_npu/utils/zero/zero_utils.hpp" +#include "intel_npu/utils/zero/zero_wrappers.hpp" +#include "openvino/runtime/profiling_info.hpp" + +namespace intel_npu { + +class IGraph : public std::enable_shared_from_this { +public: + IGraph(ze_graph_handle_t handle, NetworkMetadata metadata, std::optional> blob) + : _handle(handle), + _metadata(std::move(metadata)) { + if (blob.has_value()) { + _blob = std::move(*blob); + } + } + + virtual void export_blob(std::ostream& stream) const = 0; + + virtual std::vector process_profiling_output(const std::vector& profData, + const Config& config) const = 0; + + virtual void set_argument_value(uint32_t argi, const void* argv) const = 0; + + virtual void initialize(const Config& config) = 0; + + virtual ~IGraph() = default; + + const NetworkMetadata& get_metadata() const { + return _metadata; + } + + ze_graph_handle_t get_handle() const { + return _handle; + } + + void update_network_name(std::string_view name) { + _metadata.name = name; + } + + inline const std::vector& get_input_descriptors() const { + return _input_descriptors; + } + + inline const std::vector& get_output_descriptors() const { + return _output_descriptors; + } + + inline const std::shared_ptr& get_command_queue() const { + return _command_queue; + } + + void set_workload_type(const ov::WorkloadType workloadType) const { + if (_command_queue == nullptr) { + return; + } + + ze_command_queue_workload_type_t zeWorkloadType; + switch (workloadType) { + case ov::WorkloadType::DEFAULT: + zeWorkloadType = ze_command_queue_workload_type_t::ZE_WORKLOAD_TYPE_DEFAULT; + break; + case ov::WorkloadType::EFFICIENT: + zeWorkloadType = ze_command_queue_workload_type_t::ZE_WORKLOAD_TYPE_BACKGROUND; + break; + default: + OPENVINO_THROW("Unknown value for WorkloadType!"); + } + + _command_queue->setWorkloadType(zeWorkloadType); + } + + std::mutex& get_mutex() { + return _mutex; + } + +protected: + ze_graph_handle_t _handle = nullptr; + NetworkMetadata _metadata; + + std::vector _input_descriptors; + std::vector _output_descriptors; + + std::shared_ptr _command_queue; + + // Used to protect zero pipeline creation in the graph. The pipeline should be created only once per graph when the + // first inference starts running + std::mutex _mutex; + + std::vector _blob; +}; + +} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp index 8c1eb57fe34fc3..b34f2deee6c61e 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/npu.hpp @@ -7,9 +7,9 @@ #include #include "intel_npu/common/icompiled_model.hpp" +#include "intel_npu/common/igraph.hpp" #include "intel_npu/common/sync_infer_request.hpp" #include "intel_npu/config/config.hpp" -#include "intel_npu/icompiler.hpp" #include "openvino/runtime/intel_npu/remote_properties.hpp" #include "openvino/runtime/iremote_context.hpp" #include "openvino/runtime/properties.hpp" @@ -54,11 +54,14 @@ class IEngineBackend : public std::enable_shared_from_this { //------------------------------------------------------------------------------ -class IExecutor { +class ICompilerAdapter { public: - virtual ~IExecutor() = default; + virtual std::shared_ptr compile(const std::shared_ptr& model, + const Config& config) const = 0; + virtual std::shared_ptr parse(std::vector network, const Config& config) const = 0; + virtual ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const = 0; - virtual void setWorkloadType(const ov::WorkloadType workloadType) const = 0; + virtual ~ICompilerAdapter() = default; }; //------------------------------------------------------------------------------ @@ -67,10 +70,6 @@ class IDevice : public std::enable_shared_from_this { public: using Uuid = ov::device::UUID; - virtual std::shared_ptr createExecutor( - const std::shared_ptr& networkDescription, - const Config& config) = 0; - virtual std::string getName() const = 0; virtual std::string getFullDeviceName() const = 0; virtual Uuid getUuid() const; @@ -85,7 +84,6 @@ class IDevice : public std::enable_shared_from_this { virtual std::shared_ptr createInferRequest( const std::shared_ptr& compiledModel, - const std::shared_ptr& executor, const Config& config) = 0; virtual void updateInfo(const Config& config) = 0; diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/sync_infer_request.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/sync_infer_request.hpp index 99f9ce7cb0eb28..788ce87136a04d 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/sync_infer_request.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/sync_infer_request.hpp @@ -5,8 +5,9 @@ #pragma once #include "intel_npu/common/icompiled_model.hpp" +#include "intel_npu/common/igraph.hpp" #include "intel_npu/common/variable_state.hpp" -#include "intel_npu/icompiler.hpp" +#include "intel_npu/network_metadata.hpp" #include "openvino/runtime/iinfer_request.hpp" #include "openvino/runtime/iplugin.hpp" diff --git a/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp b/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp index 0ae0832fe29d72..0eeefccf43906d 100644 --- a/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp +++ b/src/plugins/intel_npu/src/common/src/sync_infer_request.cpp @@ -21,7 +21,7 @@ namespace intel_npu { SyncInferRequest::SyncInferRequest(const std::shared_ptr& compiledModel, const Config& config) : _compiledModel(compiledModel), - _metadata(compiledModel->get_network_metadata()), + _metadata(compiledModel->get_graph()->get_metadata()), _logger("SyncInferRequest", config.get()), _userInputTensors(_metadata.inputs.size(), std::vector>(1, {nullptr})), _userOutputTensors(_metadata.outputs.size(), {nullptr}) { diff --git a/src/plugins/intel_npu/src/compiler/include/driver_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler/include/driver_compiler_adapter.hpp deleted file mode 100644 index addd9ca5308c65..00000000000000 --- a/src/plugins/intel_npu/src/compiler/include/driver_compiler_adapter.hpp +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright (C) 2018-2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include - -#include "intel_npu/common/npu.hpp" -#include "intel_npu/icompiler.hpp" -#include "intel_npu/utils/logger/logger.hpp" - -namespace intel_npu { -namespace driverCompilerAdapter { - -/** - * @brief Adapter for Compiler in driver - * @details Wrap compiler in driver calls and do preliminary actions (like opset conversion) - */ -class LevelZeroCompilerAdapter final : public ICompiler { -public: - LevelZeroCompilerAdapter(std::shared_ptr iEngineBackend); - - uint32_t getSupportedOpsetVersion() const override final; - - NetworkDescription compile(const std::shared_ptr& model, - const Config& config) const override final; - - ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const override final; - - NetworkMetadata parse(const std::vector& network, const Config& config) const override final; - - std::vector process_profiling_output(const std::vector& profData, - const std::vector& network, - const Config& config) const override final; - - void release(std::shared_ptr networkDescription) override; - - CompiledNetwork getCompiledNetwork(const NetworkDescription& networkDescription) override; - -private: - /** - * @brief Separate externals calls to separate class - */ - std::shared_ptr apiAdapter; - Logger _logger; -}; - -} // namespace driverCompilerAdapter -} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp b/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp deleted file mode 100644 index 5641408dffcac0..00000000000000 --- a/src/plugins/intel_npu/src/compiler/include/zero_compiler_in_driver.hpp +++ /dev/null @@ -1,201 +0,0 @@ -// Copyright (C) 2018-2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include -#include - -#include -#include - -#include "intel_npu/icompiler.hpp" -#include "intel_npu/utils/logger/logger.hpp" -#include "intel_npu/utils/zero/zero_api.hpp" -#include "zero_executor.hpp" - -namespace intel_npu { -namespace driverCompilerAdapter { - -using SerializedIR = std::pair>; - -#define NotSupportQuery(T) (T == ZE_GRAPH_EXT_VERSION_1_2) - -// ext version == 1.3 && 1.4, support API (pfnQueryNetworkCreate, pfnQueryNetworkDestroy, -// pfnQueryNetworkGetSupportedLayers) -#define SupportAPIGraphQueryNetworkV1(T) (T == ZE_GRAPH_EXT_VERSION_1_3 || T == ZE_GRAPH_EXT_VERSION_1_4) - -// ext version >= 1.5, support API (pfnCreate2, pfnQueryNetworkCreate2, pfnQueryContextMemory) -#define SupportAPIGraphQueryNetworkV2(T) ((!NotSupportQuery(T) && !SupportAPIGraphQueryNetworkV1(T))) - -// For ext version >= 1.5, pfnCreate2 api is avaible -#define NotSupportGraph2(T) \ - (T == ZE_GRAPH_EXT_VERSION_1_2 || T == ZE_GRAPH_EXT_VERSION_1_3 || T == ZE_GRAPH_EXT_VERSION_1_4) - -// A bug inside the driver makes the "pfnGraphGetArgumentMetadata" call not safe for use prior to -// "ze_graph_dditable_ext_1_6_t". -// See: E#117498 -#define NotSupportArgumentMetadata(T) \ - (T == ZE_GRAPH_EXT_VERSION_1_2 || T == ZE_GRAPH_EXT_VERSION_1_3 || T == ZE_GRAPH_EXT_VERSION_1_4 || \ - T == ZE_GRAPH_EXT_VERSION_1_5) - -#define UseCopyForNativeBinary(T) \ - (T == ZE_GRAPH_EXT_VERSION_1_2 || T == ZE_GRAPH_EXT_VERSION_1_3 || T == ZE_GRAPH_EXT_VERSION_1_4 || \ - T == ZE_GRAPH_EXT_VERSION_1_5 || T == ZE_GRAPH_EXT_VERSION_1_6) - -/** - * Adapter to use CiD through ZeroAPI - */ -template -class LevelZeroCompilerInDriver final : public ICompiler { -public: - LevelZeroCompilerInDriver(ze_driver_handle_t driverHandle, - ze_device_handle_t deviceHandle, - ze_context_handle_t zeContext, - ze_graph_dditable_ext_curr_t& graph_ddi_table_ext); - LevelZeroCompilerInDriver(const LevelZeroCompilerInDriver&) = delete; - LevelZeroCompilerInDriver& operator=(const LevelZeroCompilerInDriver&) = delete; - ~LevelZeroCompilerInDriver() override; - - uint32_t getSupportedOpsetVersion() const override final; - - ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const override; - - NetworkDescription compile(const std::shared_ptr& model, - const Config& config) const override final; - - ze_result_t seriazlideIRModelAndCreateGraph(const std::shared_ptr& model, - const Config& config, - ze_device_graph_properties_t deviceGraphProperties, - ze_graph_handle_t& graphHandle) const; - - NetworkMetadata parse(const std::vector& network, const Config& config) const override final; - - std::vector process_profiling_output(const std::vector& profData, - const std::vector& network, - const Config& config) const override final { - OPENVINO_THROW("Profiling post-processing is not implemented."); - } - - template = true> - std::unordered_set getQueryResultFromSupportedLayers( - ze_result_t result, - ze_graph_query_network_handle_t& hGraphQueryNetwork) const; - - /** - * @brief Serialize input / output information to string format. - * @details Format: - * --inputs_precisions="0: [1:]" - * --inputs_layouts="0: [1:]" - * --outputs_precisions="0:" - * --outputs_layouts="0:" - * - * For older compiler versions, the name of the inputs/outputs may be used instead of their indices. - * - * Since the layout information is no longer an important part of the metadata values when using the 2.0 OV - * API, the layout fields shall be filled with default values in order to assure the backward compatibility - * with the driver. - */ - static std::string serializeIOInfo(const std::shared_ptr& model, const bool useIndices); - - void release(std::shared_ptr networkDescription) override; - - CompiledNetwork getCompiledNetwork(const NetworkDescription& networkDescription) override; - -private: - NetworkMetadata getNetworkMeta(ze_graph_handle_t graphHandle) const; - - SerializedIR serializeIR(const std::shared_ptr& model, - ze_graph_compiler_version_info_t compilerVersion) const; - std::string serializeConfig(const Config& config, ze_graph_compiler_version_info_t& compilerVersion) const; - - template = true> - void getMetadata(ze_graph_dditable_ext_curr_t& graphDdiTableExt, - ze_graph_handle_t graphHandle, - uint32_t index, - std::vector& inputs, - std::vector& outputs) const; - - template = true> - void getMetadata(ze_graph_dditable_ext_curr_t& graphDdiTableExt, - ze_graph_handle_t graphHandle, - uint32_t index, - std::vector& inputs, - std::vector& outputs) const; - - template = true> - void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt, - ze_graph_handle_t graphHandle, - std::vector& blob, - const uint8_t*& blobPtr, - size_t& blobSize) const; - - template = true> - void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt, - ze_graph_handle_t graphHandle, - std::vector& /* unusedBlob */, - const uint8_t*& blobPtr, - size_t& blobSize) const; - - template = true> - ze_result_t seriazlideIRModelAndQueryNetworkCreateV2(const std::shared_ptr& model, - const Config& config, - ze_device_graph_properties_t deviceGraphProperties, - const ze_device_handle_t& _deviceHandle, - ze_graph_query_network_handle_t& hGraphQueryNetwork) const; - - // ext version >= 1.5, support API (pfnCreate2, pfnQueryNetworkCreate2, pfnQueryContextMemory) - template = true> - std::unordered_set queryImpl(const std::shared_ptr& model, - const Config& config) const; - - template = true> - ze_result_t seriazlideIRModelAndQueryNetworkCreateV1(const std::shared_ptr& model, - const Config& config, - ze_device_graph_properties_t deviceGraphProperties, - const ze_device_handle_t& _deviceHandle, - ze_graph_query_network_handle_t& hGraphQueryNetwork) const; - - // ext version == 1.3 && 1.4, support API (pfnQueryNetworkCreate, pfnQueryNetworkDestroy, - // pfnQueryNetworkGetSupportedLayers) - template = true> - std::unordered_set queryImpl(const std::shared_ptr& model, - const Config& config) const; - - // For ext version < 1.3 - template = true> - std::unordered_set queryImpl(const std::shared_ptr& model, - const Config& config) const; - - template = true> - ze_result_t createGraph(const ze_graph_format_t& format, - const SerializedIR& serializedIR, - const std::string& buildFlags, - const uint32_t& flags, - ze_graph_handle_t* graph) const; - - template = true> - ze_result_t createGraph(const ze_graph_format_t& format, - const SerializedIR& serializedIR, - const std::string& buildFlags, - const uint32_t& flags, - ze_graph_handle_t* graph) const; - -private: - ze_driver_handle_t _driverHandle = nullptr; - ze_device_handle_t _deviceHandle = nullptr; - ze_context_handle_t _context = nullptr; - - ze_graph_dditable_ext_curr_t& _graphDdiTableExt; - Logger _logger; -}; - -} // namespace driverCompilerAdapter -} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp deleted file mode 100644 index 0406b375609044..00000000000000 --- a/src/plugins/intel_npu/src/compiler/src/driver_compiler_adapter.cpp +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright (C) 2018-2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "driver_compiler_adapter.hpp" - -#include "graph_transformations.hpp" -#include "intel_npu/config/common.hpp" -#include "intel_npu/utils/zero/zero_api.hpp" -#include "intel_npu/utils/zero/zero_result.hpp" -#include "ze_intel_npu_uuid.h" -#include "zero_backend.hpp" -#include "zero_compiler_in_driver.hpp" -#include "zero_init.hpp" - -namespace intel_npu { -namespace driverCompilerAdapter { - -LevelZeroCompilerAdapter::LevelZeroCompilerAdapter(std::shared_ptr iEngineBackend) - : _logger("LevelZeroCompilerAdapter", Logger::global().level()) { - _logger.debug("initialize LevelZeroCompilerAdapter start"); - - std::shared_ptr zeroBackend = nullptr; - zeroBackend = std::dynamic_pointer_cast(iEngineBackend); - if (!zeroBackend) { - OPENVINO_THROW("LevelZeroCompilerAdapter init failed to cast zeroBackend, zeroBackend is a nullptr"); - } - - ze_context_handle_t zeContext = static_cast(zeroBackend->getContext()); - ze_driver_handle_t driverHandle = static_cast(zeroBackend->getDriverHandle()); - ze_device_handle_t deviceHandle = static_cast(zeroBackend->getDeviceHandle()); - ze_graph_dditable_ext_curr_t& graph_ddi_table_ext = zeroBackend->getGraphDdiTable(); - - uint32_t graphExtVersion = graph_ddi_table_ext.version(); - - if (driverHandle == nullptr) { - OPENVINO_THROW("LevelZeroCompilerAdapter failed to get properties about zeDriver"); - } - - _logger.info("LevelZeroCompilerAdapter creating adapter using graphExtVersion"); - - switch (graphExtVersion) { - case ZE_GRAPH_EXT_VERSION_1_3: - apiAdapter = std::make_shared>(driverHandle, - deviceHandle, - zeContext, - graph_ddi_table_ext); - break; - case ZE_GRAPH_EXT_VERSION_1_4: - apiAdapter = std::make_shared>(driverHandle, - deviceHandle, - zeContext, - graph_ddi_table_ext); - break; - case ZE_GRAPH_EXT_VERSION_1_5: - apiAdapter = std::make_shared>(driverHandle, - deviceHandle, - zeContext, - graph_ddi_table_ext); - break; - case ZE_GRAPH_EXT_VERSION_1_6: - apiAdapter = std::make_shared>(driverHandle, - deviceHandle, - zeContext, - graph_ddi_table_ext); - break; - case ZE_GRAPH_EXT_VERSION_1_7: - apiAdapter = std::make_shared>(driverHandle, - deviceHandle, - zeContext, - graph_ddi_table_ext); - break; - case ZE_GRAPH_EXT_VERSION_1_8: - apiAdapter = std::make_shared>(driverHandle, - deviceHandle, - zeContext, - graph_ddi_table_ext); - break; - default: - apiAdapter = std::make_shared>(driverHandle, - deviceHandle, - zeContext, - graph_ddi_table_ext); - break; - } - - _logger.info("initialize LevelZeroCompilerAdapter complete, using graphExtVersion: %d.%d", - ZE_MAJOR_VERSION(graphExtVersion), - ZE_MINOR_VERSION(graphExtVersion)); -} - -uint32_t LevelZeroCompilerAdapter::getSupportedOpsetVersion() const { - return apiAdapter->getSupportedOpsetVersion(); -} - -NetworkDescription LevelZeroCompilerAdapter::compile(const std::shared_ptr& model, - const Config& config) const { - _logger.debug("compile start"); - return apiAdapter->compile(model, config); -} - -ov::SupportedOpsMap LevelZeroCompilerAdapter::query(const std::shared_ptr& model, - const Config& config) const { - _logger.debug("query start"); - return apiAdapter->query(model, config); -} - -NetworkMetadata LevelZeroCompilerAdapter::parse(const std::vector& network, const Config& config) const { - _logger.debug("parse start"); - return apiAdapter->parse(network, config); -} - -std::vector LevelZeroCompilerAdapter::process_profiling_output(const std::vector&, - const std::vector&, - const Config&) const { - OPENVINO_THROW("Profiling post-processing is not implemented."); -} - -void LevelZeroCompilerAdapter::release(std::shared_ptr networkDescription) { - _logger.info("release - using adapter to release networkDescription"); - apiAdapter->release(std::move(networkDescription)); -} - -CompiledNetwork LevelZeroCompilerAdapter::getCompiledNetwork(const NetworkDescription& networkDescription) { - _logger.info("getCompiledNetwork - using adapter to perform getCompiledNetwork(networkDescription)"); - return apiAdapter->getCompiledNetwork(networkDescription); -} - -} // namespace driverCompilerAdapter -} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp b/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp deleted file mode 100644 index 8f7ac4198bb0a4..00000000000000 --- a/src/plugins/intel_npu/src/compiler/src/zero_compiler_in_driver.cpp +++ /dev/null @@ -1,1081 +0,0 @@ -// Copyright (C) 2018-2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "zero_compiler_in_driver.hpp" - -#include -#include - -#include "graph_transformations.hpp" -#include "intel_npu/common/itt.hpp" -#include "intel_npu/config/common.hpp" -#include "intel_npu/config/compiler.hpp" -#include "intel_npu/config/runtime.hpp" -#include "intel_npu/prefix.hpp" -#include "intel_npu/utils/zero/zero_result.hpp" -#include "openvino/core/model.hpp" - -namespace { - -constexpr std::string_view INPUTS_PRECISIONS_KEY = "--inputs_precisions"; -constexpr std::string_view INPUTS_LAYOUTS_KEY = "--inputs_layouts"; -constexpr std::string_view OUTPUTS_PRECISIONS_KEY = "--outputs_precisions"; -constexpr std::string_view OUTPUTS_LAYOUTS_KEY = "--outputs_layouts"; - -//