From 9128e5d3296bbfe30838d028fb51f8cee7a702a4 Mon Sep 17 00:00:00 2001 From: Shivam Thakkar <66198514+shivam5522@users.noreply.github.com> Date: Thu, 12 Dec 2024 23:32:05 -0500 Subject: [PATCH 1/4] [ARM]: Implement CPU plugin just-in-time emitter for LogicalOr operation #27504 (#27987) ### Details: - Added a jit_logical_or_emitter derived class in aarch64/jit_eltwise_emitters - Created entry Algorithm::EltwiseLogicalOr in the get_supported_precisions in nodes/kernels/aarch64 - Add the EltwiseLogicalOr entry in the aarch64 executors supported algorithms ### Tickets: - #27504 --- .../plugin/aarch64/jit_eltwise_emitters.cpp | 52 +++++++++++++++++++ .../plugin/aarch64/jit_eltwise_emitters.hpp | 28 ++++++++++ .../nodes/executors/aarch64/jit_eltwise.cpp | 1 + .../aarch64/jit_uni_eltwise_generic.cpp | 2 + 4 files changed, 83 insertions(+) diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp index 39a2d20c092835..05a0e0a2cf6a0e 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp @@ -1329,6 +1329,58 @@ std::set> jit_logical_and_emitter::get_supported_prec return {{element::f32, element::f32}}; } +/// LOGICAL_OR /// +jit_logical_or_emitter::jit_logical_or_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const std::shared_ptr& node) + : jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) { + prepare_table(); +} + +jit_logical_or_emitter::jit_logical_or_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const ov::element::Type exec_prc) + : jit_emitter(host, host_isa, exec_prc) { + prepare_table(); +} + +size_t jit_logical_or_emitter::get_inputs_count() const { return 2; } + +size_t jit_logical_or_emitter::get_aux_vecs_count() const { return 1; } + +size_t jit_logical_or_emitter::get_aux_gprs_count() const { return 1; } + +void jit_logical_or_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { + if (host_isa_ == dnnl::impl::cpu::aarch64::asimd) { + emit_isa(in_vec_idxs, out_vec_idxs); + } else { + OV_CPU_JIT_EMITTER_THROW("Can't create jit eltwise kernel"); + } +} + +template +void jit_logical_or_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const { + OV_CPU_JIT_EMITTER_ASSERT(exec_prc_ == ov::element::f32, "unsupported precision: " + exec_prc_.to_string()); + + using TReg = typename dnnl::impl::cpu::aarch64::cpu_isa_traits::TReg; + const TReg src1 = TReg(in_vec_idxs[0]); + const TReg src2 = TReg(in_vec_idxs[1]); + const TReg dst = TReg(out_vec_idxs[0]); + const TReg aux = TReg(aux_vec_idxs[0]); + + h->orr(dst.b16, src1.b16, src2.b16); + h->ld1r(aux.s, table_val2("one")); + h->and_(dst.b16, dst.b16, aux.b16); +} + +void jit_logical_or_emitter::register_table_entries() { + push_arg_entry_of("one", 0x3f800000, true); +} + +std::set> jit_logical_or_emitter::get_supported_precisions(const std::shared_ptr& node) { + return {{element::f32, element::f32}}; +} + /// LOGICAL_NOT /// jit_logical_not_emitter::jit_logical_not_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp index 2173a1487f1057..be4e51cd0b759d 100644 --- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp +++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp @@ -628,6 +628,34 @@ class jit_logical_and_emitter : public jit_emitter { void register_table_entries() override; }; +class jit_logical_or_emitter : public jit_emitter { +public: + jit_logical_or_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const ov::element::Type exec_prc = ov::element::f32); + + jit_logical_or_emitter(dnnl::impl::cpu::aarch64::jit_generator *host, + dnnl::impl::cpu::aarch64::cpu_isa_t host_isa, + const std::shared_ptr& n); + + size_t get_inputs_count() const override; + + size_t get_aux_vecs_count() const override; + + size_t get_aux_gprs_count() const override; + + static std::set> get_supported_precisions( + const std::shared_ptr& node = nullptr); + +private: + void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override; + + template + void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const; + + void register_table_entries() override; +}; + class jit_logical_not_emitter : public jit_emitter { public: jit_logical_not_emitter(dnnl::impl::cpu::aarch64::jit_generator* host, diff --git a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp index 0374888e3d7fcb..912fe23fcd1fcf 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp @@ -38,6 +38,7 @@ bool JitEltwiseExecutor::isSupported( Algorithm::EltwiseIsNaN, Algorithm::EltwiseLessEqual, Algorithm::EltwiseLogicalAnd, + Algorithm::EltwiseLogicalOr, Algorithm::EltwiseLogicalNot, Algorithm::EltwiseLogicalXor, Algorithm::EltwiseMaximum, diff --git a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp index cfe36f78cc40f9..b3fe7018d23677 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp @@ -655,6 +655,7 @@ std::shared_ptr jit_uni_eltwise_generic::create_eltwise_emitte OV_CASE(Algorithm::EltwiseIsInf, ov::intel_cpu::aarch64::jit_is_inf_emitter), OV_CASE(Algorithm::EltwiseLessEqual, ov::intel_cpu::aarch64::jit_less_equal_emitter), OV_CASE(Algorithm::EltwiseLogicalAnd, ov::intel_cpu::aarch64::jit_logical_and_emitter), + OV_CASE(Algorithm::EltwiseLogicalOr, ov::intel_cpu::aarch64::jit_logical_or_emitter), OV_CASE(Algorithm::EltwiseLogicalNot, ov::intel_cpu::aarch64::jit_logical_not_emitter), OV_CASE(Algorithm::EltwiseLogicalXor, ov::intel_cpu::aarch64::jit_logical_xor_emitter), OV_CASE(Algorithm::EltwiseIsNaN, ov::intel_cpu::aarch64::jit_is_nan_emitter), @@ -845,6 +846,7 @@ std::set> eltwise_precision_helper::get_supported_pre OV_CASE(Algorithm::EltwiseIsNaN, jit_is_nan_emitter), OV_CASE(Algorithm::EltwiseLessEqual, jit_less_equal_emitter), OV_CASE(Algorithm::EltwiseLogicalAnd, jit_logical_and_emitter), + OV_CASE(Algorithm::EltwiseLogicalOr, jit_logical_or_emitter), OV_CASE(Algorithm::EltwiseLogicalNot, jit_logical_not_emitter), OV_CASE(Algorithm::EltwiseLogicalXor, jit_logical_xor_emitter), OV_CASE(Algorithm::EltwiseMaximum, jit_maximum_emitter), From 6b2f8ec39e9db2affabbfc64f226590755e35787 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Fri, 13 Dec 2024 06:03:16 +0100 Subject: [PATCH 2/4] [core] Extend model serialize API to support std::filesystem::path (#28043) ### Details: - Extend `Serialize` and `save_model` API to support cpp17 path class ### Tickets: - CVS-158889 --------- Signed-off-by: Raasz, Pawel --- src/core/include/openvino/core/graph_util.hpp | 30 ++++++++++++-- src/core/include/openvino/pass/serialize.hpp | 11 +++++ .../pass/serialization/deterministicity.cpp | 41 +++++++++++++++++++ .../tests/pass/serialization/serialize.cpp | 17 ++++++++ 4 files changed, 95 insertions(+), 4 deletions(-) diff --git a/src/core/include/openvino/core/graph_util.hpp b/src/core/include/openvino/core/graph_util.hpp index 66c640a62314df..f5694ca89fee51 100644 --- a/src/core/include/openvino/core/graph_util.hpp +++ b/src/core/include/openvino/core/graph_util.hpp @@ -21,6 +21,10 @@ #include "openvino/op/parameter.hpp" #include "openvino/pass/serialize.hpp" +#ifdef OPENVINO_CPP_VER_17 +# include +#endif + namespace ov { OPENVINO_API @@ -288,27 +292,45 @@ bool replace_node_update_name(const std::shared_ptr& target, const std::sh /// \param bin_path Path where .bin file will be saved (optional). /// The same name as for xml_path will be used by default. /// \param version Version of the generated IR (optional). +/// \{ OPENVINO_API void serialize(const std::shared_ptr& m, const std::string& xml_path, const std::string& bin_path = "", ov::pass::Serialize::Version version = ov::pass::Serialize::Version::UNSPECIFIED); +#ifdef OPENVINO_CPP_VER_17 +template >* = nullptr> +void serialize(const std::shared_ptr& m, + const Path& xml_path, + const Path& bin_path = {""}, + ov::pass::Serialize::Version version = ov::pass::Serialize::Version::UNSPECIFIED) { + serialize(m, xml_path.string(), bin_path.string(), version); +} +#endif +/// \} + /// \brief Save given model into IR. Floating point weights are compressed to FP16 by default. /// This method saves a model to IR applying all necessary transformations that usually applied -/// in model conversion flow provided by mo tool. Paricularly, floatting point weights are compressed to FP16. +/// in model conversion flow provided by mo tool. Particularly, floating point weights are compressed to FP16. /// \param model Model which will be converted to IR representation. /// \param output_model Path to the output model file, must have extension .xml -/// \param compress_to_fp16 Whether to compress floatting point weights to FP16 (true by default) +/// \param compress_to_fp16 Whether to compress floating point weights to FP16 (true by default) OPENVINO_API void save_model(const std::shared_ptr& model, const std::string& output_model, bool compress_to_fp16 = true); - #if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) OPENVINO_API void save_model(const std::shared_ptr& model, const std::wstring& output_model, bool compress_to_fp16 = true); #endif -} // namespace ov \ No newline at end of file + +#ifdef OPENVINO_CPP_VER_17 +template >* = nullptr> +void save_model(const std::shared_ptr& model, const Path& output_model, bool compress_to_fp16 = true) { + save_model(model, output_model.string(), compress_to_fp16); +} +#endif +} // namespace ov diff --git a/src/core/include/openvino/pass/serialize.hpp b/src/core/include/openvino/pass/serialize.hpp index fc3e743d4005dc..d0eaadde346bf6 100644 --- a/src/core/include/openvino/pass/serialize.hpp +++ b/src/core/include/openvino/pass/serialize.hpp @@ -11,6 +11,10 @@ #include "openvino/opsets/opset.hpp" #include "openvino/pass/pass.hpp" +#ifdef OPENVINO_CPP_VER_17 +# include +#endif + namespace ov { namespace pass { @@ -35,6 +39,13 @@ class OPENVINO_API Serialize : public ov::pass::ModelPass { Serialize(const std::string& xmlPath, const std::string& binPath, Version version = Version::UNSPECIFIED); +#ifdef OPENVINO_CPP_VER_17 + Serialize(const std::filesystem::path& xmlPath, + const std::filesystem::path& binPath, + Version version = Version::UNSPECIFIED) + : Serialize(xmlPath.string(), binPath.string(), version) {} +#endif + private: std::ostream* m_xmlFile; std::ostream* m_binFile; diff --git a/src/core/tests/pass/serialization/deterministicity.cpp b/src/core/tests/pass/serialization/deterministicity.cpp index 8441da501eb9bf..a93f092889d2a1 100644 --- a/src/core/tests/pass/serialization/deterministicity.cpp +++ b/src/core/tests/pass/serialization/deterministicity.cpp @@ -296,6 +296,47 @@ TEST_P(SerializationDeterministicityInputOutputTest, FromIrModel) { EXPECT_TRUE(files_equal(xml_2, xml_1)); } +#ifdef OPENVINO_CPP_VER_17 +TEST_P(SerializationDeterministicityInputOutputTest, FromOvModelBybPath) { + auto irVersion = GetParam(); + + std::shared_ptr modelRef; + { + auto parameter0 = std::make_shared(ov::element::f32, ov::Shape{1, 3, 22, 22}); + parameter0->set_friendly_name("input0"); + auto result0 = std::make_shared(parameter0); + result0->set_friendly_name("output0"); + auto parameter1 = std::make_shared(ov::element::f32, ov::Shape{1, 3, 22, 22}); + parameter1->set_friendly_name("input1"); + auto result1 = std::make_shared(parameter1); + result1->set_friendly_name("output1"); + modelRef = + std::make_shared(ov::NodeVector{result0, result1}, ov::ParameterVector{parameter0, parameter1}); + } + + auto& expected1 = modelRef; + const auto out_xml_path = std::filesystem::path(m_out_xml_path_1); + const auto out_bin_path = std::filesystem::path(m_out_bin_path_1); + ov::pass::Serialize(out_xml_path, out_bin_path, irVersion).run_on_model(modelRef); + auto expected2 = ov::test::readModel(m_out_xml_path_1, m_out_bin_path_1); + + ov::pass::Serialize(m_out_xml_path_2, m_out_bin_path_2, irVersion).run_on_model(expected2); + + EXPECT_EQ(input0Name, expected1->input(0).get_node()->get_friendly_name()); + EXPECT_EQ(input1Name, expected1->input(1).get_node()->get_friendly_name()); + EXPECT_EQ(output0Name, expected1->output(0).get_node()->get_friendly_name()); + EXPECT_EQ(output1Name, expected1->output(1).get_node()->get_friendly_name()); + EXPECT_EQ(input0Name, expected2->input(0).get_node()->get_friendly_name()); + EXPECT_EQ(input1Name, expected2->input(1).get_node()->get_friendly_name()); + EXPECT_EQ(output0Name, expected2->output(0).get_node()->get_friendly_name()); + EXPECT_EQ(output1Name, expected2->output(1).get_node()->get_friendly_name()); + + std::ifstream xml_1(m_out_xml_path_1, std::ios::in | std::ios::binary); + std::ifstream xml_2(m_out_xml_path_2, std::ios::in | std::ios::binary); + EXPECT_TRUE(files_equal(xml_1, xml_2)); +} +#endif + INSTANTIATE_TEST_SUITE_P(DeterministicityInputOutput, SerializationDeterministicityInputOutputTest, ::testing::Values(ov::pass::Serialize::Version::IR_V10, ov::pass::Serialize::Version::IR_V11)); diff --git a/src/core/tests/pass/serialization/serialize.cpp b/src/core/tests/pass/serialization/serialize.cpp index e45d5d1d1434ff..5cb1965feebdd7 100644 --- a/src/core/tests/pass/serialization/serialize.cpp +++ b/src/core/tests/pass/serialization/serialize.cpp @@ -74,6 +74,23 @@ TEST_P(SerializationTest, SaveModel) { }); } +#ifdef OPENVINO_CPP_VER_17 +TEST_P(SerializationTest, CompareFunctionsByPath) { + const auto out_xml_path = std::filesystem::path(m_out_xml_path); + const auto out_bin_path = std::filesystem::path(m_out_bin_path); + CompareSerialized([&out_xml_path, &out_bin_path](const auto& m) { + ov::pass::Serialize(out_xml_path, out_bin_path).run_on_model(m); + }); +} + +TEST_P(SerializationTest, SaveModelByPath) { + const auto out_xml_path = std::filesystem::path(m_out_xml_path); + CompareSerialized([&out_xml_path](const auto& m) { + ov::save_model(m, out_xml_path, false); + }); +} +#endif + INSTANTIATE_TEST_SUITE_P( IRSerialization, SerializationTest, From 8e7ff7b4c0f31e912e243affc2c3b9df56c5970f Mon Sep 17 00:00:00 2001 From: Tatiana Savina Date: Fri, 13 Dec 2024 06:58:37 +0100 Subject: [PATCH 3/4] [DOCS] fix link (#28048) ### Details: - *item1* - *...* ### Tickets: - *ticket-id* --- .../learn-openvino/llm_inference_guide/genai-guide.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst index 42c1c3fb47aa42..172586831252a9 100644 --- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst +++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst @@ -130,7 +130,7 @@ make sure to :doc:`install OpenVINO with GenAI <../../get-started/install-openvi image_write("baseline.bmp", image) For more information, refer to the - `Python sample `__ + `Python sample `__ .. tab-item:: C++ :sync: cpp From 59984e969cfe4083a317314a1bdb4d44640a4eab Mon Sep 17 00:00:00 2001 From: Tomasz Krupa Date: Fri, 13 Dec 2024 06:10:50 +0000 Subject: [PATCH 4/4] [GPU] Enable weightless cache with precision conversion (#27742) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Details: This change makes constants which undergo precision conversion during transformation pipeline or graph optimization eligible for weightless caching. Information about precision conversion which happened before export to cache is recorded in the cache file. During the import from cache, functionally equivalent conversions are performed. Besides the unit tests in model_cache.cpp I tested accuracy and performance of llama-2-7b-chat with FP16 inference mode. Performance impact (weightless caching is OPTIMIZE_SIZE):   | OPTIMIZE_SPEED | OPTIMIZE_SIZE -- | -- | -- FP16 model import, no cache | 25.4 s | 13.6 s FP16 model import, cache exists | 6.2 s | 6.4 s FP32 model import, no cache | 57.6 | 45.8 s FP32 model import, cache exists | 8.5 s | 15.2 s   | OPTIMIZE_SPEED | OPTIMIZE_SIZE -- | -- | -- FP16 model cache size | 13 GB | 6.1 MB FP32 model cache size | 13 GB | 6.2 MB Model import time is the measurement of from_pretrained() call when running the llama model with openvino.genai/tools/llm_bench tool. Question to reviewers: I'm unsure if the condition in ov::WeightlessCacheAttribute::is_copyable() is not too lenient. Specifically, I'm thinking of a scenario where a single complex transformation changes constant's data type AND something else at the same time. This would render the constant eligible for weightless caching even though the reconstruction of transformations during the cache load is not aware of anything besides the data type change (which would break the feature). Does such complex transformation exist? ### Tickets: - CVS-157081 --- .../src/transformations/convert_precision.cpp | 8 + .../tests/utils/convert_precision.cpp | 36 +++ .../rt_info/weightless_caching_attributes.hpp | 7 +- src/frontends/ir/src/ir_deserializer.cpp | 6 +- .../include/intel_gpu/primitives/data.hpp | 219 +++++++++++++++--- .../graph_optimizer/propagate_constants.cpp | 67 +++++- .../src/graph/include/pass_manager.h | 7 +- .../intel_gpu/src/plugin/program_builder.cpp | 11 +- .../tests/functional/behavior/model_cache.cpp | 116 ++++++---- .../unit/shape_infer/eltwise_si_test.cpp | 6 +- 10 files changed, 375 insertions(+), 108 deletions(-) diff --git a/src/common/transformations/src/transformations/convert_precision.cpp b/src/common/transformations/src/transformations/convert_precision.cpp index 8a2985a284769a..aa067da4f360fd 100644 --- a/src/common/transformations/src/transformations/convert_precision.cpp +++ b/src/common/transformations/src/transformations/convert_precision.cpp @@ -8,6 +8,7 @@ #include #include "itt.hpp" +#include "openvino/core/rt_info/weightless_caching_attributes.hpp" #include "openvino/op/ops.hpp" #include "openvino/pass/constant_folding.hpp" #include "openvino/pass/manager.hpp" @@ -1405,6 +1406,13 @@ bool fuse_type_to_constant(const std::shared_ptr& node, new_const->validate_and_infer_types(); new_const->set_friendly_name(constant->get_friendly_name()); ov::copy_runtime_info(constant, new_const); + + const auto& rt_info = node->get_rt_info(); + auto weightless_caching_attr = rt_info.find(ov::WeightlessCacheAttribute::get_type_info_static()); + if (weightless_caching_attr != rt_info.end()) { + new_const->get_rt_info()[ov::WeightlessCacheAttribute::get_type_info_static()] = + weightless_caching_attr->second; + } return true; } return false; diff --git a/src/common/transformations/tests/utils/convert_precision.cpp b/src/common/transformations/tests/utils/convert_precision.cpp index 318f15ab1a64dc..c2b7133506aebe 100644 --- a/src/common/transformations/tests/utils/convert_precision.cpp +++ b/src/common/transformations/tests/utils/convert_precision.cpp @@ -13,6 +13,7 @@ #include "common_test_utils/ov_test_utils.hpp" #include "openvino/core/model.hpp" +#include "openvino/core/rt_info/weightless_caching_attributes.hpp" #include "openvino/opsets/opset1.hpp" #include "openvino/opsets/opset10.hpp" #include "openvino/opsets/opset15.hpp" @@ -2702,3 +2703,38 @@ TEST(TransformationTests, ConvertPrecision_assign_read_value_preserve_orig_types FunctionsComparator::Result result = func_comparator(model_ref, model); ASSERT_TRUE(result.valid) << result.message; } + +TEST(TransformationTests, ConvertPrecision_assign_read_value_preserve_weightless_cache_info_as_rt_attribute) { + pass::Manager manager; + + auto some_value = opset10::Constant::create(element::f32, Shape{1}, {2}); + auto& node_rt_info = some_value->get_rt_info(); + ov::WeightlessCacheAttribute attr(element::f32.size(), 0, element::f32); + node_rt_info[ov::WeightlessCacheAttribute::get_type_info_static()] = attr; + + ov::ParameterVector inputParams; + ov::ResultVector results; + results.push_back(std::make_shared(some_value->output(0))); + auto model = std::make_shared(results, inputParams); + + type_to_fuse_map empty_type_to_fuse_map = {}; + bool keep_precision_sensitive_in_fp32 = false; + bool convert_input_output_precision = false; + bool store_original_precision_as_rt_attribute = true; + manager.register_pass(precisions_map{{element::f32, element::f16}}, + empty_type_to_fuse_map, + keep_precision_sensitive_in_fp32, + convert_input_output_precision, + store_original_precision_as_rt_attribute); + manager.run_passes(model); + + const auto& ops = model->get_ops(); + auto it = std::find_if(ops.begin(), ops.end(), [](const std::shared_ptr& node) { + return ov::op::util::is_constant(node); + }); + + ASSERT_TRUE(it != ops.end()); + const auto& new_rt_info = (*it)->get_rt_info(); + auto weightless_caching_attr_it = new_rt_info.find(ov::WeightlessCacheAttribute::get_type_info_static()); + ASSERT_TRUE(weightless_caching_attr_it != new_rt_info.end()); +} diff --git a/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp b/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp index fedcb030fb52cf..e3cf2609b26c8d 100644 --- a/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp +++ b/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp @@ -5,6 +5,7 @@ #pragma once #include "openvino/core/core_visibility.hpp" +#include "openvino/core/node.hpp" #include "openvino/core/runtime_attribute.hpp" namespace ov { @@ -25,14 +26,16 @@ class OPENVINO_API WeightlessCacheAttribute : public RuntimeAttribute { WeightlessCacheAttribute() = delete; - WeightlessCacheAttribute(size_t original_size, size_t bin_offset) + WeightlessCacheAttribute(size_t original_size, size_t bin_offset, ov::element::Type original_dtype) : original_size(original_size), - bin_offset(bin_offset) {} + bin_offset(bin_offset), + original_dtype(original_dtype) {} bool is_copyable() const override; size_t original_size; size_t bin_offset; + ov::element::Type original_dtype; }; } // namespace ov diff --git a/src/frontends/ir/src/ir_deserializer.cpp b/src/frontends/ir/src/ir_deserializer.cpp index 2d1dfba956ea72..d7e250f9916302 100644 --- a/src/frontends/ir/src/ir_deserializer.cpp +++ b/src/frontends/ir/src/ir_deserializer.cpp @@ -950,10 +950,12 @@ std::shared_ptr ov::XmlDeserializer::create_node(const std::vector(pugixml::get_uint64_attr(dn, "size")), - static_cast(pugixml::get_uint64_attr(dn, "offset"))); + static_cast(pugixml::get_uint64_attr(dn, "offset")), + ov::element::Type(pugixml::get_str_attr(dn, "element_type"))); } } diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp index 461f063ec26bc5..8a9a35b1e92fe9 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp @@ -4,15 +4,170 @@ #pragma once #include +#include #include "intel_gpu/runtime/engine.hpp" #include "intel_gpu/runtime/memory.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/util/op_types.hpp" +#include "openvino/pass/manager.hpp" #include "openvino/runtime/shared_buffer.hpp" #include "openvino/util/mmap_object.hpp" #include "primitive.hpp" +#include "transformations/convert_precision.hpp" namespace cldnn { +struct weights_mem { + std::shared_ptr>> shared_buf = nullptr; + std::shared_ptr transformed_constant = nullptr; + + const uint8_t* get_loaded_data() { + if (transformed_constant) { + return reinterpret_cast(transformed_constant->get_data_ptr()); + } + OPENVINO_ASSERT(shared_buf); + return shared_buf->get_ptr(); + } +}; + +struct weightless_cache_manager { + void set_constant_info(size_t bin_offset, + size_t original_size, + ov::element::Type original_dtype, + ov::element::Type curr_dtype, + ov::Shape shape) { + this->bin_offset = bin_offset; + this->original_size = original_size; + this->original_dtype = original_dtype; + this->curr_dtype = curr_dtype; + this->shape = shape; + do_weightless_caching = true; + + if (original_dtype != curr_dtype) { + do_precision_conversion = true; + } + } + + void invalidate() { + do_weightless_caching = false; + } + + void set_new_dtype(ov::element::Type curr_dtype) { + this->curr_dtype = curr_dtype; + do_precision_conversion = original_dtype != curr_dtype; + } + + bool save(BinaryOutputBuffer& ob, size_t data_size) const { + if (!do_weightless_caching) { + ob << false; + return false; + } + + ob << true; + ob << bin_offset; + ob << do_precision_conversion; + if (do_precision_conversion) { + ob << original_size; + ob << make_data(&original_dtype, sizeof(ov::element::Type)); + ob << make_data(&curr_dtype, sizeof(ov::element::Type)); + + size_t num_dims = shape.size(); + ob << make_data(&num_dims, sizeof(size_t)); + ob << make_data(shape.data(), num_dims * sizeof(ov::Shape::value_type)); + } + return true; + } + + std::shared_ptr load(BinaryInputBuffer& ib, + std::shared_ptr mapped_weights, + size_t data_size) { + ib >> do_weightless_caching; + if (!do_weightless_caching) { + return nullptr; + } + + OPENVINO_ASSERT(mapped_weights != nullptr, "mmap object is null"); + + ib >> bin_offset; + ib >> do_precision_conversion; + if (do_precision_conversion) { + ib >> original_size; + ib >> make_data(&original_dtype, sizeof(ov::element::Type)); + ib >> make_data(&curr_dtype, sizeof(ov::element::Type)); + + size_t num_dims = 0; + ib >> make_data(&num_dims, sizeof(size_t)); + shape.resize(num_dims); + ib >> make_data(shape.data(), num_dims * sizeof(ov::Shape::value_type)); + } else { + original_size = data_size; + } + + auto mem_obj = std::make_shared(); + mem_obj->shared_buf = std::make_shared>>( + mapped_weights->data() + bin_offset, + original_size, + mapped_weights); + + if (should_run_transformations()) { + run_transformations(mem_obj); + } + return mem_obj; + } + +private: + bool do_weightless_caching = false; + bool do_precision_conversion = false; + + size_t bin_offset = SIZE_MAX; + size_t original_size = SIZE_MAX; + ov::element::Type original_dtype = ov::element::Type_t::undefined; + ov::element::Type curr_dtype = ov::element::Type_t::undefined; + ov::Shape shape; + + bool should_run_transformations() { + return do_precision_conversion; + } + + void run_transformations(std::shared_ptr mem_obj) { + auto orig_constant = std::make_shared(original_dtype, + shape, + mem_obj->shared_buf->get_ptr(), + mem_obj->shared_buf); + + ov::ParameterVector inputParams; + ov::ResultVector results; + results.push_back(std::make_shared(orig_constant->output(0))); + auto model = std::make_shared(results, inputParams, "aux"); + + ov::pass::Manager manager("Plugin:GPU:weightless_cache_transformations"); + + if (do_precision_conversion) { + precisions_map fp_convert_precision_map = { + {original_dtype, curr_dtype}}; + type_to_fuse_map empty_fuse_map = {}; + const bool keep_precision_sensitive_in_fp32 = false; + const bool convert_input_output_precision = false; + const bool store_original_precision_as_rt_attribute = true; + manager.register_pass(fp_convert_precision_map, + empty_fuse_map, + keep_precision_sensitive_in_fp32, + convert_input_output_precision, + store_original_precision_as_rt_attribute); + } + + manager.run_passes(model); + const auto& ops = model->get_ops(); + auto it = std::find_if(ops.begin(), ops.end(), [](const std::shared_ptr& node) { + return ov::op::util::is_constant(node); + }); + OPENVINO_ASSERT(it != ops.end()); + mem_obj->transformed_constant = std::dynamic_pointer_cast(*it); + OPENVINO_ASSERT(mem_obj->transformed_constant->get_element_type() == curr_dtype); + } +}; + /// @brief Provides input data to topology. /// @details This primitive allows to pass data which is known at topology creation. /// For example, weights and biases for scoring networks. @@ -20,21 +175,32 @@ namespace cldnn { struct data : public primitive_base { CLDNN_DECLARE_PRIMITIVE(data) - data() : primitive_base("", {}) {} + data() : primitive_base("", {}) { + cache_info = std::make_shared(); + } /// @brief Constructs data primitive. /// @param id This primitive id. /// @param mem @ref memory object which contains data. /// @note If memory is attached by memory::attach(), the attached buffer should be valid till network build. - data(const primitive_id& id, memory::ptr mem) - : primitive_base(id, {}), mem(std::move(mem)) {} + data(const primitive_id& id, memory::ptr mem) : primitive_base(id, {}), mem(std::move(mem)) { + cache_info = std::make_shared(); + } + + data(const primitive_id& id, memory::ptr mem, std::shared_ptr cache_info) + : primitive_base(id, {}), + mem(std::move(mem)), + cache_info(cache_info) { + if (!cache_info) { + this->cache_info = std::make_shared(); + } + } /// @brief @ref memory object which contains data. /// @note If memory is attached by memory::attach(), the attached buffer should be valid till network build. memory::ptr mem; - size_t original_size = SIZE_MAX; - size_t bin_offset = SIZE_MAX; + std::shared_ptr cache_info; size_t hash() const override { size_t seed = primitive::hash(); @@ -53,13 +219,8 @@ struct data : public primitive_base { size_t data_size = mem->size(); ob << make_data(&data_size, sizeof(size_t)); - bool is_cache_without_weights = bin_offset != SIZE_MAX && data_size == original_size; - - if (is_cache_without_weights) { - ob << true; - ob << bin_offset; - } else { - ob << false; + bool do_weightless_caching = cache_info->save(ob, data_size); + if (!do_weightless_caching) { if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) { ob << make_data(mem->buffer_ptr(), data_size); } else { @@ -88,26 +249,12 @@ struct data : public primitive_base { mem = ib.get_engine().allocate_memory(output_layout, _allocation_type, false); - bool is_cache_without_weights; - ib >> is_cache_without_weights; - if (is_cache_without_weights && mapped_weights == nullptr) { - OPENVINO_THROW("mmap object is null"); - } - - std::shared_ptr>> shared_buf; - if (is_cache_without_weights) { - ib >> bin_offset; - original_size = data_size; - - shared_buf = std::make_shared>>( - mapped_weights->data() + bin_offset, - data_size, - mapped_weights); - } + auto mem_obj = cache_info->load(ib, mapped_weights, data_size); + bool is_weightless_caching_enabled = mem_obj != nullptr; if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) { - if (is_cache_without_weights) { - std::memcpy(reinterpret_cast(mem->buffer_ptr()), shared_buf->get_ptr(), data_size); + if (is_weightless_caching_enabled) { + std::memcpy(reinterpret_cast(mem->buffer_ptr()), mem_obj->get_loaded_data(), data_size); } else { ib >> make_data(mem->buffer_ptr(), data_size); } @@ -116,8 +263,8 @@ struct data : public primitive_base { auto& strm = ib.get_engine().get_service_stream(); if (data_size < DATA_BLOCK_SIZE || output_layout.format.is_image_2d()) { std::vector _buf(data_size); - if (is_cache_without_weights) { - std::memcpy(reinterpret_cast(_buf.data()), shared_buf->get_ptr(), data_size); + if (is_weightless_caching_enabled) { + std::memcpy(reinterpret_cast(_buf.data()), mem_obj->get_loaded_data(), data_size); } else { ib >> make_data(_buf.data(), data_size); } @@ -135,9 +282,9 @@ struct data : public primitive_base { size_t copy_size = (data_size > (dst_offset + DATA_BLOCK_SIZE)) ? DATA_BLOCK_SIZE : (data_size - dst_offset); if (buf_flag) { - if (is_cache_without_weights) { + if (is_weightless_caching_enabled) { std::memcpy(reinterpret_cast(_buf1.data()), - shared_buf->get_ptr() + dst_offset, + mem_obj->get_loaded_data() + dst_offset, copy_size); } else { ib >> make_data(_buf1.data(), copy_size); @@ -148,9 +295,9 @@ struct data : public primitive_base { } ev1 = mem->copy_from(strm, _buf1.data(), src_offset, dst_offset, copy_size, is_blocking); } else { - if (is_cache_without_weights) { + if (is_weightless_caching_enabled) { std::memcpy(reinterpret_cast(_buf2.data()), - shared_buf->get_ptr() + dst_offset, + mem_obj->get_loaded_data() + dst_offset, copy_size); } else { ib >> make_data(_buf2.data(), copy_size); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp index 85173e9eb33e7c..a4129800733875 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp @@ -74,11 +74,14 @@ void propagate_constants::run(program& p) { // replace all constant nodes which are relevant for inference (either used by non-const user or marked as output) // with recomputed cldnn::data for (auto& cout : to_replace) { - auto& id_to_replace = cout.first; - auto mem_impl = cout.second; - - auto const_data = - std::make_shared("_cldnn_const_prop_" + id_to_replace, mem_impl /* <<< REMOVE ME WHEN POSSIBLE */); + auto& id_to_replace = std::get<0>(cout); + auto mem_impl = std::get<1>(cout); + auto cache_info = std::get<2>(cout); + auto in_layout = std::get<3>(cout); + + auto const_data = std::make_shared("_cldnn_const_prop_" + id_to_replace, + mem_impl, /* <<< REMOVE ME WHEN POSSIBLE */ + cache_info); auto& new_node = p.get_or_create(const_data); auto& curr_node = p.get_node(id_to_replace); @@ -92,6 +95,25 @@ void propagate_constants::run(program& p) { } } + auto is_reorder_with_only_dtype_change = [&](program_node& dst) { + if (!in_layout) { + return false; + } + auto& dst_layout = dst.get_output_layout(); + if (in_layout->data_type == dst_layout.data_type) { + return false; + } + + auto aux_layout = dst_layout; + aux_layout.data_type = in_layout->data_type; + return aux_layout == *in_layout.get(); + }; + if (is_reorder_with_only_dtype_change(new_node)) { + new_node.as().get_primitive()->cache_info->set_new_dtype(new_node.get_output_layout().data_type); + } else { + new_node.as().get_primitive()->cache_info->invalidate(); + } + curr_node.dependencies.clear(); // remove all constant users (as they will be either removed or replaced by cldnn::data which does not have any // dependencies) @@ -113,9 +135,10 @@ bool propagate_constants::has_non_const_user(program_node& node) const { return false; } -std::list> propagate_constants::calculate(engine& engine, - const ExecutionConfig& config, - std::shared_ptr task_executor) { +std::list, std::shared_ptr>> +propagate_constants::calculate(engine& engine, + const ExecutionConfig& config, + std::shared_ptr task_executor) { if (!has_non_trivial_constants) return {}; @@ -123,15 +146,37 @@ std::list> propagate_constants::calculate(e cf_config.set_property(ov::intel_gpu::optimize_data(false)); cf_config.set_property(ov::intel_gpu::custom_outputs(const_outputs)); network::ptr net = network::build_network(engine, nodes, cf_config, task_executor, true); - for (auto& cin : const_inputs) + std::map, std::shared_ptr>> + weightless_cache_map; + for (auto& cin : const_inputs) { net->set_input_data(cin->id(), cin->get_attached_memory_ptr()); + auto users = cin->get_users(); + if (users.size() == 1 && users.front()->is_type()) { + auto rprim = users.front()->as().get_primitive(); + auto id = rprim->id; + auto cache_ptr = cin->as().get_primitive()->cache_info; + auto layout_ptr = std::make_shared(cin->get_output_layout()); + weightless_cache_map.emplace(id, std::make_pair(cache_ptr, layout_ptr)); + } + } + net->execute({}); net->reset_execution(true); // wait for computations to complete auto outputs = net->get_outputs(); - std::list> ret; - for (auto& out : outputs) ret.push_back({out->id(), out->output_memory_ptr()}); + std::list, std::shared_ptr>> + ret; + for (auto& out : outputs) { + std::shared_ptr cache_ptr = nullptr; + std::shared_ptr layout_ptr = nullptr; + auto it = weightless_cache_map.find(out->id()); + if (it != weightless_cache_map.end()) { + cache_ptr = it->second.first; + layout_ptr = it->second.second; + } + ret.push_back({out->id(), out->output_memory_ptr(), cache_ptr, layout_ptr}); + } return ret; } diff --git a/src/plugins/intel_gpu/src/graph/include/pass_manager.h b/src/plugins/intel_gpu/src/graph/include/pass_manager.h index 490076a37f788e..0b7c3d85c37e27 100644 --- a/src/plugins/intel_gpu/src/graph/include/pass_manager.h +++ b/src/plugins/intel_gpu/src/graph/include/pass_manager.h @@ -211,9 +211,10 @@ class propagate_constants : public base_pass { private: void run(program& p) override; - std::list> calculate(engine& engine, - const ExecutionConfig& config, - std::shared_ptr task_executor); + std::list, std::shared_ptr>> + calculate(engine& engine, + const ExecutionConfig& config, + std::shared_ptr task_executor); bool has_non_const_user(program_node& node) const; void handle_constant(program& prog, program_node& node); void add_constant(program& prog, program_node& node); diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp index 368e25abe2ddac..a9bb813d0ce587 100644 --- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp +++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp @@ -14,6 +14,7 @@ #include "intel_gpu/plugin/common_utils.hpp" #include "intel_gpu/plugin/program_builder.hpp" +#include "intel_gpu/primitives/data.hpp" #include "intel_gpu/runtime/itt.hpp" #include "intel_gpu/runtime/debug_configuration.hpp" #include "intel_gpu/primitives/mutable_data.hpp" @@ -311,11 +312,15 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptrm_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE) { if (auto data_prim = dynamic_cast(prim.get())) { auto rt_info = op.get_rt_info(); + auto weightless_cache_attr = rt_info.find(ov::WeightlessCacheAttribute::get_type_info_static()); if (weightless_cache_attr != rt_info.end()) { - data_prim->bin_offset = weightless_cache_attr->second.as().bin_offset; - data_prim->original_size = - weightless_cache_attr->second.as().original_size; + auto& attr = weightless_cache_attr->second.as(); + data_prim->cache_info->set_constant_info(attr.bin_offset, + attr.original_size, + attr.original_dtype, + op.get_output_element_type(0), + op.get_output_shape(0)); } } } diff --git a/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp b/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp index 839b2640ca180c..17e1ed6d0a9bbe 100644 --- a/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp +++ b/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp @@ -8,48 +8,40 @@ #include "common_test_utils/common_utils.hpp" #include "common_test_utils/file_utils.hpp" #include "common_test_utils/ov_tensor_utils.hpp" -#include "common_test_utils/subgraph_builders/2_input_subtract.hpp" -#include "common_test_utils/subgraph_builders/concat_with_params.hpp" -#include "common_test_utils/subgraph_builders/conv_bias.hpp" -#include "common_test_utils/subgraph_builders/conv_pool_relu.hpp" -#include "common_test_utils/subgraph_builders/conv_pool_relu_no_reshapes.hpp" -#include "common_test_utils/subgraph_builders/conv_pool_relu_non_zero.hpp" -#include "common_test_utils/subgraph_builders/convert_transpose.hpp" -#include "common_test_utils/subgraph_builders/detection_output.hpp" -#include "common_test_utils/subgraph_builders/kso_func.hpp" -#include "common_test_utils/subgraph_builders/matmul_bias.hpp" -#include "common_test_utils/subgraph_builders/multi_single_conv.hpp" -#include "common_test_utils/subgraph_builders/multiple_input_outpput_double_concat.hpp" -#include "common_test_utils/subgraph_builders/nested_branch_conv_concat.hpp" -#include "common_test_utils/subgraph_builders/nested_split_conv_concat.hpp" #include "common_test_utils/subgraph_builders/read_concat_split_assign.hpp" #include "common_test_utils/subgraph_builders/single_concat_with_constant.hpp" -#include "common_test_utils/subgraph_builders/single_conv.hpp" -#include "common_test_utils/subgraph_builders/single_split.hpp" -#include "common_test_utils/subgraph_builders/split_concat.hpp" -#include "common_test_utils/subgraph_builders/split_conv_concat.hpp" -#include "common_test_utils/subgraph_builders/split_multi_conv_concat.hpp" #include "common_test_utils/subgraph_builders/ti_with_lstm_cell.hpp" #include "common_test_utils/test_common.hpp" #include "openvino/pass/serialize.hpp" namespace { -class CheckWeightlessCacheAccuracy : public ::testing::Test, - public ::testing::WithParamInterface { +typedef std::tuple testParams; + +class CheckWeightlessCacheAccuracy : public ::testing::Test, public ::testing::WithParamInterface { public: - static std::string get_test_case_name(::testing::TestParamInfo obj) { - bool use_compile_model_api = obj.param; + static std::string get_test_case_name(::testing::TestParamInfo obj) { + bool use_compile_model_api_; + ov::element::Type inference_mode_; + ov::element::Type model_dtype_; + std::tie(use_compile_model_api_, inference_mode_, model_dtype_) = obj.param; std::ostringstream result; - result << "use_compile_model_api=" << use_compile_model_api; + const char separator = '_'; + result << "use_compile_model_api=" << use_compile_model_api_ << separator; + result << "inference_mode=" << inference_mode_ << separator; + result << "model_dtype=" << model_dtype_; return result.str(); } + protected: std::shared_ptr model; std::string xml_path; std::string bin_path; std::string cache_path; - bool use_compile_model_api; // for loading from cache + std::string cache_dir; + bool use_compile_model_api; // for loading from cache + ov::element::Type inference_mode; + ov::element::Type model_dtype; void SetUp() override; void TearDown() override; @@ -61,36 +53,46 @@ void CheckWeightlessCacheAccuracy::SetUp() { xml_path = filePrefix + ".xml"; bin_path = filePrefix + ".bin"; cache_path = filePrefix + ".blob"; - use_compile_model_api = GetParam(); + cache_dir = filePrefix + "_cache_dir"; + + std::tie(use_compile_model_api, inference_mode, model_dtype) = GetParam(); } void CheckWeightlessCacheAccuracy::TearDown() { std::remove(xml_path.c_str()); std::remove(bin_path.c_str()); std::remove(cache_path.c_str()); + + ov::test::utils::removeFilesWithExt(cache_dir, "blob"); + ov::test::utils::removeFilesWithExt(cache_dir, "cl_cache"); + ov::test::utils::removeDir(cache_dir); } void CheckWeightlessCacheAccuracy::run() { - ov::AnyMap config = { ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE) }; - ov::AnyMap config_with_weights_path = { ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE), ov::weights_path(bin_path) }; + ov::AnyMap config = {ov::cache_dir(cache_dir), + ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE), + ov::hint::inference_precision(inference_mode)}; + ov::AnyMap config_with_weights_path = {ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE), + ov::weights_path(bin_path), + ov::hint::inference_precision(inference_mode)}; auto core = ov::test::utils::PluginCache::get().core(); ov::pass::Serialize(xml_path, bin_path).run_on_model(model); ov::CompiledModel compiled_model; - OV_ASSERT_NO_THROW(compiled_model = core->compile_model(xml_path, ov::test::utils::DEVICE_GPU, config)); + compiled_model = core->compile_model(xml_path, ov::test::utils::DEVICE_GPU, config); - auto ofstr = std::ofstream(cache_path, std::ofstream::binary); - OV_ASSERT_NO_THROW(compiled_model.export_model(ofstr)); - ofstr.close(); + if (!use_compile_model_api) { + auto ofstr = std::ofstream(cache_path, std::ofstream::binary); + compiled_model.export_model(ofstr); + ofstr.close(); + } auto ifstr = std::ifstream(cache_path, std::ifstream::binary); ov::CompiledModel imported_model; if (use_compile_model_api) { - OV_ASSERT_NO_THROW(imported_model = - core->compile_model(xml_path, ov::test::utils::DEVICE_GPU, config)); + imported_model = core->compile_model(xml_path, ov::test::utils::DEVICE_GPU, config); } else { - OV_ASSERT_NO_THROW(imported_model = - core->import_model(ifstr, ov::test::utils::DEVICE_GPU, config_with_weights_path)); + imported_model = core->import_model(ifstr, ov::test::utils::DEVICE_GPU, config_with_weights_path); } ifstr.close(); @@ -99,39 +101,57 @@ void CheckWeightlessCacheAccuracy::run() { for (size_t param_idx = 0; param_idx < model->get_parameters().size(); ++param_idx) { auto input = model->get_parameters().at(param_idx); - auto tensor = ov::test::utils::create_and_fill_tensor(input->get_element_type(), input->get_shape()); + auto tensor = ov::test::utils::create_and_fill_tensor_real_distribution(input->get_element_type(), + input->get_shape(), + -100, + 100, + param_idx); orig_req.set_tensor(input, tensor); new_req.set_tensor(input, tensor); } - OV_ASSERT_NO_THROW(orig_req.infer()); - OV_ASSERT_NO_THROW(new_req.infer()); + orig_req.infer(); + new_req.infer(); auto result_vector = model->get_results(); for (auto& res : result_vector) { auto orig_out = orig_req.get_tensor(res); auto new_out = new_req.get_tensor(res); - ov::test::utils::compare(orig_out, new_out); + ov::test::utils::compare(orig_out, new_out, inference_mode); } } TEST_P(CheckWeightlessCacheAccuracy, ReadConcatSplitAssign) { - model = ov::test::utils::make_read_concat_split_assign({1, 1, 2, 4}, ov::element::f16); - run(); + OV_ASSERT_NO_THROW(model = ov::test::utils::make_read_concat_split_assign({1, 1, 2, 4}, model_dtype)); + OV_ASSERT_NO_THROW(run()); } TEST_P(CheckWeightlessCacheAccuracy, SingleConcatWithConstant) { - model = ov::test::utils::make_single_concat_with_constant({1, 1, 2, 4}, ov::element::f16); - run(); + OV_ASSERT_NO_THROW(model = ov::test::utils::make_single_concat_with_constant({1, 1, 2, 4}, model_dtype)); + OV_ASSERT_NO_THROW(run()); } TEST_P(CheckWeightlessCacheAccuracy, TiWithLstmCell) { - model = ov::test::utils::make_ti_with_lstm_cell(ov::element::f16); - run(); + OV_ASSERT_NO_THROW(model = ov::test::utils::make_ti_with_lstm_cell(model_dtype)); + OV_ASSERT_NO_THROW(run()); } -INSTANTIATE_TEST_SUITE_P(smoke_CheckWeightlessCacheAccuracy, CheckWeightlessCacheAccuracy, - ::testing::Bool(), +const std::vector inference_modes = { + ov::element::f32, + ov::element::f16, +}; + +const std::vector model_dtypes = { + ov::element::f32, + ov::element::f16, + ov::element::bf16, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_CheckWeightlessCacheAccuracy, + CheckWeightlessCacheAccuracy, + ::testing::Combine(::testing::Bool(), + ::testing::ValuesIn(inference_modes), + ::testing::ValuesIn(model_dtypes)), CheckWeightlessCacheAccuracy::get_test_case_name); } // namespace diff --git a/src/plugins/intel_gpu/tests/unit/shape_infer/eltwise_si_test.cpp b/src/plugins/intel_gpu/tests/unit/shape_infer/eltwise_si_test.cpp index 7abdbcb8c2fc52..7b4f27b5af05b4 100644 --- a/src/plugins/intel_gpu/tests/unit/shape_infer/eltwise_si_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/shape_infer/eltwise_si_test.cpp @@ -23,11 +23,11 @@ using namespace ov; namespace shape_infer_tests { struct eltwise_test_params { - layout input1_layout; - layout input2_layout; + cldnn::layout input1_layout; + cldnn::layout input2_layout; eltwise_mode mode; AutoBroadcastSpec auto_broadcast_spec; - layout expected_layout; + cldnn::layout expected_layout; std::vector stride; };