diff --git a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst
index 42c1c3fb47aa42..172586831252a9 100644
--- a/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst
+++ b/docs/articles_en/learn-openvino/llm_inference_guide/genai-guide.rst
@@ -130,7 +130,7 @@ make sure to :doc:`install OpenVINO with GenAI <../../get-started/install-openvi
image_write("baseline.bmp", image)
For more information, refer to the
- `Python sample `__
+ `Python sample `__
.. tab-item:: C++
:sync: cpp
diff --git a/src/common/transformations/src/transformations/convert_precision.cpp b/src/common/transformations/src/transformations/convert_precision.cpp
index 8a2985a284769a..aa067da4f360fd 100644
--- a/src/common/transformations/src/transformations/convert_precision.cpp
+++ b/src/common/transformations/src/transformations/convert_precision.cpp
@@ -8,6 +8,7 @@
#include
#include "itt.hpp"
+#include "openvino/core/rt_info/weightless_caching_attributes.hpp"
#include "openvino/op/ops.hpp"
#include "openvino/pass/constant_folding.hpp"
#include "openvino/pass/manager.hpp"
@@ -1405,6 +1406,13 @@ bool fuse_type_to_constant(const std::shared_ptr& node,
new_const->validate_and_infer_types();
new_const->set_friendly_name(constant->get_friendly_name());
ov::copy_runtime_info(constant, new_const);
+
+ const auto& rt_info = node->get_rt_info();
+ auto weightless_caching_attr = rt_info.find(ov::WeightlessCacheAttribute::get_type_info_static());
+ if (weightless_caching_attr != rt_info.end()) {
+ new_const->get_rt_info()[ov::WeightlessCacheAttribute::get_type_info_static()] =
+ weightless_caching_attr->second;
+ }
return true;
}
return false;
diff --git a/src/common/transformations/tests/utils/convert_precision.cpp b/src/common/transformations/tests/utils/convert_precision.cpp
index 318f15ab1a64dc..c2b7133506aebe 100644
--- a/src/common/transformations/tests/utils/convert_precision.cpp
+++ b/src/common/transformations/tests/utils/convert_precision.cpp
@@ -13,6 +13,7 @@
#include "common_test_utils/ov_test_utils.hpp"
#include "openvino/core/model.hpp"
+#include "openvino/core/rt_info/weightless_caching_attributes.hpp"
#include "openvino/opsets/opset1.hpp"
#include "openvino/opsets/opset10.hpp"
#include "openvino/opsets/opset15.hpp"
@@ -2702,3 +2703,38 @@ TEST(TransformationTests, ConvertPrecision_assign_read_value_preserve_orig_types
FunctionsComparator::Result result = func_comparator(model_ref, model);
ASSERT_TRUE(result.valid) << result.message;
}
+
+TEST(TransformationTests, ConvertPrecision_assign_read_value_preserve_weightless_cache_info_as_rt_attribute) {
+ pass::Manager manager;
+
+ auto some_value = opset10::Constant::create(element::f32, Shape{1}, {2});
+ auto& node_rt_info = some_value->get_rt_info();
+ ov::WeightlessCacheAttribute attr(element::f32.size(), 0, element::f32);
+ node_rt_info[ov::WeightlessCacheAttribute::get_type_info_static()] = attr;
+
+ ov::ParameterVector inputParams;
+ ov::ResultVector results;
+ results.push_back(std::make_shared(some_value->output(0)));
+ auto model = std::make_shared(results, inputParams);
+
+ type_to_fuse_map empty_type_to_fuse_map = {};
+ bool keep_precision_sensitive_in_fp32 = false;
+ bool convert_input_output_precision = false;
+ bool store_original_precision_as_rt_attribute = true;
+ manager.register_pass(precisions_map{{element::f32, element::f16}},
+ empty_type_to_fuse_map,
+ keep_precision_sensitive_in_fp32,
+ convert_input_output_precision,
+ store_original_precision_as_rt_attribute);
+ manager.run_passes(model);
+
+ const auto& ops = model->get_ops();
+ auto it = std::find_if(ops.begin(), ops.end(), [](const std::shared_ptr& node) {
+ return ov::op::util::is_constant(node);
+ });
+
+ ASSERT_TRUE(it != ops.end());
+ const auto& new_rt_info = (*it)->get_rt_info();
+ auto weightless_caching_attr_it = new_rt_info.find(ov::WeightlessCacheAttribute::get_type_info_static());
+ ASSERT_TRUE(weightless_caching_attr_it != new_rt_info.end());
+}
diff --git a/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp b/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp
index fedcb030fb52cf..e3cf2609b26c8d 100644
--- a/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp
+++ b/src/core/dev_api/openvino/core/rt_info/weightless_caching_attributes.hpp
@@ -5,6 +5,7 @@
#pragma once
#include "openvino/core/core_visibility.hpp"
+#include "openvino/core/node.hpp"
#include "openvino/core/runtime_attribute.hpp"
namespace ov {
@@ -25,14 +26,16 @@ class OPENVINO_API WeightlessCacheAttribute : public RuntimeAttribute {
WeightlessCacheAttribute() = delete;
- WeightlessCacheAttribute(size_t original_size, size_t bin_offset)
+ WeightlessCacheAttribute(size_t original_size, size_t bin_offset, ov::element::Type original_dtype)
: original_size(original_size),
- bin_offset(bin_offset) {}
+ bin_offset(bin_offset),
+ original_dtype(original_dtype) {}
bool is_copyable() const override;
size_t original_size;
size_t bin_offset;
+ ov::element::Type original_dtype;
};
} // namespace ov
diff --git a/src/core/include/openvino/core/graph_util.hpp b/src/core/include/openvino/core/graph_util.hpp
index 66c640a62314df..f5694ca89fee51 100644
--- a/src/core/include/openvino/core/graph_util.hpp
+++ b/src/core/include/openvino/core/graph_util.hpp
@@ -21,6 +21,10 @@
#include "openvino/op/parameter.hpp"
#include "openvino/pass/serialize.hpp"
+#ifdef OPENVINO_CPP_VER_17
+# include
+#endif
+
namespace ov {
OPENVINO_API
@@ -288,27 +292,45 @@ bool replace_node_update_name(const std::shared_ptr& target, const std::sh
/// \param bin_path Path where .bin file will be saved (optional).
/// The same name as for xml_path will be used by default.
/// \param version Version of the generated IR (optional).
+/// \{
OPENVINO_API
void serialize(const std::shared_ptr& m,
const std::string& xml_path,
const std::string& bin_path = "",
ov::pass::Serialize::Version version = ov::pass::Serialize::Version::UNSPECIFIED);
+#ifdef OPENVINO_CPP_VER_17
+template >* = nullptr>
+void serialize(const std::shared_ptr& m,
+ const Path& xml_path,
+ const Path& bin_path = {""},
+ ov::pass::Serialize::Version version = ov::pass::Serialize::Version::UNSPECIFIED) {
+ serialize(m, xml_path.string(), bin_path.string(), version);
+}
+#endif
+/// \}
+
/// \brief Save given model into IR. Floating point weights are compressed to FP16 by default.
/// This method saves a model to IR applying all necessary transformations that usually applied
-/// in model conversion flow provided by mo tool. Paricularly, floatting point weights are compressed to FP16.
+/// in model conversion flow provided by mo tool. Particularly, floating point weights are compressed to FP16.
/// \param model Model which will be converted to IR representation.
/// \param output_model Path to the output model file, must have extension .xml
-/// \param compress_to_fp16 Whether to compress floatting point weights to FP16 (true by default)
+/// \param compress_to_fp16 Whether to compress floating point weights to FP16 (true by default)
OPENVINO_API
void save_model(const std::shared_ptr& model,
const std::string& output_model,
bool compress_to_fp16 = true);
-
#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT)
OPENVINO_API
void save_model(const std::shared_ptr& model,
const std::wstring& output_model,
bool compress_to_fp16 = true);
#endif
-} // namespace ov
\ No newline at end of file
+
+#ifdef OPENVINO_CPP_VER_17
+template >* = nullptr>
+void save_model(const std::shared_ptr& model, const Path& output_model, bool compress_to_fp16 = true) {
+ save_model(model, output_model.string(), compress_to_fp16);
+}
+#endif
+} // namespace ov
diff --git a/src/core/include/openvino/pass/serialize.hpp b/src/core/include/openvino/pass/serialize.hpp
index fc3e743d4005dc..d0eaadde346bf6 100644
--- a/src/core/include/openvino/pass/serialize.hpp
+++ b/src/core/include/openvino/pass/serialize.hpp
@@ -11,6 +11,10 @@
#include "openvino/opsets/opset.hpp"
#include "openvino/pass/pass.hpp"
+#ifdef OPENVINO_CPP_VER_17
+# include
+#endif
+
namespace ov {
namespace pass {
@@ -35,6 +39,13 @@ class OPENVINO_API Serialize : public ov::pass::ModelPass {
Serialize(const std::string& xmlPath, const std::string& binPath, Version version = Version::UNSPECIFIED);
+#ifdef OPENVINO_CPP_VER_17
+ Serialize(const std::filesystem::path& xmlPath,
+ const std::filesystem::path& binPath,
+ Version version = Version::UNSPECIFIED)
+ : Serialize(xmlPath.string(), binPath.string(), version) {}
+#endif
+
private:
std::ostream* m_xmlFile;
std::ostream* m_binFile;
diff --git a/src/core/tests/pass/serialization/deterministicity.cpp b/src/core/tests/pass/serialization/deterministicity.cpp
index 8441da501eb9bf..a93f092889d2a1 100644
--- a/src/core/tests/pass/serialization/deterministicity.cpp
+++ b/src/core/tests/pass/serialization/deterministicity.cpp
@@ -296,6 +296,47 @@ TEST_P(SerializationDeterministicityInputOutputTest, FromIrModel) {
EXPECT_TRUE(files_equal(xml_2, xml_1));
}
+#ifdef OPENVINO_CPP_VER_17
+TEST_P(SerializationDeterministicityInputOutputTest, FromOvModelBybPath) {
+ auto irVersion = GetParam();
+
+ std::shared_ptr modelRef;
+ {
+ auto parameter0 = std::make_shared(ov::element::f32, ov::Shape{1, 3, 22, 22});
+ parameter0->set_friendly_name("input0");
+ auto result0 = std::make_shared(parameter0);
+ result0->set_friendly_name("output0");
+ auto parameter1 = std::make_shared(ov::element::f32, ov::Shape{1, 3, 22, 22});
+ parameter1->set_friendly_name("input1");
+ auto result1 = std::make_shared(parameter1);
+ result1->set_friendly_name("output1");
+ modelRef =
+ std::make_shared(ov::NodeVector{result0, result1}, ov::ParameterVector{parameter0, parameter1});
+ }
+
+ auto& expected1 = modelRef;
+ const auto out_xml_path = std::filesystem::path(m_out_xml_path_1);
+ const auto out_bin_path = std::filesystem::path(m_out_bin_path_1);
+ ov::pass::Serialize(out_xml_path, out_bin_path, irVersion).run_on_model(modelRef);
+ auto expected2 = ov::test::readModel(m_out_xml_path_1, m_out_bin_path_1);
+
+ ov::pass::Serialize(m_out_xml_path_2, m_out_bin_path_2, irVersion).run_on_model(expected2);
+
+ EXPECT_EQ(input0Name, expected1->input(0).get_node()->get_friendly_name());
+ EXPECT_EQ(input1Name, expected1->input(1).get_node()->get_friendly_name());
+ EXPECT_EQ(output0Name, expected1->output(0).get_node()->get_friendly_name());
+ EXPECT_EQ(output1Name, expected1->output(1).get_node()->get_friendly_name());
+ EXPECT_EQ(input0Name, expected2->input(0).get_node()->get_friendly_name());
+ EXPECT_EQ(input1Name, expected2->input(1).get_node()->get_friendly_name());
+ EXPECT_EQ(output0Name, expected2->output(0).get_node()->get_friendly_name());
+ EXPECT_EQ(output1Name, expected2->output(1).get_node()->get_friendly_name());
+
+ std::ifstream xml_1(m_out_xml_path_1, std::ios::in | std::ios::binary);
+ std::ifstream xml_2(m_out_xml_path_2, std::ios::in | std::ios::binary);
+ EXPECT_TRUE(files_equal(xml_1, xml_2));
+}
+#endif
+
INSTANTIATE_TEST_SUITE_P(DeterministicityInputOutput,
SerializationDeterministicityInputOutputTest,
::testing::Values(ov::pass::Serialize::Version::IR_V10, ov::pass::Serialize::Version::IR_V11));
diff --git a/src/core/tests/pass/serialization/serialize.cpp b/src/core/tests/pass/serialization/serialize.cpp
index e45d5d1d1434ff..5cb1965feebdd7 100644
--- a/src/core/tests/pass/serialization/serialize.cpp
+++ b/src/core/tests/pass/serialization/serialize.cpp
@@ -74,6 +74,23 @@ TEST_P(SerializationTest, SaveModel) {
});
}
+#ifdef OPENVINO_CPP_VER_17
+TEST_P(SerializationTest, CompareFunctionsByPath) {
+ const auto out_xml_path = std::filesystem::path(m_out_xml_path);
+ const auto out_bin_path = std::filesystem::path(m_out_bin_path);
+ CompareSerialized([&out_xml_path, &out_bin_path](const auto& m) {
+ ov::pass::Serialize(out_xml_path, out_bin_path).run_on_model(m);
+ });
+}
+
+TEST_P(SerializationTest, SaveModelByPath) {
+ const auto out_xml_path = std::filesystem::path(m_out_xml_path);
+ CompareSerialized([&out_xml_path](const auto& m) {
+ ov::save_model(m, out_xml_path, false);
+ });
+}
+#endif
+
INSTANTIATE_TEST_SUITE_P(
IRSerialization,
SerializationTest,
diff --git a/src/frontends/ir/src/ir_deserializer.cpp b/src/frontends/ir/src/ir_deserializer.cpp
index 2d1dfba956ea72..d7e250f9916302 100644
--- a/src/frontends/ir/src/ir_deserializer.cpp
+++ b/src/frontends/ir/src/ir_deserializer.cpp
@@ -950,10 +950,12 @@ std::shared_ptr ov::XmlDeserializer::create_node(const std::vector(pugixml::get_uint64_attr(dn, "size")),
- static_cast(pugixml::get_uint64_attr(dn, "offset")));
+ static_cast(pugixml::get_uint64_attr(dn, "offset")),
+ ov::element::Type(pugixml::get_str_attr(dn, "element_type")));
}
}
diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp
index 39a2d20c092835..05a0e0a2cf6a0e 100644
--- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp
+++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.cpp
@@ -1329,6 +1329,58 @@ std::set> jit_logical_and_emitter::get_supported_prec
return {{element::f32, element::f32}};
}
+/// LOGICAL_OR ///
+jit_logical_or_emitter::jit_logical_or_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,
+ dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
+ const std::shared_ptr& node)
+ : jit_emitter(host, host_isa, node, get_arithmetic_binary_exec_precision(node)) {
+ prepare_table();
+}
+
+jit_logical_or_emitter::jit_logical_or_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,
+ dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
+ const ov::element::Type exec_prc)
+ : jit_emitter(host, host_isa, exec_prc) {
+ prepare_table();
+}
+
+size_t jit_logical_or_emitter::get_inputs_count() const { return 2; }
+
+size_t jit_logical_or_emitter::get_aux_vecs_count() const { return 1; }
+
+size_t jit_logical_or_emitter::get_aux_gprs_count() const { return 1; }
+
+void jit_logical_or_emitter::emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const {
+ if (host_isa_ == dnnl::impl::cpu::aarch64::asimd) {
+ emit_isa(in_vec_idxs, out_vec_idxs);
+ } else {
+ OV_CPU_JIT_EMITTER_THROW("Can't create jit eltwise kernel");
+ }
+}
+
+template
+void jit_logical_or_emitter::emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const {
+ OV_CPU_JIT_EMITTER_ASSERT(exec_prc_ == ov::element::f32, "unsupported precision: " + exec_prc_.to_string());
+
+ using TReg = typename dnnl::impl::cpu::aarch64::cpu_isa_traits::TReg;
+ const TReg src1 = TReg(in_vec_idxs[0]);
+ const TReg src2 = TReg(in_vec_idxs[1]);
+ const TReg dst = TReg(out_vec_idxs[0]);
+ const TReg aux = TReg(aux_vec_idxs[0]);
+
+ h->orr(dst.b16, src1.b16, src2.b16);
+ h->ld1r(aux.s, table_val2("one"));
+ h->and_(dst.b16, dst.b16, aux.b16);
+}
+
+void jit_logical_or_emitter::register_table_entries() {
+ push_arg_entry_of("one", 0x3f800000, true);
+}
+
+std::set> jit_logical_or_emitter::get_supported_precisions(const std::shared_ptr& node) {
+ return {{element::f32, element::f32}};
+}
+
/// LOGICAL_NOT ///
jit_logical_not_emitter::jit_logical_not_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,
dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
diff --git a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp
index 2173a1487f1057..be4e51cd0b759d 100644
--- a/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp
+++ b/src/plugins/intel_cpu/src/emitters/plugin/aarch64/jit_eltwise_emitters.hpp
@@ -628,6 +628,34 @@ class jit_logical_and_emitter : public jit_emitter {
void register_table_entries() override;
};
+class jit_logical_or_emitter : public jit_emitter {
+public:
+ jit_logical_or_emitter(dnnl::impl::cpu::aarch64::jit_generator *host,
+ dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
+ const ov::element::Type exec_prc = ov::element::f32);
+
+ jit_logical_or_emitter(dnnl::impl::cpu::aarch64::jit_generator *host,
+ dnnl::impl::cpu::aarch64::cpu_isa_t host_isa,
+ const std::shared_ptr& n);
+
+ size_t get_inputs_count() const override;
+
+ size_t get_aux_vecs_count() const override;
+
+ size_t get_aux_gprs_count() const override;
+
+ static std::set> get_supported_precisions(
+ const std::shared_ptr& node = nullptr);
+
+private:
+ void emit_impl(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const override;
+
+ template
+ void emit_isa(const std::vector &in_vec_idxs, const std::vector &out_vec_idxs) const;
+
+ void register_table_entries() override;
+};
+
class jit_logical_not_emitter : public jit_emitter {
public:
jit_logical_not_emitter(dnnl::impl::cpu::aarch64::jit_generator* host,
diff --git a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp
index 0374888e3d7fcb..912fe23fcd1fcf 100644
--- a/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp
+++ b/src/plugins/intel_cpu/src/nodes/executors/aarch64/jit_eltwise.cpp
@@ -38,6 +38,7 @@ bool JitEltwiseExecutor::isSupported(
Algorithm::EltwiseIsNaN,
Algorithm::EltwiseLessEqual,
Algorithm::EltwiseLogicalAnd,
+ Algorithm::EltwiseLogicalOr,
Algorithm::EltwiseLogicalNot,
Algorithm::EltwiseLogicalXor,
Algorithm::EltwiseMaximum,
diff --git a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp
index cfe36f78cc40f9..b3fe7018d23677 100644
--- a/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp
+++ b/src/plugins/intel_cpu/src/nodes/kernels/aarch64/jit_uni_eltwise_generic.cpp
@@ -655,6 +655,7 @@ std::shared_ptr jit_uni_eltwise_generic::create_eltwise_emitte
OV_CASE(Algorithm::EltwiseIsInf, ov::intel_cpu::aarch64::jit_is_inf_emitter),
OV_CASE(Algorithm::EltwiseLessEqual, ov::intel_cpu::aarch64::jit_less_equal_emitter),
OV_CASE(Algorithm::EltwiseLogicalAnd, ov::intel_cpu::aarch64::jit_logical_and_emitter),
+ OV_CASE(Algorithm::EltwiseLogicalOr, ov::intel_cpu::aarch64::jit_logical_or_emitter),
OV_CASE(Algorithm::EltwiseLogicalNot, ov::intel_cpu::aarch64::jit_logical_not_emitter),
OV_CASE(Algorithm::EltwiseLogicalXor, ov::intel_cpu::aarch64::jit_logical_xor_emitter),
OV_CASE(Algorithm::EltwiseIsNaN, ov::intel_cpu::aarch64::jit_is_nan_emitter),
@@ -845,6 +846,7 @@ std::set> eltwise_precision_helper::get_supported_pre
OV_CASE(Algorithm::EltwiseIsNaN, jit_is_nan_emitter),
OV_CASE(Algorithm::EltwiseLessEqual, jit_less_equal_emitter),
OV_CASE(Algorithm::EltwiseLogicalAnd, jit_logical_and_emitter),
+ OV_CASE(Algorithm::EltwiseLogicalOr, jit_logical_or_emitter),
OV_CASE(Algorithm::EltwiseLogicalNot, jit_logical_not_emitter),
OV_CASE(Algorithm::EltwiseLogicalXor, jit_logical_xor_emitter),
OV_CASE(Algorithm::EltwiseMaximum, jit_maximum_emitter),
diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp
index 461f063ec26bc5..8a9a35b1e92fe9 100644
--- a/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp
+++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/data.hpp
@@ -4,15 +4,170 @@
#pragma once
#include
+#include
#include "intel_gpu/runtime/engine.hpp"
#include "intel_gpu/runtime/memory.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/op/util/op_types.hpp"
+#include "openvino/pass/manager.hpp"
#include "openvino/runtime/shared_buffer.hpp"
#include "openvino/util/mmap_object.hpp"
#include "primitive.hpp"
+#include "transformations/convert_precision.hpp"
namespace cldnn {
+struct weights_mem {
+ std::shared_ptr>> shared_buf = nullptr;
+ std::shared_ptr transformed_constant = nullptr;
+
+ const uint8_t* get_loaded_data() {
+ if (transformed_constant) {
+ return reinterpret_cast(transformed_constant->get_data_ptr());
+ }
+ OPENVINO_ASSERT(shared_buf);
+ return shared_buf->get_ptr();
+ }
+};
+
+struct weightless_cache_manager {
+ void set_constant_info(size_t bin_offset,
+ size_t original_size,
+ ov::element::Type original_dtype,
+ ov::element::Type curr_dtype,
+ ov::Shape shape) {
+ this->bin_offset = bin_offset;
+ this->original_size = original_size;
+ this->original_dtype = original_dtype;
+ this->curr_dtype = curr_dtype;
+ this->shape = shape;
+ do_weightless_caching = true;
+
+ if (original_dtype != curr_dtype) {
+ do_precision_conversion = true;
+ }
+ }
+
+ void invalidate() {
+ do_weightless_caching = false;
+ }
+
+ void set_new_dtype(ov::element::Type curr_dtype) {
+ this->curr_dtype = curr_dtype;
+ do_precision_conversion = original_dtype != curr_dtype;
+ }
+
+ bool save(BinaryOutputBuffer& ob, size_t data_size) const {
+ if (!do_weightless_caching) {
+ ob << false;
+ return false;
+ }
+
+ ob << true;
+ ob << bin_offset;
+ ob << do_precision_conversion;
+ if (do_precision_conversion) {
+ ob << original_size;
+ ob << make_data(&original_dtype, sizeof(ov::element::Type));
+ ob << make_data(&curr_dtype, sizeof(ov::element::Type));
+
+ size_t num_dims = shape.size();
+ ob << make_data(&num_dims, sizeof(size_t));
+ ob << make_data(shape.data(), num_dims * sizeof(ov::Shape::value_type));
+ }
+ return true;
+ }
+
+ std::shared_ptr load(BinaryInputBuffer& ib,
+ std::shared_ptr mapped_weights,
+ size_t data_size) {
+ ib >> do_weightless_caching;
+ if (!do_weightless_caching) {
+ return nullptr;
+ }
+
+ OPENVINO_ASSERT(mapped_weights != nullptr, "mmap object is null");
+
+ ib >> bin_offset;
+ ib >> do_precision_conversion;
+ if (do_precision_conversion) {
+ ib >> original_size;
+ ib >> make_data(&original_dtype, sizeof(ov::element::Type));
+ ib >> make_data(&curr_dtype, sizeof(ov::element::Type));
+
+ size_t num_dims = 0;
+ ib >> make_data(&num_dims, sizeof(size_t));
+ shape.resize(num_dims);
+ ib >> make_data(shape.data(), num_dims * sizeof(ov::Shape::value_type));
+ } else {
+ original_size = data_size;
+ }
+
+ auto mem_obj = std::make_shared();
+ mem_obj->shared_buf = std::make_shared>>(
+ mapped_weights->data() + bin_offset,
+ original_size,
+ mapped_weights);
+
+ if (should_run_transformations()) {
+ run_transformations(mem_obj);
+ }
+ return mem_obj;
+ }
+
+private:
+ bool do_weightless_caching = false;
+ bool do_precision_conversion = false;
+
+ size_t bin_offset = SIZE_MAX;
+ size_t original_size = SIZE_MAX;
+ ov::element::Type original_dtype = ov::element::Type_t::undefined;
+ ov::element::Type curr_dtype = ov::element::Type_t::undefined;
+ ov::Shape shape;
+
+ bool should_run_transformations() {
+ return do_precision_conversion;
+ }
+
+ void run_transformations(std::shared_ptr mem_obj) {
+ auto orig_constant = std::make_shared(original_dtype,
+ shape,
+ mem_obj->shared_buf->get_ptr(),
+ mem_obj->shared_buf);
+
+ ov::ParameterVector inputParams;
+ ov::ResultVector results;
+ results.push_back(std::make_shared(orig_constant->output(0)));
+ auto model = std::make_shared(results, inputParams, "aux");
+
+ ov::pass::Manager manager("Plugin:GPU:weightless_cache_transformations");
+
+ if (do_precision_conversion) {
+ precisions_map fp_convert_precision_map = {
+ {original_dtype, curr_dtype}};
+ type_to_fuse_map empty_fuse_map = {};
+ const bool keep_precision_sensitive_in_fp32 = false;
+ const bool convert_input_output_precision = false;
+ const bool store_original_precision_as_rt_attribute = true;
+ manager.register_pass(fp_convert_precision_map,
+ empty_fuse_map,
+ keep_precision_sensitive_in_fp32,
+ convert_input_output_precision,
+ store_original_precision_as_rt_attribute);
+ }
+
+ manager.run_passes(model);
+ const auto& ops = model->get_ops();
+ auto it = std::find_if(ops.begin(), ops.end(), [](const std::shared_ptr& node) {
+ return ov::op::util::is_constant(node);
+ });
+ OPENVINO_ASSERT(it != ops.end());
+ mem_obj->transformed_constant = std::dynamic_pointer_cast(*it);
+ OPENVINO_ASSERT(mem_obj->transformed_constant->get_element_type() == curr_dtype);
+ }
+};
+
/// @brief Provides input data to topology.
/// @details This primitive allows to pass data which is known at topology creation.
/// For example, weights and biases for scoring networks.
@@ -20,21 +175,32 @@ namespace cldnn {
struct data : public primitive_base {
CLDNN_DECLARE_PRIMITIVE(data)
- data() : primitive_base("", {}) {}
+ data() : primitive_base("", {}) {
+ cache_info = std::make_shared();
+ }
/// @brief Constructs data primitive.
/// @param id This primitive id.
/// @param mem @ref memory object which contains data.
/// @note If memory is attached by memory::attach(), the attached buffer should be valid till network build.
- data(const primitive_id& id, memory::ptr mem)
- : primitive_base(id, {}), mem(std::move(mem)) {}
+ data(const primitive_id& id, memory::ptr mem) : primitive_base(id, {}), mem(std::move(mem)) {
+ cache_info = std::make_shared();
+ }
+
+ data(const primitive_id& id, memory::ptr mem, std::shared_ptr cache_info)
+ : primitive_base(id, {}),
+ mem(std::move(mem)),
+ cache_info(cache_info) {
+ if (!cache_info) {
+ this->cache_info = std::make_shared();
+ }
+ }
/// @brief @ref memory object which contains data.
/// @note If memory is attached by memory::attach(), the attached buffer should be valid till network build.
memory::ptr mem;
- size_t original_size = SIZE_MAX;
- size_t bin_offset = SIZE_MAX;
+ std::shared_ptr cache_info;
size_t hash() const override {
size_t seed = primitive::hash();
@@ -53,13 +219,8 @@ struct data : public primitive_base {
size_t data_size = mem->size();
ob << make_data(&data_size, sizeof(size_t));
- bool is_cache_without_weights = bin_offset != SIZE_MAX && data_size == original_size;
-
- if (is_cache_without_weights) {
- ob << true;
- ob << bin_offset;
- } else {
- ob << false;
+ bool do_weightless_caching = cache_info->save(ob, data_size);
+ if (!do_weightless_caching) {
if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) {
ob << make_data(mem->buffer_ptr(), data_size);
} else {
@@ -88,26 +249,12 @@ struct data : public primitive_base {
mem = ib.get_engine().allocate_memory(output_layout, _allocation_type, false);
- bool is_cache_without_weights;
- ib >> is_cache_without_weights;
- if (is_cache_without_weights && mapped_weights == nullptr) {
- OPENVINO_THROW("mmap object is null");
- }
-
- std::shared_ptr>> shared_buf;
- if (is_cache_without_weights) {
- ib >> bin_offset;
- original_size = data_size;
-
- shared_buf = std::make_shared>>(
- mapped_weights->data() + bin_offset,
- data_size,
- mapped_weights);
- }
+ auto mem_obj = cache_info->load(ib, mapped_weights, data_size);
+ bool is_weightless_caching_enabled = mem_obj != nullptr;
if (_allocation_type == allocation_type::usm_host || _allocation_type == allocation_type::usm_shared) {
- if (is_cache_without_weights) {
- std::memcpy(reinterpret_cast(mem->buffer_ptr()), shared_buf->get_ptr(), data_size);
+ if (is_weightless_caching_enabled) {
+ std::memcpy(reinterpret_cast(mem->buffer_ptr()), mem_obj->get_loaded_data(), data_size);
} else {
ib >> make_data(mem->buffer_ptr(), data_size);
}
@@ -116,8 +263,8 @@ struct data : public primitive_base {
auto& strm = ib.get_engine().get_service_stream();
if (data_size < DATA_BLOCK_SIZE || output_layout.format.is_image_2d()) {
std::vector _buf(data_size);
- if (is_cache_without_weights) {
- std::memcpy(reinterpret_cast(_buf.data()), shared_buf->get_ptr(), data_size);
+ if (is_weightless_caching_enabled) {
+ std::memcpy(reinterpret_cast(_buf.data()), mem_obj->get_loaded_data(), data_size);
} else {
ib >> make_data(_buf.data(), data_size);
}
@@ -135,9 +282,9 @@ struct data : public primitive_base {
size_t copy_size =
(data_size > (dst_offset + DATA_BLOCK_SIZE)) ? DATA_BLOCK_SIZE : (data_size - dst_offset);
if (buf_flag) {
- if (is_cache_without_weights) {
+ if (is_weightless_caching_enabled) {
std::memcpy(reinterpret_cast(_buf1.data()),
- shared_buf->get_ptr() + dst_offset,
+ mem_obj->get_loaded_data() + dst_offset,
copy_size);
} else {
ib >> make_data(_buf1.data(), copy_size);
@@ -148,9 +295,9 @@ struct data : public primitive_base {
}
ev1 = mem->copy_from(strm, _buf1.data(), src_offset, dst_offset, copy_size, is_blocking);
} else {
- if (is_cache_without_weights) {
+ if (is_weightless_caching_enabled) {
std::memcpy(reinterpret_cast(_buf2.data()),
- shared_buf->get_ptr() + dst_offset,
+ mem_obj->get_loaded_data() + dst_offset,
copy_size);
} else {
ib >> make_data(_buf2.data(), copy_size);
diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp
index 85173e9eb33e7c..a4129800733875 100644
--- a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp
+++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp
@@ -74,11 +74,14 @@ void propagate_constants::run(program& p) {
// replace all constant nodes which are relevant for inference (either used by non-const user or marked as output)
// with recomputed cldnn::data
for (auto& cout : to_replace) {
- auto& id_to_replace = cout.first;
- auto mem_impl = cout.second;
-
- auto const_data =
- std::make_shared("_cldnn_const_prop_" + id_to_replace, mem_impl /* <<< REMOVE ME WHEN POSSIBLE */);
+ auto& id_to_replace = std::get<0>(cout);
+ auto mem_impl = std::get<1>(cout);
+ auto cache_info = std::get<2>(cout);
+ auto in_layout = std::get<3>(cout);
+
+ auto const_data = std::make_shared("_cldnn_const_prop_" + id_to_replace,
+ mem_impl, /* <<< REMOVE ME WHEN POSSIBLE */
+ cache_info);
auto& new_node = p.get_or_create(const_data);
auto& curr_node = p.get_node(id_to_replace);
@@ -92,6 +95,25 @@ void propagate_constants::run(program& p) {
}
}
+ auto is_reorder_with_only_dtype_change = [&](program_node& dst) {
+ if (!in_layout) {
+ return false;
+ }
+ auto& dst_layout = dst.get_output_layout();
+ if (in_layout->data_type == dst_layout.data_type) {
+ return false;
+ }
+
+ auto aux_layout = dst_layout;
+ aux_layout.data_type = in_layout->data_type;
+ return aux_layout == *in_layout.get();
+ };
+ if (is_reorder_with_only_dtype_change(new_node)) {
+ new_node.as().get_primitive()->cache_info->set_new_dtype(new_node.get_output_layout().data_type);
+ } else {
+ new_node.as().get_primitive()->cache_info->invalidate();
+ }
+
curr_node.dependencies.clear();
// remove all constant users (as they will be either removed or replaced by cldnn::data which does not have any
// dependencies)
@@ -113,9 +135,10 @@ bool propagate_constants::has_non_const_user(program_node& node) const {
return false;
}
-std::list> propagate_constants::calculate(engine& engine,
- const ExecutionConfig& config,
- std::shared_ptr task_executor) {
+std::list, std::shared_ptr>>
+propagate_constants::calculate(engine& engine,
+ const ExecutionConfig& config,
+ std::shared_ptr task_executor) {
if (!has_non_trivial_constants)
return {};
@@ -123,15 +146,37 @@ std::list> propagate_constants::calculate(e
cf_config.set_property(ov::intel_gpu::optimize_data(false));
cf_config.set_property(ov::intel_gpu::custom_outputs(const_outputs));
network::ptr net = network::build_network(engine, nodes, cf_config, task_executor, true);
- for (auto& cin : const_inputs)
+ std::map, std::shared_ptr>>
+ weightless_cache_map;
+ for (auto& cin : const_inputs) {
net->set_input_data(cin->id(), cin->get_attached_memory_ptr());
+ auto users = cin->get_users();
+ if (users.size() == 1 && users.front()->is_type()) {
+ auto rprim = users.front()->as().get_primitive();
+ auto id = rprim->id;
+ auto cache_ptr = cin->as().get_primitive()->cache_info;
+ auto layout_ptr = std::make_shared(cin->get_output_layout());
+ weightless_cache_map.emplace(id, std::make_pair(cache_ptr, layout_ptr));
+ }
+ }
+
net->execute({});
net->reset_execution(true); // wait for computations to complete
auto outputs = net->get_outputs();
- std::list> ret;
- for (auto& out : outputs) ret.push_back({out->id(), out->output_memory_ptr()});
+ std::list, std::shared_ptr>>
+ ret;
+ for (auto& out : outputs) {
+ std::shared_ptr cache_ptr = nullptr;
+ std::shared_ptr layout_ptr = nullptr;
+ auto it = weightless_cache_map.find(out->id());
+ if (it != weightless_cache_map.end()) {
+ cache_ptr = it->second.first;
+ layout_ptr = it->second.second;
+ }
+ ret.push_back({out->id(), out->output_memory_ptr(), cache_ptr, layout_ptr});
+ }
return ret;
}
diff --git a/src/plugins/intel_gpu/src/graph/include/pass_manager.h b/src/plugins/intel_gpu/src/graph/include/pass_manager.h
index 490076a37f788e..0b7c3d85c37e27 100644
--- a/src/plugins/intel_gpu/src/graph/include/pass_manager.h
+++ b/src/plugins/intel_gpu/src/graph/include/pass_manager.h
@@ -211,9 +211,10 @@ class propagate_constants : public base_pass {
private:
void run(program& p) override;
- std::list> calculate(engine& engine,
- const ExecutionConfig& config,
- std::shared_ptr task_executor);
+ std::list, std::shared_ptr>>
+ calculate(engine& engine,
+ const ExecutionConfig& config,
+ std::shared_ptr task_executor);
bool has_non_const_user(program_node& node) const;
void handle_constant(program& prog, program_node& node);
void add_constant(program& prog, program_node& node);
diff --git a/src/plugins/intel_gpu/src/plugin/program_builder.cpp b/src/plugins/intel_gpu/src/plugin/program_builder.cpp
index 368e25abe2ddac..a9bb813d0ce587 100644
--- a/src/plugins/intel_gpu/src/plugin/program_builder.cpp
+++ b/src/plugins/intel_gpu/src/plugin/program_builder.cpp
@@ -14,6 +14,7 @@
#include "intel_gpu/plugin/common_utils.hpp"
#include "intel_gpu/plugin/program_builder.hpp"
+#include "intel_gpu/primitives/data.hpp"
#include "intel_gpu/runtime/itt.hpp"
#include "intel_gpu/runtime/debug_configuration.hpp"
#include "intel_gpu/primitives/mutable_data.hpp"
@@ -311,11 +312,15 @@ void ProgramBuilder::add_primitive(const ov::Node& op, std::shared_ptrm_config.get_property(ov::cache_mode) == ov::CacheMode::OPTIMIZE_SIZE) {
if (auto data_prim = dynamic_cast(prim.get())) {
auto rt_info = op.get_rt_info();
+
auto weightless_cache_attr = rt_info.find(ov::WeightlessCacheAttribute::get_type_info_static());
if (weightless_cache_attr != rt_info.end()) {
- data_prim->bin_offset = weightless_cache_attr->second.as().bin_offset;
- data_prim->original_size =
- weightless_cache_attr->second.as().original_size;
+ auto& attr = weightless_cache_attr->second.as();
+ data_prim->cache_info->set_constant_info(attr.bin_offset,
+ attr.original_size,
+ attr.original_dtype,
+ op.get_output_element_type(0),
+ op.get_output_shape(0));
}
}
}
diff --git a/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp b/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp
index 839b2640ca180c..17e1ed6d0a9bbe 100644
--- a/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp
+++ b/src/plugins/intel_gpu/tests/functional/behavior/model_cache.cpp
@@ -8,48 +8,40 @@
#include "common_test_utils/common_utils.hpp"
#include "common_test_utils/file_utils.hpp"
#include "common_test_utils/ov_tensor_utils.hpp"
-#include "common_test_utils/subgraph_builders/2_input_subtract.hpp"
-#include "common_test_utils/subgraph_builders/concat_with_params.hpp"
-#include "common_test_utils/subgraph_builders/conv_bias.hpp"
-#include "common_test_utils/subgraph_builders/conv_pool_relu.hpp"
-#include "common_test_utils/subgraph_builders/conv_pool_relu_no_reshapes.hpp"
-#include "common_test_utils/subgraph_builders/conv_pool_relu_non_zero.hpp"
-#include "common_test_utils/subgraph_builders/convert_transpose.hpp"
-#include "common_test_utils/subgraph_builders/detection_output.hpp"
-#include "common_test_utils/subgraph_builders/kso_func.hpp"
-#include "common_test_utils/subgraph_builders/matmul_bias.hpp"
-#include "common_test_utils/subgraph_builders/multi_single_conv.hpp"
-#include "common_test_utils/subgraph_builders/multiple_input_outpput_double_concat.hpp"
-#include "common_test_utils/subgraph_builders/nested_branch_conv_concat.hpp"
-#include "common_test_utils/subgraph_builders/nested_split_conv_concat.hpp"
#include "common_test_utils/subgraph_builders/read_concat_split_assign.hpp"
#include "common_test_utils/subgraph_builders/single_concat_with_constant.hpp"
-#include "common_test_utils/subgraph_builders/single_conv.hpp"
-#include "common_test_utils/subgraph_builders/single_split.hpp"
-#include "common_test_utils/subgraph_builders/split_concat.hpp"
-#include "common_test_utils/subgraph_builders/split_conv_concat.hpp"
-#include "common_test_utils/subgraph_builders/split_multi_conv_concat.hpp"
#include "common_test_utils/subgraph_builders/ti_with_lstm_cell.hpp"
#include "common_test_utils/test_common.hpp"
#include "openvino/pass/serialize.hpp"
namespace {
-class CheckWeightlessCacheAccuracy : public ::testing::Test,
- public ::testing::WithParamInterface {
+typedef std::tuple testParams;
+
+class CheckWeightlessCacheAccuracy : public ::testing::Test, public ::testing::WithParamInterface {
public:
- static std::string get_test_case_name(::testing::TestParamInfo obj) {
- bool use_compile_model_api = obj.param;
+ static std::string get_test_case_name(::testing::TestParamInfo obj) {
+ bool use_compile_model_api_;
+ ov::element::Type inference_mode_;
+ ov::element::Type model_dtype_;
+ std::tie(use_compile_model_api_, inference_mode_, model_dtype_) = obj.param;
std::ostringstream result;
- result << "use_compile_model_api=" << use_compile_model_api;
+ const char separator = '_';
+ result << "use_compile_model_api=" << use_compile_model_api_ << separator;
+ result << "inference_mode=" << inference_mode_ << separator;
+ result << "model_dtype=" << model_dtype_;
return result.str();
}
+
protected:
std::shared_ptr model;
std::string xml_path;
std::string bin_path;
std::string cache_path;
- bool use_compile_model_api; // for loading from cache
+ std::string cache_dir;
+ bool use_compile_model_api; // for loading from cache
+ ov::element::Type inference_mode;
+ ov::element::Type model_dtype;
void SetUp() override;
void TearDown() override;
@@ -61,36 +53,46 @@ void CheckWeightlessCacheAccuracy::SetUp() {
xml_path = filePrefix + ".xml";
bin_path = filePrefix + ".bin";
cache_path = filePrefix + ".blob";
- use_compile_model_api = GetParam();
+ cache_dir = filePrefix + "_cache_dir";
+
+ std::tie(use_compile_model_api, inference_mode, model_dtype) = GetParam();
}
void CheckWeightlessCacheAccuracy::TearDown() {
std::remove(xml_path.c_str());
std::remove(bin_path.c_str());
std::remove(cache_path.c_str());
+
+ ov::test::utils::removeFilesWithExt(cache_dir, "blob");
+ ov::test::utils::removeFilesWithExt(cache_dir, "cl_cache");
+ ov::test::utils::removeDir(cache_dir);
}
void CheckWeightlessCacheAccuracy::run() {
- ov::AnyMap config = { ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE) };
- ov::AnyMap config_with_weights_path = { ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE), ov::weights_path(bin_path) };
+ ov::AnyMap config = {ov::cache_dir(cache_dir),
+ ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE),
+ ov::hint::inference_precision(inference_mode)};
+ ov::AnyMap config_with_weights_path = {ov::cache_mode(ov::CacheMode::OPTIMIZE_SIZE),
+ ov::weights_path(bin_path),
+ ov::hint::inference_precision(inference_mode)};
auto core = ov::test::utils::PluginCache::get().core();
ov::pass::Serialize(xml_path, bin_path).run_on_model(model);
ov::CompiledModel compiled_model;
- OV_ASSERT_NO_THROW(compiled_model = core->compile_model(xml_path, ov::test::utils::DEVICE_GPU, config));
+ compiled_model = core->compile_model(xml_path, ov::test::utils::DEVICE_GPU, config);
- auto ofstr = std::ofstream(cache_path, std::ofstream::binary);
- OV_ASSERT_NO_THROW(compiled_model.export_model(ofstr));
- ofstr.close();
+ if (!use_compile_model_api) {
+ auto ofstr = std::ofstream(cache_path, std::ofstream::binary);
+ compiled_model.export_model(ofstr);
+ ofstr.close();
+ }
auto ifstr = std::ifstream(cache_path, std::ifstream::binary);
ov::CompiledModel imported_model;
if (use_compile_model_api) {
- OV_ASSERT_NO_THROW(imported_model =
- core->compile_model(xml_path, ov::test::utils::DEVICE_GPU, config));
+ imported_model = core->compile_model(xml_path, ov::test::utils::DEVICE_GPU, config);
} else {
- OV_ASSERT_NO_THROW(imported_model =
- core->import_model(ifstr, ov::test::utils::DEVICE_GPU, config_with_weights_path));
+ imported_model = core->import_model(ifstr, ov::test::utils::DEVICE_GPU, config_with_weights_path);
}
ifstr.close();
@@ -99,39 +101,57 @@ void CheckWeightlessCacheAccuracy::run() {
for (size_t param_idx = 0; param_idx < model->get_parameters().size(); ++param_idx) {
auto input = model->get_parameters().at(param_idx);
- auto tensor = ov::test::utils::create_and_fill_tensor(input->get_element_type(), input->get_shape());
+ auto tensor = ov::test::utils::create_and_fill_tensor_real_distribution(input->get_element_type(),
+ input->get_shape(),
+ -100,
+ 100,
+ param_idx);
orig_req.set_tensor(input, tensor);
new_req.set_tensor(input, tensor);
}
- OV_ASSERT_NO_THROW(orig_req.infer());
- OV_ASSERT_NO_THROW(new_req.infer());
+ orig_req.infer();
+ new_req.infer();
auto result_vector = model->get_results();
for (auto& res : result_vector) {
auto orig_out = orig_req.get_tensor(res);
auto new_out = new_req.get_tensor(res);
- ov::test::utils::compare(orig_out, new_out);
+ ov::test::utils::compare(orig_out, new_out, inference_mode);
}
}
TEST_P(CheckWeightlessCacheAccuracy, ReadConcatSplitAssign) {
- model = ov::test::utils::make_read_concat_split_assign({1, 1, 2, 4}, ov::element::f16);
- run();
+ OV_ASSERT_NO_THROW(model = ov::test::utils::make_read_concat_split_assign({1, 1, 2, 4}, model_dtype));
+ OV_ASSERT_NO_THROW(run());
}
TEST_P(CheckWeightlessCacheAccuracy, SingleConcatWithConstant) {
- model = ov::test::utils::make_single_concat_with_constant({1, 1, 2, 4}, ov::element::f16);
- run();
+ OV_ASSERT_NO_THROW(model = ov::test::utils::make_single_concat_with_constant({1, 1, 2, 4}, model_dtype));
+ OV_ASSERT_NO_THROW(run());
}
TEST_P(CheckWeightlessCacheAccuracy, TiWithLstmCell) {
- model = ov::test::utils::make_ti_with_lstm_cell(ov::element::f16);
- run();
+ OV_ASSERT_NO_THROW(model = ov::test::utils::make_ti_with_lstm_cell(model_dtype));
+ OV_ASSERT_NO_THROW(run());
}
-INSTANTIATE_TEST_SUITE_P(smoke_CheckWeightlessCacheAccuracy, CheckWeightlessCacheAccuracy,
- ::testing::Bool(),
+const std::vector inference_modes = {
+ ov::element::f32,
+ ov::element::f16,
+};
+
+const std::vector model_dtypes = {
+ ov::element::f32,
+ ov::element::f16,
+ ov::element::bf16,
+};
+
+INSTANTIATE_TEST_SUITE_P(smoke_CheckWeightlessCacheAccuracy,
+ CheckWeightlessCacheAccuracy,
+ ::testing::Combine(::testing::Bool(),
+ ::testing::ValuesIn(inference_modes),
+ ::testing::ValuesIn(model_dtypes)),
CheckWeightlessCacheAccuracy::get_test_case_name);
} // namespace
diff --git a/src/plugins/intel_gpu/tests/unit/shape_infer/eltwise_si_test.cpp b/src/plugins/intel_gpu/tests/unit/shape_infer/eltwise_si_test.cpp
index 7abdbcb8c2fc52..7b4f27b5af05b4 100644
--- a/src/plugins/intel_gpu/tests/unit/shape_infer/eltwise_si_test.cpp
+++ b/src/plugins/intel_gpu/tests/unit/shape_infer/eltwise_si_test.cpp
@@ -23,11 +23,11 @@ using namespace ov;
namespace shape_infer_tests {
struct eltwise_test_params {
- layout input1_layout;
- layout input2_layout;
+ cldnn::layout input1_layout;
+ cldnn::layout input2_layout;
eltwise_mode mode;
AutoBroadcastSpec auto_broadcast_spec;
- layout expected_layout;
+ cldnn::layout expected_layout;
std::vector stride;
};