diff --git a/src/core/include/openvino/op/fake_convert.hpp b/src/core/include/openvino/op/fake_convert.hpp index c3eaa43b98a51b..93eca1dbaf58de 100644 --- a/src/core/include/openvino/op/fake_convert.hpp +++ b/src/core/include/openvino/op/fake_convert.hpp @@ -70,6 +70,10 @@ class OPENVINO_API FakeConvert : public Op { std::string get_destination_type() const; const ov::element::Type& get_destination_element_type() const; + void set_destination_type(ov::element::Type destination_type) { + m_destination_type = destination_type; + } + private: void validate_destination_type() const; diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp index e234bc68de0750..ecec9429aacfb1 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/primitives_list.hpp @@ -267,6 +267,7 @@ REGISTER_FACTORY(v13, ScaledDotProductAttention); REGISTER_FACTORY(v13, BitwiseAnd); REGISTER_FACTORY(v13, BitwiseOr); REGISTER_FACTORY(v13, BitwiseXor); +REGISTER_FACTORY(v13, FakeConvert); // ------------------------------ Supported v15 ops ----------------------------- // REGISTER_FACTORY(v15, ROIAlignRotated); diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/fake_convert.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/fake_convert.hpp new file mode 100644 index 00000000000000..4743bbec9cca78 --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/fake_convert.hpp @@ -0,0 +1,59 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once +#include "primitive.hpp" +#include + +namespace cldnn { + +/// @brief Returns shape of input primitive. +struct fake_convert : public primitive_base { + CLDNN_DECLARE_PRIMITIVE(fake_convert) + + fake_convert() : primitive_base("", {}) {} + + /// @brief Constructs fake_convert primitive. + /// @param id This primitive id. + /// @param input Input primitive id. + /// @param output_data_type type of output values. can be i32 and i64. + fake_convert(const primitive_id& id, + const input_info& input, + const input_info& scale, + const input_info& shift, + std::string destination_type = "f8e4m3") + : primitive_base(id, {input, scale, shift}, 1), destination_type(destination_type) {} + + fake_convert(const primitive_id& id, + const input_info& input, + const input_info& scale, + std::string destination_type = "f8e4m3") + : primitive_base(id, {input, scale}, 1), destination_type(destination_type) {} + + std::string destination_type; + + size_t hash() const override { + size_t seed = primitive::hash(); + seed = hash_combine(seed, destination_type); + return seed; + } + + bool operator==(const primitive& rhs) const override { + if (!compare_common_params(rhs)) + return false; + auto rhs_casted = downcast(rhs); + return (destination_type == rhs_casted.destination_type); + } + + void save(BinaryOutputBuffer& ob) const override { + primitive_base::save(ob); + ob << destination_type; + } + + void load(BinaryInputBuffer& ib) override { + primitive_base::load(ib); + ib >> destination_type; + } +}; +} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/fake_convert.cpp b/src/plugins/intel_gpu/src/graph/fake_convert.cpp new file mode 100644 index 00000000000000..8dec5da3129741 --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/fake_convert.cpp @@ -0,0 +1,70 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "fake_convert_inst.h" +#include "fake_convert_shape_inference.hpp" + +#include "primitive_type_base.h" +#include "intel_gpu/runtime/error_handler.hpp" +#include "json_object.h" +#include + +namespace cldnn { +GPU_DEFINE_PRIMITIVE_TYPE_ID(fake_convert) + +layout fake_convert_inst::calc_output_layout(fake_convert_node const& node, kernel_impl_params const& impl_param) { + return calc_output_layouts(node, impl_param)[0]; +} + +template +std::vector fake_convert_inst::calc_output_layouts(fake_convert_node const& node, kernel_impl_params const& impl_param) { + auto desc = impl_param.typed_desc(); + auto input_layout = impl_param.get_input_layout(0); + auto scale_layout = impl_param.get_input_layout(1); + auto output_type = input_layout.data_type; + auto output_format = input_layout.format; + + ov::op::v13::FakeConvert op; + + std::vector input_shapes = { + input_layout.get(), + scale_layout.get() + }; + if (impl_param.input_layouts.size() == 3) { + auto shift_layout = impl_param.get_input_layout(2); + input_shapes.push_back(shift_layout.get()); + } + std::vector output_shapes = ov::op::v13::shape_infer(&op, input_shapes); + + return { layout{output_shapes[0], output_type, output_format} }; +} + +template std::vector fake_convert_inst::calc_output_layouts(fake_convert_node const& node, const kernel_impl_params& impl_param); + +std::string fake_convert_inst::to_string(fake_convert_node const& node) { + auto desc = node.get_primitive(); + auto node_info = node.desc_to_json(); + auto& input = node.input(); + auto& scale = node.scale(); + + std::stringstream primitive_description; + + json_composite fake_convert_info; + fake_convert_info.add("input id", input.id()); + fake_convert_info.add("scale id", scale.id()); + if (node.has_shift()) { + fake_convert_info.add("shift id", node.shift().id()); + } + fake_convert_info.add("destination_type", node.get_destination_type()); + + node_info->add("fake_convert info", fake_convert_info); + node_info->dump(primitive_description); + + return primitive_description.str(); +} + +fake_convert_inst::typed_primitive_inst(network& network, fake_convert_node const& node) + : parent(network, node) {} + +} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/fake_convert.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/fake_convert.cpp new file mode 100644 index 00000000000000..d89e910eb8bdf4 --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/fake_convert.cpp @@ -0,0 +1,134 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "impls/cpu/cpu_impl_helpers.hpp" +#include "register.hpp" +#include "fake_convert_inst.h" +#include "impls/registry/implementation_map.hpp" + +#include "openvino/op/fake_convert.hpp" + +namespace cldnn { +namespace cpu { + +struct fake_convert_impl : public typed_primitive_impl { + using parent = typed_primitive_impl; + using parent::parent; + + std::string destination_type; + + std::shared_ptr op; + + DECLARE_OBJECT_TYPE_SERIALIZATION(cldnn::cpu::fake_convert_impl) + + std::unique_ptr clone() const override { + return make_unique(*this); + } + + fake_convert_impl() : parent("fake_convert_cpu_impl") {} + + explicit fake_convert_impl(const fake_convert_node& outer) { + set_node_params(outer); + } + + void set_node_params(const program_node& arg) override { + OPENVINO_ASSERT(arg.is_type(), "[GPU] Incorrect program_node type"); + const auto& node = arg.as(); + destination_type = node.get_destination_type(); + } + + void save(BinaryOutputBuffer& ob) const override { + parent::save(ob); + ob << destination_type; + } + + void load(BinaryInputBuffer& ib) override { + parent::load(ib); + ib >> destination_type; + } + + event::ptr execute_impl(const std::vector& events, fake_convert_inst& instance) override { + OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "fake_convert::execute_impl"); + auto& stream = instance.get_network().get_stream(); + + const bool pass_through_events = (stream.get_queue_type() == QueueTypes::out_of_order) && instance.all_dependencies_cpu_impl(); + + if (!pass_through_events) { + stream.wait_for_events(events); + } + + auto params = instance.get_impl_params(); + + ov::TensorVector input_host_tensors; + ov::TensorVector output_host_tensors; + + if (!op) { + op = std::make_shared(); + op->set_destination_type(ov::element::Type(destination_type)); + } + + std::vector input_mem_ptrs; + for (size_t i = 0; i < instance.dependencies().size(); i++) + input_mem_ptrs.push_back(instance.dep_memory_ptr(i)); + + auto output_mem_ptr = instance.output_memory_ptr(); + + cldnn::mem_lock output_lock(output_mem_ptr, stream); + + for (size_t i = 0; i < input_mem_ptrs.size(); i++) + input_host_tensors.push_back(make_tensor(params->input_layouts[i], input_mem_ptrs[i]->lock(stream, mem_lock_type::read))); + + output_host_tensors.push_back(make_tensor(params->output_layouts[0], output_lock.data())); + + OPENVINO_ASSERT(op->evaluate(output_host_tensors, input_host_tensors), + "[GPU] Couldn't execute fake_convert primitive with id ", instance.id()); + + if (pass_through_events) { + return stream.group_events(events); + } + + return make_output_event(stream, instance.is_output()); + } + + void init_kernels(const kernels_cache& , const kernel_impl_params&) override {} + + void update(primitive_inst& inst, const kernel_impl_params& impl_param) override {} + +public: + static std::unique_ptr create(const fake_convert_node& arg, const kernel_impl_params& impl_param) { + return make_unique(); + } +}; + + +namespace detail { + +attach_fake_convert_impl::attach_fake_convert_impl() { + auto formats = { + format::bfyx, + format::bfzyx, + format::bfwzyx, + format::bfuwzyx, + format::bfvuwzyx, + }; + + auto types = { + data_types::f32, + data_types::f16, + data_types::i32, + data_types::i64, + data_types::i8, + data_types::u8, + }; + + implementation_map::add(impl_types::cpu, shape_types::static_shape, fake_convert_impl::create, types, formats); + implementation_map::add(impl_types::cpu, shape_types::dynamic_shape, fake_convert_impl::create, types, formats); +} + +} // namespace detail +} // namespace cpu +} // namespace cldnn + +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::cpu::fake_convert_impl) +BIND_BINARY_BUFFER_WITH_TYPE(cldnn::fake_convert) diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/register.cpp b/src/plugins/intel_gpu/src/graph/impls/cpu/register.cpp index 2b0dc5b212158c..e86628444de439 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/register.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/register.cpp @@ -31,6 +31,7 @@ void register_implementations() { REGISTER_CPU(tile); REGISTER_CPU(select); REGISTER_CPU(reduce); + REGISTER_CPU(fake_convert); } } // namespace cpu diff --git a/src/plugins/intel_gpu/src/graph/impls/cpu/register.hpp b/src/plugins/intel_gpu/src/graph/impls/cpu/register.hpp index cb89eae29d8c56..15cc4b11c077eb 100644 --- a/src/plugins/intel_gpu/src/graph/impls/cpu/register.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/cpu/register.hpp @@ -56,6 +56,7 @@ REGISTER_CPU(broadcast); REGISTER_CPU(tile); REGISTER_CPU(select); REGISTER_CPU(reduce); +REGISTER_CPU(fake_convert); #undef REGISTER_CPU diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/fake_convert_impls.cpp b/src/plugins/intel_gpu/src/graph/impls/registry/fake_convert_impls.cpp new file mode 100644 index 00000000000000..991ab5aa12657a --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/impls/registry/fake_convert_impls.cpp @@ -0,0 +1,24 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "registry.hpp" +#include "intel_gpu/primitives/fake_convert.hpp" +#include "primitive_inst.h" + +namespace ov { +namespace intel_gpu { + +using namespace cldnn; + +const std::vector>& Registry::get_implementations() { + static const std::vector> impls = { + OV_GPU_GET_INSTANCE_CPU(fake_convert, shape_types::static_shape) + OV_GPU_GET_INSTANCE_CPU(fake_convert, shape_types::dynamic_shape) + }; + + return impls; +} + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/graph/impls/registry/registry.hpp b/src/plugins/intel_gpu/src/graph/impls/registry/registry.hpp index b7dbbaef6e64f1..4af6c79220533e 100644 --- a/src/plugins/intel_gpu/src/graph/impls/registry/registry.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/registry/registry.hpp @@ -145,6 +145,7 @@ REGISTER_IMPLS(scatter_elements_update); REGISTER_IMPLS(shape_of); REGISTER_IMPLS(strided_slice); REGISTER_IMPLS(tile); +REGISTER_IMPLS(fake_convert); REGISTER_DEFAULT_IMPLS(assign, CPU_S, CPU_D); REGISTER_DEFAULT_IMPLS(read_value, CPU_S, CPU_D); diff --git a/src/plugins/intel_gpu/src/graph/include/fake_convert_inst.h b/src/plugins/intel_gpu/src/graph/include/fake_convert_inst.h new file mode 100644 index 00000000000000..fc2fd41e65fa12 --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/include/fake_convert_inst.h @@ -0,0 +1,55 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once +#include "intel_gpu/primitives/fake_convert.hpp" +#include "primitive_inst.h" + +#include +#include + +namespace cldnn { + +template <> +struct typed_program_node : public typed_program_node_base { + using parent = typed_program_node_base; + typed_program_node(const std::shared_ptr prim, program& prog) + : parent(prim, prog), destination_type(prim->destination_type) { + support_padding_all(true); + } + +public: + using parent::parent; + + program_node& input() const { return get_dependency(0); } + program_node& scale() const { return get_dependency(1); } + program_node& shift() const { return get_dependency(2); } + bool has_shift() const { return (get_dependencies().size() == 3); } + + std::string get_destination_type() const { return destination_type; } + + std::vector get_shape_infer_dependencies() const override { return {}; } + +private: + std::string destination_type; +}; + +using fake_convert_node = typed_program_node; + +template <> +class typed_primitive_inst : public typed_primitive_inst_base { + using parent = typed_primitive_inst_base; + using parent::parent; + +public: + template + static std::vector calc_output_layouts(fake_convert_node const& /*node*/, const kernel_impl_params& impl_param); + static layout calc_output_layout(fake_convert_node const& node, kernel_impl_params const& impl_param); + static std::string to_string(fake_convert_node const& node); + + typed_primitive_inst(network& network, fake_convert_node const& node); +}; + +using fake_convert_inst = typed_primitive_inst; +} // namespace cldnn diff --git a/src/plugins/intel_gpu/src/plugin/ops/fake_convert.cpp b/src/plugins/intel_gpu/src/plugin/ops/fake_convert.cpp new file mode 100644 index 00000000000000..d17c574f042947 --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/ops/fake_convert.cpp @@ -0,0 +1,45 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "intel_gpu/plugin/program_builder.hpp" +#include "intel_gpu/plugin/common_utils.hpp" + +#include "openvino/op/fake_convert.hpp" + +#include "intel_gpu/primitives/fake_convert.hpp" + +namespace ov { +namespace intel_gpu { + +static void CreateFakeConvertOp(ProgramBuilder& p, const std::shared_ptr& op) { + validate_inputs_count(op, {2, 3}); + const auto inputs = p.GetInputInfo(op); + const std::string layerName = layer_type_name_ID(op); + std::string destination_type = ""; + if (auto fake_convert_v13 = std::dynamic_pointer_cast(op)) { + destination_type = fake_convert_v13->get_destination_type(); + } else { + OPENVINO_THROW("[GPU] Can't cast Broadcast operation to any supported version"); + } + std::shared_ptr fake_convert_prim = nullptr; + if (inputs.size() == 2) { + fake_convert_prim = std::make_shared(layerName, + inputs[0], + inputs[1], + destination_type); + } else { + fake_convert_prim = std::make_shared(layerName, + inputs[0], + inputs[1], + inputs[2], + destination_type); + } + + p.add_primitive(*op, fake_convert_prim); +} + +REGISTER_FACTORY_IMPL(v13, FakeConvert); + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/fake_convert.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/fake_convert.cpp new file mode 100644 index 00000000000000..f63e0a646d2ef3 --- /dev/null +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/fake_convert.cpp @@ -0,0 +1,136 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_test_utils/ov_tensor_utils.hpp" +#include "common_test_utils/file_utils.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" + +#include "openvino/op/parameter.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/gather.hpp" +#include "openvino/op/fake_convert.hpp" + +namespace { + +namespace fp8 { +constexpr float MAX_F8E4M3 = 448.f; +constexpr float MIN_F8E4M3 = 0.001953125f; + +constexpr float MAX_F8E5M2 = 57344.f; +constexpr float MIN_F8E5M2 = 0.0000152587890625f; +} // namespace fp8 + +using namespace std; +using namespace ov; +using namespace testing; +using ov::test::InputShape; + +using FakeConvertTestParams = std::tuple< + ov::Shape, // Input shapes + ov::Shape, // Scale shape + ov::Shape, // Shift shape + ov::element::Type, // input precision + std::string >; // device name + +class FakeConvertTest : public testing::WithParamInterface, + virtual public ov::test::SubgraphBaseStaticTest { +public: + static std::string getTestCaseName(testing::TestParamInfo obj) { + ov::Shape input_shape; + ov::Shape scale_shape; + ov::Shape shift_shape; + ov::element::Type prec; + std::string target_device; + + std::tie(input_shape, scale_shape, shift_shape, prec, target_device) = obj.param; + + std::ostringstream result; + result << "IS=("; + result << ov::test::utils::vec2str(input_shape) << "_"; + result << "scale_shape=" << ov::test::utils::vec2str(scale_shape) << "_"; + result << "shift_shape=" << ov::test::utils::vec2str(shift_shape) << "_"; + result << "input_precision=" << prec; + result << "device_type=" << target_device; + return result.str(); + } + +protected: + ov::Shape input_shape, scale_shape, shift_shape; + + void SetUp() override { + ov::element::Type prec; + std::tie(input_shape, scale_shape, shift_shape, prec, targetDevice) = GetParam(); + if (shift_shape.empty()) { + auto data = make_shared(prec, input_shape); + auto scale = op::v0::Constant::create(prec, + scale_shape, + {fp8::MAX_F8E4M3 / (fp8::MAX_F8E4M3 / 2.f), + 1.0f, + fp8::MAX_F8E4M3 / (fp8::MAX_F8E4M3 * 3.5f), + fp8::MAX_F8E4M3 / (fp8::MAX_F8E4M3 * 4.f)}); + + auto op = make_shared(data, scale, "f8e4m3"); + + function = make_shared(OutputVector{op}, ParameterVector{data}); + } else { + auto data = make_shared(prec, input_shape); + auto scale = op::v0::Constant::create(prec, + scale_shape, + {fp8::MAX_F8E4M3 / (fp8::MAX_F8E4M3 / 2.f), + 1.0f, + fp8::MAX_F8E4M3 / (fp8::MAX_F8E4M3 * 3.5f), + fp8::MAX_F8E4M3 / (fp8::MAX_F8E4M3 * 4.f)}); + auto shift = op::v0::Constant::create(prec, shift_shape, {0.f, 0.f, 0.f, 0.f}); + + auto op = make_shared(data, scale, shift, "f8e4m3"); + + function = make_shared(OutputVector{op}, ParameterVector{data}); + } + } + + void generate_inputs(const std::vector& target_shapes) override { + inputs.clear(); + const auto& func_inputs = function->inputs(); + auto& data_input = func_inputs[0]; + ov::Tensor tensor = ov::Tensor(data_input.get_element_type(), target_shapes[0]); + std::vector input_data{fp8::MAX_F8E4M3 / 4.f, + fp8::MAX_F8E4M3 / 3.f, + fp8::MAX_F8E4M3 / 2.f, + fp8::MAX_F8E4M3, + fp8::MAX_F8E4M3, + fp8::MAX_F8E4M3, + fp8::MAX_F8E4M3 * 1.2f, + fp8::MAX_F8E4M3 * 2.3f, + fp8::MAX_F8E4M3 * 3.4f, + fp8::MAX_F8E4M3 * 2.f, + fp8::MAX_F8E4M3 * 3.f, + fp8::MAX_F8E4M3 * 4.f}; + auto* data_ptr = tensor.data(); + for (size_t i = 0; i < input_data.size(); i++) { + data_ptr[i] = input_data[i]; + } + inputs.insert({data_input.get_node_shared_ptr(), tensor}); + } +}; + +TEST_P(FakeConvertTest, Inference) { + run(); +} + +const std::vector input_precisions = {ov::element::f32}; + +const std::vector input_shapes = {{4, 3}}; + +const ov::Shape scale_shape = {4, 1}; +const std::vector shift_shapes = {{4, 1}, {}}; + +INSTANTIATE_TEST_SUITE_P(Smoke_FakeConvertTest, + FakeConvertTest, + ::testing::Combine(::testing::ValuesIn(input_shapes), + ::testing::Values(scale_shape), + ::testing::ValuesIn(shift_shapes), + ::testing::ValuesIn(input_precisions), + ::testing::Values(ov::test::utils::DEVICE_GPU)), + FakeConvertTest::getTestCaseName); +} // namespace diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/impls_registry_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/impls_registry_test.cpp index a16cd20846a1c7..5dfc450e43905a 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/impls_registry_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/module_tests/impls_registry_test.cpp @@ -85,6 +85,7 @@ #include "intel_gpu/primitives/swiglu.hpp" #include "intel_gpu/primitives/tile.hpp" #include "intel_gpu/primitives/unique.hpp" +#include "intel_gpu/primitives/fake_convert.hpp" #include "primitive_inst.h" #include "test_utils.h" @@ -226,5 +227,6 @@ TEST(registry_test, no_null_impls) { cldnn::unique_count, cldnn::unique_gather, cldnn::scaled_dot_product_attention, - cldnn::rope>(); + cldnn::rope, + cldnn::fake_convert>(); }