diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index cf6b0e502f92ed..272a0863ae1dd1 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -146,6 +146,35 @@ static bool disable_reduce_decomposition(const std::shared_ptr n } return false; } + +static bool is_non_decompression_multiply(const std::shared_ptr node) { + auto get_single_consumer = [](const std::shared_ptr node) -> std::shared_ptr { + const auto consumers = node->get_output_target_inputs(0); + if (consumers.size() != 1) + return nullptr; + return consumers.begin()->get_node()->shared_from_this(); + }; + + auto consumer = get_single_consumer(node); + if (!consumer) + return true; + + if (ov::is_type(consumer)) { + return false; + } else if (ov::is_type(consumer)) { + consumer = get_single_consumer(consumer); + if (consumer != nullptr && ov::is_type(consumer)) { + return false; + } + } + if (consumer != nullptr && ov::is_type(consumer)) { + consumer = get_single_consumer(consumer); + if (consumer != nullptr && ov::is_type(consumer)) { + return false; + } + } + return true; +} } // namespace namespace ov { @@ -247,6 +276,8 @@ void TransformationsPipeline::apply(std::shared_ptr func) { }); manager.register_pass(ov::element::TypeVector{ov::element::u8, ov::element::u4, ov::element::i4}, true); + // Ignore nodes that are not related to FullyConnected and allow ConstantFolding to be applied to them + pass_config->set_callback(is_non_decompression_multiply); const bool keep_precision_sensitive_in_fp32_1 = true; const bool convert_input_output_precision = false;