diff --git a/docs/articles_en/assets/snippets/lpt_intel_cpu_plugin.cpp b/docs/articles_en/assets/snippets/lpt_intel_cpu_plugin.cpp index 662d32be4a10fb..76e6d60b8e3e90 100644 --- a/docs/articles_en/assets/snippets/lpt_intel_cpu_plugin.cpp +++ b/docs/articles_en/assets/snippets/lpt_intel_cpu_plugin.cpp @@ -38,7 +38,7 @@ auto defaultPrecisions = useLpt ? ov::pass::low_precision::precision_set::get_int8_support() : std::vector{}; if (useLpt) { // disable constant folding on dequantization subgraphs so they can be processed by LPT - manager.register_pass(defaultPrecisions); + manager.register_pass(defaultPrecisions); } // OpenVINO common transformations happen here diff --git a/src/common/low_precision_transformations/tests/mark_dequantization_subgraph_transformation.cpp b/src/common/low_precision_transformations/tests/mark_dequantization_subgraph_transformation.cpp index 89fb71f1a8f0f1..b8c5ad1177d2d5 100644 --- a/src/common/low_precision_transformations/tests/mark_dequantization_subgraph_transformation.cpp +++ b/src/common/low_precision_transformations/tests/mark_dequantization_subgraph_transformation.cpp @@ -31,7 +31,9 @@ TEST_F(TransformationTestsF, KeepConstPrecision) { model = std::make_shared(stub_op, ParameterVector{}); } - manager.register_pass(element::TypeVector{element::u4}); + manager.register_pass(element::TypeVector{element::u4}); + manager.register_pass(); + manager.register_pass(element::TypeVector{element::u4}); manager.register_pass(ov::element::u4, ov::element::u8, type_to_fuse_map{}, false, false); { @@ -46,7 +48,7 @@ TEST_F(TransformationTestsF, KeepConstPrecision) { } } -TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformation) { +TEST_F(TransformationTestsF, MarkDequantizationTransformation) { // Input graph: // // Parameter @@ -69,7 +71,7 @@ TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformation) { // \ / // Convolution // - // After MarkDequantizationAndDecompression all Subtract and Multiply nodes from above graph + // After MarkDequantization all Subtract and Multiply nodes from above graph // are marked with 'DequantizationNode' attribute. // All 'Convert(DCF)' nodes from above graph are marked with 'DisableConstantFolding' attribute // Weights and zero points are marked with 'KeepConstPrecision' attribute @@ -114,7 +116,8 @@ TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformation) { model = std::make_shared(conv, ParameterVector{parameter}); } - manager.register_pass(element::TypeVector{element::u8, element::i8}); + manager.register_pass(element::TypeVector{element::u8, element::i8}); + manager.register_pass(element::TypeVector{element::u8, element::i8}); manager.register_pass(); { @@ -170,7 +173,7 @@ TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformation) { comparator.enable(FunctionsComparator::CmpValues::RUNTIME_KEYS); } -TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationNoZeroPoint) { +TEST_F(TransformationTestsF, MarkDequantizationTransformationNoZeroPoint) { // Input graph: // // Parameter @@ -190,7 +193,7 @@ TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationNoZ // \ / // Convolution // - // After MarkDequantizationAndDecompression all Multiply nodes from above graph + // After MarkDequantization all Multiply nodes from above graph // are marked with 'DequantizationNode' attribute. // Also 'Convert(DCF)' node from above graph is marked with 'DisableConstantFolding' attribute // Weights node is marked with 'KeepConstPrecision' attribute @@ -229,7 +232,8 @@ TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationNoZ model = std::make_shared(conv, ParameterVector{parameter}); } - manager.register_pass(element::TypeVector{element::u8, element::i8}); + manager.register_pass(element::TypeVector{element::u8, element::i8}); + manager.register_pass(element::TypeVector{element::u8, element::i8}); manager.register_pass(); { @@ -274,7 +278,7 @@ TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationNoZ comparator.enable(FunctionsComparator::CmpValues::RUNTIME_KEYS); } -TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationNoZeroPointFP16) { +TEST_F(TransformationTestsF, MarkDequantizationTransformationNoZeroPointFP16) { // Input graph: // // Parameter @@ -294,7 +298,7 @@ TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationNoZ // \ / // Convolution // - // After MarkDequantizationAndDecompression all Multiply nodes from above graph + // After MarkDequantization all Multiply nodes from above graph // are marked with 'DequantizationNode' attribute. // Also 'Convert(DCF)' node from above graph is marked with 'DisableConstantFolding' attribute // Weights node is marked with 'KeepConstPrecision' attribute @@ -337,7 +341,8 @@ TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationNoZ model = std::make_shared(conv, ParameterVector{parameter}); } - manager.register_pass(element::TypeVector{element::u8, element::i8}); + manager.register_pass(element::TypeVector{element::u8, element::i8}); + manager.register_pass(element::TypeVector{element::u8, element::i8}); { auto parameter = std::make_shared(element::f32, Shape{1, 16, 14, 14}); @@ -385,7 +390,7 @@ TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationNoZ comparator.enable(FunctionsComparator::CmpValues::RUNTIME_KEYS); } -TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationNotConstantWeights) { +TEST_F(TransformationTestsF, MarkDequantizationTransformationNotConstantWeights) { // Input graph: // // Parameter @@ -408,7 +413,7 @@ TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationNot // \ / // Convolution // - // After MarkDequantizationAndDecompression all Subtract and Multiply nodes from above graph + // After MarkDequantization all Subtract and Multiply nodes from above graph // are marked with 'DequantizationNode' attribute. // Also all 'Convert(DCF)' nodes from above graph are marked with 'DisableConstantFolding' attribute // Weights and zero point nodes are marked with 'KeepConstPrecision' attribute @@ -456,7 +461,8 @@ TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationNot model = std::make_shared(conv, ParameterVector{parameter}); } - manager.register_pass(element::TypeVector{element::u8, element::i8}); + manager.register_pass(element::TypeVector{element::u8, element::i8}); + manager.register_pass(element::TypeVector{element::u8, element::i8}); manager.register_pass(); { @@ -511,7 +517,7 @@ TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationNot comparator.enable(FunctionsComparator::CmpValues::RUNTIME_KEYS); } -TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationFoldSubConst) { +TEST_F(TransformationTestsF, MarkDequantizationTransformationFoldSubConst) { // Input graph: After transformation: // // Constant Constant Constant @@ -525,7 +531,7 @@ TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationFol // | / \ / // Multiply Multiply // - // After MarkDequantizationAndDecompression all Subtract and Multiply nodes from above graph + // After MarkDequantization all Subtract and Multiply nodes from above graph // are marked with 'DequantizationNode' attribute. // Also all 'Convert(DCF)' node before weights is marked with 'DisableConstantFolding' attribute // but Convert before Dequantization Sub const isn't because fold_subtract_const is set to true @@ -542,7 +548,8 @@ TEST_F(TransformationTestsF, MarkDequantizationAndDecompressionTransformationFol model = std::make_shared(ov::OutputVector{multiply}); } - manager.register_pass(element::TypeVector{element::u8}, true); + manager.register_pass(element::TypeVector{element::u8}, true); + manager.register_pass(element::TypeVector{element::u8}, true); manager.register_pass(); { diff --git a/src/common/transformations/include/transformations/low_precision/mark_dequantization_subgraph.hpp b/src/common/transformations/include/transformations/low_precision/mark_dequantization_subgraph.hpp index c60d9ca5d3659c..6cbd8d990ac73e 100644 --- a/src/common/transformations/include/transformations/low_precision/mark_dequantization_subgraph.hpp +++ b/src/common/transformations/include/transformations/low_precision/mark_dequantization_subgraph.hpp @@ -13,26 +13,67 @@ namespace ov { namespace pass { /** * @ingroup ov_transformation_common_api - * @brief MarkDequantizationAndDecompression is a set of transformation which mark - * Dequantization and Decompression patterns with the keep_const_precision, disable_const_folding and - * dequantization attributes. Also it calls ConstantFolding. + * + * @brief MarkDequantization matches Dequantization subgraphs and marks Subtract and Multiply nodes + * with the dequantization attribute. Also if Convert nodes are part of the subgraph they might be marked + * with the disable_const_folding attribute. + * + * If Convert -> Reshape/Unsqueeze are part of the Dequantization subraph, Convert and Reshape/Unsqueeze + * nodes will be swapped to eliminate Reshape/Unsqueeze in the next ConstantFolding. + * + * Dequantization subgraph may have two forms: with and without Subtract. + * ZeroPoints and Scale might be present as subgraphs and include Convert ops. + * + * Input ZeroPoints + * │ │ + * ▼ ▼ + * Convert (opt) Reshape/Unsqueeze + * │ │ + * ▼ ▼ Scale Input Scale + * Subtract │ │ │ + * │ ▼ ▼ ▼ + * │ (opt) Reshape/Unsqueeze Convert (opt) Reshape/Unsqueeze + * │ │ │ │ + * ▼ ▼ ▼ ▼ + * Multiply Multiply + * */ -class TRANSFORMATIONS_API MarkDequantizationAndDecompression : public ModelPass { +class TRANSFORMATIONS_API MarkDequantization : public ov::pass::MatcherPass { public: - OPENVINO_RTTI("MarkDequantizationAndDecompression", "0"); - explicit MarkDequantizationAndDecompression(element::TypeVector precisions, - const bool fold_subtract_const = false, - const bool fold_multiply_const = true) - : m_fold_subtract_const(fold_subtract_const), - m_fold_multiply_const(fold_multiply_const), - m_precisions(std::move(precisions)) {} - - bool run_on_model(const std::shared_ptr& m) override; + OPENVINO_RTTI("MarkDequantization", "0"); + explicit MarkDequantization(const element::TypeVector& precisions, + bool fold_subtract_const = false, + bool fold_multiply_const = true); +}; -private: - bool m_fold_subtract_const = false; - bool m_fold_multiply_const = true; - element::TypeVector m_precisions; +/** + * @ingroup ov_transformation_common_api + * + * @brief KeepConstsPrecision matches Dequantization subgraphs and if Input/ZeroPoints/Scale are Constants + * they might be marked with keep_const_precision attribute. + * + * Dequantization subgraph may have two forms: with and without Subtract. + * + * Input + * │ + * ▼ + * Convert ZeroPoints + * │ │ + * ▼ ▼ Input + * Subtract │ + * │ ▼ + * │ Scale Convert Scale + * │ │ │ │ + * ▼ ▼ ▼ ▼ + * Multiply Multiply + * + */ +class TRANSFORMATIONS_API KeepConstsPrecision : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("KeepConstsPrecision", "0"); + explicit KeepConstsPrecision(const element::TypeVector& precisions, + bool fold_subtract_const = false, + bool fold_multiply_const = true); }; } // namespace pass diff --git a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp index fea833171fb1ee..37ee2d12d9aebb 100644 --- a/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/common_optimizations.cpp @@ -128,6 +128,7 @@ bool ov::pass::CommonOptimizations::run_on_model(const std::shared_ptr manager.register_pass(); } + if (m_low_precision_enabled) { + manager.register_pass( + element::TypeVector{ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4}); + } + // RemoveConcatZeroDimInput and RemoveMultiSubGraphOpDanglingParamsResults // should be performed before first !ConstantFolding! call. // The passes can deteach graph branches where zero dimesion is calculated. @@ -145,12 +150,6 @@ bool ov::pass::MOCTransformations::run_on_model(const std::shared_ptr REGISTER_PASS(manager, EliminateLoopInputsOutputs); REGISTER_PASS(manager, Validate) - if (m_low_precision_enabled) { - // includes ConstantFolding call - manager.register_pass( - element::TypeVector{ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4}); - } - // todo: ticket 96960 // the order EliminateDuplicateTIInputs and RemoveMultiSubGraphOpDanglingParamsResults is important // it looks like we need to combine these transformations into one. diff --git a/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp b/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp index 3e742ff305c68c..bddaf81e31a067 100644 --- a/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp +++ b/src/common/transformations/src/transformations/low_precision/mark_dequantization_subgraph.cpp @@ -22,71 +22,6 @@ using namespace ov; using namespace ov::op; using namespace ov::pass::pattern; -/** - * @ingroup ov_transformation_common_api - * - * @brief MarkDequantization matches Dequantization subgraphs and marks Subtract and Multiply nodes - * with the dequantization attribute. Also if Convert nodes are part of the subgraph they might be marked - * with the disable_const_folding attribute. - * - * If Convert -> Reshape/Unsqueeze are part of the Dequantization subraph, Convert and Reshape/Unsqueeze - * nodes will be swapped to eliminate Reshape/Unsqueeze in the next ConstantFolding. - * - * Dequantization subgraph may have two forms: with and without Subtract. - * ZeroPoints and Scale might be present as subgraphs and include Convert ops. - * - * Input ZeroPoints - * │ │ - * ▼ ▼ - * Convert (opt) Reshape/Unsqueeze - * │ │ - * ▼ ▼ Scale Input Scale - * Subtract │ │ │ - * │ ▼ ▼ ▼ - * │ (opt) Reshape/Unsqueeze Convert (opt) Reshape/Unsqueeze - * │ │ │ │ - * ▼ ▼ ▼ ▼ - * Multiply Multiply - * - */ -class TRANSFORMATIONS_API MarkDequantization : public ov::pass::MatcherPass { -public: - OPENVINO_RTTI("MarkDequantization", "0"); - explicit MarkDequantization(const element::TypeVector& precisions, - bool fold_subtract_const, - bool fold_multiply_const); -}; - -/** - * @ingroup ov_transformation_common_api - * - * @brief KeepConstsPrecision matches Dequantization subgraphs and if Input/ZeroPoints/Scale are Constants - * they might be marked with keep_const_precision attribute. - * - * Dequantization subgraph may have two forms: with and without Subtract. - * - * Input - * │ - * ▼ - * Convert ZeroPoints - * │ │ - * ▼ ▼ Input - * Subtract │ - * │ ▼ - * │ Scale Convert Scale - * │ │ │ │ - * ▼ ▼ ▼ ▼ - * Multiply Multiply - * - */ -class TRANSFORMATIONS_API KeepConstsPrecision : public ov::pass::MatcherPass { -public: - OPENVINO_RTTI("KeepConstsPrecision", "0"); - explicit KeepConstsPrecision(const element::TypeVector& precisions, - bool fold_subtract_const, - bool fold_multiply_const); -}; - namespace { bool check_precision(const ov::element::Type_t type_to_check, const ov::element::TypeVector& precisions) { @@ -129,9 +64,9 @@ void swap_nodes(const PatternValueMap& pt_map, } // namespace -MarkDequantization::MarkDequantization(const element::TypeVector& precisions, - const bool fold_subtract_const, - const bool fold_multiply_const) { +ov::pass::MarkDequantization::MarkDequantization(const element::TypeVector& precisions, + const bool fold_subtract_const, + const bool fold_multiply_const) { // data input: auto input_pattern = any_input(); auto convert_pattern = wrap_type({input_pattern}, consumers_count(1)); @@ -191,9 +126,9 @@ MarkDequantization::MarkDequantization(const element::TypeVector& precisions, this->register_matcher(m, callback); } -KeepConstsPrecision::KeepConstsPrecision(const element::TypeVector& precisions, - bool fold_subtract_const, - bool fold_multiply_const) { +ov::pass::KeepConstsPrecision::KeepConstsPrecision(const element::TypeVector& precisions, + bool fold_subtract_const, + bool fold_multiply_const) { // data input: auto input_pattern = any_input(); auto convert_pattern = wrap_type({input_pattern}, consumers_count(1)); @@ -239,17 +174,3 @@ KeepConstsPrecision::KeepConstsPrecision(const element::TypeVector& precisions, auto m = std::make_shared(multiply_pattern, "KeepConstsPrecision"); this->register_matcher(m, callback); } - -bool pass::MarkDequantizationAndDecompression::run_on_model(const std::shared_ptr& m) { - const auto& pass_config = get_pass_config(); - auto callback = pass_config->get_callback(); - pass_config->set_callback(callback); - pass_config->set_callback(callback); - - ov::pass::Manager manager(pass_config, "MarkDequantizationAndDecompressionManager"); - manager.register_pass(); - manager.register_pass(m_precisions, m_fold_subtract_const, m_fold_multiply_const); - manager.register_pass(); - manager.register_pass(m_precisions, m_fold_subtract_const, m_fold_multiply_const); - return manager.run_passes(m); -} diff --git a/src/common/transformations/tests/op_conversions/convert_subtract.cpp b/src/common/transformations/tests/op_conversions/convert_subtract.cpp index 93b5c223345d03..1a1d6d8b5c83bb 100644 --- a/src/common/transformations/tests/op_conversions/convert_subtract.cpp +++ b/src/common/transformations/tests/op_conversions/convert_subtract.cpp @@ -77,7 +77,7 @@ TEST_F(TransformationTestsF, ConvertSubtractDequantizationSubgraph) { model = std::make_shared(mul, ParameterVector{data}); - manager.register_pass(element::TypeVector{element::u8}); + manager.register_pass(element::TypeVector{element::u8}); manager.register_pass(); } diff --git a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp index 2e79d066b069ff..8daf8d81704301 100644 --- a/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp +++ b/src/plugins/intel_cpu/src/transformations/transformation_pipeline.cpp @@ -342,10 +342,10 @@ void Transformations::PreLpt(const std::vector& defaultPrecis ov::element::i4, ov::element::nf4, ov::element::f4e2m1}; - CPU_REGISTER_PASS_X64(decompression_handling_manager, ov::pass::MarkDequantizationAndDecompression, decompression_precisions, false, true); + CPU_REGISTER_PASS_X64(decompression_handling_manager, ov::pass::MarkDequantization, decompression_precisions, false, true); CPU_SET_CALLBACK_X64(decompression_handling_manager, [&](const_node_ptr &node) -> bool { return !is_decompression_multiply(node); - }, ov::pass::MarkDequantizationAndDecompression); + }, ov::pass::MarkDequantization); CPU_SET_CALLBACK_COMMON( decompression_handling_manager, @@ -371,7 +371,7 @@ void Transformations::PreLpt(const std::vector& defaultPrecis ov::pass::Manager manager("Plugin:CPU"); manager.set_per_pass_validation(false); if (useLpt) - CPU_REGISTER_PASS_COMMON(manager, ov::pass::MarkDequantizationAndDecompression, defaultPrecisions); + CPU_REGISTER_PASS_COMMON(manager, ov::pass::MarkDequantization, defaultPrecisions); auto get_convert_precisions = [&]() { precisions_map map = { @@ -427,6 +427,10 @@ void Transformations::PreLpt(const std::vector& defaultPrecis CPU_REGISTER_PASS_COMMON(manager, ov::pass::AUGRUCellFusion); CPU_REGISTER_PASS_COMMON(manager, ov::pass::CommonOptimizations); + CPU_REGISTER_PASS_X64(manager, ov::pass::KeepConstsPrecision, decompression_precisions, false, true); + CPU_SET_CALLBACK_X64(manager, [&](const_node_ptr &node) -> bool { + return !is_decompression_multiply(node); + }, ov::pass::KeepConstsPrecision); CPU_REGISTER_PASS_COMMON(manager, ov::pass::WrapInterpolateIntoTransposes); CPU_REGISTER_PASS_COMMON(manager, ov::pass::TransposeSinking); CPU_REGISTER_PASS_COMMON(manager, ov::pass::ConvertSequenceToTensorIterator); diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp index f4a1014ee879df..b0a767eec013bb 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp @@ -292,7 +292,7 @@ void TransformationsPipeline::apply(std::shared_ptr func) { auto is_model_quantized = ov::pass::low_precision::LowPrecision::isFunctionQuantized(func); enableInt8 = config.get_property(ov::intel_gpu::enable_lp_transformations) && is_model_quantized; if (enableInt8) { - manager.register_pass( + manager.register_pass( std::vector{ ov::element::i8, ov::element::u8, ov::element::i4, ov::element::u4 }); } @@ -372,8 +372,8 @@ void TransformationsPipeline::apply(std::shared_ptr func) { // Disable subtract folding only for the dGPUs to meet the requirements of oneDNN: // it expects to have the same data type for weights and zero points (apply it only for u8 data type, since other compression // types are not supported by oneDNN) - manager.register_pass(supported_woq_types, !device_info.supports_immad); - pass_config->set_callback([&](const std::shared_ptr node) { + manager.register_pass(supported_woq_types, !device_info.supports_immad); + pass_config->set_callback([&](const std::shared_ptr node) { return !is_decompression_multiply(node, device_info.supports_immad); }); @@ -911,8 +911,8 @@ void TransformationsPipeline::apply(std::shared_ptr func) { manager.register_pass(); // ZP should not be folded for FC. But still, ZP should be folded for Gather. - // Therefore, run MarkDequantizationAndDecompression again to fold ZP constant. - manager.register_pass(supported_woq_types, true); + // Therefore, run MarkDequantization again to fold ZP constant. + manager.register_pass(supported_woq_types, true); if (device_info.supports_immad) { if (disable_horizontal_fc_fusion) manager.register_pass();