From d4cd1e26f7db7c637f79fe392087a9ebdb2b0066 Mon Sep 17 00:00:00 2001 From: Alexey Smirnov Date: Tue, 19 Nov 2024 15:39:20 +0000 Subject: [PATCH 1/4] Add a new compute pattern --- .../npuw/partitioning/online/compiler.cpp | 1 + .../npuw/partitioning/online/snapshot.cpp | 3 ++ .../npuw/partitioning/patterns/compute.cpp | 46 +++++++++++++++++++ .../npuw/partitioning/patterns/compute.hpp | 5 ++ 4 files changed, 55 insertions(+) diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp index 89a0e0d2da9b23..2951ea353968da 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp @@ -25,6 +25,7 @@ namespace { static const std::map ISOL_PRESETS = {{"COMPUTE", "P:DQMatMulGQu4/compute,P:DQMatMulCWu4/compute," "P:DQMatMulGQi4/compute,P:DQMatMulCWi4/compute," + "P:DQMatMulConv/compute," "P:VocabMatMul/compute," "P:RMSNorm/compute"}}; } diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp index c8a27c47665021..d8fbad01b1f775 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp @@ -481,6 +481,9 @@ void Snapshot::earlyRegroup() { } else if (isolate.pattern == "DQMatMulGQi4") { rewr.add_matcher(shared_from_this(), isolate.tag); handle_patterns = true; + } else if (isolate.pattern == "DQMatMulConv") { + rewr.add_matcher(shared_from_this(), isolate.tag); + handle_patterns = true; } else if (isolate.pattern == "VocabMatMul") { rewr.add_matcher(shared_from_this(), isolate.tag); handle_patterns = true; diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.cpp index b082d67037db7d..14c91b70e279c2 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.cpp @@ -226,6 +226,52 @@ DQMatMulCWi4::DQMatMulCWi4(const std::shared_ptr& sn register_matcher(std::make_shared(qmm, "TagDQMatMulCWi4"), std::move(callback)); } +// Pattern: +// -> Transpose ------------------------------> +// Param/Const --> Convert(f32) --> Multiply -> Convolution -> Transpose -> +// Param/Const -> (Convert(f32)) -> + +DQMatMulConv::DQMatMulConv(const std::shared_ptr& snapshot, const std::string& isol_tag) { + auto param = opp::any_input(); + auto convert = opp::wrap_type({param->output(0)}); + auto param2 = opp::any_input(); + auto convert2 = opp::optional({param2->output(0)}); + auto multiply = opp::wrap_type({convert, convert2}); + auto tr_input = opp::any_input(); + auto transpose_in = opp::wrap_type({tr_input, opp::any_input()}); + auto conv = opp::wrap_type({transpose_in, multiply}); + auto transpose_out = opp::wrap_type({conv, opp::any_input()}); + + auto node_to_gptr = snapshot->getNodeToGroupMap(); + + // Note: Use [=] to make sure the above objects stay alive in the callback + auto callback = [=](ov::pass::pattern::Matcher& m) { + auto& node_to_output = m.get_pattern_value_map(); + + auto matched_node_param = node_to_output.at(param).get_node_shared_ptr(); + auto matched_node_param2 = node_to_output.at(param2).get_node_shared_ptr(); + + auto matched_node_transpose_in = node_to_output.at(transpose_in).get_node_shared_ptr(); + auto matched_node_transpose_out = node_to_output.at(transpose_out).get_node_shared_ptr(); + auto matched_node_multiply = node_to_output.at(multiply).get_node_shared_ptr(); + auto matched_node_conv = node_to_output.at(conv).get_node_shared_ptr(); + + if ((matched_node_param->get_element_type() == ov::element::i4 || + matched_node_param->get_element_type() == ov::element::i8) && + (matched_node_param2->get_element_type() == ov::element::f32 || + matched_node_param2->get_element_type() == ov::element::f16)) { + // Partitioning ignores Param/Const -> Convert nodes + node_to_gptr->at(matched_node_transpose_in)->isolate(isol_tag); + node_to_gptr->at(matched_node_transpose_out)->isolate(isol_tag); + node_to_gptr->at(matched_node_multiply)->isolate(isol_tag); + node_to_gptr->at(matched_node_conv)->isolate(isol_tag); + } + + return false; // root hasn't changed + }; + register_matcher(std::make_shared(transpose_out, "TagDQMatMulConv"), std::move(callback)); +} + // This is a case for Raw (f16/f32) MatMul connected directly to the Result. // // The following combinations are covered: diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.hpp index faa2fe3f0f9578..ab6c177d8ef924 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.hpp @@ -41,6 +41,11 @@ class DQMatMulCWi4 : public ov::pass::MatcherPass { DQMatMulCWi4(const std::shared_ptr& snapshot, const std::string& isol_tag); }; +class DQMatMulConv : public ov::pass::MatcherPass { +public: + DQMatMulConv(const std::shared_ptr& snapshot, const std::string& isol_tag); +}; + class VocabMatMul : public ov::pass::MatcherPass { public: VocabMatMul(const std::shared_ptr& snapshot, const std::string& isol_tag); From c821d300a8ca9f7d032cbb5eda975eee73e5b022 Mon Sep 17 00:00:00 2001 From: Alexey Smirnov Date: Tue, 19 Nov 2024 16:41:31 +0000 Subject: [PATCH 2/4] Add another pattern --- .../npuw/partitioning/online/compiler.cpp | 2 +- .../npuw/partitioning/online/snapshot.cpp | 3 ++ .../npuw/partitioning/patterns/compute.cpp | 35 +++++++++++++++++++ .../npuw/partitioning/patterns/compute.hpp | 5 +++ 4 files changed, 44 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp index 2951ea353968da..a81beea0b5ab0c 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp @@ -27,7 +27,7 @@ static const std::map ISOL_PRESETS = {{"COMPUTE", "P:DQMatMulGQi4/compute,P:DQMatMulCWi4/compute," "P:DQMatMulConv/compute," "P:VocabMatMul/compute," - "P:RMSNorm/compute"}}; + "P:RMSNorm/compute,P:RMSNorm2/compute"}}; } // For missing declaration warning diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp index d8fbad01b1f775..6767e7314f62dc 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp @@ -469,6 +469,9 @@ void Snapshot::earlyRegroup() { if (isolate.pattern == "RMSNorm") { rewr.add_matcher(shared_from_this(), isolate.tag); handle_patterns = true; + } else if (isolate.pattern == "RMSNorm2") { + rewr.add_matcher(shared_from_this(), isolate.tag); + handle_patterns = true; } else if (isolate.pattern == "DQMatMulCWu4") { rewr.add_matcher(shared_from_this(), isolate.tag); handle_patterns = true; diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.cpp index 14c91b70e279c2..b6da99e8b4d5ea 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.cpp @@ -348,6 +348,7 @@ RMSNorm::RMSNorm(const std::shared_ptr& snapshot, co // Note: Use [=] to make sure the above objects stay alive in the callback auto callback = [=](ov::pass::pattern::Matcher& m) { + std::cout << "RMSNorm MATCHED!" << std::endl; auto& node_to_output = m.get_pattern_value_map(); auto matched_hadd = node_to_output.at(hadd).get_node_shared_ptr(); @@ -373,6 +374,40 @@ RMSNorm::RMSNorm(const std::shared_ptr& snapshot, co register_matcher(std::make_shared(multiply2, "TagRMSNorm"), std::move(callback)); } +// TODO: visualize +RMSNorm2::RMSNorm2(const std::shared_ptr& snapshot, const std::string& isol_tag) { + auto hadd = opp::wrap_type({opp::any_input(), opp::any_input()}); + auto power = opp::wrap_type({hadd, opp::any_input()}); + auto reduce = opp::wrap_type({power, opp::any_input()}); + auto sqrt = opp::wrap_type({reduce}); + auto div = opp::wrap_type({hadd, sqrt}); + auto multiply = opp::wrap_type({opp::any_input(), div}); + + auto node_to_gptr = snapshot->getNodeToGroupMap(); + + // Note: Use [=] to make sure the above objects stay alive in the callback + auto callback = [=](ov::pass::pattern::Matcher& m) { + auto& node_to_output = m.get_pattern_value_map(); + + auto matched_hadd = node_to_output.at(hadd).get_node_shared_ptr(); + auto matched_power = node_to_output.at(power).get_node_shared_ptr(); + auto matched_reduce = node_to_output.at(reduce).get_node_shared_ptr(); + auto matched_sqrt = node_to_output.at(sqrt).get_node_shared_ptr(); + auto matched_div = node_to_output.at(div).get_node_shared_ptr(); + auto matched_multiply = node_to_output.at(multiply).get_node_shared_ptr(); + + node_to_gptr->at(matched_hadd)->isolate(isol_tag); + node_to_gptr->at(matched_power)->isolate(isol_tag); + node_to_gptr->at(matched_reduce)->isolate(isol_tag); + node_to_gptr->at(matched_sqrt)->isolate(isol_tag); + node_to_gptr->at(matched_div)->isolate(isol_tag); + node_to_gptr->at(matched_multiply)->isolate(isol_tag); + + return false; // root hasn't changed + }; + register_matcher(std::make_shared(multiply, "TagRMSNorm2"), std::move(callback)); +} + } // namespace compute } // namespace patterns } // namespace npuw diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.hpp index ab6c177d8ef924..77bc9fb3f90418 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/compute.hpp @@ -56,6 +56,11 @@ class RMSNorm : public ov::pass::MatcherPass { RMSNorm(const std::shared_ptr& snapshot, const std::string& isol_tag); }; +class RMSNorm2 : public ov::pass::MatcherPass { +public: + RMSNorm2(const std::shared_ptr& snapshot, const std::string& isol_tag); +}; + } // namespace compute } // namespace patterns } // namespace npuw From 295a5b0d1c883ce735b7f6df81fece8c0c9a0048 Mon Sep 17 00:00:00 2001 From: Alexey Smirnov Date: Wed, 20 Nov 2024 13:22:54 +0000 Subject: [PATCH 3/4] Forbid merging small number of repeated groups --- .../npuw/partitioning/online/snapshot.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp index 6767e7314f62dc..b88a6d4546c113 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp @@ -729,6 +729,15 @@ std::shared_ptr Snapshot::tryMergeTriangles(const std::vector Snapshot::tryMergeRepeating(const std::vector new_rep = std::make_shared(); for (size_t i = 0; i < conss.size(); ++i) { From 65686c2f410b91d9748567ba486776ae5628f338 Mon Sep 17 00:00:00 2001 From: Alexey Smirnov Date: Wed, 20 Nov 2024 17:37:02 +0000 Subject: [PATCH 4/4] Address review comments --- .../npuw/partitioning/online/snapshot.cpp | 72 +++++++++---------- .../npuw/partitioning/patterns/compute.cpp | 16 ++--- 2 files changed, 41 insertions(+), 47 deletions(-) diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp index b88a6d4546c113..f1ef604033481d 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp @@ -465,34 +465,20 @@ void Snapshot::earlyRegroup() { break; } case PatternType::PATTERN: { - // FIXME: refactor as more patterns are supported - if (isolate.pattern == "RMSNorm") { - rewr.add_matcher(shared_from_this(), isolate.tag); - handle_patterns = true; - } else if (isolate.pattern == "RMSNorm2") { - rewr.add_matcher(shared_from_this(), isolate.tag); - handle_patterns = true; - } else if (isolate.pattern == "DQMatMulCWu4") { - rewr.add_matcher(shared_from_this(), isolate.tag); - handle_patterns = true; - } else if (isolate.pattern == "DQMatMulGQu4") { - rewr.add_matcher(shared_from_this(), isolate.tag); - handle_patterns = true; - } else if (isolate.pattern == "DQMatMulCWi4") { - rewr.add_matcher(shared_from_this(), isolate.tag); - handle_patterns = true; - } else if (isolate.pattern == "DQMatMulGQi4") { - rewr.add_matcher(shared_from_this(), isolate.tag); - handle_patterns = true; - } else if (isolate.pattern == "DQMatMulConv") { - rewr.add_matcher(shared_from_this(), isolate.tag); - handle_patterns = true; - } else if (isolate.pattern == "VocabMatMul") { - rewr.add_matcher(shared_from_this(), isolate.tag); - handle_patterns = true; - } else { - LOG_WARN("OPENVINO_NPUW_ISOLATE: unsupported pattern " << isolate.pattern << " is skipped!"); - } +#define HNDL(p) \ + if (isolate.pattern == #p) { \ + rewr.add_matcher(shared_from_this(), isolate.tag); \ + handle_patterns = true; \ + } + HNDL(RMSNorm); + HNDL(RMSNorm2); + HNDL(DQMatMulCWu4); + HNDL(DQMatMulGQu4); + HNDL(DQMatMulCWi4); + HNDL(DQMatMulGQi4); + HNDL(DQMatMulConv); + HNDL(VocabMatMul); +#undef HNDL } } } @@ -730,11 +716,16 @@ std::shared_ptr Snapshot::tryMergeTriangles(const std::vector Snapshot::tryMergeRepeating(const std::vector& sn auto param2 = opp::any_input(); auto convert2 = opp::optional({param2->output(0)}); auto multiply = opp::wrap_type({convert, convert2}); - auto tr_input = opp::any_input(); - auto transpose_in = opp::wrap_type({tr_input, opp::any_input()}); + auto transpose_in = opp::wrap_type({opp::any_input(), opp::any_input()}); auto conv = opp::wrap_type({transpose_in, multiply}); auto transpose_out = opp::wrap_type({conv, opp::any_input()}); @@ -248,18 +247,18 @@ DQMatMulConv::DQMatMulConv(const std::shared_ptr& sn auto callback = [=](ov::pass::pattern::Matcher& m) { auto& node_to_output = m.get_pattern_value_map(); - auto matched_node_param = node_to_output.at(param).get_node_shared_ptr(); - auto matched_node_param2 = node_to_output.at(param2).get_node_shared_ptr(); + auto matched_node_param = node_to_output.at(param); + auto matched_node_param2 = node_to_output.at(param2); auto matched_node_transpose_in = node_to_output.at(transpose_in).get_node_shared_ptr(); auto matched_node_transpose_out = node_to_output.at(transpose_out).get_node_shared_ptr(); auto matched_node_multiply = node_to_output.at(multiply).get_node_shared_ptr(); auto matched_node_conv = node_to_output.at(conv).get_node_shared_ptr(); - if ((matched_node_param->get_element_type() == ov::element::i4 || - matched_node_param->get_element_type() == ov::element::i8) && - (matched_node_param2->get_element_type() == ov::element::f32 || - matched_node_param2->get_element_type() == ov::element::f16)) { + if ((matched_node_param.get_element_type() == ov::element::i4 || + matched_node_param.get_element_type() == ov::element::i8) && + (matched_node_param2.get_element_type() == ov::element::f32 || + matched_node_param2.get_element_type() == ov::element::f16)) { // Partitioning ignores Param/Const -> Convert nodes node_to_gptr->at(matched_node_transpose_in)->isolate(isol_tag); node_to_gptr->at(matched_node_transpose_out)->isolate(isol_tag); @@ -348,7 +347,6 @@ RMSNorm::RMSNorm(const std::shared_ptr& snapshot, co // Note: Use [=] to make sure the above objects stay alive in the callback auto callback = [=](ov::pass::pattern::Matcher& m) { - std::cout << "RMSNorm MATCHED!" << std::endl; auto& node_to_output = m.get_pattern_value_map(); auto matched_hadd = node_to_output.at(hadd).get_node_shared_ptr();