[TMP] Debug WIP

openvinotoolkit · Nov 8, 2024 · 18376d1 · 18376d1
1 parent ad07a06
commit 18376d1
Show file tree

Hide file tree

Showing 6 changed files with 76 additions and 62 deletions.
diff --git a/src/common/snippets/src/runtime_configurator.cpp b/src/common/snippets/src/runtime_configurator.cpp
@@ -390,7 +390,7 @@ bool RuntimeConfigurator::MHAParallelWAOptimizer::optimize() {
     size_t new_batch_dim, new_kernel_dim;
     if (!SplitDimensionM::split(configurator->m_config->master_shape, concurrency, new_batch_dim, new_kernel_dim))
         return false;
-
+    std::cout << "[ INFO ] MHAParallelWAOptimizer works\n";
     auto& master_shape = configurator->m_config->master_shape;
     *++master_shape.rbegin() = new_kernel_dim;
     master_shape.insert(master_shape.cbegin() + master_shape.size() - 2, new_batch_dim);

diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_kernel_emitter.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_kernel_emitter.cpp
@@ -147,6 +147,13 @@ void jit_kernel_static_emitter::init_data_pointers(const std::vector<Xbyak::Reg6
     // Note that we don't need offset for the last dim, since it's handled directly by Tile emitter
     const size_t offset_rank = master_shape.size() - 1;
 
+    std::cout << "[ INFO ] offset_rank:" << offset_rank << std::endl;
+    std::cout << "[ INFO ] Static data offsets:" << std::endl;
+    for (const auto& offset : data_offsets) {
+        ov::PartialShape pshape(offset);
+        std::cout << " " << pshape << std::endl;
+    }
+
     // master_shape size must be valid in both static and dynamic cases
     std::function<void(Reg64, const std::vector<size_t>&, Reg64)> init_ptr_with_offset;
     init_ptr_with_offset = [&](Reg64 pointer, const std::vector<size_t>& offsets, Reg64 reg_tmp) {

diff --git a/src/plugins/intel_cpu/src/nodes/subgraph.cpp b/src/plugins/intel_cpu/src/nodes/subgraph.cpp
@@ -156,6 +156,12 @@ class SubgraphDynamicSpecializedExecutor : public Subgraph::SubgraphExecutor {
             callable(&call_args);
         };
 
+        std::cout << "[ INFO ] Dynamic data offsets:" << std::endl;
+        for (const auto& offset : data_offsets) {
+            ov::PartialShape pshape(offset);
+            std::cout << " " << pshape << std::endl;
+        }
+
         if (m_parallel_exec_domain.size() == rank6D) {
             parallel_for6d(initializer, caller);
         } else {

diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_cpu_blocking.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/lowered/brgemm_cpu_blocking.cpp
@@ -56,7 +56,7 @@ std::tuple<size_t, size_t, size_t> BrgemmCPUBlocking::get_blocking_params(const
     std::tie(m_blk, n_blk, k_blk) = BrgemmBlockingBase::get_blocking_params(brgemm_expr);
     // Note: K,N blocking is functionally enabled, need to turn it on after blocking heuristic is updated to cover
     // the low precision cases (ticket: 156014)
-    if (with_repacking(brgemm->get_type()) && brgemm->get_output_element_type(0) != element::f32) {
+    if (with_repacking(brgemm->get_type()) && brgemm->get_input_element_type(0) != element::f32) {
         n_blk = get_full_dim_value();
         k_blk = get_full_dim_value();
     }
@@ -78,6 +78,8 @@ bool BrgemmCPUBlocking::mark_blocking_loops(LinearIR& linear_ir,
     const auto brgemm = ov::as_type_ptr<ov::intel_cpu::BrgemmCPU>(brgemm_expr->get_node());
     const auto type = brgemm->get_type();
 
+    std::cout << "n_blk = " << n_block << std::endl;
+
     auto res = ov::snippets::lowered::pass::BrgemmBlockingBase::mark_blocking_loops(linear_ir, brgemm_it, m_block, n_block, k_block);
 
     if (stand_alone(type))

diff --git a/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/move_brgemm_repacking_out.cpp b/src/plugins/intel_cpu/src/transformations/snippets/x64/pass/move_brgemm_repacking_out.cpp
@@ -1,29 +1,18 @@
-// Copyright (C) 2018-2022 Intel Corporation
+// Copyright (C) 2024 Intel Corporation
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "snippets/itt.hpp"
-
 #include "move_brgemm_repacking_out.hpp"
 
-#include "snippets/utils/utils.hpp"
-#include "snippets/op/brgemm.hpp"
-#include "snippets/op/buffer.hpp"
-#include "transformations/snippets/x64/op/brgemm_copy_b.hpp"
-#include "transformations/snippets/x64/op/brgemm_cpu.hpp"
-#include "transformations/tpp/x64/op/modifiers.hpp"
-
-#include "openvino/core/rt_info.hpp"
-#include "openvino/pass/pattern/op/wrap_type.hpp"
-#include "openvino/pass/pattern/matcher.hpp"
-
 #include "cpu/x64/cpu_isa_traits.hpp"
+#include "openvino/pass/pattern/matcher.hpp"
+#include "openvino/pass/pattern/op/optional.hpp"
+#include "openvino/pass/pattern/op/wrap_type.hpp"
+#include "snippets/itt.hpp"
+#include "snippets/op/rank_normalization.hpp"
+#include "transformations/snippets/x64/op/brgemm_copy_b.hpp"
 #include "transformations/snippets/x64/op/brgemm_utils.hpp"
 
-#include "cpu_shape.h"
-#include "utils/general_utils.h"
-
-
 namespace ov {
 namespace intel_cpu {
 
@@ -33,12 +22,12 @@ using namespace snippets::lowered;
 pass::MoveBrgemmRepackingOut::MoveBrgemmRepackingOut() {
     MATCHER_SCOPE(MoveBrgemmRepackingOut);
     auto m_param = ov::pass::pattern::wrap_type<ov::op::v0::Parameter>();
-    auto m_copy_b = ov::pass::pattern::wrap_type<BrgemmCopyB>({m_param});
+    auto m_rank_normalization = ov::pass::pattern::optional<ov::snippets::op::RankNormalization>(m_param);
+    auto m_copy_b = ov::pass::pattern::wrap_type<BrgemmCopyB>({m_rank_normalization});
 
     auto callback = [=](ov::pass::pattern::Matcher& m) {
         OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "ov::intel_cpu::pass::MoveBrgemmRepackingOut")
         const auto& pattern_map = m.get_pattern_value_map();
-        const auto& copy_b_in = pattern_map.at(m_param);
         const auto& copy_b_out = pattern_map.at(m_copy_b);
         const auto copy_b_node = ov::as_type_ptr<BrgemmCopyB>(copy_b_out.get_node_shared_ptr());
         OPENVINO_ASSERT(copy_b_node, "BrgemmCopyB node is null in MoveBrgemmRepackingOut transformation");
@@ -48,11 +37,13 @@ pass::MoveBrgemmRepackingOut::MoveBrgemmRepackingOut() {
         // TODO:
         // 1. handle copyB with compensations
         // 2. handle non-planar layout
-        if (!ov::snippets::utils::is_planar_layout(layout) || copy_b_node->get_src_element_type() == ov::element::i8 ||
-            transformation_callback(copy_b_node))
+        std::cout << "copy_b_node = " << copy_b_node << std::endl;
+        std::cout << "copy_b_node->get_src_element_type() = " << copy_b_node->get_src_element_type() << std::endl;
+        if (!ov::snippets::utils::is_planar_layout(layout) ||
+            copy_b_node->get_src_element_type() == ov::element::i8 || transformation_callback(copy_b_node))
             return false;
         std::cout << "[ INFO ] MoveBrgemmRepackingOut is finished\n";
-        return ov::replace_output_update_name(copy_b_out, copy_b_in);
+        return ov::replace_output_update_name(copy_b_out, copy_b_node->input_value(0));
     };
 
     auto m = std::make_shared<ov::pass::pattern::Matcher>(m_copy_b, matcher_name);

diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/matmul.cpp
@@ -25,47 +25,55 @@ static inline std::vector<std::vector<element::Type>> precisions() {
 }
 
 std::vector<std::vector<ov::test::InputShape>> input_shapes{
-    { {{}, {{2, 1, 3, 5}}},   {{}, {{1, 3, 5, 3}}} },
-    { {{}, {{3, 1, 32, 14}}},   {{}, {{1, 3, 14, 37}}} },
-    { {{}, {{1, 2, 37, 23}}},   {{}, {{2, 1, 23, 37}}} },
-    { {{}, {{1, 1, 32, 23}}},   {{}, {{1, 1, 23, 68}}} },
-    { {{}, {{1, 16, 384, 64}}},   {{}, {{1, 16, 64, 384}}} },
-    { {{}, {{1, 1, 100, 700}}},   {{}, {{1, 1, 700, 100}}} },
-    { {{}, {{1, 1, 100, 1024}}},   {{}, {{1, 1, 1024, 100}}} },
-    { {{}, {{1, 1, 100, 2500}}},   {{}, {{1, 1, 2500, 100}}} },
-    { {{}, {{1, 1, 100, 4500}}},   {{}, {{1, 1, 4500, 100}}} },
-    // Only M dimension is dynamic + one one loop by M
+    // { {{}, {{2, 1, 3, 5}}},   {{}, {{1, 3, 5, 3}}} },
+    // { {{}, {{3, 1, 32, 14}}},   {{}, {{1, 3, 14, 37}}} },
+    // { {{}, {{1, 2, 37, 23}}},   {{}, {{2, 1, 23, 37}}} },
+    // { {{}, {{1, 1, 32, 23}}},   {{}, {{1, 1, 23, 68}}} },
+    // { {{}, {{1, 16, 384, 64}}},   {{}, {{1, 16, 64, 384}}} },
+    // { {{}, {{1, 1, 100, 700}}},   {{}, {{1, 1, 700, 100}}} },
+    // { {{}, {{1, 1, 100, 1024}}},   {{}, {{1, 1, 1024, 100}}} },
+    // { {{}, {{1, 1, 100, 2500}}},   {{}, {{1, 1, 2500, 100}}} },
+    // { {{}, {{1, 1, 100, 4500}}},   {{}, {{1, 1, 4500, 100}}} },
     {
-        {PartialShape{-1, 2, -1, 64}, {{2, 2, 64, 64}, {2, 2, 64, 64}, {2, 2, 35, 64},
-                                       {2, 2, 120, 64}, {2, 2, 15, 64}, {2, 2, 35, 64}}},
-        {PartialShape{-1, 2, 64, 32}, {{2, 2, 64, 32}, {2, 2, 64, 32}, {1, 2, 64, 32},
-                                       {1, 2, 64, 32}, {2, 2, 64, 32}, {1, 2, 64, 32}}}
+        {PartialShape{}, {{2, 2, 64, 64}}},
+        {PartialShape{}, {{2, 2, 64, 32}}}
     },
-    // Only M dimension is dynamic + all Loops (by M, N, K)
     {
-        {PartialShape{2, 2, -1, 550}, {{2, 2, 64, 550}, {2, 2, 16, 550}, {2, 2, 35, 550},
-                                       {2, 2, 16, 550}, {2, 2, 70, 550}, {2, 2, 64, 550}}},
-        {PartialShape{2, 1, 550, 70}, {{2, 1, 550, 70}, {2, 1, 550, 70}, {2, 1, 550, 70},
-                                       {2, 1, 550, 70}, {2, 1, 550, 70}, {2, 1, 550, 70}}}
+        {PartialShape{-1, 2, -1, 64}, {{2, 2, 64, 64}}},
+        {PartialShape{-1, 2, 64, 32}, {{2, 2, 64, 32}}}
     },
-    // All dimensions are dynamic
-    {
-        {PartialShape{-1, -1, -1, -1}, {{2, 1, 32, 64}, {2, 2, 10, 20}, {2, 2, 100, 80},
-                                        {2, 2, 10, 20}, {2, 1, 32, 64}, {2, 3, 64, 55}}},
-        {PartialShape{-1, -1, -1, -1}, {{1, 3, 64, 128}, {2, 2, 20, 30}, {2, 2, 80, 120},
-                                        {2, 2, 20, 30}, {1, 3, 64, 128}, {2, 3, 55, 128}}}
-    },
-    // Only K dimension is dynamic
-    {
-        {PartialShape{2, 2, 70, -1}, {{2, 2, 70, 512}, {2, 2, 70, 10}, {2, 2, 70, 33}, {2, 2, 70, 2000}, {2, 2, 70, 35}, {2, 2, 70, 600}}},
-        {PartialShape{2, 2, -1, 70}, {{2, 2, 512, 70}, {2, 2, 10, 70}, {2, 2, 33, 70}, {2, 2, 2000, 70}, {2, 2, 35, 70}, {2, 2, 600, 70}}}
-    },
-    // Only N dimension is dynamic
-    {
-        {PartialShape{},              {{2, 2, 65, 550}}},
-        {PartialShape{2, 2, 550, -1}, {{2, 2, 550, 70}, {2, 2, 550, 12}, {2, 2, 550, 70},
-                                       {2, 2, 550, 12}, {2, 2, 550, 10}, {2, 2, 550, 64} }}
-    }
+    // // Only M dimension is dynamic + one one loop by M
+    // {
+    //     {PartialShape{-1, 2, -1, 64}, {{2, 2, 64, 64}, {2, 2, 64, 64}, {2, 2, 35, 64},
+    //                                    {2, 2, 120, 64}, {2, 2, 15, 64}, {2, 2, 35, 64}}},
+    //     {PartialShape{-1, 2, 64, 32}, {{2, 2, 64, 32}, {2, 2, 64, 32}, {1, 2, 64, 32},
+    //                                    {1, 2, 64, 32}, {2, 2, 64, 32}, {1, 2, 64, 32}}}
+    // },
+    // // Only M dimension is dynamic + all Loops (by M, N, K)
+    // {
+    //     {PartialShape{2, 2, -1, 550}, {{2, 2, 64, 550}, {2, 2, 16, 550}, {2, 2, 35, 550},
+    //                                    {2, 2, 16, 550}, {2, 2, 70, 550}, {2, 2, 64, 550}}},
+    //     {PartialShape{2, 1, 550, 70}, {{2, 1, 550, 70}, {2, 1, 550, 70}, {2, 1, 550, 70},
+    //                                    {2, 1, 550, 70}, {2, 1, 550, 70}, {2, 1, 550, 70}}}
+    // },
+    // // All dimensions are dynamic
+    // {
+    //     {PartialShape{-1, -1, -1, -1}, {{2, 1, 32, 64}, {2, 2, 10, 20}, {2, 2, 100, 80},
+    //                                     {2, 2, 10, 20}, {2, 1, 32, 64}, {2, 3, 64, 55}}},
+    //     {PartialShape{-1, -1, -1, -1}, {{1, 3, 64, 128}, {2, 2, 20, 30}, {2, 2, 80, 120},
+    //                                     {2, 2, 20, 30}, {1, 3, 64, 128}, {2, 3, 55, 128}}}
+    // },
+    // // Only K dimension is dynamic
+    // {
+    //     {PartialShape{2, 2, 70, -1}, {{2, 2, 70, 512}, {2, 2, 70, 10}, {2, 2, 70, 33}, {2, 2, 70, 2000}, {2, 2, 70, 35}, {2, 2, 70, 600}}},
+    //     {PartialShape{2, 2, -1, 70}, {{2, 2, 512, 70}, {2, 2, 10, 70}, {2, 2, 33, 70}, {2, 2, 2000, 70}, {2, 2, 35, 70}, {2, 2, 600, 70}}}
+    // },
+    // // Only N dimension is dynamic
+    // {
+    //     {PartialShape{},              {{2, 2, 65, 550}}},
+    //     {PartialShape{2, 2, 550, -1}, {{2, 2, 550, 70}, {2, 2, 550, 12}, {2, 2, 550, 70},
+    //                                    {2, 2, 550, 12}, {2, 2, 550, 10}, {2, 2, 550, 64} }}
+    // }
 };
 
 INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMul, MatMul,