Skip to content

Commit

Permalink
[TMP] Debug WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
v-Golubev committed Nov 8, 2024
1 parent ad07a06 commit 18376d1
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 62 deletions.
2 changes: 1 addition & 1 deletion src/common/snippets/src/runtime_configurator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ bool RuntimeConfigurator::MHAParallelWAOptimizer::optimize() {
size_t new_batch_dim, new_kernel_dim;
if (!SplitDimensionM::split(configurator->m_config->master_shape, concurrency, new_batch_dim, new_kernel_dim))
return false;

std::cout << "[ INFO ] MHAParallelWAOptimizer works\n";
auto& master_shape = configurator->m_config->master_shape;
*++master_shape.rbegin() = new_kernel_dim;
master_shape.insert(master_shape.cbegin() + master_shape.size() - 2, new_batch_dim);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,13 @@ void jit_kernel_static_emitter::init_data_pointers(const std::vector<Xbyak::Reg6
// Note that we don't need offset for the last dim, since it's handled directly by Tile emitter
const size_t offset_rank = master_shape.size() - 1;

std::cout << "[ INFO ] offset_rank:" << offset_rank << std::endl;
std::cout << "[ INFO ] Static data offsets:" << std::endl;
for (const auto& offset : data_offsets) {
ov::PartialShape pshape(offset);
std::cout << " " << pshape << std::endl;
}

// master_shape size must be valid in both static and dynamic cases
std::function<void(Reg64, const std::vector<size_t>&, Reg64)> init_ptr_with_offset;
init_ptr_with_offset = [&](Reg64 pointer, const std::vector<size_t>& offsets, Reg64 reg_tmp) {
Expand Down
6 changes: 6 additions & 0 deletions src/plugins/intel_cpu/src/nodes/subgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,12 @@ class SubgraphDynamicSpecializedExecutor : public Subgraph::SubgraphExecutor {
callable(&call_args);
};

std::cout << "[ INFO ] Dynamic data offsets:" << std::endl;
for (const auto& offset : data_offsets) {
ov::PartialShape pshape(offset);
std::cout << " " << pshape << std::endl;
}

if (m_parallel_exec_domain.size() == rank6D) {
parallel_for6d(initializer, caller);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ std::tuple<size_t, size_t, size_t> BrgemmCPUBlocking::get_blocking_params(const
std::tie(m_blk, n_blk, k_blk) = BrgemmBlockingBase::get_blocking_params(brgemm_expr);
// Note: K,N blocking is functionally enabled, need to turn it on after blocking heuristic is updated to cover
// the low precision cases (ticket: 156014)
if (with_repacking(brgemm->get_type()) && brgemm->get_output_element_type(0) != element::f32) {
if (with_repacking(brgemm->get_type()) && brgemm->get_input_element_type(0) != element::f32) {
n_blk = get_full_dim_value();
k_blk = get_full_dim_value();
}
Expand All @@ -78,6 +78,8 @@ bool BrgemmCPUBlocking::mark_blocking_loops(LinearIR& linear_ir,
const auto brgemm = ov::as_type_ptr<ov::intel_cpu::BrgemmCPU>(brgemm_expr->get_node());
const auto type = brgemm->get_type();

std::cout << "n_blk = " << n_block << std::endl;

auto res = ov::snippets::lowered::pass::BrgemmBlockingBase::mark_blocking_loops(linear_ir, brgemm_it, m_block, n_block, k_block);

if (stand_alone(type))
Expand Down
Original file line number Diff line number Diff line change
@@ -1,29 +1,18 @@
// Copyright (C) 2018-2022 Intel Corporation
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "snippets/itt.hpp"

#include "move_brgemm_repacking_out.hpp"

#include "snippets/utils/utils.hpp"
#include "snippets/op/brgemm.hpp"
#include "snippets/op/buffer.hpp"
#include "transformations/snippets/x64/op/brgemm_copy_b.hpp"
#include "transformations/snippets/x64/op/brgemm_cpu.hpp"
#include "transformations/tpp/x64/op/modifiers.hpp"

#include "openvino/core/rt_info.hpp"
#include "openvino/pass/pattern/op/wrap_type.hpp"
#include "openvino/pass/pattern/matcher.hpp"

#include "cpu/x64/cpu_isa_traits.hpp"
#include "openvino/pass/pattern/matcher.hpp"
#include "openvino/pass/pattern/op/optional.hpp"
#include "openvino/pass/pattern/op/wrap_type.hpp"
#include "snippets/itt.hpp"
#include "snippets/op/rank_normalization.hpp"
#include "transformations/snippets/x64/op/brgemm_copy_b.hpp"
#include "transformations/snippets/x64/op/brgemm_utils.hpp"

#include "cpu_shape.h"
#include "utils/general_utils.h"


namespace ov {
namespace intel_cpu {

Expand All @@ -33,12 +22,12 @@ using namespace snippets::lowered;
pass::MoveBrgemmRepackingOut::MoveBrgemmRepackingOut() {
MATCHER_SCOPE(MoveBrgemmRepackingOut);
auto m_param = ov::pass::pattern::wrap_type<ov::op::v0::Parameter>();
auto m_copy_b = ov::pass::pattern::wrap_type<BrgemmCopyB>({m_param});
auto m_rank_normalization = ov::pass::pattern::optional<ov::snippets::op::RankNormalization>(m_param);
auto m_copy_b = ov::pass::pattern::wrap_type<BrgemmCopyB>({m_rank_normalization});

auto callback = [=](ov::pass::pattern::Matcher& m) {
OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "ov::intel_cpu::pass::MoveBrgemmRepackingOut")
const auto& pattern_map = m.get_pattern_value_map();
const auto& copy_b_in = pattern_map.at(m_param);
const auto& copy_b_out = pattern_map.at(m_copy_b);
const auto copy_b_node = ov::as_type_ptr<BrgemmCopyB>(copy_b_out.get_node_shared_ptr());
OPENVINO_ASSERT(copy_b_node, "BrgemmCopyB node is null in MoveBrgemmRepackingOut transformation");
Expand All @@ -48,11 +37,13 @@ pass::MoveBrgemmRepackingOut::MoveBrgemmRepackingOut() {
// TODO:
// 1. handle copyB with compensations
// 2. handle non-planar layout
if (!ov::snippets::utils::is_planar_layout(layout) || copy_b_node->get_src_element_type() == ov::element::i8 ||
transformation_callback(copy_b_node))
std::cout << "copy_b_node = " << copy_b_node << std::endl;
std::cout << "copy_b_node->get_src_element_type() = " << copy_b_node->get_src_element_type() << std::endl;
if (!ov::snippets::utils::is_planar_layout(layout) ||
copy_b_node->get_src_element_type() == ov::element::i8 || transformation_callback(copy_b_node))
return false;
std::cout << "[ INFO ] MoveBrgemmRepackingOut is finished\n";
return ov::replace_output_update_name(copy_b_out, copy_b_in);
return ov::replace_output_update_name(copy_b_out, copy_b_node->input_value(0));
};

auto m = std::make_shared<ov::pass::pattern::Matcher>(m_copy_b, matcher_name);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,47 +25,55 @@ static inline std::vector<std::vector<element::Type>> precisions() {
}

std::vector<std::vector<ov::test::InputShape>> input_shapes{
{ {{}, {{2, 1, 3, 5}}}, {{}, {{1, 3, 5, 3}}} },
{ {{}, {{3, 1, 32, 14}}}, {{}, {{1, 3, 14, 37}}} },
{ {{}, {{1, 2, 37, 23}}}, {{}, {{2, 1, 23, 37}}} },
{ {{}, {{1, 1, 32, 23}}}, {{}, {{1, 1, 23, 68}}} },
{ {{}, {{1, 16, 384, 64}}}, {{}, {{1, 16, 64, 384}}} },
{ {{}, {{1, 1, 100, 700}}}, {{}, {{1, 1, 700, 100}}} },
{ {{}, {{1, 1, 100, 1024}}}, {{}, {{1, 1, 1024, 100}}} },
{ {{}, {{1, 1, 100, 2500}}}, {{}, {{1, 1, 2500, 100}}} },
{ {{}, {{1, 1, 100, 4500}}}, {{}, {{1, 1, 4500, 100}}} },
// Only M dimension is dynamic + one one loop by M
// { {{}, {{2, 1, 3, 5}}}, {{}, {{1, 3, 5, 3}}} },
// { {{}, {{3, 1, 32, 14}}}, {{}, {{1, 3, 14, 37}}} },
// { {{}, {{1, 2, 37, 23}}}, {{}, {{2, 1, 23, 37}}} },
// { {{}, {{1, 1, 32, 23}}}, {{}, {{1, 1, 23, 68}}} },
// { {{}, {{1, 16, 384, 64}}}, {{}, {{1, 16, 64, 384}}} },
// { {{}, {{1, 1, 100, 700}}}, {{}, {{1, 1, 700, 100}}} },
// { {{}, {{1, 1, 100, 1024}}}, {{}, {{1, 1, 1024, 100}}} },
// { {{}, {{1, 1, 100, 2500}}}, {{}, {{1, 1, 2500, 100}}} },
// { {{}, {{1, 1, 100, 4500}}}, {{}, {{1, 1, 4500, 100}}} },
{
{PartialShape{-1, 2, -1, 64}, {{2, 2, 64, 64}, {2, 2, 64, 64}, {2, 2, 35, 64},
{2, 2, 120, 64}, {2, 2, 15, 64}, {2, 2, 35, 64}}},
{PartialShape{-1, 2, 64, 32}, {{2, 2, 64, 32}, {2, 2, 64, 32}, {1, 2, 64, 32},
{1, 2, 64, 32}, {2, 2, 64, 32}, {1, 2, 64, 32}}}
{PartialShape{}, {{2, 2, 64, 64}}},
{PartialShape{}, {{2, 2, 64, 32}}}
},
// Only M dimension is dynamic + all Loops (by M, N, K)
{
{PartialShape{2, 2, -1, 550}, {{2, 2, 64, 550}, {2, 2, 16, 550}, {2, 2, 35, 550},
{2, 2, 16, 550}, {2, 2, 70, 550}, {2, 2, 64, 550}}},
{PartialShape{2, 1, 550, 70}, {{2, 1, 550, 70}, {2, 1, 550, 70}, {2, 1, 550, 70},
{2, 1, 550, 70}, {2, 1, 550, 70}, {2, 1, 550, 70}}}
{PartialShape{-1, 2, -1, 64}, {{2, 2, 64, 64}}},
{PartialShape{-1, 2, 64, 32}, {{2, 2, 64, 32}}}
},
// All dimensions are dynamic
{
{PartialShape{-1, -1, -1, -1}, {{2, 1, 32, 64}, {2, 2, 10, 20}, {2, 2, 100, 80},
{2, 2, 10, 20}, {2, 1, 32, 64}, {2, 3, 64, 55}}},
{PartialShape{-1, -1, -1, -1}, {{1, 3, 64, 128}, {2, 2, 20, 30}, {2, 2, 80, 120},
{2, 2, 20, 30}, {1, 3, 64, 128}, {2, 3, 55, 128}}}
},
// Only K dimension is dynamic
{
{PartialShape{2, 2, 70, -1}, {{2, 2, 70, 512}, {2, 2, 70, 10}, {2, 2, 70, 33}, {2, 2, 70, 2000}, {2, 2, 70, 35}, {2, 2, 70, 600}}},
{PartialShape{2, 2, -1, 70}, {{2, 2, 512, 70}, {2, 2, 10, 70}, {2, 2, 33, 70}, {2, 2, 2000, 70}, {2, 2, 35, 70}, {2, 2, 600, 70}}}
},
// Only N dimension is dynamic
{
{PartialShape{}, {{2, 2, 65, 550}}},
{PartialShape{2, 2, 550, -1}, {{2, 2, 550, 70}, {2, 2, 550, 12}, {2, 2, 550, 70},
{2, 2, 550, 12}, {2, 2, 550, 10}, {2, 2, 550, 64} }}
}
// // Only M dimension is dynamic + one one loop by M
// {
// {PartialShape{-1, 2, -1, 64}, {{2, 2, 64, 64}, {2, 2, 64, 64}, {2, 2, 35, 64},
// {2, 2, 120, 64}, {2, 2, 15, 64}, {2, 2, 35, 64}}},
// {PartialShape{-1, 2, 64, 32}, {{2, 2, 64, 32}, {2, 2, 64, 32}, {1, 2, 64, 32},
// {1, 2, 64, 32}, {2, 2, 64, 32}, {1, 2, 64, 32}}}
// },
// // Only M dimension is dynamic + all Loops (by M, N, K)
// {
// {PartialShape{2, 2, -1, 550}, {{2, 2, 64, 550}, {2, 2, 16, 550}, {2, 2, 35, 550},
// {2, 2, 16, 550}, {2, 2, 70, 550}, {2, 2, 64, 550}}},
// {PartialShape{2, 1, 550, 70}, {{2, 1, 550, 70}, {2, 1, 550, 70}, {2, 1, 550, 70},
// {2, 1, 550, 70}, {2, 1, 550, 70}, {2, 1, 550, 70}}}
// },
// // All dimensions are dynamic
// {
// {PartialShape{-1, -1, -1, -1}, {{2, 1, 32, 64}, {2, 2, 10, 20}, {2, 2, 100, 80},
// {2, 2, 10, 20}, {2, 1, 32, 64}, {2, 3, 64, 55}}},
// {PartialShape{-1, -1, -1, -1}, {{1, 3, 64, 128}, {2, 2, 20, 30}, {2, 2, 80, 120},
// {2, 2, 20, 30}, {1, 3, 64, 128}, {2, 3, 55, 128}}}
// },
// // Only K dimension is dynamic
// {
// {PartialShape{2, 2, 70, -1}, {{2, 2, 70, 512}, {2, 2, 70, 10}, {2, 2, 70, 33}, {2, 2, 70, 2000}, {2, 2, 70, 35}, {2, 2, 70, 600}}},
// {PartialShape{2, 2, -1, 70}, {{2, 2, 512, 70}, {2, 2, 10, 70}, {2, 2, 33, 70}, {2, 2, 2000, 70}, {2, 2, 35, 70}, {2, 2, 600, 70}}}
// },
// // Only N dimension is dynamic
// {
// {PartialShape{}, {{2, 2, 65, 550}}},
// {PartialShape{2, 2, 550, -1}, {{2, 2, 550, 70}, {2, 2, 550, 12}, {2, 2, 550, 70},
// {2, 2, 550, 12}, {2, 2, 550, 10}, {2, 2, 550, 64} }}
// }
};

INSTANTIATE_TEST_SUITE_P(smoke_Snippets_MatMul, MatMul,
Expand Down

0 comments on commit 18376d1

Please sign in to comment.