-
Notifications
You must be signed in to change notification settings - Fork 2.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Snippets][Port to 2025.0] Implemented SetDynamicWAToOuterMostLoop pa…
…ss (#28506) ### Details: - *Dynamic MHA Subgraphs may have only dynamic batch. Then the pass `MHAParallelWAOptimizer` cannot be applied to this subgraph to increase parallel work amount since outermost Loop By M in MHA has static work amount. Then Subgraph may be inefficiently executed. This PR implemented the pass `SetDynamicWAToOuterMostLoop ` which sets dynamic work amount to outmost Loop in dynamic MHA to make applicable `MHAParallelWAOptimizer` in runtime.* - *Original PR: #28505 ### Tickets: - *160647*
- Loading branch information
1 parent
bd1764d
commit 105004b
Showing
6 changed files
with
123 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
30 changes: 30 additions & 0 deletions
30
src/common/snippets/include/snippets/lowered/pass/set_dynamic_wa_to_outermost_loop.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
// Copyright (C) 2023-2025 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#pragma once | ||
|
||
#include "pass.hpp" | ||
|
||
namespace ov { | ||
namespace snippets { | ||
namespace lowered { | ||
namespace pass { | ||
|
||
/** | ||
* @interface SetDynamicWAToOuterMostLoop | ||
* @brief The pass set dynamic work amount to outermost Loop by M in dynamic MHA Subgraphs | ||
* to allow MHAParallelWAOptimizer optimizes parallel work amount in runtime. | ||
* @ingroup snippets | ||
*/ | ||
class SetDynamicWAToOuterMostLoop : public Pass { | ||
public: | ||
OPENVINO_RTTI("SetDynamicWAToOuterMostLoop", "", Pass); | ||
SetDynamicWAToOuterMostLoop() = default; | ||
bool run(LinearIR& linear_ir) override; | ||
}; | ||
|
||
} // namespace pass | ||
} // namespace lowered | ||
} // namespace snippets | ||
} // namespace ov |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
73 changes: 73 additions & 0 deletions
73
src/common/snippets/src/lowered/pass/set_dynamic_wa_to_outermost_loop.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
// Copyright (C) 2023-2025 Intel Corporation | ||
// SPDX-License-Identifier: Apache-2.0 | ||
// | ||
|
||
#include "snippets/lowered/pass/set_dynamic_wa_to_outermost_loop.hpp" | ||
|
||
#include "snippets/lowered/pass/mha_parallel_wa_optimizer.hpp" | ||
#include "snippets/itt.hpp" | ||
#include "snippets/lowered/linear_ir.hpp" | ||
#include "snippets/lowered/loop_manager.hpp" | ||
#include "snippets/op/brgemm.hpp" | ||
#include "snippets/utils/loop_utils.hpp" | ||
|
||
namespace ov { | ||
namespace snippets { | ||
namespace lowered { | ||
namespace pass { | ||
|
||
bool SetDynamicWAToOuterMostLoop::run(LinearIR& linear_ir) { | ||
OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::SetDynamicWAToOuterMostLoop") | ||
if (linear_ir.empty() || !linear_ir.is_dynamic() || linear_ir.get_config().m_enable_domain_optimization) | ||
return false; | ||
|
||
const auto linear_ir_ptr = std::make_shared<const LinearIR>(linear_ir); | ||
const auto brgemms = MHAParallelWAOptimizer::find_applicable_brgemms(linear_ir_ptr, false); | ||
if (brgemms.empty()) | ||
return false; | ||
|
||
const auto unsqueezed_params = MHAParallelWAOptimizer::find_unsqueezed_params(linear_ir_ptr, brgemms); | ||
OPENVINO_ASSERT(!unsqueezed_params.empty(), "unsqueezed_params mustn't be empty after initialization"); | ||
|
||
|
||
const auto& loop_manager = linear_ir_ptr->get_loop_manager(); | ||
std::unordered_set<lowered::UnifiedLoopInfoPtr> affected_loops; | ||
size_t prev_loop_id = std::numeric_limits<size_t>::max(); | ||
static const size_t dim_M_idx = 1; | ||
|
||
auto add_affected_loop = [&](const lowered::ExpressionPtr& expr) { | ||
const auto& loop_idces = expr->get_loop_ids(); | ||
if (loop_idces.empty() || loop_idces.front() == prev_loop_id) | ||
return; | ||
|
||
prev_loop_id = loop_idces.front(); | ||
const auto loop_info = loop_manager->get_loop_info<lowered::UnifiedLoopInfo>(prev_loop_id); | ||
if (loop_info->get_dim_idx() == dim_M_idx) { | ||
affected_loops.insert(loop_info); | ||
} | ||
}; | ||
|
||
size_t i = 0; | ||
std::unordered_set<lowered::ExpressionPtr> visited; | ||
for (const auto& param : linear_ir_ptr->get_parameters()) { | ||
if (unsqueezed_params.count(i++)) | ||
continue; | ||
utils::visit_path(param, visited, add_affected_loop, false); | ||
} | ||
|
||
bool modified = false; | ||
for (const auto& loop : affected_loops) { | ||
if (!utils::is_dynamic_value(loop->get_work_amount())) { | ||
loop->set_work_amount(utils::get_dynamic_value<size_t>()); | ||
ov::snippets::utils::update_data_pointer_shifts(loop); | ||
modified = true; | ||
} | ||
} | ||
|
||
return modified; | ||
} | ||
|
||
} // namespace pass | ||
} // namespace lowered | ||
} // namespace snippets | ||
} // namespace ov |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters