Skip to content

Commit

Permalink
Fix original memptrs corruption in dynamic scenario
Browse files Browse the repository at this point in the history
  • Loading branch information
v-Golubev committed Nov 7, 2024
1 parent 557bbd5 commit ad07a06
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,6 @@ void CPURuntimeConfigurator::BrgemmCopyBLoopPortsAdjuster::optimize() {
void CPURuntimeConfigurator::update_requested_descs(const ov::snippets::lowered::LinearIRCPtr& linear_ir) const {
const auto& cpu_config = ov::as_type_ptr<CPURuntimeConfig>(m_config);
auto& optimal_descs = cpu_config->m_in_requested_descs;
optimal_descs.resize(m_in_num);
const auto& params = linear_ir->get_parameters();
OPENVINO_ASSERT(params.size() == m_in_num);
for (size_t i = 0; i < m_in_num; ++i) {
Expand Down Expand Up @@ -186,8 +185,8 @@ void CPURuntimeConfigurator::adjust_offsets_from_descs(const ov::snippets::lower
const auto& cpu_config = ov::as_type_ptr<CPURuntimeConfig>(m_config);
auto& optimal_descs = cpu_config->m_in_requested_descs;
for (size_t i = 0; i < m_in_num; ++i) {
const auto& optimal_desc = optimal_descs[i];
if (optimal_desc) {
if (optimal_descs.count(i)) {
const auto& optimal_desc = optimal_descs[i];
// It is assumed that shape is planar
const auto& parameter = linear_ir->get_parameters()[i];
const auto& original_shape = parameter->get_output_port_descriptor(0)->get_shape();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class CPURuntimeConfig : public ov::snippets::RuntimeConfig {
#endif

std::vector<jit_snippets_call_args::loop_args_t> loop_args = {};
std::vector<MemoryDescPtr> m_in_requested_descs = {};
std::unordered_map<size_t, MemoryDescPtr> m_in_requested_descs = {};
};

class CPURuntimeConfigurator : public ov::snippets::RuntimeConfigurator {
Expand Down
31 changes: 17 additions & 14 deletions src/plugins/intel_cpu/src/nodes/subgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -918,10 +918,10 @@ Subgraph::SubgraphExecutor::SubgraphExecutor(const std::shared_ptr<Subgraph::Sub
OPENVINO_ASSERT(!ov::snippets::utils::is_dynamic_value(m_buffer_scratchpad_size), "Undefined buffer scratchpad size!");
m_buffer_scratchpad = allocator(static_cast<size_t>(m_nthreads) * m_buffer_scratchpad_size);

const auto& requested_descs = snippet_config->m_in_requested_descs;
m_requested_repackings.resize(requested_descs.size());
for (size_t i = 0; i < requested_descs.size(); ++i) {
m_requested_repackings[i].requested_desc = requested_descs[i];
// TODO: here we need to already create memory, preliminary provide to allocator the adjusted scracth size
for (const auto& desc : snippet_config->m_in_requested_descs) {
const auto& requested_desc = desc.second;
m_in_requested_repackings.emplace(desc.first, RequestedRepacking(requested_desc, nullptr));
}

#if defined(__linux__) && defined(OPENVINO_ARCH_X86_64) && defined(SNIPPETS_DEBUG_CAPS)
Expand Down Expand Up @@ -1000,25 +1000,28 @@ void Subgraph::SubgraphExecutor::parallel_forNd(const std::function<void(jit_sni
}

void Subgraph::SubgraphExecutor::execute(dnnl::stream strm, std::vector<MemoryPtr>& inMemPtrs, std::vector<MemoryPtr>& outMemPtrs) {
repack_inputs(strm, inMemPtrs);
exec_impl(inMemPtrs, outMemPtrs);
if (m_in_requested_repackings.empty())
exec_impl(inMemPtrs, outMemPtrs);
else
reorder_execute(strm, inMemPtrs, outMemPtrs);
}

void Subgraph::SubgraphExecutor::repack_inputs(dnnl::stream strm, std::vector<MemoryPtr>& inMemPtrs) {
OPENVINO_ASSERT(inMemPtrs.size() == m_requested_repackings.size());
for (size_t i = 0; i < m_requested_repackings.size(); ++i) {
const auto& requested_desc = m_requested_repackings[i].requested_desc;
auto& scratch_mem = m_requested_repackings[i].scratch_mem;
void Subgraph::SubgraphExecutor::reorder_execute(dnnl::stream strm, std::vector<MemoryPtr> inMemPtrs, const std::vector<MemoryPtr>& outMemPtrs) {
for (auto& requested_repacking : m_in_requested_repackings) {
const auto& requested_desc = requested_repacking.second.requested_desc;
auto& scratch_mem = requested_repacking.second.scratch_mem;
if (requested_desc) {
if (!scratch_mem || !scratch_mem->getDesc().isCompatible(*requested_desc)) {
// TODO: move to prepareParams and investigate why the repacking is called on each iteration
// scratch_mem = m_scratchpad->createScratchPadMem(requested_desc);
scratch_mem = std::make_shared<Memory>(strm.get_engine(), requested_desc);
std::cout << "scratch_mem is created for requested desc " << i << std::endl;
std::cout << "scratch_mem is created for requested desc " << requested_repacking.first << std::endl;
}
scratch_mem->load(*inMemPtrs[i]);
inMemPtrs[i] = scratch_mem;
scratch_mem->load(*inMemPtrs[requested_repacking.first]);
inMemPtrs[requested_repacking.first] = scratch_mem;
}
}
exec_impl(inMemPtrs, outMemPtrs);
}

} // namespace node
Expand Down
5 changes: 3 additions & 2 deletions src/plugins/intel_cpu/src/nodes/subgraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,13 +169,14 @@ class Subgraph::SubgraphExecutor {
#endif

private:
void repack_inputs(dnnl::stream strm, std::vector<MemoryPtr>& inMemPtrs);
void reorder_execute(dnnl::stream strm, std::vector<MemoryPtr> inMemPtrs, const std::vector<MemoryPtr>& outMemPtrs);

struct RequestedRepacking {
RequestedRepacking(MemoryDescPtr desc, MemoryPtr memory) : requested_desc(desc), scratch_mem(memory) {}
MemoryDescPtr requested_desc = {};
MemoryPtr scratch_mem = {};
};
std::vector<RequestedRepacking> m_requested_repackings = {};
std::unordered_map<size_t, RequestedRepacking> m_in_requested_repackings = {};
DnnlScratchPadPtr m_scratchpad = {};
};

Expand Down

0 comments on commit ad07a06

Please sign in to comment.