From 3b7ebd42c95a2915765ce8ba09ace35574bc46b8 Mon Sep 17 00:00:00 2001 From: Alexey Smirnov Date: Wed, 20 Nov 2024 12:28:10 +0000 Subject: [PATCH] WIP clear partitioning --- .../src/plugin/npuw/compiled_model.cpp | 20 ++++++++++++++++++- .../npuw/partitioning/online/compiler.cpp | 4 ++++ .../plugin/npuw/partitioning/online/group.hpp | 9 +++++++++ .../npuw/partitioning/online/snapshot.cpp | 8 ++++++++ .../npuw/partitioning/online/snapshot.hpp | 2 ++ 5 files changed, 42 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp index df0c0328c785c6..c0ffe45610cc5d 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include "accuracy/comparator.hpp" #include "intel_npu/npu_private_properties.hpp" @@ -454,6 +456,12 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr& model, void ov::npuw::CompiledModel::finalize_weights_bank() { LOG_INFO("Finalizing weights bank..."); LOG_BLOCK(); + + using namespace std::chrono_literals; + //std::this_thread::sleep_for(10000ms); + int jj = 0; + std::cin >> jj; + // Register lazy tensors for (std::size_t idx = 0; idx < m_compiled_submodels.size(); ++idx) { auto& comp_model_desc = m_compiled_submodels[idx]; @@ -510,7 +518,12 @@ void ov::npuw::CompiledModel::finalize_weights_bank() { void ov::npuw::CompiledModel::drop_remote_weights() { LOG_INFO("Dropping remotely allocated weights..."); - m_weights_bank.reset(); + using namespace std::chrono_literals; + //std::this_thread::sleep_for(10000ms); + int jj = 0; + std::cin >> jj; + + //m_weights_bank.reset(); std::cout << "m_compiled_submodels.size() " << m_compiled_submodels.size() << std::endl; @@ -563,6 +576,11 @@ void ov::npuw::CompiledModel::drop_remote_weights() { } } + using namespace std::chrono_literals; + //std::this_thread::sleep_for(10000ms); + int jjj = 0; + std::cin >> jjj; + LOG_INFO("Done."); } diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp index 89a0e0d2da9b23..1d29e7d775f4db 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/compiler.cpp @@ -482,6 +482,10 @@ class Compiler { LOG_INFO("Dumped online partitioning to " << dump_plan_path << "."); } + std::cout << "WIPING SNAPSHOT" << std::endl; + m_snapshot->wipe(); + m_model.reset(); + LOG_INFO("DONE."); return ens; diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.hpp index 17527033173a82..da62867b2683fa 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/group.hpp @@ -83,6 +83,15 @@ class Group : public std::enable_shared_from_this { std::string specialTags() const; void addWeightsPrecision(const std::vector& prec); const std::vector& getConstsPrecision() const; + void wipe() { + m_input_layers.clear(); + m_content.clear(); + m_output_layers.clear(); + m_graph.reset(); + m_snapshot.reset(); + m_reptrack.clear(); + std::cout << "Group wiped!" << std::endl; + } private: void includeExtraLayers(detail::OVNodeSet& input_layers, diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp index c8a27c47665021..dc66b85e4e9cf0 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.cpp @@ -1139,6 +1139,14 @@ const OVPortsMap& Snapshot::getPortsMap() const { return m_ports_map; } +void Snapshot::wipe() { + m_model.reset(); + for (const auto& nh : m_graph->sorted()) { + m_graph->meta(nh).get()->wipe(); + } + std::cout << "GRAPH WIPED!" << std::endl; + } + const std::map>>& Snapshot::getMatches() const { return m_layer_matches; } diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.hpp index 0ce6766d45850f..a1ffb2c8d26119 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.hpp +++ b/src/plugins/intel_npu/src/plugin/npuw/partitioning/online/snapshot.hpp @@ -64,6 +64,7 @@ class Snapshot : public std::enable_shared_from_this { void repeat(detail::Pass&& pass); void setCtx(const PassContext& ctx); size_t graphSize() const; + void wipe(); private: detail::GPtrSet getRepGroups(const std::shared_ptr& group) const; @@ -86,6 +87,7 @@ class Snapshot : public std::enable_shared_from_this { const std::vector>& conss); std::unordered_map, detail::GPtrSet> repeating() const; void completeRepeating(const std::shared_ptr& reptag, const detail::GPtrSet& gset); + // check destructor std::shared_ptr m_model; std::shared_ptr m_graph;