Address review comments part 3

openvinotoolkit · Oct 4, 2024 · 98806f3 · 98806f3
1 parent 576c699
commit 98806f3
Show file tree

Hide file tree

Showing 11 changed files with 49 additions and 53 deletions.
diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/al/config/npuw.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/al/config/npuw.hpp
@@ -49,7 +49,7 @@ DEFINE_OPT(NPUW_DCOFF_SCALE, bool, false, npuw::partitioning::dcoff_with_scale,
 DEFINE_OPT(NPUW_FUNCALL_FOR_ALL, bool, false, npuw::partitioning::funcall_for_all, CompileTime);
 DEFINE_OPT(NPUW_PARALLEL_COMPILE, bool, false, npuw::parallel_compilation, CompileTime);
 DEFINE_OPT(NPUW_WEIGHTS_BANK, std::string, "", npuw::weights_bank, CompileTime);
-DEFINE_OPT(NPUW_WEIGHTS_BANK_ALLOC, std::string, "", npuw::weights_bank_alloc, CompileTime);
+DEFINE_OPT(NPUW_WEIGHTS_BANK_ALLOC, std::string, "CPU", npuw::weights_bank_alloc, CompileTime);
 DEFINE_OPT(NPUW_FUNCALL_ASYNC, bool, false, npuw::funcall_async, RunTime);
 DEFINE_OPT(NPUW_ACC_CHECK, bool, false, npuw::accuracy::check, RunTime);
 DEFINE_OPT(NPUW_ACC_THRESH, double, 0.01, npuw::accuracy::threshold, RunTime);

diff --git a/src/plugins/intel_npu/src/al/include/npuw_private_properties.hpp b/src/plugins/intel_npu/src/al/include/npuw_private_properties.hpp
@@ -49,7 +49,7 @@ static constexpr ov::Property<std::string> weights_bank{"NPUW_WEIGHTS_BANK"};
  * @brief
  * Type: std::string.
  * Specify device name for weights bank which is used to allocate memory.
- * Default value: false.
+ * Default value: "CPU".
  */
 static constexpr ov::Property<std::string> weights_bank_alloc{"NPUW_WEIGHTS_BANK_ALLOC"};
 

diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
@@ -147,7 +147,7 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
         rewr.run_on_model(model);
     }
 
-    auto partitioning = getPartitioning(model, m_cfg, m_weights_bank);
+    auto partitioning = getPartitioning(model, m_cfg);
     m_total_stat.gflops = partitioning.total_gflops;
     m_total_stat.ops = partitioning.total_ops;
     const std::vector<ov::npuw::Subgraph>& orderedSubgraphs = partitioning.subgraphs;
@@ -236,7 +236,7 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
     }  // for(ordered_subgraphs)
     // NOTE(dm): there's a better way to do it, like we do in G-API backends.
 
-    m_update_required = m_cfg.get<::intel_npu::NPUW_FOLD>() ? true : false;
+    m_update_required = m_cfg.get<::intel_npu::NPUW_FOLD>();
 
     // Store mapping between manually splitted inputs/outputs
     // to connect tensors between compiled submodels
@@ -438,6 +438,7 @@ void ov::npuw::CompiledModel::finalize_weights_bank() {
     for (size_t idx = 0; idx < m_compiled_submodels.size(); ++idx) {
         auto& comp_model_desc = m_compiled_submodels[idx];
 
+        // FIXME: Head and tail don't have their closures set !!!
         if (!comp_model_desc.replaced_by) {
             continue;
         }
@@ -453,6 +454,11 @@ void ov::npuw::CompiledModel::finalize_weights_bank() {
             const auto& lt = m_compiled_submodels[idx].lazy_closure[tidx];
             const auto& evaled = evaluated_tensors[idx][tidx];
             m_compiled_submodels[idx].closure.push_back(m_weights_bank->get(lt, *func_desc.device_it, evaled));
+
+            // Sanity check
+            const auto& tensor = m_compiled_submodels[idx].closure.back();
+            NPUW_ASSERT(tensor && tensor.data() && (tensor.get_size() > 0));
+
             // FIXME: should is_remote be set unconditionally?
             m_compiled_submodels[idx].is_remote.push_back(true);
         }

diff --git a/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.cpp b/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.cpp
@@ -7,7 +7,7 @@
 using ov::npuw::weights::ConcatMeta;
 using ov::npuw::weights::ConstPtr;
 using ov::npuw::weights::LazyTensor;
-using ov::npuw::weights::LTData;
+using ov::npuw::weights::OrigData;
 using ov::npuw::weights::Transform;
 using ov::npuw::weights::TransformType;
 
@@ -17,8 +17,8 @@ namespace weights {
 
 struct LazyTensorImpl {
 public:
-    explicit LazyTensorImpl() = default;
-    explicit LazyTensorImpl(const TransformType& type, const Transform& transform);
+    LazyTensorImpl() = default;
+    LazyTensorImpl(const TransformType& type, const Transform& transform);
 
     bool operator==(const LazyTensorImpl& other) const;
 
@@ -51,8 +51,10 @@ std::size_t LazyTensorImpl::get_hash() const {
         seed = m_parent->get_hash();
     } else {
         seed = std::hash<void*>()(m_orig_data) + 0x9e3779b9;
-        seed ^= std::hash<std::string>()(m_orig_shape.to_string()) + 0x9e3779b9;
-        seed ^= std::hash<std::string>()(m_orig_type.to_string()) + 0x9e3779b9;
+        for (const auto& dim : m_orig_shape) {
+            seed ^= std::hash<std::size_t>()(dim) + 0x9e3779b9;
+        }
+        seed ^= m_orig_type.hash() + 0x9e3779b9;
     }
 
     // Combine with this hash
@@ -79,13 +81,13 @@ std::size_t LazyTensorImpl::get_hash() const {
 using ov::npuw::weights::LazyTensorImpl;
 
 LazyTensorImpl::LazyTensorImpl(const TransformType& type, const Transform& transform) {
-    if (type == TransformType::TENSOR && std::holds_alternative<LTData>(transform)) {
+    if (type == TransformType::THIS && std::holds_alternative<OrigData>(transform)) {
         m_transform = std::make_pair(type, transform);
         ov::Tensor tensor;
-        if (std::holds_alternative<ConstPtr>(std::get<LTData>(transform))) {
-            tensor = ov::npuw::util::tensor_from_const(std::get<ConstPtr>(std::get<LTData>(transform)));
+        if (std::holds_alternative<ConstPtr>(std::get<OrigData>(transform))) {
+            tensor = ov::npuw::util::tensor_from_const(std::get<ConstPtr>(std::get<OrigData>(transform)));
         } else {
-            tensor = std::get<ov::Tensor>(std::get<LTData>(transform));
+            tensor = std::get<ov::Tensor>(std::get<OrigData>(transform));
         }
         m_orig_data = tensor.data();
         m_orig_shape = tensor.get_shape();
@@ -108,7 +110,7 @@ bool LazyTensorImpl::operator==(const LazyTensorImpl& other) const {
     ConcatMeta m1, m2;
 
     switch (m_transform.first) {
-    case TransformType::TENSOR:
+    case TransformType::THIS:
         // everything is already compared above - skip
         break;
     case TransformType::CONVERT:
@@ -164,7 +166,7 @@ ov::Tensor LazyTensorImpl::eval() const {
 
     // Process the initial tensor - either from Const or from Concat
     if (!m_parent) {
-        if (m_transform.first == TransformType::TENSOR) {
+        if (m_transform.first == TransformType::THIS) {
             return get_orig_tensor();
         } else if (m_transform.first == TransformType::CONCAT) {
             std::vector<ov::Tensor> to_concat;
@@ -196,17 +198,17 @@ ov::Tensor LazyTensorImpl::eval() const {
 ov::Tensor LazyTensorImpl::get_orig_tensor() const {
     // Sanity check
     NPUW_ASSERT(!has_transformations());
-    if (std::holds_alternative<ConstPtr>(std::get<LTData>(m_transform.second))) {
-        return ov::npuw::util::tensor_from_const(std::get<ConstPtr>(std::get<LTData>(m_transform.second)));
+    if (std::holds_alternative<ConstPtr>(std::get<OrigData>(m_transform.second))) {
+        return ov::npuw::util::tensor_from_const(std::get<ConstPtr>(std::get<OrigData>(m_transform.second)));
     }
-    return std::get<ov::Tensor>(std::get<LTData>(m_transform.second));
+    return std::get<ov::Tensor>(std::get<OrigData>(m_transform.second));
 }
 
 bool LazyTensorImpl::has_transformations() const {
-    if (m_parent == nullptr) {
-        return false;
+    if (m_parent) {
+        return true;
     }
-    return true;
+    return false;
 }
 
 LazyTensor::LazyTensor(const TransformType& type, const Transform& transform)

diff --git a/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.hpp b/src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.hpp
@@ -19,18 +19,17 @@ namespace ov {
 namespace npuw {
 namespace weights {
 
-enum class TransformType : int { TENSOR, PERMUTE, CONVERT, CONCAT };
+enum class TransformType : int { THIS, PERMUTE, CONVERT, CONCAT };
 
 // Forward declaration
 class LazyTensor;
 struct LazyTensorImpl;
 
 using ConcatMeta = std::pair<std::vector<LazyTensor>, std::size_t>;
 using ConstPtr = std::shared_ptr<ov::op::v0::Constant>;
-using LTData = std::variant<ConstPtr, ov::Tensor>;
+using OrigData = std::variant<ConstPtr, ov::Tensor>;
 
-// LazyTensor owns Constant's memory
-using Transform = std::variant<LTData, std::vector<std::size_t>, std::monostate, ConcatMeta>;
+using Transform = std::variant<OrigData, std::vector<std::size_t>, std::monostate, ConcatMeta>;
 
 class LazyTensor {
 public:

diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.cpp
@@ -276,14 +276,12 @@ class Partitioner {
     Partitioner(const std::shared_ptr<ov::Model>& _model,
                 ov::npuw::Ensemble& _ens,
                 ov::npuw::Partitioning& _P,
-                ::intel_npu::Config& _cfg,
-                const std::shared_ptr<ov::npuw::weights::Bank>& _bank)
+                ::intel_npu::Config& _cfg)
         : model(_model),
           ens(_ens),
           P(_P),
           func_pipeline_type(FunctionPipelineType::FOLD),
-          cfg(_cfg),
-          bank(_bank) {}
+          cfg(_cfg) {}
 
     ////////////////////////////////////////////////////////
     // Partitioning execution pipeline
@@ -319,7 +317,6 @@ class Partitioner {
 private:
     FunctionPipelineType func_pipeline_type;
     ::intel_npu::Config& cfg;
-    const std::shared_ptr<ov::npuw::weights::Bank>& bank;
 };
 
 void Partitioner::identifySubgraphs() {
@@ -1493,7 +1490,7 @@ void Partitioner::createFunction(FunctionPipeline& func_ggg) {
 
                 LOG_DEBUG("Register " << prod_output << " in the function closure");
                 funcall._lazy_closure.push_back(
-                    LazyTensor(TransformType::TENSOR,
+                    LazyTensor(TransformType::THIS,
                                std::dynamic_pointer_cast<ov::op::v0::Constant>(input_node)));  // (n)/1/i/c
             } else if (ov::op::util::is_parameter(input_node)) {
                 LOG_DEBUG("Handling a Parameter input " << prod_output);
@@ -1591,7 +1588,7 @@ void Partitioner::matchRepeatedSubgraphs(const std::string& func_name) {
                     LOG_DEBUG("Register " << prod_output << " in the function closure[" << param_idx
                                           << "] (via prototype " << proto_layer_name << ")");
                     funcall._lazy_closure[param_idx - function._param_offset] =
-                        LazyTensor(TransformType::TENSOR,
+                        LazyTensor(TransformType::THIS,
                                    std::dynamic_pointer_cast<ov::op::v0::Constant>(input_node));  // (t)/1/c
                 }
             }  // for (inputs)
@@ -1716,11 +1713,6 @@ void Partitioner::optimize(const std::string& func_name) {
                 auto& funcall = func_group.refs[f_idx].get();
                 // FIXME: assuming no transformations were applied to the tensor - since we are utilizing the original
                 // ov::Tensor below
-                NPUW_ASSERT(!funcall._lazy_closure[w_idx - f._param_offset].has_transformations());
-                if (z_idx != -1) {
-                    NPUW_ASSERT(!funcall._lazy_closure[z_idx - f._param_offset].has_transformations());
-                }
-                NPUW_ASSERT(!funcall._lazy_closure[s_idx - f._param_offset].has_transformations());
                 ov::Tensor cw = funcall._lazy_closure[w_idx - f._param_offset].get_orig_tensor();
                 ov::Tensor cz =
                     z_idx != -1 ? funcall._lazy_closure[z_idx - f._param_offset].get_orig_tensor() : ov::Tensor{};
@@ -1735,7 +1727,7 @@ void Partitioner::optimize(const std::string& func_name) {
                 } else {
                     NPUW_ASSERT(false && "Unsupported combination");
                 }
-                funcall._lazy_closure.push_back(LazyTensor(TransformType::TENSOR, std::move(dst)));
+                funcall._lazy_closure.push_back(LazyTensor(TransformType::THIS, std::move(dst)));
             });
         }
 
@@ -1750,7 +1742,7 @@ void Partitioner::optimize(const std::string& func_name) {
                 auto new_elem_type = params_to_gather.pnew->get_element_type();
                 auto new_shape = params_to_gather.pnew->get_shape();
                 funcall.get()._lazy_closure.push_back(
-                    LazyTensor(TransformType::TENSOR, ov::Tensor(new_elem_type, new_shape)));
+                    LazyTensor(TransformType::THIS, ov::Tensor(new_elem_type, new_shape)));
             }
         }
 
@@ -2003,9 +1995,7 @@ void Partitioner::finalizeLinks() {
 
 }  // namespace
 
-ov::npuw::Partitioning ov::npuw::getPartitioning(const std::shared_ptr<ov::Model>& model,
-                                                 ::intel_npu::Config& cfg,
-                                                 const std::shared_ptr<weights::Bank>& bank) {
+ov::npuw::Partitioning ov::npuw::getPartitioning(const std::shared_ptr<ov::Model>& model, ::intel_npu::Config& cfg) {
     LOG_INFO("Building partitioning for model " << model->get_friendly_name() << "...");
     LOG_BLOCK();
 
@@ -2064,7 +2054,7 @@ ov::npuw::Partitioning ov::npuw::getPartitioning(const std::shared_ptr<ov::Model
     Partitioning P;
     P.total_gflops = ens.gflops;
 
-    Partitioner p(model, ens, P, cfg, bank);
+    Partitioner p(model, ens, P, cfg);
     p.identifySubgraphs();
 
     if (!ens.repeated.empty()) {

diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/partitioning.hpp
@@ -10,7 +10,6 @@
 #include <vector>
 
 #include "../lazy_tensor.hpp"
-#include "../weights_bank.hpp"
 #include "intel_npu/al/config/config.hpp"
 #include "openvino/openvino.hpp"
 
@@ -118,9 +117,7 @@ struct Partitioning {
     float total_gflops = 0.f;
 };
 
-Partitioning getPartitioning(const std::shared_ptr<ov::Model>& model,
-                             ::intel_npu::Config& config,
-                             const std::shared_ptr<weights::Bank>& bank);
+Partitioning getPartitioning(const std::shared_ptr<ov::Model>& model, ::intel_npu::Config& config);
 
 }  // namespace npuw
 }  // namespace ov
diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.cpp
@@ -101,8 +101,7 @@ ClosureRemap build_remap(const Function& fbody, const DCOFFParams& params_to) {
         auto zerop_iter = params_to.zerops.find(param);
         if (zerop_iter != params_to.zerops.end()) {
             LOG_DEBUG("This parameter requires zero point: " << zerop_iter->second);
-            m.zero_points.push_back(
-                ov::npuw::util::tensor_from_const(std::dynamic_pointer_cast<ov::op::v0::Constant>(zerop_iter->second)));
+            m.zero_points.push_back(ov::npuw::util::tensor_from_const(zerop_iter->second));
         } else {
             m.zero_points.push_back(ov::Tensor());
         }

diff --git a/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.hpp b/src/plugins/intel_npu/src/plugin/npuw/partitioning/patterns/dcoff.hpp
@@ -7,7 +7,6 @@
 #include <map>
 #include <vector>
 
-#include "../../weights_bank.hpp"
 #include "openvino/openvino.hpp"
 #include "openvino/pass/graph_rewrite.hpp"
 

diff --git a/src/plugins/intel_npu/src/plugin/npuw/util.cpp b/src/plugins/intel_npu/src/plugin/npuw/util.cpp
@@ -38,8 +38,12 @@ bool ov::npuw::util::is_set(const std::size_t sub_idx, const std::string& opt) {
     return false;
 }
 
-ov::Tensor ov::npuw::util::tensor_from_const(const std::shared_ptr<ov::op::v0::Constant>& node) {
-    return ov::Tensor(node->get_element_type(), node->get_shape(), const_cast<void*>(node->get_data_ptr()));
+ov::Tensor ov::npuw::util::tensor_from_const(const std::shared_ptr<ov::Node>& node) {
+    NPUW_ASSERT(ov::op::util::is_constant(node));
+    NPUW_ASSERT(node->outputs().size() == 1);
+    const auto port = node->output(0);
+    auto cnst_node = std::dynamic_pointer_cast<ov::op::v0::Constant>(node);
+    return ov::Tensor(port.get_element_type(), port.get_shape(), const_cast<void*>(cnst_node->get_data_ptr()));
 }
 
 bool ov::npuw::util::starts_with(const std::string& str, const std::string& prefix) {

diff --git a/src/plugins/intel_npu/src/plugin/npuw/util.hpp b/src/plugins/intel_npu/src/plugin/npuw/util.hpp
@@ -19,7 +19,7 @@ bool is_set(const std::size_t sub_idx, const std::string& opt);
 
 // Every great project has its own string class...
 // NB: Newer C++ standards would allow to use string views or smt
-ov::Tensor tensor_from_const(const std::shared_ptr<ov::op::v0::Constant>& node);
+ov::Tensor tensor_from_const(const std::shared_ptr<ov::Node>& node);
 
 bool starts_with(const std::string& str, const std::string& prefix);