Skip to content

Commit

Permalink
Address review comments part 3
Browse files Browse the repository at this point in the history
  • Loading branch information
smirnov-alexey committed Oct 4, 2024
1 parent 576c699 commit 98806f3
Show file tree
Hide file tree
Showing 11 changed files with 49 additions and 53 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ DEFINE_OPT(NPUW_DCOFF_SCALE, bool, false, npuw::partitioning::dcoff_with_scale,
DEFINE_OPT(NPUW_FUNCALL_FOR_ALL, bool, false, npuw::partitioning::funcall_for_all, CompileTime);
DEFINE_OPT(NPUW_PARALLEL_COMPILE, bool, false, npuw::parallel_compilation, CompileTime);
DEFINE_OPT(NPUW_WEIGHTS_BANK, std::string, "", npuw::weights_bank, CompileTime);
DEFINE_OPT(NPUW_WEIGHTS_BANK_ALLOC, std::string, "", npuw::weights_bank_alloc, CompileTime);
DEFINE_OPT(NPUW_WEIGHTS_BANK_ALLOC, std::string, "CPU", npuw::weights_bank_alloc, CompileTime);
DEFINE_OPT(NPUW_FUNCALL_ASYNC, bool, false, npuw::funcall_async, RunTime);
DEFINE_OPT(NPUW_ACC_CHECK, bool, false, npuw::accuracy::check, RunTime);
DEFINE_OPT(NPUW_ACC_THRESH, double, 0.01, npuw::accuracy::threshold, RunTime);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ static constexpr ov::Property<std::string> weights_bank{"NPUW_WEIGHTS_BANK"};
* @brief
* Type: std::string.
* Specify device name for weights bank which is used to allocate memory.
* Default value: false.
* Default value: "CPU".
*/
static constexpr ov::Property<std::string> weights_bank_alloc{"NPUW_WEIGHTS_BANK_ALLOC"};

Expand Down
10 changes: 8 additions & 2 deletions src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
rewr.run_on_model(model);
}

auto partitioning = getPartitioning(model, m_cfg, m_weights_bank);
auto partitioning = getPartitioning(model, m_cfg);
m_total_stat.gflops = partitioning.total_gflops;
m_total_stat.ops = partitioning.total_ops;
const std::vector<ov::npuw::Subgraph>& orderedSubgraphs = partitioning.subgraphs;
Expand Down Expand Up @@ -236,7 +236,7 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
} // for(ordered_subgraphs)
// NOTE(dm): there's a better way to do it, like we do in G-API backends.

m_update_required = m_cfg.get<::intel_npu::NPUW_FOLD>() ? true : false;
m_update_required = m_cfg.get<::intel_npu::NPUW_FOLD>();

// Store mapping between manually splitted inputs/outputs
// to connect tensors between compiled submodels
Expand Down Expand Up @@ -438,6 +438,7 @@ void ov::npuw::CompiledModel::finalize_weights_bank() {
for (size_t idx = 0; idx < m_compiled_submodels.size(); ++idx) {
auto& comp_model_desc = m_compiled_submodels[idx];

// FIXME: Head and tail don't have their closures set !!!
if (!comp_model_desc.replaced_by) {
continue;
}
Expand All @@ -453,6 +454,11 @@ void ov::npuw::CompiledModel::finalize_weights_bank() {
const auto& lt = m_compiled_submodels[idx].lazy_closure[tidx];
const auto& evaled = evaluated_tensors[idx][tidx];
m_compiled_submodels[idx].closure.push_back(m_weights_bank->get(lt, *func_desc.device_it, evaled));

// Sanity check
const auto& tensor = m_compiled_submodels[idx].closure.back();
NPUW_ASSERT(tensor && tensor.data() && (tensor.get_size() > 0));

// FIXME: should is_remote be set unconditionally?
m_compiled_submodels[idx].is_remote.push_back(true);
}
Expand Down
36 changes: 19 additions & 17 deletions src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
using ov::npuw::weights::ConcatMeta;
using ov::npuw::weights::ConstPtr;
using ov::npuw::weights::LazyTensor;
using ov::npuw::weights::LTData;
using ov::npuw::weights::OrigData;
using ov::npuw::weights::Transform;
using ov::npuw::weights::TransformType;

Expand All @@ -17,8 +17,8 @@ namespace weights {

struct LazyTensorImpl {
public:
explicit LazyTensorImpl() = default;
explicit LazyTensorImpl(const TransformType& type, const Transform& transform);
LazyTensorImpl() = default;
LazyTensorImpl(const TransformType& type, const Transform& transform);

bool operator==(const LazyTensorImpl& other) const;

Expand Down Expand Up @@ -51,8 +51,10 @@ std::size_t LazyTensorImpl::get_hash() const {
seed = m_parent->get_hash();
} else {
seed = std::hash<void*>()(m_orig_data) + 0x9e3779b9;
seed ^= std::hash<std::string>()(m_orig_shape.to_string()) + 0x9e3779b9;
seed ^= std::hash<std::string>()(m_orig_type.to_string()) + 0x9e3779b9;
for (const auto& dim : m_orig_shape) {
seed ^= std::hash<std::size_t>()(dim) + 0x9e3779b9;
}
seed ^= m_orig_type.hash() + 0x9e3779b9;
}

// Combine with this hash
Expand All @@ -79,13 +81,13 @@ std::size_t LazyTensorImpl::get_hash() const {
using ov::npuw::weights::LazyTensorImpl;

LazyTensorImpl::LazyTensorImpl(const TransformType& type, const Transform& transform) {
if (type == TransformType::TENSOR && std::holds_alternative<LTData>(transform)) {
if (type == TransformType::THIS && std::holds_alternative<OrigData>(transform)) {
m_transform = std::make_pair(type, transform);
ov::Tensor tensor;
if (std::holds_alternative<ConstPtr>(std::get<LTData>(transform))) {
tensor = ov::npuw::util::tensor_from_const(std::get<ConstPtr>(std::get<LTData>(transform)));
if (std::holds_alternative<ConstPtr>(std::get<OrigData>(transform))) {
tensor = ov::npuw::util::tensor_from_const(std::get<ConstPtr>(std::get<OrigData>(transform)));
} else {
tensor = std::get<ov::Tensor>(std::get<LTData>(transform));
tensor = std::get<ov::Tensor>(std::get<OrigData>(transform));
}
m_orig_data = tensor.data();
m_orig_shape = tensor.get_shape();
Expand All @@ -108,7 +110,7 @@ bool LazyTensorImpl::operator==(const LazyTensorImpl& other) const {
ConcatMeta m1, m2;

switch (m_transform.first) {
case TransformType::TENSOR:
case TransformType::THIS:
// everything is already compared above - skip
break;
case TransformType::CONVERT:
Expand Down Expand Up @@ -164,7 +166,7 @@ ov::Tensor LazyTensorImpl::eval() const {

// Process the initial tensor - either from Const or from Concat
if (!m_parent) {
if (m_transform.first == TransformType::TENSOR) {
if (m_transform.first == TransformType::THIS) {
return get_orig_tensor();
} else if (m_transform.first == TransformType::CONCAT) {
std::vector<ov::Tensor> to_concat;
Expand Down Expand Up @@ -196,17 +198,17 @@ ov::Tensor LazyTensorImpl::eval() const {
ov::Tensor LazyTensorImpl::get_orig_tensor() const {
// Sanity check
NPUW_ASSERT(!has_transformations());
if (std::holds_alternative<ConstPtr>(std::get<LTData>(m_transform.second))) {
return ov::npuw::util::tensor_from_const(std::get<ConstPtr>(std::get<LTData>(m_transform.second)));
if (std::holds_alternative<ConstPtr>(std::get<OrigData>(m_transform.second))) {
return ov::npuw::util::tensor_from_const(std::get<ConstPtr>(std::get<OrigData>(m_transform.second)));
}
return std::get<ov::Tensor>(std::get<LTData>(m_transform.second));
return std::get<ov::Tensor>(std::get<OrigData>(m_transform.second));
}

bool LazyTensorImpl::has_transformations() const {
if (m_parent == nullptr) {
return false;
if (m_parent) {
return true;
}
return true;
return false;
}

LazyTensor::LazyTensor(const TransformType& type, const Transform& transform)
Expand Down
7 changes: 3 additions & 4 deletions src/plugins/intel_npu/src/plugin/npuw/lazy_tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,17 @@ namespace ov {
namespace npuw {
namespace weights {

enum class TransformType : int { TENSOR, PERMUTE, CONVERT, CONCAT };
enum class TransformType : int { THIS, PERMUTE, CONVERT, CONCAT };

// Forward declaration
class LazyTensor;
struct LazyTensorImpl;

using ConcatMeta = std::pair<std::vector<LazyTensor>, std::size_t>;
using ConstPtr = std::shared_ptr<ov::op::v0::Constant>;
using LTData = std::variant<ConstPtr, ov::Tensor>;
using OrigData = std::variant<ConstPtr, ov::Tensor>;

// LazyTensor owns Constant's memory
using Transform = std::variant<LTData, std::vector<std::size_t>, std::monostate, ConcatMeta>;
using Transform = std::variant<OrigData, std::vector<std::size_t>, std::monostate, ConcatMeta>;

class LazyTensor {
public:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,14 +276,12 @@ class Partitioner {
Partitioner(const std::shared_ptr<ov::Model>& _model,
ov::npuw::Ensemble& _ens,
ov::npuw::Partitioning& _P,
::intel_npu::Config& _cfg,
const std::shared_ptr<ov::npuw::weights::Bank>& _bank)
::intel_npu::Config& _cfg)
: model(_model),
ens(_ens),
P(_P),
func_pipeline_type(FunctionPipelineType::FOLD),
cfg(_cfg),
bank(_bank) {}
cfg(_cfg) {}

////////////////////////////////////////////////////////
// Partitioning execution pipeline
Expand Down Expand Up @@ -319,7 +317,6 @@ class Partitioner {
private:
FunctionPipelineType func_pipeline_type;
::intel_npu::Config& cfg;
const std::shared_ptr<ov::npuw::weights::Bank>& bank;
};

void Partitioner::identifySubgraphs() {
Expand Down Expand Up @@ -1493,7 +1490,7 @@ void Partitioner::createFunction(FunctionPipeline& func_ggg) {

LOG_DEBUG("Register " << prod_output << " in the function closure");
funcall._lazy_closure.push_back(
LazyTensor(TransformType::TENSOR,
LazyTensor(TransformType::THIS,
std::dynamic_pointer_cast<ov::op::v0::Constant>(input_node))); // (n)/1/i/c
} else if (ov::op::util::is_parameter(input_node)) {
LOG_DEBUG("Handling a Parameter input " << prod_output);
Expand Down Expand Up @@ -1591,7 +1588,7 @@ void Partitioner::matchRepeatedSubgraphs(const std::string& func_name) {
LOG_DEBUG("Register " << prod_output << " in the function closure[" << param_idx
<< "] (via prototype " << proto_layer_name << ")");
funcall._lazy_closure[param_idx - function._param_offset] =
LazyTensor(TransformType::TENSOR,
LazyTensor(TransformType::THIS,
std::dynamic_pointer_cast<ov::op::v0::Constant>(input_node)); // (t)/1/c
}
} // for (inputs)
Expand Down Expand Up @@ -1716,11 +1713,6 @@ void Partitioner::optimize(const std::string& func_name) {
auto& funcall = func_group.refs[f_idx].get();
// FIXME: assuming no transformations were applied to the tensor - since we are utilizing the original
// ov::Tensor below
NPUW_ASSERT(!funcall._lazy_closure[w_idx - f._param_offset].has_transformations());
if (z_idx != -1) {
NPUW_ASSERT(!funcall._lazy_closure[z_idx - f._param_offset].has_transformations());
}
NPUW_ASSERT(!funcall._lazy_closure[s_idx - f._param_offset].has_transformations());
ov::Tensor cw = funcall._lazy_closure[w_idx - f._param_offset].get_orig_tensor();
ov::Tensor cz =
z_idx != -1 ? funcall._lazy_closure[z_idx - f._param_offset].get_orig_tensor() : ov::Tensor{};
Expand All @@ -1735,7 +1727,7 @@ void Partitioner::optimize(const std::string& func_name) {
} else {
NPUW_ASSERT(false && "Unsupported combination");
}
funcall._lazy_closure.push_back(LazyTensor(TransformType::TENSOR, std::move(dst)));
funcall._lazy_closure.push_back(LazyTensor(TransformType::THIS, std::move(dst)));
});
}

Expand All @@ -1750,7 +1742,7 @@ void Partitioner::optimize(const std::string& func_name) {
auto new_elem_type = params_to_gather.pnew->get_element_type();
auto new_shape = params_to_gather.pnew->get_shape();
funcall.get()._lazy_closure.push_back(
LazyTensor(TransformType::TENSOR, ov::Tensor(new_elem_type, new_shape)));
LazyTensor(TransformType::THIS, ov::Tensor(new_elem_type, new_shape)));
}
}

Expand Down Expand Up @@ -2003,9 +1995,7 @@ void Partitioner::finalizeLinks() {

} // namespace

ov::npuw::Partitioning ov::npuw::getPartitioning(const std::shared_ptr<ov::Model>& model,
::intel_npu::Config& cfg,
const std::shared_ptr<weights::Bank>& bank) {
ov::npuw::Partitioning ov::npuw::getPartitioning(const std::shared_ptr<ov::Model>& model, ::intel_npu::Config& cfg) {
LOG_INFO("Building partitioning for model " << model->get_friendly_name() << "...");
LOG_BLOCK();

Expand Down Expand Up @@ -2064,7 +2054,7 @@ ov::npuw::Partitioning ov::npuw::getPartitioning(const std::shared_ptr<ov::Model
Partitioning P;
P.total_gflops = ens.gflops;

Partitioner p(model, ens, P, cfg, bank);
Partitioner p(model, ens, P, cfg);
p.identifySubgraphs();

if (!ens.repeated.empty()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
#include <vector>

#include "../lazy_tensor.hpp"
#include "../weights_bank.hpp"
#include "intel_npu/al/config/config.hpp"
#include "openvino/openvino.hpp"

Expand Down Expand Up @@ -118,9 +117,7 @@ struct Partitioning {
float total_gflops = 0.f;
};

Partitioning getPartitioning(const std::shared_ptr<ov::Model>& model,
::intel_npu::Config& config,
const std::shared_ptr<weights::Bank>& bank);
Partitioning getPartitioning(const std::shared_ptr<ov::Model>& model, ::intel_npu::Config& config);

} // namespace npuw
} // namespace ov
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,7 @@ ClosureRemap build_remap(const Function& fbody, const DCOFFParams& params_to) {
auto zerop_iter = params_to.zerops.find(param);
if (zerop_iter != params_to.zerops.end()) {
LOG_DEBUG("This parameter requires zero point: " << zerop_iter->second);
m.zero_points.push_back(
ov::npuw::util::tensor_from_const(std::dynamic_pointer_cast<ov::op::v0::Constant>(zerop_iter->second)));
m.zero_points.push_back(ov::npuw::util::tensor_from_const(zerop_iter->second));
} else {
m.zero_points.push_back(ov::Tensor());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#include <map>
#include <vector>

#include "../../weights_bank.hpp"
#include "openvino/openvino.hpp"
#include "openvino/pass/graph_rewrite.hpp"

Expand Down
8 changes: 6 additions & 2 deletions src/plugins/intel_npu/src/plugin/npuw/util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,12 @@ bool ov::npuw::util::is_set(const std::size_t sub_idx, const std::string& opt) {
return false;
}

ov::Tensor ov::npuw::util::tensor_from_const(const std::shared_ptr<ov::op::v0::Constant>& node) {
return ov::Tensor(node->get_element_type(), node->get_shape(), const_cast<void*>(node->get_data_ptr()));
ov::Tensor ov::npuw::util::tensor_from_const(const std::shared_ptr<ov::Node>& node) {
NPUW_ASSERT(ov::op::util::is_constant(node));
NPUW_ASSERT(node->outputs().size() == 1);
const auto port = node->output(0);
auto cnst_node = std::dynamic_pointer_cast<ov::op::v0::Constant>(node);
return ov::Tensor(port.get_element_type(), port.get_shape(), const_cast<void*>(cnst_node->get_data_ptr()));
}

bool ov::npuw::util::starts_with(const std::string& str, const std::string& prefix) {
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_npu/src/plugin/npuw/util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ bool is_set(const std::size_t sub_idx, const std::string& opt);

// Every great project has its own string class...
// NB: Newer C++ standards would allow to use string views or smt
ov::Tensor tensor_from_const(const std::shared_ptr<ov::op::v0::Constant>& node);
ov::Tensor tensor_from_const(const std::shared_ptr<ov::Node>& node);

bool starts_with(const std::string& str, const std::string& prefix);

Expand Down

0 comments on commit 98806f3

Please sign in to comment.