Skip to content

Commit

Permalink
NPUW Spatial: Fix clang
Browse files Browse the repository at this point in the history
  • Loading branch information
dmatveev committed Oct 8, 2024
1 parent 3e35389 commit 16e4588
Show file tree
Hide file tree
Showing 14 changed files with 117 additions and 113 deletions.
42 changes: 19 additions & 23 deletions src/plugins/intel_npu/src/plugin/npuw/base_sync_infer_request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,13 +241,13 @@ void ov::npuw::IBaseInferRequest::dump_input_tensors(std::size_t idx) {
}
ov::npuw::dump_input_list(comp_submodel_path, in_base_names);
} else {
const auto &s = comp_submodel_desc.spatial.value();
const auto& s = comp_submodel_desc.spatial.value();

std::set<std::size_t> spatial_param_idx;
std::vector<std::string> in_base_names_nonspat;

// First, dump the non-spatial input tensors just once - and remember its names
for (auto &&p : s.params) {
for (auto&& p : s.params) {
spatial_param_idx.insert(p.idx);
}
for (std::size_t i = 0u; i < num_inputs; i++) {
Expand All @@ -265,26 +265,24 @@ void ov::npuw::IBaseInferRequest::dump_input_tensors(std::size_t idx) {
// For the spatial case, these tiles should've been taken from the special
// spatial_io tensors
for (std::size_t offset = 0u; offset < s.range; offset += s.nway) {
const std::size_t this_len = (offset + s.nway <= s.range)
? s.nway // the full tile
: (s.range - offset); // the last tile
const std::size_t this_len = (offset + s.nway <= s.range) ? s.nway // the full tile
: (s.range - offset); // the last tile
// Copy the base file list to start with it
std::vector<std::string> tile_ilist(in_base_names_nonspat);
for (auto &&p : s.params) {
std::string in_base_name = comp_submodel_path +
"_input_" + ov::npuw::util::fmt(p.idx, num_inputs) +
"_d" + ov::npuw::util::fmt(p.dim, 10) +
"_" + ov::npuw::util::fmt(offset, s.range);
for (auto&& p : s.params) {
std::string in_base_name = comp_submodel_path + "_input_" + ov::npuw::util::fmt(p.idx, num_inputs) +
"_d" + ov::npuw::util::fmt(p.dim, 10) + "_" +
ov::npuw::util::fmt(offset, s.range);

const auto &tnsr = m_spatial_io[real_idx].inputs.at(p.idx);
const auto &view = ov::npuw::util::view(tnsr, p.dim, offset, this_len);
const auto& tnsr = m_spatial_io[real_idx].inputs.at(p.idx);
const auto& view = ov::npuw::util::view(tnsr, p.dim, offset, this_len);

ov::npuw::dump_tensor(view, in_base_name);
tile_ilist.push_back(std::move(in_base_name));
}
// Dump ilist per tile
ov::npuw::dump_input_list(comp_submodel_path, tile_ilist);
} // for(offset)
} // for(offset)
}
}

Expand Down Expand Up @@ -320,19 +318,17 @@ void ov::npuw::IBaseInferRequest::dump_output_tensors(std::size_t idx) {
ov::npuw::dump_output_list(comp_submodel_path, out_base_names);
} else {
// All outputs are considered spatial now so it should be easier
const auto &s = comp_submodel_desc.spatial.value();
const auto& s = comp_submodel_desc.spatial.value();
for (std::size_t offset = 0u; offset < s.range; offset += s.nway) {
const std::size_t this_len = (offset + s.nway <= s.range)
? s.nway // the full tile
: (s.range - offset); // the last tile
const std::size_t this_len = (offset + s.nway <= s.range) ? s.nway // the full tile
: (s.range - offset); // the last tile
std::vector<std::string> tile_olist;
for (std::size_t i = 0u; i < num_outputs; i++) {
std::string out_base_name = comp_submodel_path +
"_output_" + ov::npuw::util::fmt(i, num_outputs) +
"_d" + ov::npuw::util::fmt(s.out_dim, 10) +
"_" + ov::npuw::util::fmt(offset, s.range);
const auto &tnsr = m_spatial_io[real_idx].outputs.at(i);
const auto &view = ov::npuw::util::view(tnsr, s.out_dim, offset, this_len);
std::string out_base_name = comp_submodel_path + "_output_" + ov::npuw::util::fmt(i, num_outputs) +
"_d" + ov::npuw::util::fmt(s.out_dim, 10) + "_" +
ov::npuw::util::fmt(offset, s.range);
const auto& tnsr = m_spatial_io[real_idx].outputs.at(i);
const auto& view = ov::npuw::util::view(tnsr, s.out_dim, offset, this_len);

ov::npuw::dump_tensor(view, out_base_name);
tile_olist.push_back(std::move(out_base_name));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,15 +99,14 @@ class IBaseInferRequest : public ov::ISyncInferRequest {
// See function_prologue for details.
// Also it contains pre-allocated tensors for tails handling
struct SpatialIO {
std::vector<ov::SoPtr<ov::ITensor>> inputs; // # of elements - # of graph-side inputs
std::vector<ov::SoPtr<ov::ITensor>> outputs; // # of elements - # of subgraph outputs
std::vector<ov::SoPtr<ov::ITensor>> inputs; // # of elements - # of graph-side inputs
std::vector<ov::SoPtr<ov::ITensor>> outputs; // # of elements - # of subgraph outputs

std::vector<ov::SoPtr<ov::ITensor>> input_tails; // temporary buffers for input tails
std::vector<ov::SoPtr<ov::ITensor>> output_tails; // temporary buffers for output tails
std::vector<ov::SoPtr<ov::ITensor>> input_tails; // temporary buffers for input tails
std::vector<ov::SoPtr<ov::ITensor>> output_tails; // temporary buffers for output tails
};
std::vector<SpatialIO> m_spatial_io;


const std::size_t m_num_submodels;

void dump_input_tensors(std::size_t idx);
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
s.out_dim = fcn_template._spatial->_out_dim;
s.nway_iters = s.range / s.nway;
s.tail_size = s.range % s.nway;
for (auto &&input : fcn_template._spatial->_inputs) {
for (auto&& input : fcn_template._spatial->_inputs) {
std::size_t p_idx = fcn_template._model->get_parameter_index(input.param);
s.params.push_back(S::Param{p_idx, input.dim});
}
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ class CompiledModel : public ov::ICompiledModel {
};
std::vector<Param> params;
std::size_t range = 0u;
std::size_t nway = 0u;
std::size_t nway = 0u;
std::size_t out_dim = 0u;

std::size_t nway_iters = 0u;
Expand Down
80 changes: 45 additions & 35 deletions src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
#include "just_sync_infer_request.hpp"

#include <algorithm>
#include <future>
#include <map>
#include <memory>
#include <string>
#include <utility>
#include <future>

#include "compiled_model.hpp"
#include "logging.hpp"
Expand Down Expand Up @@ -66,19 +66,19 @@ ov::npuw::JustInferRequest::JustInferRequest(const std::shared_ptr<ov::npuw::Com
if (proto_comp_model_desc.spatial->tail_size) {
// Preallocate extra buffers for tail processing
// Note: these buffers are allocated to the entire NWAY (> tail_size)
for (auto &&p : proto_comp_model_desc.spatial->params) {
const auto &iport = proto_comp_model_desc.compiled_model->inputs()[p.idx];
for (auto&& p : proto_comp_model_desc.spatial->params) {
const auto& iport = proto_comp_model_desc.compiled_model->inputs()[p.idx];
m_spatial_io[real_idx].input_tails[p.idx] =
ov::get_tensor_impl(ov::Tensor(iport.get_element_type(), iport.get_shape()));
}
const auto num_outs = proto_comp_model_desc.compiled_model->outputs().size();
for (std::size_t out_idx = 0u; out_idx < num_outs; out_idx++) {
const auto &oport = proto_comp_model_desc.compiled_model->outputs()[out_idx];
const auto& oport = proto_comp_model_desc.compiled_model->outputs()[out_idx];
m_spatial_io[real_idx].output_tails[out_idx] =
ov::get_tensor_impl(ov::Tensor(oport.get_element_type(), oport.get_shape()));
}
}
} // if(spatial)
} // if(spatial)

for (size_t out_idx = 0; out_idx < num_outputs; out_idx++) {
const auto& port = proto_comp_model->outputs()[out_idx];
Expand Down Expand Up @@ -399,14 +399,12 @@ void ov::npuw::JustInferRequest::bind_global_parameters(std::size_t idx) {
// Check if the given subgraph's input is spatial
auto is_spatial_param = [&](std::size_t sub_in_idx) -> bool {
if (!is_spatial) {
return false; // Early return
return false; // Early return
}
auto &spatial = proto_comp_model_desc.spatial.value();
return std::any_of(spatial.params.begin(),
spatial.params.end(),
[&](const auto &p) -> bool {
return p.idx == sub_in_idx;
});
auto& spatial = proto_comp_model_desc.spatial.value();
return std::any_of(spatial.params.begin(), spatial.params.end(), [&](const auto& p) -> bool {
return p.idx == sub_in_idx;
});
};

for (auto&& it : iodesc.global_params) {
Expand Down Expand Up @@ -727,7 +725,7 @@ void ov::npuw::JustInferRequest::run_subrequest_for_success(std::size_t idx, boo
}
}

void ov::npuw::JustInferRequest::unsafe_during(std::size_t real_idx, const std::function<void()> &f) {
void ov::npuw::JustInferRequest::unsafe_during(std::size_t real_idx, const std::function<void()>& f) {
auto& comp_model_desc = m_npuw_model->m_compiled_submodels[real_idx];
if (!comp_model_desc.spatial) {
// Non-spatial execution: trigger request asynchronously, run `f` in this context
Expand Down Expand Up @@ -762,7 +760,7 @@ void ov::npuw::JustInferRequest::unsafe_infer(std::size_t real_idx) {
// not used here.
// FIXME: All these preparations could be done statically (just once)
std::vector<ov::Shape> full_in_shapes(comp_model_desc.param_base);
for (auto &&param : spatial.params) {
for (auto&& param : spatial.params) {
full_in_shapes[param.idx] = m_spatial_io[real_idx].inputs.at(param.idx)->get_shape();
}

Expand All @@ -781,60 +779,72 @@ void ov::npuw::JustInferRequest::unsafe_infer(std::size_t real_idx) {
std::size_t offset = 0u;
for (std::size_t i = 0u; i < spatial.nway_iters; i++, offset += spatial.nway) {
// Collect spatial inputs for this offset
for (auto &&param : spatial.params) {
const auto &iport = comp_model_desc.compiled_model->inputs()[param.idx];
r->set_tensor(iport, ov::npuw::util::view(m_spatial_io[real_idx].inputs.at(param.idx),
param.dim, offset, spatial.nway));
} // for(params)
for (auto&& param : spatial.params) {
const auto& iport = comp_model_desc.compiled_model->inputs()[param.idx];
r->set_tensor(
iport,
ov::npuw::util::view(m_spatial_io[real_idx].inputs.at(param.idx), param.dim, offset, spatial.nway));
} // for(params)

// Now set the spatial outputs
for (std::size_t out_idx = 0u; out_idx < num_outputs; out_idx++) {
const auto &oport = comp_model_desc.compiled_model->outputs()[out_idx];
r->set_tensor(oport, ov::npuw::util::view(m_spatial_io[real_idx].outputs.at(out_idx),
spatial.out_dim, offset, spatial.nway));
} // for(outputs)
const auto& oport = comp_model_desc.compiled_model->outputs()[out_idx];
r->set_tensor(oport,
ov::npuw::util::view(m_spatial_io[real_idx].outputs.at(out_idx),
spatial.out_dim,
offset,
spatial.nway));
} // for(outputs)

// Now run the part
r->infer();
} // for(full_nway_times)
} // for(full_nway_times)

// Now process the tail, if required
if (spatial.tail_size) {
// Copy the sub-ranges to spatial inputs
// NOTE: tails buffers are read from/written to at 0th offset!
for (auto &&param : spatial.params) {
for (auto&& param : spatial.params) {
auto in_view = ov::npuw::util::view(m_spatial_io[real_idx].inputs.at(param.idx),
param.dim, offset, spatial.tail_size);
param.dim,
offset,
spatial.tail_size);

const auto &iport = comp_model_desc.compiled_model->inputs()[param.idx];
const auto& iport = comp_model_desc.compiled_model->inputs()[param.idx];
auto out_view = ov::npuw::util::view(m_spatial_io[real_idx].input_tails.at(param.idx),
param.dim, 0, spatial.tail_size);
param.dim,
0,
spatial.tail_size);

in_view->copy_to(out_view._ptr);
r->set_tensor(iport, m_spatial_io[real_idx].input_tails.at(param.idx));
} // for(params)
} // for(params)

// Now set the tail tensors
for (std::size_t out_idx = 0u; out_idx < num_outputs; out_idx++) {
const auto &oport = comp_model_desc.compiled_model->outputs()[out_idx];
const auto& oport = comp_model_desc.compiled_model->outputs()[out_idx];
r->set_tensor(oport, m_spatial_io[real_idx].output_tails.at(out_idx));
} // for(outputs)
} // for(outputs)

// Now run the tail infer
r->infer();

// Now copy the views from the output full-nway tensor to the output tensors
for (std::size_t out_idx = 0u; out_idx < num_outputs; out_idx++) {
const auto &oport = comp_model_desc.compiled_model->outputs()[out_idx];
const auto& oport = comp_model_desc.compiled_model->outputs()[out_idx];
auto spatial_tensor_shape = oport.get_shape();

auto in_view = ov::npuw::util::view(m_spatial_io[real_idx].output_tails.at(out_idx),
spatial.out_dim, 0, spatial.tail_size);
spatial.out_dim,
0,
spatial.tail_size);

auto out_view = ov::npuw::util::view(m_spatial_io[real_idx].outputs.at(out_idx),
spatial.out_dim, offset, spatial.tail_size);
spatial.out_dim,
offset,
spatial.tail_size);
in_view->copy_to(out_view._ptr);
} // for(outputs)
} // for(outputs)
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class JustInferRequest final : public IBaseInferRequest {
void function_prologue(std::size_t idx);
void unpack_closure(std::size_t idx, RqPtr request);

void unsafe_during(std::size_t real_idx, const std::function<void()> &f);
void unsafe_during(std::size_t real_idx, const std::function<void()>& f);
void unsafe_infer(std::size_t real_idx);
void unsafe_run_this_prep_next(std::size_t idx, bool& next_prepared_p);

Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_npu/src/plugin/npuw/logging.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#include <iostream>
#include <mutex>

#include "openvino/runtime/make_tensor.hpp" // get_tensor_impl
#include "openvino/runtime/make_tensor.hpp" // get_tensor_impl

namespace {
#ifdef NPU_PLUGIN_DEVELOPER_BUILD
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,11 @@ namespace online {
namespace detail {

namespace {
static const std::map<std::string, std::string> ISOL_PRESETS = {
{"COMPUTE",
"P:DQMatMulGQu4/compute,P:DQMatMulCWu4/compute,"
"P:DQMatMulGQi4/compute,P:DQMatMulCWi4/compute,"
"P:VocabMatMul/compute,"
"P:RMSNorm/compute"
}
};
static const std::map<std::string, std::string> ISOL_PRESETS = {{"COMPUTE",
"P:DQMatMulGQu4/compute,P:DQMatMulCWu4/compute,"
"P:DQMatMulGQi4/compute,P:DQMatMulCWi4/compute,"
"P:VocabMatMul/compute,"
"P:RMSNorm/compute"}};
}

// For missing declaration warning
Expand Down Expand Up @@ -121,7 +118,7 @@ std::vector<Isolate> getIsolates(::intel_npu::Config& cfg) {
return getIsolates(cfg.getString<::intel_npu::NPUW_ONLINE_ISOLATE>());
}

std::vector<Isolate> getIsolates(const std::string &isolates_unparsed) {
std::vector<Isolate> getIsolates(const std::string& isolates_unparsed) {
if (isolates_unparsed.empty()) {
return {};
}
Expand Down
Loading

0 comments on commit 16e4588

Please sign in to comment.