Skip to content

Commit

Permalink
Fixed and refactored accuracy failover and dumps for spatial execution:
Browse files Browse the repository at this point in the history
- Switched from real failover to CPU subrequest to copy of CPU
  subrequest results back to NPU ones to avoid handling of all
  specifically allocated containers on NPU to work with CPU subrequests.
- Refactored accuracy failover to present only failures in log_level=Error
  mode.
- Fixed order of inputs in ilist.txt to be equal to order of model
  inputs in case of spatial subgraph.
- Fixed dump of ilist.txt for different tiles & also added check to dump
  only valid ranges.
- Added dumps for inaccurate subgraphs and their inputs.
  • Loading branch information
AsyaPronina committed Nov 12, 2024
1 parent bb85745 commit 759a73a
Show file tree
Hide file tree
Showing 11 changed files with 367 additions and 202 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ DEFINE_OPT(NPUW_FUNCALL_ASYNC, bool, false, npuw::funcall_async, RunTime);
DEFINE_OPT(NPUW_ACC_CHECK, bool, false, npuw::accuracy::check, RunTime);
DEFINE_OPT(NPUW_ACC_THRESH, double, 0.01, npuw::accuracy::threshold, RunTime);
DEFINE_OPT(NPUW_ACC_DEVICE, std::string, "", npuw::accuracy::reference_device, RunTime);
DEFINE_OPT(NPUW_ACC_DUMP_FAILS, bool, false, npuw::accuracy::dump_failures, RunTime);
DEFINE_OPT(NPUW_DUMP_FULL, bool, false, npuw::dump::full, CompileTime);
DEFINE_OPT(NPUW_DUMP_SUBS, std::string, "", npuw::dump::subgraphs, CompileTime);
DEFINE_OPT(NPUW_DUMP_SUBS_ON_FAIL, std::string, "", npuw::dump::subgraphs_on_fail, CompileTime);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,14 @@ static constexpr ov::Property<double> threshold{"NPUW_ACC_THRESH"};
* Default value: empty.
*/
static constexpr ov::Property<std::string> reference_device{"NPUW_ACC_DEVICE"};

/**
* @brief
* Type: bool.
* Enable dumps of materials for model(s), failing accuracy check.
* Default value: false.
*/
static constexpr ov::Property<std::string> dump_failures{"NPUW_ACC_DUMP_FAILS"};
} // namespace accuracy

namespace dump {
Expand Down
1 change: 1 addition & 0 deletions src/plugins/intel_npu/src/al/src/config/npuw.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ void intel_npu::registerNPUWOptions(OptionsDesc& desc) {
desc.add<NPUW_ACC_CHECK>();
desc.add<NPUW_ACC_THRESH>();
desc.add<NPUW_ACC_DEVICE>();
desc.add<NPUW_ACC_DUMP_FAILS>();
#ifdef NPU_PLUGIN_DEVELOPER_BUILD
desc.add<NPUW_DUMP_FULL>();
desc.add<NPUW_DUMP_SUBS>();
Expand Down
32 changes: 21 additions & 11 deletions src/plugins/intel_npu/src/plugin/npuw/accuracy/comparator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
ov::npuw::metrics::NRMSE::NRMSE(double threshold) : m_threshold(threshold) {}

bool ov::npuw::metrics::NRMSE::operator()(const ov::SoPtr<ov::ITensor>& actual,
const ov::SoPtr<ov::ITensor>& reference) const {
const ov::SoPtr<ov::ITensor>& reference,
double* result) const {
NPUW_ASSERT(actual->get_shape() == reference->get_shape());
// Check for alignment:
NPUW_ASSERT(actual->get_byte_size() == reference->get_byte_size());
Expand All @@ -32,21 +33,23 @@ bool ov::npuw::metrics::NRMSE::operator()(const ov::SoPtr<ov::ITensor>& actual,
in_reference = ov::make_tensor(reference);
}

// TODO: it might be more correct to make to_f32 function
// to work with strided tensors
NPUW_ASSERT(in_actual.is_continuous());
NPUW_ASSERT(in_reference.is_continuous());

ov::Tensor actual_f32;
ov::Tensor reference_f32;

if (ov::element::Type_t::f32 == in_actual.get_element_type()) {
if (ov::element::f32 == in_actual.get_element_type()) {
actual_f32 = in_actual;
} else {
ov::Tensor dst(ov::element::Type_t::f32, in_actual.get_shape());
ov::npuw::util::to_f32(in_actual, dst);
actual_f32 = std::move(dst);
}

if (ov::element::Type_t::f32 == in_reference.get_element_type()) {
if (ov::element::f32 == in_reference.get_element_type()) {
reference_f32 = in_reference;
} else {
ov::Tensor dst(ov::element::Type_t::f32, in_reference.get_shape());
Expand All @@ -65,13 +68,21 @@ bool ov::npuw::metrics::NRMSE::operator()(const ov::SoPtr<ov::ITensor>& actual,
}

if (squared_error <= std::numeric_limits<double>::epsilon()) {
LOG_INFO("NRMSE loss: 0.0, threshold: " << m_threshold << ".");
LOG_INFO("PASS");
if (result != nullptr) {
*result = 0.0;
}
return true;
}

double rmse = sqrt(squared_error / size);
NPUW_ASSERT(rmse >= 0.0);

if (rmse < 0.0) {
// Calculated RMSE metric is < 0.0, what is unexpected. So, return that tensors are unequal.
if (result != nullptr) {
*result = rmse;
}
return false;
}

auto actual_min_max = std::minmax_element(actual_data, actual_data + size);
auto reference_min_max = std::minmax_element(reference_data, reference_data + size);
Expand All @@ -80,9 +91,8 @@ bool ov::npuw::metrics::NRMSE::operator()(const ov::SoPtr<ov::ITensor>& actual,
std::max(0.f, *actual_min_max.second) - std::min(0.f, *actual_min_max.first)});

double nrmse = rmse / den;
LOG_INFO("NRMSE loss: " << nrmse << ", threshold: " << m_threshold << ".");

bool success = nrmse <= m_threshold;
LOG_INFO((success ? "PASS" : "FAIL"));
return success;
if (result != nullptr) {
*result = nrmse;
}
return nrmse <= m_threshold;
}
5 changes: 3 additions & 2 deletions src/plugins/intel_npu/src/plugin/npuw/accuracy/comparator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@ namespace metrics {
class NRMSE {
public:
explicit NRMSE(double threshold);
bool operator()(const ov::SoPtr<ov::ITensor>& backup_tensor, const ov::SoPtr<ov::ITensor>& original_tensor) const;

bool operator()(const ov::SoPtr<ov::ITensor>& backup_tensor,
const ov::SoPtr<ov::ITensor>& original_tensor,
double* result = nullptr) const;
private:
double m_threshold{};
};
Expand Down
Loading

0 comments on commit 759a73a

Please sign in to comment.