diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/config/npuw.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/config/npuw.hpp
index 7b0dab3d16da3c..dc1b4a4e4244ca 100644
--- a/src/plugins/intel_npu/src/al/include/intel_npu/config/npuw.hpp
+++ b/src/plugins/intel_npu/src/al/include/intel_npu/config/npuw.hpp
@@ -59,6 +59,7 @@ DEFINE_OPT(NPUW_FUNCALL_ASYNC, bool, false, npuw::funcall_async, RunTime);
 DEFINE_OPT(NPUW_ACC_CHECK, bool, false, npuw::accuracy::check, RunTime);
 DEFINE_OPT(NPUW_ACC_THRESH, double, 0.01, npuw::accuracy::threshold, RunTime);
 DEFINE_OPT(NPUW_ACC_DEVICE, std::string, "", npuw::accuracy::reference_device, RunTime);
+DEFINE_OPT(NPUW_ACC_DUMP_FAILS, bool, false, npuw::accuracy::dump_failures, RunTime);
 DEFINE_OPT(NPUW_DUMP_FULL, bool, false, npuw::dump::full, CompileTime);
 DEFINE_OPT(NPUW_DUMP_SUBS, std::string, "", npuw::dump::subgraphs, CompileTime);
 DEFINE_OPT(NPUW_DUMP_SUBS_ON_FAIL, std::string, "", npuw::dump::subgraphs_on_fail, CompileTime);
diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/npuw_private_properties.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/npuw_private_properties.hpp
index 67dce9621bfb4e..52ac711e342dc5 100644
--- a/src/plugins/intel_npu/src/al/include/intel_npu/npuw_private_properties.hpp
+++ b/src/plugins/intel_npu/src/al/include/intel_npu/npuw_private_properties.hpp
@@ -307,6 +307,14 @@ static constexpr ov::Property<double> threshold{"NPUW_ACC_THRESH"};
  * Default value: empty.
  */
 static constexpr ov::Property<std::string> reference_device{"NPUW_ACC_DEVICE"};
+
+/**
+ * @brief
+ * Type: bool.
+ * Enable dumps of materials for model(s), failing accuracy check.
+ * Default value: false.
+ */
+static constexpr ov::Property<std::string> dump_failures{"NPUW_ACC_DUMP_FAILS"};
 }  // namespace accuracy
 
 namespace dump {
diff --git a/src/plugins/intel_npu/src/al/src/config/npuw.cpp b/src/plugins/intel_npu/src/al/src/config/npuw.cpp
index 6a519a0f754a32..8a37449213d274 100644
--- a/src/plugins/intel_npu/src/al/src/config/npuw.cpp
+++ b/src/plugins/intel_npu/src/al/src/config/npuw.cpp
@@ -44,6 +44,7 @@ void intel_npu::registerNPUWOptions(OptionsDesc& desc) {
     desc.add<NPUW_ACC_CHECK>();
     desc.add<NPUW_ACC_THRESH>();
     desc.add<NPUW_ACC_DEVICE>();
+    desc.add<NPUW_ACC_DUMP_FAILS>();
 #ifdef NPU_PLUGIN_DEVELOPER_BUILD
     desc.add<NPUW_DUMP_FULL>();
     desc.add<NPUW_DUMP_SUBS>();
diff --git a/src/plugins/intel_npu/src/plugin/npuw/accuracy/comparator.cpp b/src/plugins/intel_npu/src/plugin/npuw/accuracy/comparator.cpp
index 4440027c818969..13294ac521f122 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/accuracy/comparator.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/accuracy/comparator.cpp
@@ -13,30 +13,47 @@
 ov::npuw::metrics::NRMSE::NRMSE(double threshold) : m_threshold(threshold) {}
 
 bool ov::npuw::metrics::NRMSE::operator()(const ov::SoPtr<ov::ITensor>& actual,
-                                          const ov::SoPtr<ov::ITensor>& reference) const {
-    NPUW_ASSERT(actual->is_continuous());
-    NPUW_ASSERT(reference->is_continuous());
+                                          const ov::SoPtr<ov::ITensor>& reference,
+                                          double* result) const {
     NPUW_ASSERT(actual->get_shape() == reference->get_shape());
     // Check for alignment:
     NPUW_ASSERT(actual->get_byte_size() == reference->get_byte_size());
-    // FIXME: Check for strides
+
+    ov::Tensor in_actual(actual->get_element_type(), actual->get_shape());
+    ov::Tensor in_reference(reference->get_element_type(), reference->get_shape());
+
+    if (!actual->is_continuous()) {
+        ov::make_tensor(actual).copy_to(in_actual);
+    } else {
+        in_actual = ov::make_tensor(actual);
+    }
+    if (!reference->is_continuous()) {
+        ov::make_tensor(reference).copy_to(in_reference);
+    } else {
+        in_reference = ov::make_tensor(reference);
+    }
+
+    // TODO: it might be more correct to make to_f32 function
+    //       to work with strided tensors
+    NPUW_ASSERT(in_actual.is_continuous());
+    NPUW_ASSERT(in_reference.is_continuous());
 
     ov::Tensor actual_f32;
     ov::Tensor reference_f32;
 
-    if (ov::element::Type_t::f32 == actual->get_element_type()) {
-        actual_f32 = ov::make_tensor(actual);
+    if (ov::element::f32 == in_actual.get_element_type()) {
+        actual_f32 = in_actual;
     } else {
-        ov::Tensor dst(ov::element::Type_t::f32, actual->get_shape());
-        ov::npuw::util::to_f32(ov::make_tensor(actual), dst);
+        ov::Tensor dst(ov::element::Type_t::f32, in_actual.get_shape());
+        ov::npuw::util::to_f32(in_actual, dst);
         actual_f32 = std::move(dst);
     }
 
-    if (ov::element::Type_t::f32 == reference->get_element_type()) {
-        reference_f32 = ov::make_tensor(reference);
+    if (ov::element::f32 == in_reference.get_element_type()) {
+        reference_f32 = in_reference;
     } else {
-        ov::Tensor dst(ov::element::Type_t::f32, reference->get_shape());
-        ov::npuw::util::to_f32(ov::make_tensor(reference), dst);
+        ov::Tensor dst(ov::element::Type_t::f32, in_reference.get_shape());
+        ov::npuw::util::to_f32(in_reference, dst);
         reference_f32 = dst;
     }
 
@@ -51,13 +68,21 @@ bool ov::npuw::metrics::NRMSE::operator()(const ov::SoPtr<ov::ITensor>& actual,
     }
 
     if (squared_error <= std::numeric_limits<double>::epsilon()) {
-        LOG_INFO("NRMSE loss: 0.0, threshold: " << m_threshold << ".");
-        LOG_INFO("PASS");
+        if (result != nullptr) {
+            *result = 0.0;
+        }
         return true;
     }
 
     double rmse = sqrt(squared_error / size);
-    NPUW_ASSERT(rmse >= 0.0);
+   
+    if (rmse < 0.0) {
+        // Calculated RMSE metric is < 0.0, what is unexpected. So, return that tensors are unequal.
+        if (result != nullptr) {
+            *result = rmse;
+        }
+        return false;
+    }
 
     auto actual_min_max = std::minmax_element(actual_data, actual_data + size);
     auto reference_min_max = std::minmax_element(reference_data, reference_data + size);
@@ -66,9 +91,8 @@ bool ov::npuw::metrics::NRMSE::operator()(const ov::SoPtr<ov::ITensor>& actual,
                            std::max(0.f, *actual_min_max.second) - std::min(0.f, *actual_min_max.first)});
 
     double nrmse = rmse / den;
-    LOG_INFO("NRMSE loss: " << nrmse << ", threshold: " << m_threshold << ".");
-
-    bool success = nrmse <= m_threshold;
-    LOG_INFO((success ? "PASS" : "FAIL"));
-    return success;
+    if (result != nullptr) {
+        *result = nrmse;
+    }
+    return nrmse <= m_threshold;
 }
diff --git a/src/plugins/intel_npu/src/plugin/npuw/accuracy/comparator.hpp b/src/plugins/intel_npu/src/plugin/npuw/accuracy/comparator.hpp
index e77a38ced0edc2..1d0182582946c3 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/accuracy/comparator.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/accuracy/comparator.hpp
@@ -15,8 +15,9 @@ namespace metrics {
 class NRMSE {
 public:
     explicit NRMSE(double threshold);
-    bool operator()(const ov::SoPtr<ov::ITensor>& backup_tensor, const ov::SoPtr<ov::ITensor>& original_tensor) const;
-
+    bool operator()(const ov::SoPtr<ov::ITensor>& backup_tensor,
+                    const ov::SoPtr<ov::ITensor>& original_tensor,
+                    double* result = nullptr) const;
 private:
     double m_threshold{};
 };
diff --git a/src/plugins/intel_npu/src/plugin/npuw/base_sync_infer_request.cpp b/src/plugins/intel_npu/src/plugin/npuw/base_sync_infer_request.cpp
index 216b1a35b4315c..75796186095c7d 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/base_sync_infer_request.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/base_sync_infer_request.cpp
@@ -78,7 +78,7 @@ ov::npuw::IBaseInferRequest::RqPtrs ov::npuw::IBaseInferRequest::create_infer_re
             OPENVINO_THROW("NPUW: TEMPORARY LIMITATION: Couldn't create reference infer "
                            "requests if 'nireq' is set to > 1!");
         }
-        LOG_INFO("Create reference subrequest for submodel [" << id << "] on " << m_npuw_model->m_ref_device << "...");
+        LOG_INFO("Create reference subrequest for Subgraph[" << id << "] on " << m_npuw_model->m_ref_device << "...");
         LOG_BLOCK();
         if (m_npuw_model->submodel_device(id) != m_npuw_model->m_ref_device) {
             auto& ref_submodel = m_npuw_model->m_compiled_submodels.at(id).ref_compiled_model;
@@ -88,66 +88,225 @@ ov::npuw::IBaseInferRequest::RqPtrs ov::npuw::IBaseInferRequest::create_infer_re
             m_ref_subrequests.at(id) = std::move(ref_infer_request);
             LOG_INFO("Done");
         } else {
-            LOG_INFO("Skip creation of reference subrequest for submodule["
-                     << id << "] on reference device: " << m_npuw_model->m_ref_device << ", as actual subrequest ["
-                     << id << "] has been already created on "
-                     << "it .");
+            LOG_INFO("Skip creation of reference subrequest for Subgraph["
+                    << id << "] on reference device: " << m_npuw_model->m_ref_device << ", as actual subrequest ["
+                    << id << "] has been already created on "
+                    << "it .");
         }
     }
 
     return rqs;
 }
 
-void ov::npuw::IBaseInferRequest::ensure_subrequest_is_accurate(std::size_t idx, bool& failover) {
+namespace {
+    void set_inputs(const ov::SoPtr<ov::IAsyncInferRequest>& from, ov::SoPtr<ov::IAsyncInferRequest>& to) {
+        const auto& from_comp_model = from->get_compiled_model();
+        const auto& to_comp_model = to->get_compiled_model();
+        for (size_t i = 0; i < from_comp_model->inputs().size(); i++) {
+            const auto& itnsr = from->get_tensor(from_comp_model->inputs()[i]);
+            to->set_tensor(to_comp_model->inputs()[i], itnsr);
+        }
+    }
+
+    void copy_results(const ov::SoPtr<ov::IAsyncInferRequest>& from, ov::SoPtr<ov::IAsyncInferRequest>& to) {
+        const auto& from_comp_model = from->get_compiled_model();
+        const auto& to_comp_model = to->get_compiled_model();
+        for (size_t i = 0; i < to_comp_model->outputs().size(); i++) {
+            const auto& from_tnsr = from->get_tensor(from_comp_model->outputs()[i]);
+            const auto& to_tnsr = to->get_tensor(to_comp_model->outputs()[i]);
+            from_tnsr->copy_to(to_tnsr._ptr);
+        }
+    }
+
+    std::stringstream create_launch_msg(std::size_t idx,  std::size_t real_idx) {
+        std::stringstream log_msg_stream;
+        log_msg_stream << "Launching subrequest[" << idx << "]" <<
+        ((real_idx == idx) ? std::string("...").c_str() :
+                             std::string(std::string(", which is actually subrequest[") +
+                                std::to_string(real_idx) + "]").c_str());
+        return log_msg_stream;
+    }
+} // anonymous namespace
+
+void ov::npuw::IBaseInferRequest::try_accurate_subinfer(std::size_t subidx, std::size_t offset,
+                                                        std::size_t len, bool& accuracy_failover) {
+    auto real_subidx = real(subidx);
+    auto& act_subr = m_subrequests.at(real_subidx);
+    if (!m_npuw_model->m_acc_check) {
+        act_subr->infer();
+        return;
+    }
+
+    std::stringstream log_msg_stream = create_launch_msg(subidx, real_subidx);
+    if (m_npuw_model->m_compiled_submodels[real_subidx].spatial && len != 0) {
+        log_msg_stream << ", on range : [" << offset << ", " << offset + len << ")";
+    }
+    log_msg_stream << "...";
+    LOG_INFO(log_msg_stream.str());
+    LOG_BLOCK();
+
+    if (m_npuw_model->m_compiled_submodels[real_subidx].switched_to_ref) {
+        LOG_INFO("Subrequest was inaccurate somewhere before, launching it on reference device.");
+
+        auto& act_subr = m_subrequests.at(real_subidx);
+        auto& ref_subr = m_ref_subrequests.at(real_subidx);
+
+        set_inputs(act_subr, ref_subr);
+        ref_subr->infer();
+        copy_results(ref_subr, act_subr);
+    } else {
+        act_subr->infer();
+        ensure_subrequest_is_accurate(subidx, accuracy_failover);
+    }
+}
+
+void ov::npuw::IBaseInferRequest::try_accurate_subinfer(std::size_t subidx, bool& accuracy_failover) {
+    try_accurate_subinfer(subidx, 0, 0, accuracy_failover);
+}
+
+void ov::npuw::IBaseInferRequest::try_accurate_substart_async(std::size_t subidx) {
+    auto real_subidx = real(subidx);
+    auto& act_subr = m_subrequests.at(real_subidx);
+    if (!m_npuw_model->m_acc_check) {
+        act_subr->start_async();
+        return;
+    }
+
+    std::stringstream log_msg_stream = create_launch_msg(subidx, real_subidx);
+    log_msg_stream << "...";
+    LOG_INFO(log_msg_stream.str());
+    LOG_BLOCK();
+
+    if (m_npuw_model->m_compiled_submodels[real_subidx].switched_to_ref) {
+        LOG_INFO("Subrequest was inaccurate somewhere before, launching it on reference device.");
+
+        auto& act_subr = m_subrequests.at(real_subidx);
+        auto& ref_subr = m_ref_subrequests.at(real_subidx);
+
+        set_inputs(act_subr, ref_subr);
+        ref_subr->start_async();
+    } else {
+        act_subr->start_async();
+    }
+}
+
+void ov::npuw::IBaseInferRequest::try_accurate_subwait(std::size_t subidx, bool& accuracy_failover) {
+    auto real_subidx = real(subidx);
+    auto& act_subr = m_subrequests.at(real_subidx);
+    if (!m_npuw_model->m_acc_check) {
+        act_subr->wait();
+        return;
+    }
+
+    LOG_BLOCK();
+
+    if (m_npuw_model->m_compiled_submodels[real_subidx].switched_to_ref) {
+        auto& act_subr = m_subrequests.at(real_subidx);
+        auto& ref_subr = m_ref_subrequests.at(real_subidx);
+
+        ref_subr->wait();
+        copy_results(ref_subr, act_subr);
+    } else {
+        act_subr->wait();
+        ensure_subrequest_is_accurate(subidx, accuracy_failover);
+    }
+}
+
+void ov::npuw::IBaseInferRequest::ensure_subrequest_is_accurate(std::size_t idx, bool& accuracy_failover) {
+    if (!m_npuw_model->m_acc_check) {
+         return;
+    }
+
     LOG_INFO("Check if subrequest[" << idx << "] is accurate...");
     LOG_BLOCK();
-    failover = false;
-    if (m_ref_subrequests.at(idx) != nullptr && m_subrequests.at(idx)._ptr != m_ref_subrequests.at(idx)._ptr) {
-        NPUW_ASSERT(m_npuw_model->m_compiled_submodels.at(idx).switched_to_ref == false);
-        NPUW_ASSERT(m_npuw_model->m_compiled_submodels.at(idx).replaced_by.value_or(idx) == idx);
-
-        const auto& ref_comp_model = m_ref_subrequests.at(idx)->get_compiled_model();
-        const auto& actual_comp_model = m_subrequests.at(idx)->get_compiled_model();
-        NPUW_ASSERT(actual_comp_model->inputs().size() == ref_comp_model->inputs().size());
-        // Setting inputs:
-        for (size_t i = 0; i < actual_comp_model->inputs().size(); i++) {
-            const auto& itensor = m_subrequests.at(idx)->get_tensor(actual_comp_model->inputs()[i]);
-            m_ref_subrequests.at(idx)->set_tensor(ref_comp_model->inputs()[i], itensor);
-        }
-        m_ref_subrequests.at(idx)->infer();
 
-        LOG_INFO("Compare actual outputs against references:");
-        bool tensors_converge = true;
-        for (size_t i = 0; i < actual_comp_model->outputs().size(); i++) {
+    std::size_t real_idx = real(idx);
+    OPENVINO_ASSERT(m_npuw_model->m_compiled_submodels[real_idx].switched_to_ref == false);
+
+    if (m_npuw_model->submodel_device(idx) == m_npuw_model->m_ref_device) {
+        LOG_INFO("Skipped, subrequest[" << idx << "] is launched on reference device.");
+        return;
+    }
+
+    accuracy_failover = false;
+    auto& actual_subr = m_subrequests.at(real_idx);
+    auto& ref_subr = m_ref_subrequests.at(real_idx);
+
+    // Setting inputs:
+    set_inputs(actual_subr, ref_subr);
+
+    // Running inference:
+    ref_subr->infer();
+
+    // Comparing results of actual and reference inferfences:
+    LOG_INFO("Compare actual outputs against references:");
+    bool tensors_converge = true;
+    const auto& actual_comp_model = actual_subr->get_compiled_model();
+    const auto& ref_comp_model = ref_subr->get_compiled_model();
+    std::vector<bool> converges(actual_comp_model->outputs().size());
+    std::vector<double> metrics(actual_comp_model->outputs().size());
+    for (size_t i = 0; i < actual_comp_model->outputs().size(); i++) {
+        const auto& actual_tensor = actual_subr->get_tensor(actual_comp_model->outputs()[i]);
+        const auto& ref_tensor = ref_subr->get_tensor(ref_comp_model->outputs()[i]);
+        converges[i] = m_npuw_model->m_acc_check(actual_tensor, ref_tensor, &metrics[i]);
+        tensors_converge &= converges[i];
+    }
+    if (tensors_converge == false) {
+        if (ov::npuw::get_log_level() == ov::npuw::LogLevel::Error) {
+            // For just log level error print header message:
+            LOG_ERROR("Check if subrequest[" << idx << "] is accurate...");
+        }
+    }
+    // Log comparison details:
+    for (size_t i = 0; i < actual_comp_model->outputs().size(); i++) {
+        if (converges[i]) {
             LOG_INFO(" - " << actual_comp_model->outputs()[i]);
-            const auto& actual_tensor = m_subrequests.at(idx)->get_tensor(actual_comp_model->outputs()[i]);
-            const auto& ref_tensor = m_ref_subrequests.at(idx)->get_tensor(ref_comp_model->outputs()[i]);
             LOG_BLOCK();
-            tensors_converge &= m_npuw_model->m_acc_check(actual_tensor, ref_tensor);
-        }
-        LOG_INFO((tensors_converge ? "PASS" : "FAIL"));
-
-        if (!tensors_converge) {
-            LOG_INFO("Subrequest is inaccurate, failover to reference.");
-            // FIXME: We need to copy reference tensors to actual only in single-model-inference mode
-            //        or if our subgraph is last in the chain.
-            for (size_t i = 0; i < actual_comp_model->outputs().size(); i++) {
-                const auto& actual_tensor = m_subrequests.at(idx)->get_tensor(actual_comp_model->outputs()[i]);
-                const auto& ref_tensor = m_ref_subrequests.at(idx)->get_tensor(ref_comp_model->outputs()[i]);
-                ref_tensor->copy_to(actual_tensor._ptr);
-            }
-            m_npuw_model->m_compiled_submodels.at(idx).compiled_model =
-                m_npuw_model->m_compiled_submodels.at(idx).ref_compiled_model;
-            m_npuw_model->m_compiled_submodels.at(idx).switched_to_ref = true;
-            m_subrequests.at(idx) = m_ref_subrequests.at(idx);
-            update_subrequest_links(idx);
-            failover = true;
+            LOG_INFO(m_npuw_model->m_acc_check_name << " loss: " << metrics[i] <<
+                      ", threshold: " << m_npuw_model->m_acc_check_threshold << ".");
+            LOG_INFO("PASS");
+        } else {
+            LOG_ERROR(" - " << actual_comp_model->outputs()[i]);
+            LOG_BLOCK();
+            LOG_ERROR(m_npuw_model->m_acc_check_name << " loss: " << metrics[i] <<
+                      ", threshold: " << m_npuw_model->m_acc_check_threshold << ".");
+            LOG_ERROR("FAIL");
         }
+    }
 
-        LOG_INFO("Done");
+    // If comparison fails, copy reference results to original tensors and mark subgraph as
+    // switched to reference:
+    if (tensors_converge) {
+        LOG_INFO("PASS");
     } else {
-        LOG_INFO("Skipped, subrequest is launched on reference device.");
+        LOG_ERROR("FAIL");
+        LOG_ERROR("Subrequest[" << idx << "] is inaccurate, failover to reference results.");
+        if (idx != real_idx) {
+            LOG_ERROR("As subrequest[" << idx << "] is actually " << "subrequest[" << real_idx <<
+                       "], all subrequests, corresponding to last, will be further " <<
+                       "launched on " << m_npuw_model->m_ref_device << ".'");
+        } else if (m_npuw_model->m_compiled_submodels[real_idx].replaced_by) {
+            LOG_ERROR("As subrequest[" << real_idx << "] is actually " << "a function, all " <<
+                      "subrequests, corresponding to it, will be further launched on " <<
+                      m_npuw_model->m_ref_device << ".");
+        }
+
+        if (m_npuw_model->m_cfg.get<::intel_npu::NPUW_ACC_DUMP_FAILS>()) {
+            const auto model = m_npuw_model->m_compiled_submodels[real_idx].model;
+            const auto model_path = "inaccurate_" + model->get_friendly_name() + ".xml";
+            ov::save_model(model, model_path);
+            dump_input_tensors(idx, true);
+            dump_output_tensors(idx, true);
+        }
+        
+        // Due to complex memory management logic it is safe to just copy
+        // results back to already properly allocated and linked tensors:
+        copy_results(ref_subr, actual_subr);
+        m_npuw_model->m_compiled_submodels[real_idx].switched_to_ref = true;
+        accuracy_failover = true;
     }
+
+    LOG_INFO("Done");
 }
 
 ov::SoPtr<ov::ITensor> ov::npuw::IBaseInferRequest::get_tensor(const ov::Output<const ov::Node>& port) const {
@@ -192,27 +351,23 @@ void ov::npuw::IBaseInferRequest::infer() {
         run_subrequest_for_success(idx, failover);
         failover_happened |= failover;
         complete_subrequest(idx);
-        if (m_npuw_model->m_acc_check) {
-            ensure_subrequest_is_accurate(idx, failover);
-            failover_happened |= failover;
-        }
     }
 
     // Increment counter regardless if dumps etc are enabled or not.
     m_run_iter++;
 
     if (failover_happened) {
-        LOG_INFO("Refined device distribution:");
+        LOG_ERROR("Refined device distribution:");
         LOG_BLOCK();
-        m_npuw_model->log_device_dist();
+        m_npuw_model->log_device_dist(ov::npuw::LogLevel::Error);
     }
     m_now_idx.reset();
 }
 
-void ov::npuw::IBaseInferRequest::dump_input_tensors(std::size_t idx) {
+void ov::npuw::IBaseInferRequest::dump_input_tensors(std::size_t idx, bool forced) {
     const std::string dump_ios_opt = m_npuw_model->m_cfg.get<::intel_npu::NPUW_DUMP_IO>();
     const std::size_t end_idx = m_npuw_model->m_compiled_submodels.size();
-    if (!ov::npuw::util::is_set(idx, dump_ios_opt, end_idx)) {
+    if (!ov::npuw::util::is_set(idx, dump_ios_opt, end_idx) && !forced) {
         return;
     }
 
@@ -245,7 +400,7 @@ void ov::npuw::IBaseInferRequest::dump_input_tensors(std::size_t idx) {
         const auto& s = comp_submodel_desc.spatial.value();
 
         std::set<std::size_t> spatial_param_idx;
-        std::vector<std::string> in_base_names_nonspat;
+        std::vector<std::string> in_base_names(num_inputs);
 
         // First, dump the non-spatial input tensors just once - and remember its names
         for (auto&& p : s.params) {
@@ -259,7 +414,7 @@ void ov::npuw::IBaseInferRequest::dump_input_tensors(std::size_t idx) {
             const auto& tnsr = m_subrequests[real_idx]->get_tensor(port);
             std::string in_base_name = comp_submodel_path + "_input_" + ov::npuw::util::fmt(i, num_inputs);
             ov::npuw::dump_tensor(tnsr, in_base_name);
-            in_base_names_nonspat.push_back(std::move(in_base_name));
+            in_base_names[i] = std::move(in_base_name);
         }
 
         // Now iterate over the spatial range and dump the individual tiles
@@ -268,8 +423,11 @@ void ov::npuw::IBaseInferRequest::dump_input_tensors(std::size_t idx) {
         for (std::size_t offset = 0u; offset < s.range; offset += s.nway) {
             const std::size_t this_len = (offset + s.nway <= s.range) ? s.nway               // the full tile
                                                                       : (s.range - offset);  // the last tile
+            if (m_spatial_selector != nullptr && !m_spatial_selector->need_submit(offset, this_len)) {
+                continue;
+            }
+
             // Copy the base file list to start with it
-            std::vector<std::string> tile_ilist(in_base_names_nonspat);
             for (auto&& p : s.params) {
                 std::string in_base_name = comp_submodel_path + "_input_" + ov::npuw::util::fmt(p.idx, num_inputs) +
                                            "_d" + ov::npuw::util::fmt(p.dim, 10) + "_" +
@@ -279,18 +437,20 @@ void ov::npuw::IBaseInferRequest::dump_input_tensors(std::size_t idx) {
                 const auto& view = ov::npuw::util::view(tnsr, p.dim, offset, this_len);
 
                 ov::npuw::dump_tensor(view, in_base_name);
-                tile_ilist.push_back(std::move(in_base_name));
+                in_base_names[p.idx] = std::move(in_base_name);
             }
             // Dump ilist per tile
-            ov::npuw::dump_input_list(comp_submodel_path, tile_ilist);
+            std::string tile_ilist_name = comp_submodel_path +  "_" +
+                                          ov::npuw::util::fmt(offset, s.range);
+            ov::npuw::dump_input_list(tile_ilist_name, in_base_names);
         }  // for(offset)
     }
 }
 
-void ov::npuw::IBaseInferRequest::dump_output_tensors(std::size_t idx) {
+void ov::npuw::IBaseInferRequest::dump_output_tensors(std::size_t idx, bool forced) {
     const std::string dump_ios_opt = m_npuw_model->m_cfg.get<::intel_npu::NPUW_DUMP_IO>();
     const std::size_t end_idx = m_npuw_model->m_compiled_submodels.size();
-    if (!ov::npuw::util::is_set(idx, dump_ios_opt, end_idx)) {
+    if (!ov::npuw::util::is_set(idx, dump_ios_opt, end_idx) && !forced) {
         return;
     }
 
@@ -336,7 +496,9 @@ void ov::npuw::IBaseInferRequest::dump_output_tensors(std::size_t idx) {
                 tile_olist.push_back(std::move(out_base_name));
             }
             // Dump olist per tile
-            ov::npuw::dump_output_list(comp_submodel_path, tile_olist);
+            std::string tile_olist_name = comp_submodel_path +  "_" +
+                                          ov::npuw::util::fmt(offset, s.range);
+            ov::npuw::dump_output_list(tile_olist_name, tile_olist);
         }
     }
 }
diff --git a/src/plugins/intel_npu/src/plugin/npuw/base_sync_infer_request.hpp b/src/plugins/intel_npu/src/plugin/npuw/base_sync_infer_request.hpp
index 6be64d676d6149..b054b98dd29b18 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/base_sync_infer_request.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/base_sync_infer_request.hpp
@@ -15,6 +15,7 @@
 #include "openvino/runtime/isync_infer_request.hpp"
 #include "openvino/runtime/so_ptr.hpp"
 #include "perf.hpp"
+#include "spatial.hpp" 
 
 namespace ov {
 namespace npuw {
@@ -62,8 +63,12 @@ class IBaseInferRequest : public ov::ISyncInferRequest {
     // their inference requests anymore - they must be stored
     // only once in the subrequests list
     RqPtrs create_infer_requests(std::size_t id, size_t nireq = 1, bool* recompiled = nullptr);
-    void ensure_subrequest_is_accurate(std::size_t idx, bool& failover);
-    virtual void update_subrequest_links(std::size_t idx) = 0;
+    void try_accurate_subinfer(std::size_t idx, bool& accuracy_failover);
+    void try_accurate_subinfer(std::size_t idx, std::size_t offset, std::size_t len,
+                               bool& accuracy_failover);
+    void try_accurate_substart_async(std::size_t idx);
+    void try_accurate_subwait(std::size_t idx, bool& accuracy_failover);
+    void ensure_subrequest_is_accurate(std::size_t idx, bool& accuracy_failover);
 
     std::shared_ptr<ov::npuw::CompiledModel> m_npuw_model;
     std::vector<IBaseInferRequest::Completed> m_completion_cbs;
@@ -107,10 +112,15 @@ class IBaseInferRequest : public ov::ISyncInferRequest {
     };
     std::vector<SpatialIO> m_spatial_io;
 
+    // FIXME: Currently is initialized/managed by subclass as well.
+    // Moved here dumping purposes only
+    // Represents spatial run-time info
+    runtime::spatial::Selector::Ptr m_spatial_selector;
+
     const std::size_t m_num_submodels;
 
-    void dump_input_tensors(std::size_t idx);
-    void dump_output_tensors(std::size_t idx);
+    void dump_input_tensors(std::size_t idx, bool forced = false);
+    void dump_output_tensors(std::size_t idx, bool forced = false);
 
     // Quick-and-dirty profiling
     ov::npuw::perf::metric<float, ov::npuw::perf::MSec> m_ms_unpack;
@@ -131,11 +141,11 @@ class IBaseInferRequest : public ov::ISyncInferRequest {
     std::size_t next(std::size_t idx_base) const;
     std::size_t real(std::size_t idx) const;
 
-    RqPtrs m_ref_subrequests;
-
     using now_t = std::optional<std::size_t>;
     now_t now_idx() const;
 
+    RqPtrs m_ref_subrequests;
+
 private:
     now_t m_now_idx;
 };
diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
index b52dd40ea59364..da7db76145d324 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
@@ -109,6 +109,8 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
         const double threshold_opt = m_cfg.get<::intel_npu::NPUW_ACC_THRESH>();
 
         m_acc_check = metrics::NRMSE(threshold_opt);
+        m_acc_check_name = "NRMSE";
+        m_acc_check_threshold = threshold_opt;
         m_ref_device = m_cfg.getString<::intel_npu::NPUW_ACC_DEVICE>();
         LOG_INFO("Accuracy check is enabled.");
     }
@@ -377,8 +379,7 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
             }
         }
 
-        m_compiled_submodels[id].device_it =
-            id != real_id ? m_compiled_submodels[real_id].device_it : m_dev_list.cbegin();
+        m_compiled_submodels[id].device_it = m_dev_list.cbegin();
 
         if (forced_sub_devices.count(id)) {
             std::string forced_device = forced_sub_devices[id];
@@ -776,7 +777,7 @@ std::string ov::npuw::CompiledModel::submodel_device(const std::size_t idx) cons
     return *comp_subm_desc.device_it;
 }
 
-void ov::npuw::CompiledModel::log_device_dist() const {
+void ov::npuw::CompiledModel::log_device_dist(ov::npuw::LogLevel log_lvl) const {
     std::unordered_map<std::string, execution_stats> stats_for_devices;
     execution_stats stats_for_optimized_out{0.f, 0ul};
 
@@ -791,14 +792,32 @@ void ov::npuw::CompiledModel::log_device_dist() const {
         stat.ops += real_cm.stat.ops;
     }
 
-    auto print_stats = [this](const std::string& device, const execution_stats& stat) {
+    auto print_stats = [this, log_lvl](const std::string& device, const execution_stats& stat) {
         float flops_prcnt = 100.f;
         float ops_prcnt = 100.f;
         if (m_total_stat.gflops > 0 && m_total_stat.ops > 0) {
             flops_prcnt = stat.gflops / static_cast<float>(m_total_stat.gflops) * 100;
             ops_prcnt = stat.ops / static_cast<float>(m_total_stat.ops) * 100;
         }
-        LOG_INFO(device << ": " << flops_prcnt << "% FLOPS, " << ops_prcnt << "% Layers");
+        std::stringstream log_msg;
+        log_msg << device << ": " << flops_prcnt << "% FLOPS, " << ops_prcnt << "% Layers";
+        switch (log_lvl) {
+            case LogLevel::Error:
+                LOG_ERROR(log_msg.str());
+                break;
+            case LogLevel::Warning:
+                LOG_WARN(log_msg.str());
+                break;
+            case LogLevel::Info:
+                LOG_INFO(log_msg.str());
+                break;
+            case LogLevel::Verbose:
+                LOG_VERB(log_msg.str());
+                break;
+            case LogLevel::Debug:
+                LOG_DEBUG(log_msg.str());
+                break;
+        }
     };
     for (auto&& device_st : stats_for_devices) {
         LOG_BLOCK();
@@ -940,6 +959,7 @@ void ov::npuw::CompiledModel::implement_properties() {
                           BIND(npuw::accuracy::check, NPUW_ACC_CHECK),
                           BIND(npuw::accuracy::threshold, NPUW_ACC_THRESH),
                           BIND(npuw::accuracy::reference_device, NPUW_ACC_DEVICE),
+                          BIND(npuw::accuracy::dump_failures, NPUW_ACC_DUMP_FAILS),
 #ifdef NPU_PLUGIN_DEVELOPER_BUILD
                           BIND(npuw::dump::full, NPUW_DUMP_FULL),
                           BIND(npuw::dump::subgraphs, NPUW_DUMP_SUBS),
diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp
index 6199ac66c0c64e..d629da2a1b42de 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp
@@ -71,7 +71,7 @@ class CompiledModel : public ov::ICompiledModel {
 
     std::string submodel_device(const std::size_t idx) const;
 
-    void log_device_dist() const;
+    void log_device_dist(ov::npuw::LogLevel log_lvl = ov::npuw::LogLevel::Info) const;
 
     void implement_properties();
 
@@ -149,7 +149,9 @@ class CompiledModel : public ov::ICompiledModel {
     };
     std::vector<CompiledModelDesc> m_compiled_submodels;
 
-    std::function<bool(const ov::SoPtr<ov::ITensor>&, const ov::SoPtr<ov::ITensor>&)> m_acc_check;
+    std::function<bool(const ov::SoPtr<ov::ITensor>&, const ov::SoPtr<ov::ITensor>&, double*)> m_acc_check;
+    std::string m_acc_check_name;
+    double m_acc_check_threshold;
     std::string m_ref_device;
 
     execution_stats m_total_stat;
diff --git a/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.cpp b/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.cpp
index 0e0b96582a663c..4cde73b3ea541f 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.cpp
@@ -129,7 +129,7 @@ void ov::npuw::FuncMemMgr::assign(const LinkFrom& from) {
     // - Look for an output tensor to reuse
     //   - If there's one, assign it to this allocation
     //   - If there's none, allocate a new tensor
-    // - How a tensor to reuse is piced:
+    // - How a tensor to reuse is picked:
     //   1. It should exist
     //   2. It's "remaining reads" count should be 0 (all planned reads
     //      happened at this point).
@@ -265,6 +265,7 @@ ov::npuw::JustInferRequest::JustInferRequest(const std::shared_ptr<ov::npuw::Com
         auto rqs = create_infer_requests(i, is_piped ? 2 : 1, &recompiled);
         failover_happened |= recompiled;
         m_subrequests[i] = rqs.at(0);
+        
         m_subrequest_devices[i] = *comp_model_desc.device_it;
         if (is_piped) {
             m_funcall_pipeline[i].subrequest = rqs.at(1);
@@ -274,9 +275,9 @@ ov::npuw::JustInferRequest::JustInferRequest(const std::shared_ptr<ov::npuw::Com
     }  // for(submodels)
 
     if (failover_happened) {
-        LOG_INFO("Refined device distribution:");
+        LOG_ERROR("Refined device distribution:");
         LOG_BLOCK();
-        m_npuw_model->log_device_dist();
+        m_npuw_model->log_device_dist(ov::npuw::LogLevel::Error);
     }
 
     // Identify connections for the funcall pipeline, if needed
@@ -820,11 +821,11 @@ void ov::npuw::JustInferRequest::unpack_closure(std::size_t idx, RqPtr request)
 }
 
 void ov::npuw::JustInferRequest::recreate_subrequests(std::size_t idx) {
-    auto& comp_model_desc = m_npuw_model->m_compiled_submodels[idx];
-    auto real_idx = comp_model_desc.replaced_by.value_or(idx);
+    std::size_t real_idx = real(idx);
+    auto& comp_model_desc = m_npuw_model->m_compiled_submodels[real_idx];
 
-    const auto is_piped = is_pipelined(idx);
-    auto new_rqs = create_infer_requests(idx, is_piped ? 2 : 1);
+    const auto is_piped = is_pipelined(real_idx);
+    auto new_rqs = create_infer_requests(real_idx, is_piped ? 2 : 1);
 
     // NB: Regardless if this subrequest was a function call
     // or not, always use the real_idx here - for regular
@@ -841,13 +842,13 @@ void ov::npuw::JustInferRequest::recreate_subrequests(std::size_t idx) {
     // overkill - only affected subrequest(s) could be updated instead,
     // but it is a more complex thing and can be implemented separately
     connect_subrequests();
-    m_subrequest_devices[idx] = *comp_model_desc.device_it;
+    m_subrequest_devices[real_idx] = *comp_model_desc.device_it;
 }
 
 void ov::npuw::JustInferRequest::run_subrequest_for_success(std::size_t idx, bool& failover) {
     failover = false;
-    auto& comp_model_desc = m_npuw_model->m_compiled_submodels[idx];
-    auto real_idx = comp_model_desc.replaced_by.value_or(idx);
+    bool accuracy_failover = false;
+    auto real_idx = real(idx);
 
     // Infer is also fail-safe...
     bool job_done = false;
@@ -870,7 +871,7 @@ void ov::npuw::JustInferRequest::run_subrequest_for_success(std::size_t idx, boo
         // the subrequest' outputs to global Results, if relevant.
         bind_global_results(idx);
 
-        if (comp_model_desc.replaced_by) {
+        if (m_npuw_model->m_compiled_submodels[idx].replaced_by) {
             function_prologue(idx);
         }
         if (!dump_in) {
@@ -881,7 +882,7 @@ void ov::npuw::JustInferRequest::run_subrequest_for_success(std::size_t idx, boo
         try {
             LOG_DEBUG("Trying to run subrequest[" << idx << "]...");
             LOG_BLOCK();
-            unsafe_run_this_prep_next(idx, next_prepared);
+            unsafe_run_this_prep_next(idx, next_prepared, accuracy_failover);
             job_done = true;
             LOG_DEBUG("Done: " << idx << "(exec subrequest)");
         } catch (const std::exception& ex) {
@@ -896,7 +897,8 @@ void ov::npuw::JustInferRequest::run_subrequest_for_success(std::size_t idx, boo
             LOG_INFO("- Trying next device...");
 
             // Altering iterators here!! Contracts should be changed!
-            comp_model_desc.device_it++;
+            auto& proto_comp_model_desc = m_npuw_model->m_compiled_submodels[real_idx];
+            proto_comp_model_desc.device_it++;
             if (!m_npuw_model->compile_for_success(real_idx)) {
                 OPENVINO_THROW("Failed to compile. No more devices are left!");
             }
@@ -912,36 +914,41 @@ void ov::npuw::JustInferRequest::run_subrequest_for_success(std::size_t idx, boo
             std::swap(m_subrequests[real_idx], m_funcall_pipeline[real_idx].subrequest);
         }
     }
+
+    failover |= accuracy_failover;
 }
 
-void ov::npuw::JustInferRequest::unsafe_during(std::size_t real_idx, const std::function<void()>& f) {
-    auto& comp_model_desc = m_npuw_model->m_compiled_submodels[real_idx];
-    if (!comp_model_desc.spatial) {
+void ov::npuw::JustInferRequest::unsafe_during(std::size_t idx,
+                                               const std::function<void()>& f,
+                                               bool& accuracy_failover) {
+    std::size_t real_idx = real(idx);
+    auto& proto_comp_model_desc = m_npuw_model->m_compiled_submodels[real_idx];
+    if (!proto_comp_model_desc.spatial) {
         // Non-spatial execution: trigger request asynchronously, run `f` in this context
-        auto& r = m_subrequests[real_idx];
-        r->start_async();
+        try_accurate_substart_async(idx);
         f();  // expect noexcept
-        r->wait();
+        try_accurate_subwait(idx, accuracy_failover);
     } else {
         // Spatial execution... Do the opposite - run f asynchronously, and meanwhile run the
         // spatial inference
         auto future = std::async(std::launch::async, f);
-        unsafe_infer(real_idx);
+        unsafe_infer(idx, accuracy_failover);
         future.wait();
     }
 }
 
-void ov::npuw::JustInferRequest::unsafe_infer(std::size_t real_idx) {
-    auto& comp_model_desc = m_npuw_model->m_compiled_submodels[real_idx];
-    auto& r = m_subrequests[real_idx];
-    if (!comp_model_desc.spatial) {
+void ov::npuw::JustInferRequest::unsafe_infer(std::size_t idx, bool& accuracy_failover) {
+    std::size_t real_idx = real(idx);
+    auto& proto_comp_model_desc = m_npuw_model->m_compiled_submodels[real_idx];
+    if (!proto_comp_model_desc.spatial) {
         // Run normally
-        r->infer();
+        try_accurate_subinfer(idx, accuracy_failover);
     } else {
+        auto& r = m_subrequests[real_idx];
         // Run over the specified range... Note: the full inputs/outputs
         // must be prepared in the m_spatial_io at this point
-        const auto& spatial = comp_model_desc.spatial.value();
-        const auto num_outputs = comp_model_desc.compiled_model->outputs().size();
+        const auto& spatial = proto_comp_model_desc.spatial.value();
+        const auto num_outputs = proto_comp_model_desc.compiled_model->outputs().size();
         NPUW_ASSERT(m_spatial_selector);
 
         // Create a sparse vector with full input sizes.
@@ -949,7 +956,7 @@ void ov::npuw::JustInferRequest::unsafe_infer(std::size_t real_idx) {
         // number of input parameters (activations) so some slots may be
         // not used here.
         // FIXME: All these preparations could be done statically (just once)
-        std::vector<ov::Shape> full_in_shapes(comp_model_desc.param_base);
+        std::vector<ov::Shape> full_in_shapes(proto_comp_model_desc.param_base);
         for (auto&& param : spatial.params) {
             full_in_shapes[param.idx] = m_spatial_io[real_idx].inputs.at(param.idx)->get_shape();
         }
@@ -974,7 +981,7 @@ void ov::npuw::JustInferRequest::unsafe_infer(std::size_t real_idx) {
 
             // Collect spatial inputs for this offset
             for (auto&& param : spatial.params) {
-                const auto& iport = comp_model_desc.compiled_model->inputs()[param.idx];
+                const auto& iport = proto_comp_model_desc.compiled_model->inputs()[param.idx];
                 r->set_tensor(
                     iport,
                     ov::npuw::util::view(m_spatial_io[real_idx].inputs.at(param.idx), param.dim, offset, spatial.nway));
@@ -982,7 +989,7 @@ void ov::npuw::JustInferRequest::unsafe_infer(std::size_t real_idx) {
 
             // Now set the spatial outputs
             for (std::size_t out_idx = 0u; out_idx < num_outputs; out_idx++) {
-                const auto& oport = comp_model_desc.compiled_model->outputs()[out_idx];
+                const auto& oport = proto_comp_model_desc.compiled_model->outputs()[out_idx];
                 r->set_tensor(oport,
                               ov::npuw::util::view(m_spatial_io[real_idx].outputs.at(out_idx),
                                                    spatial.out_dim,
@@ -991,7 +998,7 @@ void ov::npuw::JustInferRequest::unsafe_infer(std::size_t real_idx) {
             }  // for(outputs)
 
             // Now run the part
-            r->infer();
+            try_accurate_subinfer(idx, offset, spatial.nway, accuracy_failover);
         }  // for(full_nway_times)
 
         // Now process the tail, if required
@@ -1004,7 +1011,7 @@ void ov::npuw::JustInferRequest::unsafe_infer(std::size_t real_idx) {
                                                     offset,
                                                     spatial.tail_size);
 
-                const auto& iport = comp_model_desc.compiled_model->inputs()[param.idx];
+                const auto& iport = proto_comp_model_desc.compiled_model->inputs()[param.idx];
                 auto out_view = ov::npuw::util::view(m_spatial_io[real_idx].input_tails.at(param.idx),
                                                      param.dim,
                                                      0,
@@ -1016,16 +1023,16 @@ void ov::npuw::JustInferRequest::unsafe_infer(std::size_t real_idx) {
 
             // Now set the tail tensors
             for (std::size_t out_idx = 0u; out_idx < num_outputs; out_idx++) {
-                const auto& oport = comp_model_desc.compiled_model->outputs()[out_idx];
+                const auto& oport = proto_comp_model_desc.compiled_model->outputs()[out_idx];
                 r->set_tensor(oport, m_spatial_io[real_idx].output_tails.at(out_idx));
             }  // for(outputs)
 
             // Now run the tail infer
-            r->infer();
+            try_accurate_subinfer(idx, offset, spatial.tail_size, accuracy_failover);
 
             // Now copy the views from the output full-nway tensor to the output tensors
             for (std::size_t out_idx = 0u; out_idx < num_outputs; out_idx++) {
-                const auto& oport = comp_model_desc.compiled_model->outputs()[out_idx];
+                const auto& oport = proto_comp_model_desc.compiled_model->outputs()[out_idx];
                 auto spatial_tensor_shape = oport.get_shape();
 
                 auto in_view = ov::npuw::util::view(m_spatial_io[real_idx].output_tails.at(out_idx),
@@ -1043,7 +1050,8 @@ void ov::npuw::JustInferRequest::unsafe_infer(std::size_t real_idx) {
     }
 }
 
-void ov::npuw::JustInferRequest::unsafe_run_this_prep_next(std::size_t idx, bool& next_prepared) {
+void ov::npuw::JustInferRequest::unsafe_run_this_prep_next(std::size_t idx, bool& next_prepared,
+                                                           bool& accuracy_failover) {
     auto& comp_model_desc = m_npuw_model->m_compiled_submodels[idx];
     auto real_idx = comp_model_desc.replaced_by.value_or(idx);
     const std::size_t next_idx = next(idx + 1);
@@ -1057,18 +1065,18 @@ void ov::npuw::JustInferRequest::unsafe_run_this_prep_next(std::size_t idx, bool
             if (is_pipelined(real_idx)) {
                 // function pipelining is here! and the next rq is ours.
                 NPUW_ASSERT(m_funcall_pipeline[idx].next.value() == next_idx);
-                unsafe_during(real_idx, [&]() {
+                unsafe_during(idx, [&]() {
                     LOG_DEBUG("Unpacking closures for the NEXT subrequest[" << next_idx << "]...");
                     LOG_BLOCK();
                     // Note: do it here unconditionally - if this request fails,
                     // have to resubmit all the data to the recompiled pair anyway
                     bind_global_parameters(next_idx);
                     unpack_closure(next_idx, m_funcall_pipeline[real_idx].subrequest);
-                });
+                }, accuracy_failover);
             } else {
                 // Function pipelining is not used. THIS infer request
                 // is also the NEXT one. Nothing much to do here
-                unsafe_infer(real_idx);
+                unsafe_infer(idx, accuracy_failover);
                 bind_global_parameters(next_idx);
             }
         } else {
@@ -1078,9 +1086,9 @@ void ov::npuw::JustInferRequest::unsafe_run_this_prep_next(std::size_t idx, bool
             if (next_idx == 0) {
                 // Note: even if m_function_pipelining is ON,
                 // SWAP won't happen here - see the below check for .next
-                unsafe_infer(real_idx);
+                unsafe_infer(idx, accuracy_failover);
             } else {
-                unsafe_during(real_idx, [&]() {
+                unsafe_during(idx, [&]() {
                     if (!next_prepared) {
                         bind_global_parameters(next_idx);
                         next_prepared = true;
@@ -1091,21 +1099,21 @@ void ov::npuw::JustInferRequest::unsafe_run_this_prep_next(std::size_t idx, bool
                         LOG_BLOCK();
                         unpack_closure(my_next_idx, m_funcall_pipeline[real_idx].subrequest);
                     }
-                });
+                }, accuracy_failover);
             }
         }
     } else {
         // This is a regular subgraph. Start it async to prepare the next
         // parameters
         if (next_idx == 0) {
-            unsafe_infer(real_idx);
+            unsafe_infer(idx, accuracy_failover);
         } else {
-            unsafe_during(real_idx, [&]() {
+            unsafe_during(idx, [&]() {
                 if (!next_prepared) {
                     bind_global_parameters(next_idx);
                     next_prepared = true;
                 }
-            });
+            }, accuracy_failover);
         }
     }  // if (replaced_by)
 }
@@ -1148,10 +1156,6 @@ bool ov::npuw::JustInferRequest::supports_async_pipeline() const {
     return false;
 }
 
-void ov::npuw::JustInferRequest::update_subrequest_links(std::size_t) {
-    connect_subrequests();
-}
-
 bool ov::npuw::JustInferRequest::is_pipelined(std::size_t idx) const {
     const auto& desc = m_npuw_model->m_compiled_submodels[real(idx)];
     return m_use_function_pipelining && desc.replaced_by && !desc.forced_to_fcall;
diff --git a/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.hpp b/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.hpp
index d219f170a8e6bb..697925f4f1b652 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/just_sync_infer_request.hpp
@@ -94,8 +94,6 @@ class JustInferRequest final : public IBaseInferRequest {
     std::size_t total_subrequests() const override;
     bool supports_async_pipeline() const override;
 
-    void update_subrequest_links(std::size_t idx) override;
-
     ////////////////////////////////////
     // now own API
 
@@ -108,9 +106,9 @@ class JustInferRequest final : public IBaseInferRequest {
     void function_prologue(std::size_t idx);
     void unpack_closure(std::size_t idx, RqPtr request);
 
-    void unsafe_during(std::size_t real_idx, const std::function<void()>& f);
-    void unsafe_infer(std::size_t real_idx);
-    void unsafe_run_this_prep_next(std::size_t idx, bool& next_prepared_p);
+    void unsafe_during(std::size_t idx, const std::function<void()>& f, bool& accuracy_failover);
+    void unsafe_infer(std::size_t idx, bool& accuracy_failover);
+    void unsafe_run_this_prep_next(std::size_t idx, bool& next_prepared, bool& accuracy_failover);
 
     void connect_subrequests();
     void recreate_subrequests(std::size_t idx);
@@ -151,9 +149,6 @@ class JustInferRequest final : public IBaseInferRequest {
 
     std::unordered_set<void*> m_input_allocated;
 
-    // Represents spatial run-time info
-    runtime::spatial::Selector::Ptr m_spatial_selector;
-
     // Cached check if we do FOLDing and need to update closures in the repeating blocks
     bool m_closure_update_required = false;
 };