diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/config/config.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/config/config.hpp
index 613c7ff8c496e2..7ed21aee7525e4 100644
--- a/src/plugins/intel_npu/src/al/include/intel_npu/config/config.hpp
+++ b/src/plugins/intel_npu/src/al/include/intel_npu/config/config.hpp
@@ -172,24 +172,6 @@ struct OptionPrinter final {
     }
 };
 
-template <typename T>
-struct OptionPrinter<std::vector<T>> final {
-    static std::string toString(const std::vector<T>& val) {
-        std::stringstream ss;
-        std::size_t counter = 0;
-        std::size_t size = val.size();
-        for (auto el : val) {
-            std::string el_str = OptionPrinter<V>::toString(el);
-            ss << el_str;
-            if (counter < size - 1) {
-                ss << ",";
-            }
-            ++counter;
-        }
-        return ss.str();
-    }
-};
-
 template <typename K, typename V>
 struct OptionPrinter<std::map<K, V>> final {
     static std::string toString(const std::map<K, V>& val) {
diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/config/npuw.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/config/npuw.hpp
index 3fd0dcea4f83a5..8a92e5c4824400 100644
--- a/src/plugins/intel_npu/src/al/include/intel_npu/config/npuw.hpp
+++ b/src/plugins/intel_npu/src/al/include/intel_npu/config/npuw.hpp
@@ -66,39 +66,36 @@ DEFINE_OPT(NPUW_DUMP_SUBS, std::string, "", npuw::dump::subgraphs, CompileTime);
 DEFINE_OPT(NPUW_DUMP_SUBS_ON_FAIL, std::string, "", npuw::dump::subgraphs_on_fail, CompileTime);
 DEFINE_OPT(NPUW_DUMP_IO, std::string, "", npuw::dump::inputs_outputs, RunTime);
 DEFINE_OPT(NPUW_DUMP_IO_ITERS, bool, false, npuw::dump::io_iters, RunTime);
-DEFINE_OPT(NPUW_LLM, bool, false, npuw::dynamic_llm::enabled, CompileTime);
-DEFINE_OPT(NPUW_LLM_MAX_PROMPT_LEN, uint32_t, 1024, npuw::dynamic_llm::max_prompt_len, CompileTime);
-DEFINE_OPT(NPUW_LLM_MIN_RESPONSE_LEN, uint32_t, 128, npuw::dynamic_llm::min_response_len, CompileTime);
+DEFINE_OPT(NPUW_LLM, bool, false, npuw::llm::enabled, CompileTime);
+DEFINE_OPT(NPUW_LLM_MAX_PROMPT_LEN, uint32_t, 1024, npuw::llm::max_prompt_len, CompileTime);
+DEFINE_OPT(NPUW_LLM_MIN_RESPONSE_LEN, uint32_t, 128, npuw::llm::min_response_len, CompileTime);
 
 namespace npuw {
-namespace dynamic_llm {
+namespace llm {
 struct ModelDesc {
     std::string type;
     std::string name_or_path;
     int num_key_value_heads;
 };
-enum class GenerateHint {
-    FAST_COMPILE,
-    BEST_PERF
-};
-} // namespace dynamic_llm
-} // namespace npuw
+enum class GenerateHint { FAST_COMPILE, BEST_PERF };
+}  // namespace llm
+}  // namespace npuw
 
-struct NPUW_LLM_MODEL_DESC final : OptionBase<NPUW_LLM_MODEL_DESC, ::intel_npu::npuw::dynamic_llm::ModelDesc> {
+struct NPUW_LLM_MODEL_DESC final : OptionBase<NPUW_LLM_MODEL_DESC, ::intel_npu::npuw::llm::ModelDesc> {
     static std::string_view key() {
-        return ov::intel_npu::npuw::dynamic_llm::model_desc.name();
+        return ov::intel_npu::npuw::llm::model_desc.name();
     }
 
     static constexpr std::string_view getTypeName() {
-        return "::intel_npu::npuw::dynamic_llm::ModelDesc";
+        return "::intel_npu::npuw::llm::ModelDesc";
     }
 
-    static ::intel_npu::npuw::dynamic_llm::ModelDesc defaultValue() {
+    static ::intel_npu::npuw::llm::ModelDesc defaultValue() {
         return {};
     }
 
-    static ::intel_npu::npuw::dynamic_llm::ModelDesc parse(std::string_view val) {
-        ::intel_npu::npuw::dynamic_llm::ModelDesc res;
+    static ::intel_npu::npuw::llm::ModelDesc parse(std::string_view val) {
+        ::intel_npu::npuw::llm::ModelDesc res;
         std::map<std::string, std::string> res_map = OptionParser<std::map<std::string, std::string>>::parse(val);
         res.type = res_map["type"];
         res.name_or_path = res_map["name_or_path"];
@@ -106,7 +103,7 @@ struct NPUW_LLM_MODEL_DESC final : OptionBase<NPUW_LLM_MODEL_DESC, ::intel_npu::
         return res;
     }
 
-    static std::string toString(const ::intel_npu::npuw::dynamic_llm::ModelDesc& val) {
+    static std::string toString(const ::intel_npu::npuw::llm::ModelDesc& val) {
         std::string res;
         std::map<std::string, std::string> res_map;
         res_map["type"] = val.type;
@@ -124,46 +121,45 @@ struct NPUW_LLM_MODEL_DESC final : OptionBase<NPUW_LLM_MODEL_DESC, ::intel_npu::
     }
 };
 
-struct NPUW_LLM_GENERATE_HINT final : OptionBase<NPUW_LLM_GENERATE_HINT, ::intel_npu::npuw::dynamic_llm::GenerateHint> {
+struct NPUW_LLM_GENERATE_HINT final : OptionBase<NPUW_LLM_GENERATE_HINT, ::intel_npu::npuw::llm::GenerateHint> {
     static std::string_view key() {
-        return ov::intel_npu::npuw::dynamic_llm::generate_hint.name();
+        return ov::intel_npu::npuw::llm::generate_hint.name();
     }
 
     static constexpr std::string_view getTypeName() {
-        return "::intel_npu::npuw::dynamic_llm::GenerateHint";
+        return "::intel_npu::npuw::llm::GenerateHint";
     }
 
-    static ::intel_npu::npuw::dynamic_llm::GenerateHint defaultValue() {
-        return ::intel_npu::npuw::dynamic_llm::GenerateHint::FAST_COMPILE;
+    static ::intel_npu::npuw::llm::GenerateHint defaultValue() {
+        return ::intel_npu::npuw::llm::GenerateHint::FAST_COMPILE;
     }
 
-    static ::intel_npu::npuw::dynamic_llm::GenerateHint parse(std::string_view val) {
-        ::intel_npu::npuw::dynamic_llm::GenerateHint res;
+    static ::intel_npu::npuw::llm::GenerateHint parse(std::string_view val) {
+        ::intel_npu::npuw::llm::GenerateHint res;
 
         if (val == "FAST_COMPILE") {
-            res = ::intel_npu::npuw::dynamic_llm::GenerateHint::FAST_COMPILE;
-        }
-        else if (val == "BEST_PERF") {
-            res = ::intel_npu::npuw::dynamic_llm::GenerateHint::BEST_PERF;
+            res = ::intel_npu::npuw::llm::GenerateHint::FAST_COMPILE;
+        } else if (val == "BEST_PERF") {
+            res = ::intel_npu::npuw::llm::GenerateHint::BEST_PERF;
         } else {
             OPENVINO_THROW("Unsupported \"GENERATE_HINT\" provided: ",
-                val, ". Please select either \"FAST_COMPILE\" or \"BEST_PERF\".");
+                           val,
+                           ". Please select either \"FAST_COMPILE\" or \"BEST_PERF\".");
         }
         return res;
     }
 
-    static std::string toString(const ::intel_npu::npuw::dynamic_llm::GenerateHint& val) {
+    static std::string toString(const ::intel_npu::npuw::llm::GenerateHint& val) {
         std::string res;
         switch (val) {
-            case ::intel_npu::npuw::dynamic_llm::GenerateHint::FAST_COMPILE:
-                res = "FAST_COMPILE";
-                break;
-            case ::intel_npu::npuw::dynamic_llm::GenerateHint::BEST_PERF:
-                res = "BEST_PERF";
-                break;
-            default:
-                OPENVINO_THROW("Can't convert provided \"GENERATE_HINT\" : ",
-                    int(val), " to string.");
+        case ::intel_npu::npuw::llm::GenerateHint::FAST_COMPILE:
+            res = "FAST_COMPILE";
+            break;
+        case ::intel_npu::npuw::llm::GenerateHint::BEST_PERF:
+            res = "BEST_PERF";
+            break;
+        default:
+            OPENVINO_THROW("Can't convert provided \"GENERATE_HINT\" : ", int(val), " to string.");
         }
         return res;
     }
diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/npuw_private_properties.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/npuw_private_properties.hpp
index e9d782a6d8e46c..79fd8409f3be7c 100644
--- a/src/plugins/intel_npu/src/al/include/intel_npu/npuw_private_properties.hpp
+++ b/src/plugins/intel_npu/src/al/include/intel_npu/npuw_private_properties.hpp
@@ -370,14 +370,14 @@ static constexpr ov::Property<std::string> inputs_outputs{"NPUW_DUMP_IO"};
 static constexpr ov::Property<std::string> io_iters{"NPUW_DUMP_IO_ITERS"};
 }  // namespace dump
 
-namespace dynamic_llm {
+namespace llm {
 /**
  * @brief
  * Type: bool.
  * Tell NPUW that you want to pass dynamic stateful LLM model
  * Default value: false.
  */
-static constexpr ov::Property<bool> enabled {"NPUW_LLM"};
+static constexpr ov::Property<bool> enabled{"NPUW_LLM"};
 
 /**
  * @brief
@@ -385,15 +385,15 @@ static constexpr ov::Property<bool> enabled {"NPUW_LLM"};
  * Tell NPUW about your LLM model.
  * Default value: empty map.
  */
-static constexpr ov::Property<std::map<std::string, std::string>> model_desc {"NPUW_LLM_MODEL_DESC"};
+static constexpr ov::Property<std::map<std::string, std::string>> model_desc{"NPUW_LLM_MODEL_DESC"};
 
-    /**
+/**
  * @brief
  * Type: uint32_t.
  * Tell NPUW your desirable max prompt length.
  * Default value: 1024.
  */
-static constexpr ov::Property<uint32_t> max_prompt_len {"NPUW_LLM_MAX_PROMPT_LEN"};
+static constexpr ov::Property<uint32_t> max_prompt_len{"NPUW_LLM_MAX_PROMPT_LEN"};
 
 /**
  * @brief
@@ -401,7 +401,7 @@ static constexpr ov::Property<uint32_t> max_prompt_len {"NPUW_LLM_MAX_PROMPT_LEN
  * Tell NPUW your desirable min response length.
  * Default value: 128.
  */
-static constexpr ov::Property<uint32_t> min_response_len {"NPUW_LLM_MIN_RESPONSE_LEN"};
+static constexpr ov::Property<uint32_t> min_response_len{"NPUW_LLM_MIN_RESPONSE_LEN"};
 
 /**
  * @brief
@@ -410,9 +410,9 @@ static constexpr ov::Property<uint32_t> min_response_len {"NPUW_LLM_MIN_RESPONSE
  * Possible values: "FAST_COMPILE", "BEST_PERF".
  * Default value: "FAST_COMPILE".
  */
-static constexpr ov::Property<std::string> generate_hint {"NPUW_LLM_GENERATE_HINT"};
+static constexpr ov::Property<std::string> generate_hint{"NPUW_LLM_GENERATE_HINT"};
 
-} // namespace llm_dynamic
+}  // namespace llm
 
 }  // namespace npuw
 }  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/al/src/config/npuw.cpp b/src/plugins/intel_npu/src/al/src/config/npuw.cpp
index 5b2501bd33a66d..7eba967776827b 100644
--- a/src/plugins/intel_npu/src/al/src/config/npuw.cpp
+++ b/src/plugins/intel_npu/src/al/src/config/npuw.cpp
@@ -52,11 +52,6 @@ void intel_npu::registerNPUWOptions(OptionsDesc& desc) {
     desc.add<NPUW_DUMP_IO>();
     desc.add<NPUW_DUMP_IO_ITERS>();
 #endif
-    desc.add<NPUW_LLM>();
-    desc.add<NPUW_LLM_MODEL_DESC>();
-    desc.add<NPUW_LLM_MAX_PROMPT_LEN>();
-    desc.add<NPUW_LLM_MIN_RESPONSE_LEN>();
-    desc.add<NPUW_LLM_GENERATE_HINT>();
 }
 
 void intel_npu::registerNpuwLlmOptions(OptionsDesc& desc) {
diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
index b723d6d095d025..ced6d6f3840d2e 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
@@ -28,11 +28,14 @@
 #include "intel_npu/config/config.hpp"
 #include "intel_npu/config/npuw.hpp"
 #include "intel_npu/npuw_private_properties.hpp"
+#include "llm_compiled_model.hpp"
 #include "openvino/runtime/device_id_parser.hpp"
 #include "openvino/runtime/internal_properties.hpp"
 #include "openvino/runtime/properties.hpp"
 #include "transformations/convert_precision.hpp"
 
+#include "llm_compiled_model.hpp"
+
 namespace {
 void split_properties(const ov::AnyMap& properties,
                       ov::AnyMap& npu_plugin_properties,
@@ -85,10 +88,33 @@ ov::npuw::DeviceProperties get_properties_per_device(const std::shared_ptr<const
 }  // namespace npuw
 }  // namespace ov
 
+std::shared_ptr<ov::npuw::ICompiledModel> ov::npuw::ICompiledModel::create(
+    const std::shared_ptr<ov::Model>& model,
+    const std::shared_ptr<const ov::IPlugin>& plugin,
+    const ov::AnyMap& properties) {
+    LOG_VERB(__PRETTY_FUNCTION__);
+    LOG_BLOCK();
+    std::shared_ptr<ov::npuw::ICompiledModel> compiled_model;
+    auto use_llm_key = ov::intel_npu::npuw::llm::enabled.name();
+    if (properties.count(use_llm_key) && properties.at(use_llm_key).as<bool>() == true) {
+        LOG_DEBUG("ov::npuw::LLMCompiledModel will be created.");
+        compiled_model = std::make_shared<ov::npuw::LLMCompiledModel>(model, plugin, properties);
+    } else {
+        LOG_DEBUG("ov::npuw::CompiledModel will be created.");
+        compiled_model = std::make_shared<ov::npuw::CompiledModel>(model, plugin, properties);
+    }
+    LOG_DEBUG("Done");
+    return compiled_model;
+}
+
+ov::npuw::ICompiledModel::ICompiledModel(const std::shared_ptr<ov::Model>& model,
+                                         const std::shared_ptr<const ov::IPlugin>& plugin)
+    : ov::ICompiledModel(model, plugin) {}
+
 ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
                                        const std::shared_ptr<const ov::IPlugin>& plugin,
                                        const ov::AnyMap& properties)
-    : ov::ICompiledModel(model, plugin),
+    : ov::npuw::ICompiledModel(model, plugin),
       m_options_desc(std::make_shared<::intel_npu::OptionsDesc>()),
       m_cfg(m_options_desc),
       m_name(model->get_friendly_name()),
@@ -875,8 +901,6 @@ void ov::npuw::CompiledModel::implement_properties() {
     //    request. So the vector will define public properties.
     // 3. Create mappings for all remaining (private) NPUW-specific properties
     //    to getters of their values from config, related to ov::npuw::CompiledModel.
-    // 4. Fill default values for (private) NPUW-specific, dynamic stateful
-    //    model-specific properties.
 
 #define GET_PLUGIN_PROP(property) return get_plugin()->get_property(property.name(), ov::AnyMap());
 
@@ -963,52 +987,47 @@ void ov::npuw::CompiledModel::implement_properties() {
         }                                                                                  \
     }
 
-    m_prop_to_opt.insert({BIND(use_npuw, NPU_USE_NPUW),
-                          BIND(npuw::devices, NPUW_DEVICES),
-                          BIND(npuw::submodel_device, NPUW_SUBMODEL_DEVICE),
-                          BIND(npuw::partitioning::online::pipeline, NPUW_ONLINE_PIPELINE),
-                          BIND(npuw::partitioning::online::min_size, NPUW_ONLINE_MIN_SIZE),
-                          BIND(npuw::partitioning::online::keep_blocks, NPUW_ONLINE_KEEP_BLOCKS),
-                          BIND(npuw::partitioning::online::keep_block_size, NPUW_ONLINE_KEEP_BLOCK_SIZE),
-                          BIND(npuw::partitioning::online::avoid, NPUW_ONLINE_AVOID),
-                          BIND(npuw::partitioning::online::isolate, NPUW_ONLINE_ISOLATE),
-                          BIND(npuw::partitioning::online::nofold, NPUW_ONLINE_NO_FOLD),
-                          BIND(npuw::partitioning::online::dump_plan, NPUW_ONLINE_DUMP_PLAN),
-                          BIND(npuw::partitioning::plan, NPUW_PLAN),
-                          BIND(npuw::partitioning::fold, NPUW_FOLD),
-                          BIND(npuw::partitioning::cwai, NPUW_CWAI),
-                          BIND(npuw::partitioning::dyn_quant, NPUW_DQ),
-                          BIND(npuw::partitioning::par_matmul_merge_dims, NPUW_PMM),
-                          BIND(npuw::partitioning::slice_out, NPUW_SLICE_OUT),
-                          BIND(npuw::partitioning::spatial, NPUW_SPATIAL),
-                          BIND(npuw::partitioning::spatial_nway, NPUW_SPATIAL_NWAY),
-                          BIND(npuw::partitioning::spatial_dyn, NPUW_SPATIAL_DYN),
-                          BIND(npuw::partitioning::host_gather, NPUW_HOST_GATHER),
-                          BIND(npuw::partitioning::funcall_for_all, NPUW_FUNCALL_FOR_ALL),
-                          BIND(npuw::partitioning::dcoff_type, NPUW_DCOFF_TYPE),
-                          BIND(npuw::partitioning::dcoff_with_scale, NPUW_DCOFF_SCALE),
-                          BIND(npuw::parallel_compilation, NPUW_PARALLEL_COMPILE),
-                          BIND(npuw::funcall_async, NPUW_FUNCALL_ASYNC),
-                          BIND(npuw::unfold_ireqs, NPUW_UNFOLD_IREQS),
-                          BIND(npuw::weights_bank, NPUW_WEIGHTS_BANK),
-                          BIND(npuw::weights_bank_alloc, NPUW_WEIGHTS_BANK_ALLOC),
-                          BIND(npuw::cache_dir, NPUW_CACHE_DIR),
-                          BIND(npuw::accuracy::check, NPUW_ACC_CHECK),
-                          BIND(npuw::accuracy::threshold, NPUW_ACC_THRESH),
-                          BIND(npuw::accuracy::reference_device, NPUW_ACC_DEVICE),
+    m_prop_to_opt.insert({
+        BIND(use_npuw, NPU_USE_NPUW),
+        BIND(npuw::devices, NPUW_DEVICES),
+        BIND(npuw::submodel_device, NPUW_SUBMODEL_DEVICE),
+        BIND(npuw::partitioning::online::pipeline, NPUW_ONLINE_PIPELINE),
+        BIND(npuw::partitioning::online::min_size, NPUW_ONLINE_MIN_SIZE),
+        BIND(npuw::partitioning::online::keep_blocks, NPUW_ONLINE_KEEP_BLOCKS),
+        BIND(npuw::partitioning::online::keep_block_size, NPUW_ONLINE_KEEP_BLOCK_SIZE),
+        BIND(npuw::partitioning::online::avoid, NPUW_ONLINE_AVOID),
+        BIND(npuw::partitioning::online::isolate, NPUW_ONLINE_ISOLATE),
+        BIND(npuw::partitioning::online::nofold, NPUW_ONLINE_NO_FOLD),
+        BIND(npuw::partitioning::online::dump_plan, NPUW_ONLINE_DUMP_PLAN),
+        BIND(npuw::partitioning::plan, NPUW_PLAN),
+        BIND(npuw::partitioning::fold, NPUW_FOLD),
+        BIND(npuw::partitioning::cwai, NPUW_CWAI),
+        BIND(npuw::partitioning::dyn_quant, NPUW_DQ),
+        BIND(npuw::partitioning::par_matmul_merge_dims, NPUW_PMM),
+        BIND(npuw::partitioning::slice_out, NPUW_SLICE_OUT),
+        BIND(npuw::partitioning::spatial, NPUW_SPATIAL),
+        BIND(npuw::partitioning::spatial_nway, NPUW_SPATIAL_NWAY),
+        BIND(npuw::partitioning::spatial_dyn, NPUW_SPATIAL_DYN),
+        BIND(npuw::partitioning::host_gather, NPUW_HOST_GATHER),
+        BIND(npuw::partitioning::funcall_for_all, NPUW_FUNCALL_FOR_ALL),
+        BIND(npuw::partitioning::dcoff_type, NPUW_DCOFF_TYPE),
+        BIND(npuw::partitioning::dcoff_with_scale, NPUW_DCOFF_SCALE),
+        BIND(npuw::parallel_compilation, NPUW_PARALLEL_COMPILE),
+        BIND(npuw::funcall_async, NPUW_FUNCALL_ASYNC),
+        BIND(npuw::unfold_ireqs, NPUW_UNFOLD_IREQS),
+        BIND(npuw::weights_bank, NPUW_WEIGHTS_BANK),
+        BIND(npuw::weights_bank_alloc, NPUW_WEIGHTS_BANK_ALLOC),
+        BIND(npuw::cache_dir, NPUW_CACHE_DIR),
+        BIND(npuw::accuracy::check, NPUW_ACC_CHECK),
+        BIND(npuw::accuracy::threshold, NPUW_ACC_THRESH),
+        BIND(npuw::accuracy::reference_device, NPUW_ACC_DEVICE),
 #ifdef NPU_PLUGIN_DEVELOPER_BUILD
-                          BIND(npuw::dump::full, NPUW_DUMP_FULL),
-                          BIND(npuw::dump::subgraphs, NPUW_DUMP_SUBS),
-                          BIND(npuw::dump::subgraphs_on_fail, NPUW_DUMP_SUBS_ON_FAIL),
-                          BIND(npuw::dump::inputs_outputs, NPUW_DUMP_IO),
-                          BIND(npuw::dump::io_iters, NPUW_DUMP_IO_ITERS),
+        BIND(npuw::dump::full, NPUW_DUMP_FULL),
+        BIND(npuw::dump::subgraphs, NPUW_DUMP_SUBS),
+        BIND(npuw::dump::subgraphs_on_fail, NPUW_DUMP_SUBS_ON_FAIL),
+        BIND(npuw::dump::inputs_outputs, NPUW_DUMP_IO),
+        BIND(npuw::dump::io_iters, NPUW_DUMP_IO_ITERS),
 #endif
-    // 4.
-                          BIND(npuw::dynamic_llm::enabled, NPUW_LLM),
-                          BIND(npuw::dynamic_llm::model_desc, NPUW_LLM_MODEL_DESC),
-                          BIND(npuw::dynamic_llm::max_prompt_len, NPUW_LLM_MAX_PROMPT_LEN),
-                          BIND(npuw::dynamic_llm::min_response_len, NPUW_LLM_MIN_RESPONSE_LEN),
-                          BIND(npuw::dynamic_llm::generate_hint, NPUW_LLM_GENERATE_HINT)
     });
 #undef BIND
 }
diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp
index e82a175f03042d..0d0a18f1e0df3b 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.hpp
@@ -22,9 +22,16 @@ class Plugin;
 
 namespace ov {
 namespace npuw {
+class ICompiledModel : public ov::ICompiledModel {
+public:
+    static std::shared_ptr<ov::npuw::ICompiledModel> create(const std::shared_ptr<ov::Model>& model,
+                                                            const std::shared_ptr<const ov::IPlugin>& plugin,
+                                                            const ov::AnyMap& properties);
+    ICompiledModel(const std::shared_ptr<ov::Model>& model, const std::shared_ptr<const ov::IPlugin>& plugin);
+};
 
 class InferRequest;
-class CompiledModel : public ov::ICompiledModel {
+class CompiledModel : public ov::npuw::ICompiledModel {
     using DevList = std::vector<std::string>;
     using GetPropertiesMap =
         std::map<std::string, std::tuple<ov::PropertyMutability, std::function<ov::Any(const ::intel_npu::Config&)>>>;
diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model_factory.cpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model_factory.cpp
deleted file mode 100644
index 246f27bf4a0aee..00000000000000
--- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model_factory.cpp
+++ /dev/null
@@ -1,31 +0,0 @@
-// Copyright (C) 2024 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include "compiled_model_factory.hpp"
-#include "logging.hpp"
-#include "intel_npu/npuw_private_properties.hpp"
-#include "compiled_model.hpp"
-#include "llm_compiled_model.hpp"
-
-std::shared_ptr<ov::ICompiledModel>
-ov::npuw::CompiledModelFactory::create(const std::shared_ptr<ov::Model>& model,
-                                       const std::shared_ptr<const ov::IPlugin>& plugin,
-                                       const ov::AnyMap& properties) {
-    LOG_VERB(__PRETTY_FUNCTION__);
-    LOG_BLOCK();
-    std::shared_ptr<ov::ICompiledModel> compiled_model;
-    auto use_dynamic_llm_key = ov::intel_npu::npuw::dynamic_llm::enabled.name();
-    if (properties.count(use_dynamic_llm_key) &&
-        properties.at(use_dynamic_llm_key).as<bool>() == true) {
-        LOG_DEBUG("ov::npuw::LLMCompiledModel will be created.");
-        compiled_model = std::make_shared<ov::npuw::LLMCompiledModel>(model, plugin, properties);
-    } else {
-        LOG_DEBUG("ov::npuw::CompiledModel will be created.");
-        compiled_model = std::make_shared<ov::npuw::CompiledModel>(model, plugin, properties);
-    }
-    LOG_DEBUG("Done");
-    return compiled_model;
-}
diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model_factory.hpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model_factory.hpp
deleted file mode 100644
index d190f88aeab95d..00000000000000
--- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model_factory.hpp
+++ /dev/null
@@ -1,29 +0,0 @@
-// Copyright (C) 2024 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <optional>
-
-#include "common.hpp"
-#include "openvino/openvino.hpp"
-#include "openvino/runtime/icompiled_model.hpp"
-#include "openvino/runtime/so_ptr.hpp"
-
-namespace intel_npu {
-class Plugin;
-}
-
-namespace ov {
-namespace npuw {
-
-class CompiledModelFactory {
-public:
-    static std::shared_ptr<ov::ICompiledModel> create(const std::shared_ptr<ov::Model>& model,
-                                                      const std::shared_ptr<const ov::IPlugin>& plugin,
-                                                      const ov::AnyMap& properties);
-};
-
-} // namespace npuw
-} // namespace ov
diff --git a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
index ef1274276cb935..c0e9f8b935396f 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.cpp
@@ -4,9 +4,8 @@
 #include "llm_compiled_model.hpp"
 
 #include "llm_infer_request.hpp"
-
-#include "openvino/runtime/iasync_infer_request.hpp"
 #include "openvino/pass/stateful_to_stateless.hpp"
+#include "openvino/runtime/iasync_infer_request.hpp"
 
 namespace {
 uint32_t align_to(uint32_t value, uint32_t alignment) {
@@ -16,10 +15,10 @@ uint32_t align_to(uint32_t value, uint32_t alignment) {
 std::shared_ptr<ov::Model> redirect_new_kv_to_output(const std::shared_ptr<ov::Model>& model) {
     const auto kStartOutputKVCacheLayers = 1u;
     for (int i = kStartOutputKVCacheLayers; i < model->outputs().size(); ++i) {
-        auto kvout  = model->output(i);
+        auto kvout = model->output(i);
         auto kvrslt = kvout.get_node();
-        auto kvcat  = kvrslt->inputs()[0].get_source_output().get_node();
-        auto kvval  = kvcat->inputs()[1].get_source_output();
+        auto kvcat = kvrslt->inputs()[0].get_source_output().get_node();
+        auto kvval = kvcat->inputs()[1].get_source_output();
         kvval.set_names({kvout.get_any_name()});
         kvrslt->inputs()[0].replace_source_output(kvval);
     }
@@ -144,16 +143,15 @@ T pop_or_default(ov::AnyMap& config, const std::string& key, const T& default_va
 
 ov::AnyMap get_baseline_common_config() {
     ov::AnyMap config = {
-        { "NPU_COMPILATION_MODE_PARAMS", "compute-layers-with-higher-precision=Sqrt,Power,ReduceMean,Add_RMSNorm" },
-        { "NPUW_DEVICES", "NPU" },
-        { "NPU_USE_NPUW",  "YES" },
-        { "NPUW_FOLD", "YES" },
-        { "NPUW_DCOFF_TYPE", "f16" },
-        { "NPUW_DCOFF_SCALE", "YES"},
-        { "NPUW_WEIGHTS_BANK", "shared" },
-        { "NPUW_SLICE_OUT", "YES" },
-        { "NPUW_FUNCALL_ASYNC", "YES" }
-    };
+        {"NPU_COMPILATION_MODE_PARAMS", "compute-layers-with-higher-precision=Sqrt,Power,ReduceMean,Add_RMSNorm"},
+        {"NPUW_DEVICES", "NPU"},
+        {"NPU_USE_NPUW", "YES"},
+        {"NPUW_FOLD", "YES"},
+        {"NPUW_DCOFF_TYPE", "f16"},
+        {"NPUW_DCOFF_SCALE", "YES"},
+        {"NPUW_WEIGHTS_BANK", "shared"},
+        {"NPUW_SLICE_OUT", "YES"},
+        {"NPUW_FUNCALL_ASYNC", "YES"}};
     return config;
 }
 
@@ -168,17 +166,14 @@ ov::AnyMap get_default_common_config(const std::shared_ptr<ov::Model>& model) {
     return config;
 }
 
-ov::AnyMap get_default_prefill_config(const std::shared_ptr<ov::Model>& model,
-                                      const std::optional<NPUDesc>& npudesc) {
+ov::AnyMap get_default_prefill_config(const std::shared_ptr<ov::Model>& model, const std::optional<NPUDesc>& npudesc) {
     auto config = get_default_common_config(model);
     if (is_cw_compressed(model)) {
         config.emplace("NPUW_DQ", "YES");
     } else {
         config.emplace("NPUW_PMM", "NO");
     }
-    if (npudesc.has_value() &&
-        npudesc->arch == "4000" &&
-        npudesc->max_tiles != -1) {
+    if (npudesc.has_value() && npudesc->arch == "4000" && npudesc->max_tiles != -1) {
         config.emplace("NPU_DPU_GROUPS", npudesc->max_tiles);
     }
     return config;
@@ -186,9 +181,9 @@ ov::AnyMap get_default_prefill_config(const std::shared_ptr<ov::Model>& model,
 
 ov::AnyMap get_default_generate_config(const std::shared_ptr<ov::Model>& model,
                                        const std::optional<NPUDesc>& npudesc,
-                                       const ::intel_npu::npuw::dynamic_llm::GenerateHint hint) {
+                                       const ::intel_npu::npuw::llm::GenerateHint hint) {
     auto config = get_default_common_config(model);
-    if (hint == ::intel_npu::npuw::dynamic_llm::GenerateHint::BEST_PERF) {
+    if (hint == ::intel_npu::npuw::llm::GenerateHint::BEST_PERF) {
         config.emplace("NPUW_ONLINE_PIPELINE", "NONE");
     }
     // NB: Unconditionally set for generation model
@@ -216,12 +211,10 @@ void drop_cache_dir(ov::AnyMap& config) {
     }
 }
 
-void split_llm_properties(const ov::AnyMap& properties,
-                      ov::AnyMap& dyn_llm_properties,
-                      ov::AnyMap& other_properties) {
+void split_llm_properties(const ov::AnyMap& properties, ov::AnyMap& llm_properties, ov::AnyMap& other_properties) {
     for (auto it = properties.begin(); it != properties.end(); ++it) {
         if (it->first.find("NPUW_LLM") != it->first.npos) {
-            dyn_llm_properties.insert(*it);
+            llm_properties.insert(*it);
         } else {
             other_properties.insert(*it);
         }
@@ -235,27 +228,27 @@ std::map<std::string, std::string> any_copy(const ov::AnyMap& params) {
     }
     return result;
 }
-} // namespace
+}  // namespace
 
 ov::npuw::LLMCompiledModel::LLMCompiledModel(const std::shared_ptr<ov::Model>& model,
                                              const std::shared_ptr<const ov::IPlugin>& plugin,
                                              const ov::AnyMap& properties)
-    : ov::ICompiledModel(model, plugin),
-     m_options_desc(std::make_shared<::intel_npu::OptionsDesc>()),
-     m_cfg(m_options_desc) {
+    : ov::npuw::ICompiledModel(model, plugin),
+      m_options_desc(std::make_shared<::intel_npu::OptionsDesc>()),
+      m_cfg(m_options_desc) {
     LOG_VERB(__PRETTY_FUNCTION__);
     LOG_BLOCK();
 
     ::intel_npu::registerNpuwLlmOptions(*m_options_desc);
 
-    std::map<std::string, ov::Any> npuw_dyn_llm_props;
+    std::map<std::string, ov::Any> npuw_llm_props;
     std::map<std::string, ov::Any> other_props;
-    split_llm_properties(properties, npuw_dyn_llm_props, other_props);
-    m_cfg.update(any_copy(npuw_dyn_llm_props));
+    split_llm_properties(properties, npuw_llm_props, other_props);
+    m_cfg.update(any_copy(npuw_llm_props));
 
     // (1) Make template model to be kvcache model, used in generation phase
     auto kvcache_model = model->clone();
-    // (2) Expose KV-cache input and output layers from kvcache model
+    // (2) Expose KV-cache input and output layers from kvcache model;
     ov::pass::StatefulToStateless().run_on_model(kvcache_model);
 
     // (3) Create prefill model from passed template model
@@ -271,10 +264,9 @@ ov::npuw::LLMCompiledModel::LLMCompiledModel(const std::shared_ptr<ov::Model>& m
 
     const uint32_t kMaxPromptLen = align_to(m_cfg.get<::intel_npu::NPUW_LLM_MAX_PROMPT_LEN>(), 64u);
     const uint32_t kMinResponseLen = align_to(m_cfg.get<::intel_npu::NPUW_LLM_MIN_RESPONSE_LEN>(), 64u);
-    const ::intel_npu::npuw::dynamic_llm::ModelDesc model_desc = m_cfg.get<::intel_npu::NPUW_LLM_MODEL_DESC>();
+    const ::intel_npu::npuw::llm::ModelDesc model_desc = m_cfg.get<::intel_npu::NPUW_LLM_MODEL_DESC>();
     KVAxesPosition axes = get_kv_axes(model_desc.type);
-    m_kvcache_desc = KVCacheDesc
-        { kMaxPromptLen, kMaxPromptLen + kMinResponseLen, 0u, axes.seq_len};
+    m_kvcache_desc = KVCacheDesc{kMaxPromptLen, kMaxPromptLen + kMinResponseLen, 0u, axes.seq_len};
     // (7) Make prefill model with static shapes
     reshape_to_static(prefill_model, m_kvcache_desc.max_prompt_size, m_kvcache_desc.max_prompt_size, axes);
     // (8) Make kvcache model with static shapes
@@ -285,7 +277,7 @@ ov::npuw::LLMCompiledModel::LLMCompiledModel(const std::shared_ptr<ov::Model>& m
     ov::AnyMap properties_copy = other_props;
     auto prefill_config = get_default_prefill_config(model, npudesc);
     // NB: GENERATE_HINT is only applicable for default generate config!
-    const ::intel_npu::npuw::dynamic_llm::GenerateHint generate_hint = m_cfg.get<::intel_npu::NPUW_LLM_GENERATE_HINT>();
+    const ::intel_npu::npuw::llm::GenerateHint generate_hint = m_cfg.get<::intel_npu::NPUW_LLM_GENERATE_HINT>();
     auto generate_config = get_default_generate_config(model, npudesc, generate_hint);
     merge_config_with(prefill_config, properties_copy);
     merge_config_with(generate_config, properties_copy);
@@ -333,11 +325,6 @@ std::shared_ptr<ov::ISyncInferRequest> ov::npuw::LLMCompiledModel::create_llm_in
     return std::make_shared<ov::npuw::LLMInferRequest>(this_sptr, m_kvcache_desc);
 }
 
-std::shared_ptr<ov::IAsyncInferRequest> ov::npuw::LLMCompiledModel::create_infer_request() const {
-    auto internal_request = create_sync_infer_request();
-    return std::make_shared<ov::IAsyncInferRequest>(internal_request, get_task_executor(), get_callback_executor());
-}
-
 void ov::npuw::LLMCompiledModel::implement_properties() {
 #define BIND(N, T)                                                                         \
     {                                                                                      \
@@ -348,10 +335,10 @@ void ov::npuw::LLMCompiledModel::implement_properties() {
         }                                                                                  \
     }
 
-    m_prop_to_opt.insert({BIND(npuw::dynamic_llm::enabled, NPUW_LLM),
-                          BIND(npuw::dynamic_llm::model_desc, NPUW_LLM_MODEL_DESC),
-                          BIND(npuw::dynamic_llm::max_prompt_len, NPUW_LLM_MAX_PROMPT_LEN),
-                          BIND(npuw::dynamic_llm::min_response_len, NPUW_LLM_MIN_RESPONSE_LEN),
-                          BIND(npuw::dynamic_llm::generate_hint, NPUW_LLM_GENERATE_HINT)});
+    m_prop_to_opt.insert({BIND(npuw::llm::enabled, NPUW_LLM),
+                          BIND(npuw::llm::model_desc, NPUW_LLM_MODEL_DESC),
+                          BIND(npuw::llm::max_prompt_len, NPUW_LLM_MAX_PROMPT_LEN),
+                          BIND(npuw::llm::min_response_len, NPUW_LLM_MIN_RESPONSE_LEN),
+                          BIND(npuw::llm::generate_hint, NPUW_LLM_GENERATE_HINT)});
 #undef BIND
 }
diff --git a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.hpp b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.hpp
index fe025e750169f1..1a748997fd48fa 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/llm_compiled_model.hpp
@@ -12,33 +12,28 @@ namespace ov {
 namespace npuw {
 
 class LLMInferRequest;
-class LLMCompiledModel : public ov::ICompiledModel {
+class LLMCompiledModel : public ov::npuw::ICompiledModel {
     using GetPropertiesMap =
-        std::map<std::string, std::tuple<ov::PropertyMutability,
-                 std::function<ov::Any(const ::intel_npu::Config&)>>>;
+        std::map<std::string, std::tuple<ov::PropertyMutability, std::function<ov::Any(const ::intel_npu::Config&)>>>;
+
 public:
     struct KVCacheDesc {
-        uint32_t max_prompt_size = 1024u;
-        uint32_t total_size = 1152u;
+        uint32_t max_prompt_size = 0u;
+        uint32_t total_size = 0u;
         uint32_t num_stored_tokens = 0u;
-        uint32_t dim = 2u;
+        uint32_t dim = 0u;
     };
 
     LLMCompiledModel(const std::shared_ptr<ov::Model>& model,
                      const std::shared_ptr<const ov::IPlugin>& plugin,
                      const ov::AnyMap& properties);
+    LLMCompiledModel() = delete;
     void export_model(std::ostream& model) const override;
     std::shared_ptr<const ov::Model> get_runtime_model() const override;
 
     void set_property(const ov::AnyMap& properties) override;
     ov::Any get_property(const std::string& name) const override;
 
-    std::shared_ptr<ov::IAsyncInferRequest> create_infer_request() const override;
-
-    std::shared_ptr<::intel_npu::OptionsDesc> m_options_desc;
-    ::intel_npu::Config m_cfg;
-    GetPropertiesMap m_prop_to_opt;
-
 private:
     friend class LLMInferRequest;
 
@@ -46,10 +41,14 @@ class LLMCompiledModel : public ov::ICompiledModel {
     std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request() const override;
     void implement_properties();
 
+    std::shared_ptr<::intel_npu::OptionsDesc> m_options_desc;
+    ::intel_npu::Config m_cfg;
+    GetPropertiesMap m_prop_to_opt;
+
     KVCacheDesc m_kvcache_desc;
     std::shared_ptr<ov::npuw::CompiledModel> m_kvcache_compiled;
     std::shared_ptr<ov::npuw::CompiledModel> m_prefill_compiled;
 };
 
-} // namespace npuw
-} // namespace ov
+}  // namespace npuw
+}  // namespace ov
diff --git a/src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.cpp b/src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.cpp
index 705a8e07bbe0c1..e399aaa011eca7 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.cpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.cpp
@@ -4,18 +4,21 @@
 
 #include "llm_infer_request.hpp"
 
+#include <regex>
+
 #include "llm_compiled_model.hpp"
 #include "openvino/runtime/iasync_infer_request.hpp"
 
-#include <regex>
-
 template <typename T>
 void fill_tensor(ov::SoPtr<ov::ITensor> tensor, T fill_val, size_t offset = 0u) {
     T* tensor_data = tensor->data<T>();
     std::fill(tensor_data + offset, tensor_data + tensor->get_size(), fill_val);
 }
 
-ov::SoPtr<ov::ITensor> make_tensor_slice(ov::SoPtr<ov::ITensor> tensor, uint32_t dim, uint32_t start_pos, uint32_t end_pos) {
+ov::SoPtr<ov::ITensor> make_tensor_slice(ov::SoPtr<ov::ITensor> tensor,
+                                         uint32_t dim,
+                                         uint32_t start_pos,
+                                         uint32_t end_pos) {
     ov::Shape start_shape(std::vector<size_t>(tensor->get_shape().size(), 0u));
     start_shape[dim] = start_pos;
     ov::Shape end_shape = tensor->get_shape();
@@ -54,12 +57,12 @@ void ov::npuw::LLMInferRequest::prepare_for_new_conversation() {
     LOG_VERB(__PRETTY_FUNCTION__);
     LOG_BLOCK();
 
-    auto prefill_compiled = m_prefill_request->get_compiled_model();
     fill_tensor<int64_t>(m_prefill_request->get_tensor(m_prefill_in_ports.at("input_ids")), 0u);
     fill_tensor<int64_t>(m_prefill_request->get_tensor(m_prefill_in_ports.at("attention_mask")), 0u);
     fill_tensor<int64_t>(m_prefill_request->get_tensor(m_prefill_in_ports.at("position_ids")), 0u);
     fill_tensor<int64_t>(m_kvcache_request->get_tensor(m_kvcache_in_ports.at("attention_mask")), 0u);
     m_kvcache_desc.num_stored_tokens = 0u;
+
     LOG_VERB("Done");
 }
 
@@ -71,13 +74,9 @@ void ov::npuw::LLMInferRequest::infer_prefill(ov::SoPtr<ov::ITensor> input_ids,
 
     prepare_for_new_conversation();
 
-    auto prefill_compiled = m_prefill_request->get_compiled_model();
-
     auto padded_input_ids = m_prefill_request->get_tensor(m_prefill_in_ports.at("input_ids"));
     const size_t offset = padded_input_ids->get_size() - input_ids->get_size();
-    std::copy_n(input_ids->data<int64_t>(),
-                input_ids->get_size(),
-                padded_input_ids->data<int64_t>() + offset);
+    std::copy_n(input_ids->data<int64_t>(), input_ids->get_size(), padded_input_ids->data<int64_t>() + offset);
 
     auto padded_attention_mask = m_prefill_request->get_tensor(m_prefill_in_ports.at("attention_mask"));
     std::copy_n(attention_mask->data<int64_t>(),
@@ -85,9 +84,7 @@ void ov::npuw::LLMInferRequest::infer_prefill(ov::SoPtr<ov::ITensor> input_ids,
                 padded_attention_mask->data<int64_t>() + offset);
 
     auto padded_position_ids = m_prefill_request->get_tensor(m_prefill_in_ports.at("position_ids"));
-    std::copy_n(position_ids->data<int64_t>(),
-                position_ids->get_size(),
-                padded_position_ids->data<int64_t>() + offset);
+    std::copy_n(position_ids->data<int64_t>(), position_ids->get_size(), padded_position_ids->data<int64_t>() + offset);
 
     m_prefill_request->infer();
     m_kvcache_desc.num_stored_tokens += static_cast<uint32_t>(input_ids->get_size());
@@ -106,7 +103,7 @@ void ov::npuw::LLMInferRequest::infer_generate(ov::SoPtr<ov::ITensor> input_ids,
 
     // NB: KV-cache is full, further generation is impossible
     if (m_kvcache_desc.num_stored_tokens == m_kvcache_desc.total_size) {
-        OPENVINO_THROW("KV-Cache is full");
+        OPENVINO_THROW("KV-Cache is full.");
     }
 
     if (m_need_copy_kvcache) {
@@ -119,21 +116,25 @@ void ov::npuw::LLMInferRequest::infer_generate(ov::SoPtr<ov::ITensor> input_ids,
 
             const auto& input_name = std::regex_replace(output_name, std::regex("present"), "past_key_values");
             auto kvcache_in_tensor = m_kvcache_request->get_tensor(m_kvcache_in_ports.at(input_name));
+
+            // FIXME: We don't need to fill whole tensor with 0s, but only tensor.size() - num_stored_tokens
+            //        taking into account kvcache dimension.
             fill_tensor<ov::float16>(kvcache_in_tensor, 0);
 
-            auto prefill_out_slice = make_tensor_slice(
-                prefill_out_tensor, m_kvcache_desc.dim,
-                m_kvcache_desc.max_prompt_size - m_kvcache_desc.num_stored_tokens, m_kvcache_desc.max_prompt_size
-            );
+            auto prefill_out_slice =
+                make_tensor_slice(prefill_out_tensor,
+                                  m_kvcache_desc.dim,
+                                  m_kvcache_desc.max_prompt_size - m_kvcache_desc.num_stored_tokens,
+                                  m_kvcache_desc.max_prompt_size);
+
+            auto kvcache_in_slice =
+                make_tensor_slice(kvcache_in_tensor, m_kvcache_desc.dim, 0u, m_kvcache_desc.num_stored_tokens);
 
-            auto kvcache_in_slice = make_tensor_slice(
-                kvcache_in_tensor, m_kvcache_desc.dim, 0u, m_kvcache_desc.num_stored_tokens
-            );
             prefill_out_slice->copy_to(kvcache_in_slice._ptr);
         }
         LOG_VERB("Prepare attention mask pattern.");
-        auto* attention_mask_data = m_kvcache_request->get_tensor(
-            m_kvcache_in_ports.at("attention_mask"))->data<int64_t>();
+        auto* attention_mask_data =
+            m_kvcache_request->get_tensor(m_kvcache_in_ports.at("attention_mask"))->data<int64_t>();
         attention_mask_data[m_kvcache_desc.total_size - 1] = 1;
 
         m_need_copy_kvcache = false;
@@ -160,9 +161,10 @@ void ov::npuw::LLMInferRequest::infer_generate(ov::SoPtr<ov::ITensor> input_ids,
         const auto& output_name = kvcache_compiled->outputs()[kStartOutputKVCacheLayers + i].get_any_name();
         const auto& input_name = std::regex_replace(output_name, std::regex("present"), "past_key_values");
         auto kvcache_in_tensor = m_kvcache_request->get_tensor(m_kvcache_in_ports.at(input_name));
-        auto kvcache_in_slice = make_tensor_slice(
-            kvcache_in_tensor, m_kvcache_desc.dim, m_kvcache_desc.num_stored_tokens - 1, m_kvcache_desc.num_stored_tokens
-        );
+        auto kvcache_in_slice = make_tensor_slice(kvcache_in_tensor,
+                                                  m_kvcache_desc.dim,
+                                                  m_kvcache_desc.num_stored_tokens - 1,
+                                                  m_kvcache_desc.num_stored_tokens);
         auto kvcache_out_tensor = m_kvcache_request->get_tensor(m_kvcache_out_ports.at(output_name));
         kvcache_out_tensor->copy_to(kvcache_in_slice._ptr);
     }
@@ -172,9 +174,13 @@ void ov::npuw::LLMInferRequest::infer_generate(ov::SoPtr<ov::ITensor> input_ids,
 void ov::npuw::LLMInferRequest::infer() {
     const auto& inputs = get_inputs();
 
-    auto input_ids      = get_tensor(inputs[0]);
+    auto input_ids = get_tensor(inputs[0]);
     auto attention_mask = get_tensor(inputs[1]);
-    auto position_ids   = get_tensor(inputs[2]);
+    auto position_ids = get_tensor(inputs[2]);
+
+    OPENVINO_ASSERT(ov::element::i64 == input_ids->get_element_type());
+    OPENVINO_ASSERT(ov::element::i64 == attention_mask->get_element_type());
+    OPENVINO_ASSERT(ov::element::i64 == position_ids->get_element_type());
 
     if (input_ids->get_size() != 1) {
         infer_prefill(input_ids, attention_mask, position_ids);
diff --git a/src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.hpp b/src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.hpp
index bef90aab4a4d69..3703d41f0c2950 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/llm_infer_request.hpp
@@ -6,9 +6,9 @@
 
 #include <memory>
 
-#include "openvino/runtime/isync_infer_request.hpp"
-#include "openvino/core/descriptor/output.hpp"
 #include "llm_compiled_model.hpp"
+#include "openvino/core/descriptor/output.hpp"
+#include "openvino/runtime/isync_infer_request.hpp"
 
 namespace ov {
 namespace npuw {
@@ -24,8 +24,12 @@ class LLMInferRequest final : public ov::ISyncInferRequest {
 
     void check_tensors() const override{};
 
-    virtual std::vector<ov::ProfilingInfo> get_profiling_info() const { return {}; }
-    virtual std::vector<ov::SoPtr<ov::IVariableState>> query_state() const { return {}; }
+    virtual std::vector<ov::ProfilingInfo> get_profiling_info() const {
+        return {};
+    }
+    virtual std::vector<ov::SoPtr<ov::IVariableState>> query_state() const {
+        return {};
+    }
 
 private:
     void prepare_for_new_conversation();
diff --git a/src/plugins/intel_npu/src/plugin/npuw/logging.hpp b/src/plugins/intel_npu/src/plugin/npuw/logging.hpp
index ca0ece85f7f2bb..95c9a742db7842 100644
--- a/src/plugins/intel_npu/src/plugin/npuw/logging.hpp
+++ b/src/plugins/intel_npu/src/plugin/npuw/logging.hpp
@@ -64,5 +64,5 @@ void dump_failure(const std::shared_ptr<ov::Model>& model, const std::string& de
     } while (0)
 
 #ifdef _MSC_VER
-    #define __PRETTY_FUNCTION__ __FUNCSIG__
+#    define __PRETTY_FUNCTION__ __FUNCSIG__
 #endif
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index ac9ac64f6e5046..28ac490b04ce1e 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -7,6 +7,7 @@
 #include <fstream>
 
 #include "compiled_model.hpp"
+#include "npuw/compiled_model.hpp"
 #include "driver_compiler_adapter.hpp"
 #include "intel_npu/common/device_helpers.hpp"
 #include "intel_npu/common/igraph.hpp"
@@ -16,7 +17,6 @@
 #include "intel_npu/config/npuw.hpp"
 #include "intel_npu/config/runtime.hpp"
 #include "intel_npu/utils/zero/zero_init.hpp"
-#include "npuw/compiled_model_factory.hpp"
 #include "openvino/op/constant.hpp"
 #include "openvino/op/parameter.hpp"
 #include "openvino/runtime/intel_npu/properties.hpp"
@@ -631,7 +631,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
             if (localProperties.count(ov::cache_dir.name()) || !_globalConfig.get<CACHE_DIR>().empty()) {
                 OPENVINO_THROW("Option 'CACHE_DIR' is not supported with NPU_USE_NPUW!");
             }
-            return ov::npuw::CompiledModelFactory::create(model->clone(), shared_from_this(), localProperties);
+            return ov::npuw::ICompiledModel::create(model->clone(), shared_from_this(), localProperties);
         } else {
             // NPUW is disabled, remove the key from the properties
             localProperties.erase(useNpuwKey);