From 70712ef621aa47d599bf5bfcc9887d5505bbc186 Mon Sep 17 00:00:00 2001 From: Alexey Smirnov Date: Tue, 26 Nov 2024 17:41:11 +0000 Subject: [PATCH 1/3] Decide when to enable NPUW_DQ_FULL property --- src/cpp/src/llm_pipeline_static.cpp | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/cpp/src/llm_pipeline_static.cpp b/src/cpp/src/llm_pipeline_static.cpp index 2beb7d64be..7479c42994 100644 --- a/src/cpp/src/llm_pipeline_static.cpp +++ b/src/cpp/src/llm_pipeline_static.cpp @@ -457,6 +457,7 @@ void merge_config_with(ov::AnyMap& lhs, const ov::AnyMap& rhs) { struct NPUDesc { std::string arch; int64_t max_tiles; + bool compiler_dq; }; std::optional extract_npu_descriptor(ov::Core& core) { @@ -466,7 +467,14 @@ std::optional extract_npu_descriptor(ov::Core& core) { } const auto arch = core.get_property("NPU", ov::device::architecture); const auto max_tiles = core.get_property("NPU", ov::intel_npu::max_tiles); - return std::make_optional(NPUDesc{arch, max_tiles}); + + bool compiler_dq = false; + const auto supported_internal_properties = core.get_property("NPU", ov::internal::supported_properties); + if (std::find(supported_internal_properties.begin(), supported_internal_properties.end(), + ov::internal::npu_compiler_dq) != supported_internal_properties.end()) { + compiler_dq = true; + } + return std::make_optional(NPUDesc{arch, max_tiles, compiler_dq}); } ov::AnyMap get_baseline_common_config() { @@ -508,6 +516,9 @@ ov::AnyMap get_default_prefill_config(const std::shared_ptr& model, npudesc->max_tiles != -1) { config.emplace("NPU_DPU_GROUPS", npudesc->max_tiles); } + if (npudesc.has_value() && npudesc->compiler_dq) { + config.emplace("NPUW_DQ_FULL", "NO"); + } return config; } @@ -523,6 +534,9 @@ ov::AnyMap get_default_generate_config(const std::shared_ptr& model, if (npudesc.has_value() && npudesc->arch == "4000") { config.emplace("NPU_DPU_GROUPS", 4); } + if (npudesc.has_value() && npudesc->compiler_dq) { + config.emplace("NPUW_DQ_FULL", "NO"); + } return config; } From 807564d367bfeca4fc74eab1139d73b1590e4f8b Mon Sep 17 00:00:00 2001 From: Alexey Smirnov Date: Tue, 26 Nov 2024 19:34:08 +0000 Subject: [PATCH 2/3] Fix internal properties usage --- src/cpp/CMakeLists.txt | 2 +- src/cpp/src/llm_pipeline_static.cpp | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/cpp/CMakeLists.txt b/src/cpp/CMakeLists.txt index d02f32ded9..226630c15b 100644 --- a/src/cpp/CMakeLists.txt +++ b/src/cpp/CMakeLists.txt @@ -71,7 +71,7 @@ target_include_directories(${TARGET_NAME} target_include_directories(${TARGET_NAME} SYSTEM PRIVATE "${safetensors.h_SOURCE_DIR}") -target_link_libraries(${TARGET_NAME} PUBLIC openvino::runtime PRIVATE openvino::threading nlohmann_json::nlohmann_json jinja2cpp) +target_link_libraries(${TARGET_NAME} PUBLIC openvino::runtime openvino::runtime::dev PRIVATE openvino::threading nlohmann_json::nlohmann_json jinja2cpp) target_compile_features(${TARGET_NAME} PUBLIC cxx_std_17) diff --git a/src/cpp/src/llm_pipeline_static.cpp b/src/cpp/src/llm_pipeline_static.cpp index 7479c42994..9406ca5d94 100644 --- a/src/cpp/src/llm_pipeline_static.cpp +++ b/src/cpp/src/llm_pipeline_static.cpp @@ -19,6 +19,7 @@ #include "openvino/core/preprocess/pre_post_process.hpp" #include "openvino/runtime/properties.hpp" #include "openvino/runtime/intel_npu/properties.hpp" +#include "openvino/runtime/internal_properties.hpp" #include "openvino/core/parallel.hpp" #include @@ -471,7 +472,7 @@ std::optional extract_npu_descriptor(ov::Core& core) { bool compiler_dq = false; const auto supported_internal_properties = core.get_property("NPU", ov::internal::supported_properties); if (std::find(supported_internal_properties.begin(), supported_internal_properties.end(), - ov::internal::npu_compiler_dq) != supported_internal_properties.end()) { + "NPU_COMPILER_DQ") != supported_internal_properties.end()) { compiler_dq = true; } return std::make_optional(NPUDesc{arch, max_tiles, compiler_dq}); From 0b5badadaf8cf914cba00d85a2b3f8bb5a4d6260 Mon Sep 17 00:00:00 2001 From: Alexey Smirnov Date: Wed, 27 Nov 2024 16:47:03 +0000 Subject: [PATCH 3/3] Switch from internal properties to device capabilities --- src/cpp/CMakeLists.txt | 2 +- src/cpp/src/llm_pipeline_static.cpp | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/cpp/CMakeLists.txt b/src/cpp/CMakeLists.txt index 226630c15b..d02f32ded9 100644 --- a/src/cpp/CMakeLists.txt +++ b/src/cpp/CMakeLists.txt @@ -71,7 +71,7 @@ target_include_directories(${TARGET_NAME} target_include_directories(${TARGET_NAME} SYSTEM PRIVATE "${safetensors.h_SOURCE_DIR}") -target_link_libraries(${TARGET_NAME} PUBLIC openvino::runtime openvino::runtime::dev PRIVATE openvino::threading nlohmann_json::nlohmann_json jinja2cpp) +target_link_libraries(${TARGET_NAME} PUBLIC openvino::runtime PRIVATE openvino::threading nlohmann_json::nlohmann_json jinja2cpp) target_compile_features(${TARGET_NAME} PUBLIC cxx_std_17) diff --git a/src/cpp/src/llm_pipeline_static.cpp b/src/cpp/src/llm_pipeline_static.cpp index 9406ca5d94..402417918d 100644 --- a/src/cpp/src/llm_pipeline_static.cpp +++ b/src/cpp/src/llm_pipeline_static.cpp @@ -19,7 +19,6 @@ #include "openvino/core/preprocess/pre_post_process.hpp" #include "openvino/runtime/properties.hpp" #include "openvino/runtime/intel_npu/properties.hpp" -#include "openvino/runtime/internal_properties.hpp" #include "openvino/core/parallel.hpp" #include @@ -470,9 +469,9 @@ std::optional extract_npu_descriptor(ov::Core& core) { const auto max_tiles = core.get_property("NPU", ov::intel_npu::max_tiles); bool compiler_dq = false; - const auto supported_internal_properties = core.get_property("NPU", ov::internal::supported_properties); - if (std::find(supported_internal_properties.begin(), supported_internal_properties.end(), - "NPU_COMPILER_DQ") != supported_internal_properties.end()) { + const auto device_caps = core.get_property("NPU", ov::device::capabilities); + if (std::find(device_caps.begin(), device_caps.end(), + "COMPILER_DYNAMIC_QUANTIZATION") != device_caps.end()) { compiler_dq = true; } return std::make_optional(NPUDesc{arch, max_tiles, compiler_dq});