Skip to content

Commit

Permalink
StaticLLMPipeline: Decide when to enable NPUW_DQ_FULL property (#1258)
Browse files Browse the repository at this point in the history
Based on (yet to be) supported OV properties from the NPU Plugin enable
NPUW_DQ_FULL.
releases/2024/5 mirror:
openvinotoolkit/openvino.genai#1272

Dependencies
* openvinotoolkit/openvino#27678 needs to be
merged first
* openvinotoolkit/openvino#27789
  • Loading branch information
smirnov-alexey authored Nov 28, 2024
1 parent 7e84bba commit 5289d2e
Showing 1 changed file with 15 additions and 1 deletion.
16 changes: 15 additions & 1 deletion src/llm_pipeline_static.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,7 @@ void merge_config_with(ov::AnyMap& lhs, const ov::AnyMap& rhs) {
struct NPUDesc {
std::string arch;
int64_t max_tiles;
bool compiler_dq;
};

std::optional<NPUDesc> extract_npu_descriptor(ov::Core& core) {
Expand All @@ -466,7 +467,14 @@ std::optional<NPUDesc> extract_npu_descriptor(ov::Core& core) {
}
const auto arch = core.get_property("NPU", ov::device::architecture);
const auto max_tiles = core.get_property("NPU", ov::intel_npu::max_tiles);
return std::make_optional(NPUDesc{arch, max_tiles});

bool compiler_dq = false;
const auto device_caps = core.get_property("NPU", ov::device::capabilities);
if (std::find(device_caps.begin(), device_caps.end(),
"COMPILER_DYNAMIC_QUANTIZATION") != device_caps.end()) {
compiler_dq = true;
}
return std::make_optional(NPUDesc{arch, max_tiles, compiler_dq});
}

ov::AnyMap get_baseline_common_config() {
Expand Down Expand Up @@ -508,6 +516,9 @@ ov::AnyMap get_default_prefill_config(const std::shared_ptr<ov::Model>& model,
npudesc->max_tiles != -1) {
config.emplace("NPU_DPU_GROUPS", npudesc->max_tiles);
}
if (npudesc.has_value() && npudesc->compiler_dq) {
config.emplace("NPUW_DQ_FULL", "NO");
}
return config;
}

Expand All @@ -523,6 +534,9 @@ ov::AnyMap get_default_generate_config(const std::shared_ptr<ov::Model>& model,
if (npudesc.has_value() && npudesc->arch == "4000") {
config.emplace("NPU_DPU_GROUPS", 4);
}
if (npudesc.has_value() && npudesc->compiler_dq) {
config.emplace("NPUW_DQ_FULL", "NO");
}
return config;
}

Expand Down

0 comments on commit 5289d2e

Please sign in to comment.