diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp index 7c8e55cddfe593..44758f73289edb 100644 --- a/src/plugins/intel_gpu/src/runtime/execution_config.cpp +++ b/src/plugins/intel_gpu/src/runtime/execution_config.cpp @@ -246,6 +246,11 @@ void ExecutionConfig::apply_user_properties(const cldnn::device_info& info) { set_property(ov::intel_gpu::queue_type(QueueTypes::in_order)); } + // Enable KV-cache compression by default for non-systolic platforms + if (!is_set_by_user(ov::hint::kv_cache_precision) && !info.supports_immad) { + set_property(ov::hint::kv_cache_precision(ov::element::i8)); + } + user_properties.clear(); } diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache_sdpa.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache_sdpa.cpp index 16db9d89c28b4d..2563fe535a93d9 100644 --- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache_sdpa.cpp +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache_sdpa.cpp @@ -47,8 +47,11 @@ class SDPAWithKVCacheTest : public ::testing::Test, public ::testing::WithParamI ov::AnyMap properties = {ov::hint::inference_precision(ov::element::f16), ov::intel_gpu::hint::enable_sdpa_optimization(true)}; - if (p.compressed) + if (p.compressed) { properties.emplace(ov::hint::kv_cache_precision(ov::element::i8)); + } else { + properties.emplace(ov::hint::kv_cache_precision(ov::element::undefined)); + } const size_t n_heads = 16; const size_t n_features = 64;