diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_opt.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_opt.cpp index e1854df1954e09..16b5c129e59dc4 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_opt.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/sdpa/sdpa_kernel_opt.cpp @@ -78,7 +78,8 @@ static std::vector get_internal_buffer_sizes(const sdpa_params& sdpa_par return {blocks_indexes_buf_size}; } else { if (sdpa_params.has_dynamic_tensors() || kernel_type == KernelsTypes::MULTI_TOKENS) { - return {1, 1}; + const auto default_bytes_count = BytesPerElement(get_softmax_acc_type()); + return {default_bytes_count, default_bytes_count}; } else { TransposedDimensionAccessHelperBase dims_q(sdpa_params.inputs[0], sdpa_params.input0_order); const auto& output = sdpa_params.outputs[0]; diff --git a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/scaled_dot_product_attention.cpp b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/scaled_dot_product_attention.cpp index dbd878cb651923..c99c4cf501997e 100644 --- a/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/scaled_dot_product_attention.cpp +++ b/src/plugins/intel_gpu/tests/functional/single_layer_tests/dynamic/scaled_dot_product_attention.cpp @@ -244,6 +244,21 @@ const std::vector> shapes{ {ov::Shape{1, 1, 7, 7}, ov::Shape{1, 1, 1, 1}, ov::Shape{2, 1, 10, 10}}} }, }, + // normal case, shapes of q,k,v are same, static shapes + { + // q shape + {ov::test::InputShape{ov::PartialShape{1, 8, 100, 128}, + {ov::Shape{1, 8, 100, 128}}} + }, + // kv shape + {ov::test::InputShape{ov::PartialShape{1, 8, 100, 128}, + {ov::Shape{1, 8, 100, 128}}} + }, + // attn shape: [B, 1, -1, L0+L1] + {ov::test::InputShape{ov::PartialShape{1, 1, 100, 100}, + {ov::Shape{1, 1, 100, 100}}} + }, + }, }; const auto params = testing::Combine(testing::Values(ov::element::f16 /*, ov::element::f32 */),