Skip to content

Commit

Permalink
[GPU] support fsv16 for output of group norm bfyx opt kernel (#27906)
Browse files Browse the repository at this point in the history
### Details:
- In group normalization, there can be a case requiring input bfyx and
output fsv16 in bfyx opt kernel.

### Tickets:
 - 156608
  • Loading branch information
davidsnam-intel authored Dec 12, 2024
1 parent bf8be26 commit b61a685
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ ParamsKey GroupNormalizationKernelBfyx::GetSupportedKey() const {
k.EnableInputLayout(DataLayout::bfzyx);
k.EnableOutputLayout(DataLayout::bfyx);
k.EnableOutputLayout(DataLayout::bfzyx);
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
k.EnableBatching();
k.EnableTensorOffset();
k.EnableTensorPitches();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,13 @@

#include "test_utils.h"
#include "random_generator.hpp"
#include "program_wrapper.h"
#include "pass_manager.h"

#include <intel_gpu/primitives/input_layout.hpp>
#include <intel_gpu/primitives/group_normalization.hpp>
#include <intel_gpu/primitives/reorder.hpp>
#include <intel_gpu/primitives/permute.hpp>
#include "openvino/reference/group_normalization.hpp"
#include "intel_gpu/runtime/compilation_context.hpp"

Expand Down Expand Up @@ -156,3 +161,79 @@ INSTANTIATE_TEST_SUITE_P(
::testing::ValuesIn({padding(), padding({0, 0, 1, 1})})));

} // anonymous namespace

#ifdef ENABLE_ONEDNN_FOR_GPU
TEST(group_normalization, input_bfyx_output_fsv16) {
auto& engine = get_test_engine();

auto in_layout = layout{ ov::PartialShape{1, 3, 3, 2}, data_types::f32, format::bfyx };
auto scale_layout = layout{ ov::PartialShape{1, 1, 1, 1}, data_types::f32, format::bfyx };
auto bias_layout = layout{ ov::PartialShape{1, 1, 1, 1}, data_types::f32, format::bfyx };

auto input_mem = engine.allocate_memory(in_layout);
auto scale_mem = engine.allocate_memory(scale_layout);
auto bias_mem = engine.allocate_memory(bias_layout);

set_values(input_mem,
{ 0.125, 0.125, 0.875, -0.125, 0.125, 0.750,
0.875, -0.375, -0.375, -1.000, -0.625, -1.000,
-0.125, -0.750, -0.250, 0.625, -0.500, -0.875 });
set_values(scale_mem, { 0.125 });
set_values(bias_mem, { 0.75 });

topology topology_g(
input_layout("input", in_layout),
input_layout("scale", scale_layout),
input_layout("bias", bias_layout),
group_normalization("group_normalization", input_info("input"), input_info("scale"), input_info("bias"), static_cast<std::int64_t>(1), 0.0025),
permute("output", input_info("group_normalization"), {0, 1, 2, 3})
);

topology topology_t(
input_layout("input", in_layout),
input_layout("scale", scale_layout),
input_layout("bias", bias_layout),
reorder("reorder1", input_info("input"), format::b_fs_yx_fsv16, data_types::f32),
group_normalization("group_normalization", input_info("reorder1"), input_info("scale"), input_info("bias"), static_cast<std::int64_t>(1), 0.0025),
reorder("reorder2", input_info("group_normalization"), format::b_fs_yx_fsv16, data_types::f32),
permute("output", input_info("reorder2"), {0, 1, 2, 3})
);

ExecutionConfig config = get_test_default_config(engine);
ov::intel_gpu::ImplementationDesc gn_impl = { format::bfyx, "", impl_types::ocl };
config.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{{"group_normalization", gn_impl}}));
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::optimize_data(true));

network network_g(engine, topology_g, config);
network_g.set_input_data("input", input_mem);
network_g.set_input_data("scale", scale_mem);
network_g.set_input_data("bias", bias_mem);

auto outputs_g = network_g.execute();
auto output_g = outputs_g.at("output").get_memory();
cldnn::mem_lock<float> output_mem_g(output_g, get_test_stream());

auto program = program::build_program(engine, topology_t, config, false, true);
auto& reorder_node = program->get_node("reorder1");
std::vector<layout> layouts = {in_layout};
reorder_node.set_output_layouts(layouts, false);
program_wrapper::build(*program);

network network_t(program);
network_t.set_input_data("input", input_mem);
network_t.set_input_data("scale", scale_mem);
network_t.set_input_data("bias", bias_mem);

auto outputs_t = network_t.execute();
auto output_t = outputs_g.at("output").get_memory();
cldnn::mem_lock<float> output_mem_t(output_t, get_test_stream());

ASSERT_EQ(output_mem_g.size(), output_mem_t.size());
ASSERT_EQ(outputs_g.begin()->first, outputs_t.begin()->first);

for (std::size_t i = 0; i < output_mem_t.size(); i++) {
ASSERT_NEAR(output_mem_t[i], output_mem_g[i], 0.0001);
}
}
#endif // ENABLE_ONEDNN_FOR_GPU

0 comments on commit b61a685

Please sign in to comment.