Skip to content

Commit

Permalink
[GPU] Do not apply crop optimization for squeeze if the crop axis mat…
Browse files Browse the repository at this point in the history
…ches the squeeze axis
  • Loading branch information
sshlyapn committed Aug 28, 2024
1 parent 4d716ba commit 9bffde9
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 1 deletion.
3 changes: 3 additions & 0 deletions src/plugins/intel_gpu/src/graph/crop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,9 @@ std::string crop_inst::to_string(crop_node const& node) {
}
crop_info.add("reference input size", ref_in_sizes.to_string());
crop_info.add("offset", offsets.to_string());
crop_info.add("axis", desc->axis);
crop_info.add("num_splits", desc->num_splits);
crop_info.add("output_idx", desc->output_idx);

node_info->add("crop info", crop_info);
node_info->dump(primitive_description);
Expand Down
13 changes: 12 additions & 1 deletion src/plugins/intel_gpu/src/graph/include/reshape_inst.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,18 @@ struct typed_program_node<reshape> : public typed_program_node_base<reshape> {
if (prim->mode == reshape::reshape_mode::squeeze || prim->mode == reshape::reshape_mode::unsqueeze) {
// For proper padding propagation we need to know output pattern at model loading stage
// in case of squeeze/unsqueeze mode
return prim->output_pattern.size() > 0;
if (prim->output_pattern.empty())
return false;

if (input().is_type<crop>() && prim->mode == reshape::reshape_mode::squeeze) {
const auto crop_axis = input().as<crop>().get_primitive()->axis;
const auto& output_pattern = prim->output_pattern;

// Do not propagate output padding in squeeze mode if the squeezed dimension corresponds to the crop axis
return std::find(output_pattern.begin(), output_pattern.end(), crop_axis) == output_pattern.end();
}

return true;
}

// TODO: This function is to limit condition to a specific case (crop + reshape) among cases for the base mode
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -857,6 +857,75 @@ TEST(prepare_buffer_fusing, in_place_crop_dynamic_reshape_unsqueeze) {
ASSERT_EQ(output_ptr_3[i], out3[i]);
}

TEST(prepare_buffer_fusing, in_place_crop_dynamic_reshape_squeeze_crop_axis) {
auto& engine = get_test_engine();
tests::random_generator rg(GET_SUITE_NAME);

auto in_layout = layout{ ov::PartialShape{2, -1, 4}, data_types::f32, format::bfyx};
auto input_mem = engine.allocate_memory({ {2, 2, 4}, data_types::f32, format::bfyx });
auto weights_mem = engine.allocate_memory({ {8, 4}, data_types::f32, format::bfyx });
auto bias_mem = engine.allocate_memory({ {1, 1, 8}, data_types::f32, format::bfyx });
auto axis_mem = engine.allocate_memory({ {}, data_types::i64, format::bfyx });
auto splits_length_mem = engine.allocate_memory({ {2}, data_types::i64, format::bfyx });
auto eltwise_scale = engine.allocate_memory({ {1, 1}, data_types::f32, format::bfyx });

int64_t axis = 0;
auto input_data = rg.generate_random_1d<float>(input_mem->count(), 0, 1);
auto weights_data = rg.generate_random_1d<float>(weights_mem->count(), 0, 1);
auto bias_data = rg.generate_random_1d<float>(bias_mem->count(), 0, 1);

set_values(input_mem, input_data);
set_values(weights_mem, weights_data);
set_values(bias_mem, bias_data);
set_values(eltwise_scale, { 1.f });
set_values<int64_t>(axis_mem, {axis});
set_values<int64_t>(splits_length_mem, { 1, 1 });

cldnn::crop_ngraph_op_mode op_mode = cldnn::crop_ngraph_op_mode::variadic_split;
topology topology(
input_layout("input", in_layout),
data("axis", axis_mem),
data("splits_length", splits_length_mem),
data("eltwise_data", eltwise_scale),
data("weights", weights_mem),
data("bias", bias_mem),
fully_connected("fc", input_info("input"), "weights", "bias", data_types::f32, 3, 2),
crop("crop1", { input_info("fc"), input_info("axis"), input_info("splits_length") }, cldnn::tensor(1), cldnn::tensor(0), op_mode, 0, axis),
reorder("first_half", input_info("crop1"), format::bfyx, data_types::f32),
crop("crop2", { input_info("fc"), input_info("axis"), input_info("splits_length") }, cldnn::tensor(1), cldnn::tensor(0), op_mode, 1, axis),
reshape("reshape", input_info("crop2"), false, std::vector<int64_t>{0}, ov::PartialShape{-1, 8}, cldnn::reshape::reshape_mode::squeeze),
eltwise("multiply", { input_info("reshape"), input_info("eltwise_data") }, eltwise_mode::prod),
reorder("second_half", input_info("multiply"), format::bfyx, data_types::f32),
reorder("full_output", input_info("fc"), format::bfyx, data_types::f32)
);

auto config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
config.set_property(ov::intel_gpu::optimize_data(true));
network network(engine, topology, config);

network.set_input_data("input", input_mem);

ASSERT_FALSE(network.get_primitive("crop2")->can_be_optimized());

auto outputs = network.execute();

auto full_output_mem = outputs.at("full_output").get_memory();
cldnn::mem_lock<float> full_output(full_output_mem, get_test_stream());

auto first_half_mem = outputs.at("first_half").get_memory();
cldnn::mem_lock<float> first_half(first_half_mem, get_test_stream());

auto second_half_mem = outputs.at("second_half").get_memory();
cldnn::mem_lock<float> second_half(second_half_mem, get_test_stream());

for (size_t i = 0; i < full_output.size() / 2; i++)
ASSERT_EQ(first_half[i], full_output[i]) << i;

for (size_t i = 0; i < full_output.size() / 2; i++)
ASSERT_EQ(second_half[i], full_output[full_output.size() / 2 + i]) << i;
}

TEST(prepare_buffer_fusing, in_place_crop_dynamic_split_lengths) {
auto& engine = get_test_engine();

Expand Down

0 comments on commit 9bffde9

Please sign in to comment.