Skip to content

Commit

Permalink
[GPU] Fix sd1.5_controlnet_lora bad image.
Browse files Browse the repository at this point in the history
1. Fixed a bug where dynamic tensor value would disappear when using dynamic and static inputs together.
2. Fixed onednn gemm post-op wrong dims in case spatial 1x1.
3. Fixed side effect for can_be_optimized condtion in  allocate_output .

Signed-off-by: hyunback <[email protected]>
  • Loading branch information
hyunback committed Oct 2, 2024
1 parent ad448dd commit 6ec9abd
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 2 deletions.
3 changes: 2 additions & 1 deletion src/plugins/intel_gpu/src/graph/gemm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,8 @@ layout gemm_inst::transform_output_layout(const std::shared_ptr<const gemm> prim
(i == 1) ? transposed_input1_pshape :
input_layouts[i].get_partial_shape();
for (size_t j = 0; j != input_pshape.size(); ++j) {
ov::Dimension::merge(output_pshape[j], output_pshape[j], input_pshape[j]);
if (input_pshape[j].get_max_length() != input_pshape[j].get_min_length())
ov::Dimension::merge(output_pshape[j], output_pshape[j], input_pshape[j]);
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_gpu/src/graph/primitive_inst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2122,7 +2122,7 @@ memory::ptr primitive_inst::allocate_output(engine& _engine,
GPU_DEBUG_LOG << "[" << _node.id() << ": constant]" << std::endl;
return ov::intel_gpu::allocate_memory_evenif_zero_bytes(_engine, layout, alloc_type, reset);
}
} else if (!_node.can_share_buffer() || impl_params.can_be_optimized() || _node.is_output()) {
} else if (!_node.can_share_buffer() || _node.can_be_optimized() || _node.is_output()) {
GPU_DEBUG_LOG << "[" << _node.id() << ": output]" << std::endl;
return ov::intel_gpu::allocate_memory_evenif_zero_bytes(_engine, layout, alloc_type, reset);
} else {
Expand Down
4 changes: 4 additions & 0 deletions src/plugins/intel_gpu/src/graph/program_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1550,6 +1550,10 @@ void program_node::create_onednn_primitive_attributes(
size_t rank = cldnn::format::dimension(in.format);
size_t in_batched_size = in.count() / (in.spatial(0) * in.spatial(1));
dnnl::memory::dims dims = onednn::convert_gemm_tensor(in.get_tensor(), rank, in_batched_size == 1);
bool spatial_dims_can_be_removed = (in.spatial(0) * in.spatial(1) == 1);
if (dims.size() == 4 && spatial_dims_can_be_removed) {
dims.erase(dims.begin() + 2, dims.begin() + 4);
}
dnnl::memory::data_type dt = onednn::convert_data_type(in.data_type);
dnnl::memory::format_tag fmt = onednn::convert_gemm_data_format(dims, in.format);
post_ops.append_binary(alg, dnnl::memory::desc(dims, dt, fmt));
Expand Down
74 changes: 74 additions & 0 deletions src/plugins/intel_gpu/tests/unit/test_cases/gemm_gpu_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1415,6 +1415,76 @@ class gemm_gpu_tests: public ::testing::Test {
ASSERT_NEAR(output_ptr[i], ref_out_data[i], abs_error) << "at " << i;
}
}

void test_dynamic_static_broadcast_3dim(std::vector<size_t> BMKN, bool is_caching_test, const double abs_error = 0.0001) {
tests::random_generator rg;
rg.set_seed(GET_SUITE_NAME);

auto& engine = get_test_engine();
cldnn::layout input0_layout;
cldnn::layout input1_layout;

std::vector<int64_t> input0_order = {0, 1, 2};
std::vector<int64_t> input1_order = {0, 1, 2};
std::vector<int64_t> output_order = {0, 1, 2};

size_t BATCH_SIZE = BMKN[0];
size_t M_SIZE = BMKN[1];
size_t K_SIZE = BMKN[2];
size_t N_SIZE = BMKN[3];

ov::Shape input0_shape = { BATCH_SIZE, M_SIZE, K_SIZE };
ov::Shape input1_shape = { 1, K_SIZE, N_SIZE };
ov::Shape output_shape = { BATCH_SIZE, M_SIZE, N_SIZE };

input0_layout = layout{ov::PartialShape::dynamic(input0_shape.size()), data_types::f16, format::bfyx};
input1_layout = layout{ov::PartialShape(input1_shape), data_types::f16, format::bfyx};

auto input0_mem = engine.allocate_memory(layout{ov::PartialShape(input0_shape), data_types::f16, format::bfyx});
auto input1_mem = engine.allocate_memory(layout{ov::PartialShape(input1_shape), data_types::f16, format::bfyx});

auto input_0_data = rg.generate_random_1d<ov::float16>(ov::shape_size(input0_shape), -2, 2);
auto input_1_data = rg.generate_random_1d<ov::float16>(ov::shape_size(input1_shape), -2, 2);

set_values(input0_mem, input_0_data);
set_values(input1_mem, input_1_data);

topology topology;
topology.add(input_layout("input0", input0_layout),
input_layout("input1", input1_layout),
gemm("gemm", { input_info("input0"), input_info("input1") }, data_types::f16, input0_order, input1_order, output_order)
);

ExecutionConfig config = get_test_default_config(engine);
config.set_property(ov::intel_gpu::optimize_data(true));
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
network::ptr network = get_network(engine, topology, config, get_test_stream_ptr(), is_caching_test);
network->set_input_data("input0", input0_mem);
network->set_input_data("input1", input1_mem);

auto outputs = network->execute();

auto output_mem = outputs.at("gemm").get_memory();
cldnn::mem_lock<ov::float16> output_ptr(output_mem, get_test_stream());

std::vector<ov::float16> ref_out_data;
ref_out_data.resize(ov::shape_size(output_shape));

ov::reference::matmul<ov::float16>(input_0_data.data(),
input_1_data.data(),
ref_out_data.data(),
input0_shape,
input1_shape,
output_shape,
false,
false);

ASSERT_EQ(output_ptr.size(), ref_out_data.size());

for (uint32_t i = 0; i < ref_out_data.size(); ++i) {
ASSERT_NEAR(output_ptr[i], ref_out_data[i], abs_error) << "at " << i;
}
}
};

TEST_F(gemm_gpu_tests, basic_bfyx_t2_inplace_crop_with_pad) {
Expand Down Expand Up @@ -1538,6 +1608,10 @@ TEST_F(gemm_gpu_tests, transpose_matmul_static_4d_f32_n_tile_32_input1_ylast) {
this->test_transpose_matmul_f32(4, false, false, /*BMKN*/{19, 37, 23, 29}, /*input0_order*/{0, 1, 2, 3}, /*input1_order*/{0, 1, 3, 2});
}

TEST_F(gemm_gpu_tests, test_dynamic_static_broadcast_3dim) {
this->test_dynamic_static_broadcast_3dim(/*BMKN*/{2, 16, 2, 2}, false);
}

TEST_F(gemm_gpu_tests, transpose_matmul_in0_indirect) {
this->test_transpose_indirect(false, true, false);
}
Expand Down

0 comments on commit 6ec9abd

Please sign in to comment.