Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GPU] network code cleanup #26908

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 0 additions & 32 deletions src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,34 +116,10 @@ struct network {

std::vector<std::shared_ptr<primitive_inst>> const& get_outputs() { return _outputs; }

const std::vector<std::shared_ptr<const primitive_inst>>& get_outputs() const {
return reinterpret_cast<const std::vector<std::shared_ptr<const primitive_inst>>&>(_outputs);
}

network_output get_output(const primitive_id& output_id) {
event::ptr evt;
if (get_stream().get_queue_type() == QueueTypes::out_of_order || _enable_profiling)
evt = get_primitive_event(output_id);
return network_output(evt, get_output_memory(output_id), get_stream_ptr(), get_output_layout(output_id));
}
layout get_node_output_layout(const primitive_id& output_id) const;
memory::ptr get_output_memory(const primitive_id& output_id);
layout get_output_layout(const primitive_id& output_id) const;
std::vector<layout> get_input_layouts() const;

/// @brief Returns the list of primitive ids before and after graph optimization.
/// @details If primitive was not optimized, the old and actual id will be the same.
/// @n If primitive was optimized during graph optimization, the actual id will be "_optimized_".
std::map<primitive_id, primitive_id> get_all_primitives() const {
auto primitive_ids = get_all_primitive_ids();
auto primitive_org_ids = get_all_primitive_org_ids();
std::map<primitive_id, primitive_id> result;
for (decltype(primitive_org_ids.size()) i = 0; i < primitive_org_ids.size(); i++) {
result.emplace(primitive_org_ids[i], primitive_ids[i]);
}
return result;
}

/// @brief Returns the list of @ref event for the primitives that were executed in network.
std::map<primitive_id, event::ptr> get_executed_primitives() const {
auto primitive_ids = get_executed_primitive_ids();
Expand Down Expand Up @@ -201,7 +177,6 @@ struct network {
void configure_primitives_second_output();
void build_insts_deps();
uint32_t get_id() const { return net_id; }
uint32_t get_local_id() const { return _local_net_id; }
stream& get_stream() const { return *_stream; }
stream::ptr get_stream_ptr() const { return _stream; }
bool is_internal() const { return _internal; }
Expand All @@ -219,7 +194,6 @@ struct network {
const ov::intel_gpu::VariableStateInfo& get_variable_info(const std::string &variable_id) const;
const ov::intel_gpu::VariablesMap& get_variables() const;
const ov::intel_gpu::VariablesInfoMap& get_variables_info() const;
std::vector<primitive_id> get_kv_cache_ids() const { return kv_cache_ids; }

const ExecutionConfig& get_config() const { return _config; }

Expand All @@ -243,8 +217,6 @@ struct network {
bool _is_dynamic = false;
bool _enable_profiling = false;
bool _reset_arguments;
uint32_t _local_net_id = 0; // This is for thread-safe deserialization. 'net_id' is globally unique,
// but '_local_net_id' is unique only in each intel_gpu::Graph.

std::unordered_map<primitive_id, std::shared_ptr<primitive_inst>> _primitives;
std::vector<shared_mem_type> _in_out_shared_mem_types;
Expand All @@ -255,10 +227,8 @@ struct network {

ov::intel_gpu::VariablesMap _variables_states;
ov::intel_gpu::VariablesInfoMap _variables_state_info;
std::vector<primitive_id> kv_cache_ids;

program::primitives_info _prims_info;
std::map<primitive_id, primitive_id> _ext_id_mapping;
size_t _weights_cache_capacity = 1;

std::unordered_map<primitive_id, event::ptr> _events;
Expand All @@ -272,9 +242,7 @@ struct network {
void allocate_primitive_instance(program_node const& node);
void transfer_memory_to_device(std::shared_ptr<primitive_inst> instance, program_node const& node);
void add_to_exec_order(const primitive_id& id);
std::shared_ptr<primitive_inst> find_in_internal_networks(const primitive_id& id) const;
std::shared_ptr<primitive_inst> find_primitive(const primitive_id& id) const;
void check_names();
void add_default_output_chains();
void calculate_weights_cache_capacity();
output_chains_map::iterator add_output_chain(std::shared_ptr<primitive_inst>& p_inst);
Expand Down
3 changes: 2 additions & 1 deletion src/plugins/intel_gpu/src/graph/kv_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ GPU_DEFINE_PRIMITIVE_TYPE_ID(kv_cache)
kv_cache_inst::typed_primitive_inst(network& network, const kv_cache_node& node) :
parent{network, node, false},
memory_state::variable{node.get_primitive()->variable_info.variable_id} {
kv_cache_id = network.get_kv_cache_ids().size();
thread_local size_t kv_cache_counter = 0;
kv_cache_id = kv_cache_counter++;
}

layout kv_cache_inst::calc_output_layout(const kv_cache_node& node, kernel_impl_params const& impl_param) {
Expand Down
89 changes: 20 additions & 69 deletions src/plugins/intel_gpu/src/graph/network.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,6 @@ network::network(program::ptr program, stream::ptr stream, bool is_internal, boo
calculate_weights_cache_capacity();
allocate_primitives();
configure_primitives_second_output();
if (!_program->is_loaded_from_cache())
check_names();
build_insts_deps();
build_exec_order();
validate_primitives();
Expand Down Expand Up @@ -333,11 +331,7 @@ void network::reset_execution(bool wait) {

event::ptr network::set_input_data(const primitive_id& id, memory::ptr data) {
GPU_DEBUG_TRACE_DETAIL << "Set input " << id << " " << data->get_layout().to_short_string() << std::endl;
std::shared_ptr<primitive_inst> primitive_inst;

primitive_inst = find_primitive(id);

OPENVINO_ASSERT(primitive_inst != nullptr, "[GPU] topology doesn't contain primitive: ", id);
auto primitive_inst = find_primitive(id);

if (primitive_inst->type() != input_layout::type_id()) {
CLDNN_ERROR_MESSAGE(id, "primitive " + id + " is not an input");
Expand Down Expand Up @@ -481,11 +475,8 @@ network::output_chains_map::iterator network::add_output_chain(std::shared_ptr<p

std::vector<event::ptr> network::set_output_memory(const primitive_id& id, memory::ptr mem_new) {
GPU_DEBUG_TRACE_DETAIL << "Set output " << id << " " << mem_new->get_layout().to_short_string() << std::endl;
std::shared_ptr<primitive_inst> p_inst;
std::vector<event::ptr> ret_ev;
p_inst = find_primitive(id);

OPENVINO_ASSERT(p_inst != nullptr, "[GPU] topology doesn't contain primitive: ", id);
std::shared_ptr<primitive_inst> p_inst = find_primitive(id);

auto iter = std::find(_outputs.begin(), _outputs.end(), p_inst);
if (iter == _outputs.end())
Expand Down Expand Up @@ -513,35 +504,10 @@ std::vector<event::ptr> network::set_output_memory(const primitive_id& id, memor
return ret_ev;
}

void cldnn::network::check_names() {
for (auto const& prim : _primitives) {
if (find_in_internal_networks(prim.first) != nullptr)
CLDNN_ERROR_MESSAGE("Network", "Found primitive with id: " + prim.first + "in anotother network.");
}
}

std::shared_ptr<primitive_inst> cldnn::network::find_primitive(const primitive_id& id) const {
if (_primitives.find(id) != _primitives.end())
return _primitives.at(id);

return find_in_internal_networks(id);
}

std::shared_ptr<primitive_inst> cldnn::network::find_in_internal_networks(const primitive_id& id) const {
std::shared_ptr<primitive_inst> ret;

for (auto const& prim : _primitives) {
if (prim.second->type() == condition::type_id()) { // currently only condition inst contains mini networks
auto cond_inst = std::static_pointer_cast<condition_inst>(prim.second);
ret = cond_inst->get_net_true()->find_primitive(id);
if (ret != nullptr)
return ret;
ret = cond_inst->get_net_false()->find_primitive(id);
if (ret != nullptr)
return ret;
}
}
return nullptr;
auto it = _primitives.find(id);
OPENVINO_ASSERT(it != _primitives.end(), "[GPU] Network doesn't contain primitive ", id);
return it->second;
}

std::string network::get_primitive_info(const primitive_id& id) const {
Expand All @@ -552,9 +518,6 @@ std::string network::get_primitive_info(const primitive_id& id) const {
bool network::does_node_need_lockable_output(const primitive_id& id) const {
auto prim_inst = find_primitive(id);

OPENVINO_ASSERT(prim_inst, "[GPU] Can't get implementation type, since topology ",
"doesn't contain primitive with requested id: ", id);

const auto& node = prim_inst->get_node();
if (node.is_type<input_layout>()) {
for (const auto& user : node.get_users()) {
Expand All @@ -574,15 +537,6 @@ std::string network::get_implementation_info(const primitive_id& id) const {
return _program->get_implementation_info(id);
}

layout network::get_node_output_layout(const primitive_id& output_id) const {
auto res = std::find_if(_outputs.begin(), _outputs.end(), [&](const std::shared_ptr<primitive_inst>& v) {
return v->id() == output_id;
});
OPENVINO_ASSERT(res != _outputs.end(), "[GPU] Couldn't get output layout for ", output_id, ". Output with such name is not found in the outputs list");

return (*res)->get_node_output_layout();
}

memory::ptr network::get_output_memory(const primitive_id& output_id) {
return get_primitive(output_id)->output_memory_ptr();
}
Expand Down Expand Up @@ -729,17 +683,6 @@ void network::add_to_exec_order(const primitive_id& id) {
}

std::map<primitive_id, network_output> network::execute(const std::vector<event::ptr>& dependencies) {
execute_impl(dependencies);

auto output_ids = get_output_ids();
std::map<primitive_id, network_output> result;
for (auto& id : output_ids) {
result.emplace(id, get_output(id));
}
return result;
}

void network::execute_impl(const std::vector<event::ptr>& events) {
OV_ITT_SCOPED_TASK(ov::intel_gpu::itt::domains::intel_gpu_plugin, "NetworkImpl::Execute");
NETWORK_DEBUG(*this);

Expand Down Expand Up @@ -779,6 +722,21 @@ void network::execute_impl(const std::vector<event::ptr>& events) {
// in some cases.
auto surf_lock = surfaces_lock::create(get_engine().type(), in_out_mem, get_stream());

execute_impl(dependencies);

std::map<primitive_id, network_output> result;
for (auto& inst : _outputs) {
event::ptr ev = nullptr;
const auto& id = inst->id();
if (get_stream().get_queue_type() == QueueTypes::out_of_order || _enable_profiling)
ev = _events.at(id);

result.emplace(id, network_output(ev, inst->output_memory_ptr(0), get_stream_ptr(), inst->get_output_layout(0)));
}
return result;
}

void network::execute_impl(const std::vector<event::ptr>& events) {
set_arguments();

// This extra flush command is needed for dynamic models in both cases of out_of_order / in_order operating mode
Expand Down Expand Up @@ -904,10 +862,6 @@ const program::graph_optimizer_info& network::get_optimizer_passes_info() const
}

std::map<primitive_id, primitive_id> network::get_ext_id_mapping() const {
if (_program == nullptr) {
return _ext_id_mapping;
}

std::map<primitive_id, primitive_id> result;
for (auto& prim : _primitives) {
result.emplace(prim.first, prim.second->get_node().get_primitive()->origin_op_name);
Expand Down Expand Up @@ -1008,9 +962,6 @@ void network::allocate_primitive_instance(program_node const& node) {
if (node.is_type<data>())
_data_outputs.push_back(inst);
}
if (node.is_type<kv_cache>()) {
kv_cache_ids.push_back(node.id());
}
if (auto state_prim = std::dynamic_pointer_cast<memory_state::variable>(inst)) {
auto prim = inst->get_node().get_primitive();
set_variables_state_info(state_prim->variable_id(), node.get_output_layout(0), state_prim->get_user_specified_type(), prim.get());
Expand Down
1 change: 0 additions & 1 deletion src/plugins/intel_gpu/src/plugin/graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,6 @@ void Graph::update_profiling_info() {
};

std::map<cldnn::primitive_id, cldnn::event::ptr> executedPrimitives = get_network()->get_executed_primitives();
auto allPrimitives = get_network()->get_all_primitives();

// Get profiling info for all layers
for (auto &profiledID : profilingIDs) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,15 +183,15 @@ TEST(reorder_inputs, impl_forcing_basic_format) {
7.f, 3.f, -2.f, -1.f });

network.set_input_data("input", input);
network.execute();
auto outputs = network.execute();

const auto& prog = network.get_program();
auto& pool_node = prog->get_node("pool");
auto pool_layout = pool_node.get_output_layout();

ASSERT_EQ(pool_layout.format.value, format::yxfb);

auto out_mem = network.get_output("pool").get_memory();
auto out_mem = outputs.at("pool").get_memory();
cldnn::mem_lock<float> out_mem_ptr(out_mem, get_test_stream());

ASSERT_EQ(out_mem_ptr.size(), 4u);
Expand Down Expand Up @@ -239,7 +239,7 @@ TEST(reorder_inputs, impl_forcing_basic_format_kernel) {
7.f, 3.f, -2.f, -1.f });

network.set_input_data("input", input);
network.execute();
auto outputs = network.execute();

auto prog = network.get_program();
auto& node = prog->get_node("actv");
Expand All @@ -250,7 +250,7 @@ TEST(reorder_inputs, impl_forcing_basic_format_kernel) {
ASSERT_EQ(actv_layout.format.value, format::yxfb);
ASSERT_EQ(kernel_name, actv_impl.kernel_name);

auto out_mem = network.get_output("actv").get_memory();
auto out_mem = outputs.at("actv").get_memory();
cldnn::mem_lock<float> out_mem_ptr(out_mem, get_test_stream());

ASSERT_EQ(out_mem_ptr.size(), 8u);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1031,9 +1031,9 @@ struct concat_gpu_4d : public concat_gpu {
network.set_input_data(input_ids[i].pid, in_memory[i]);
}

network.execute();
auto outputs = network.execute();

auto out_mem = network.get_output("concat").get_memory();
auto out_mem = outputs.at("concat").get_memory();
cldnn::mem_lock<Type> out_ptr(out_mem, get_test_stream());

for (size_t bi = 0; bi < batch_num; bi++) {
Expand Down Expand Up @@ -1117,9 +1117,9 @@ struct concat_gpu_4d_axis3 : public concat_axis3_gpu {
network.set_input_data(input_ids[i].pid, in_memory[i]);
}

network.execute();
auto outputs = network.execute();

auto out_mem = network.get_output("concat").get_memory();
auto out_mem = outputs.at("concat").get_memory();
cldnn::mem_lock<Type> out_ptr(out_mem, get_test_stream());

for (size_t bi = 0; bi < batch_num; bi++) {
Expand Down Expand Up @@ -1283,9 +1283,9 @@ struct concat_id_conv_gpu_4d : public concat_gpu {
network.set_input_data(input_ids[i].pid, in_memory[i]);
}

network.execute();
auto outputs = network.execute();

auto out_mem = network.get_output("conv").get_memory();
auto out_mem = outputs.at("conv").get_memory();
cldnn::mem_lock<OutputT> out_ptr(out_mem, get_test_stream());
ASSERT_EQ(out_mem->get_layout().format, fmt);

Expand Down Expand Up @@ -1420,13 +1420,13 @@ struct concat_gpu_4d_implicit : public concat_gpu {
for (size_t i = 0; i < in_features.size(); i++) {
concat_network->set_input_data(input_ids[i], in_memory[i]);
}
concat_network->execute();
auto outputs = concat_network->execute();

bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data);
bool concat_opt_result = std::static_pointer_cast<concatenation_inst>(concat_network->get_primitive("concat"))->can_be_optimized();
EXPECT_EQ(concat_opt_enabled, concat_opt_result);

return concat_network->get_output("reorder").get_memory();
return outputs.at("reorder").get_memory();
}

std::vector<std::vector<std::vector<std::vector<std::vector<Type>>>>> generate_input() {
Expand Down Expand Up @@ -1640,13 +1640,13 @@ struct concat_gpu_4d_implicit_onednn : public concat_gpu {
for (size_t i = 0; i < in_features.size(); i++) {
concat_network.set_input_data(input_ids[i], in_memory[i]);
}
concat_network.execute();
auto outputs = concat_network.execute();

bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data);
bool concat_opt_result = std::static_pointer_cast<concatenation_inst>(concat_network.get_primitive("concat"))->node->can_be_optimized();
EXPECT_EQ(concat_opt_enabled, concat_opt_result);

return concat_network.get_output("reorder").get_memory();
return outputs.at("reorder").get_memory();
}

std::vector<std::vector<std::vector<std::vector<std::vector<Type>>>>> generate_input() {
Expand Down Expand Up @@ -1803,7 +1803,7 @@ struct concat_gpu_4d_explicit : public concat_gpu {
for (size_t i = 0; i < 4; i++) {
concat_network.set_input_data(input_ids[i], in_memory[i]);
}
concat_network.execute();
auto outputs = concat_network.execute();

bool concat_opt_enabled = config.get_property(ov::intel_gpu::optimize_data);
bool concat_opt_result = std::static_pointer_cast<concatenation_inst>(concat_network.get_primitive("concat"))->node->can_be_optimized();
Expand All @@ -1813,7 +1813,7 @@ struct concat_gpu_4d_explicit : public concat_gpu {
if (concat_opt_enabled && batch_num > 1) concat_opt_result = !concat_opt_result;
EXPECT_EQ(concat_opt_enabled, concat_opt_result);

return concat_network.get_output("reorder").get_memory();
return outputs.at("reorder").get_memory();
}

std::vector<std::vector<std::vector<std::vector<std::vector<Type>>>>> generate_input() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// SPDX-License-Identifier: Apache-2.0
//

#include "intel_gpu/graph/network.hpp"
#include "intel_gpu/primitives/permute.hpp"
#include "intel_gpu/runtime/internal_properties.hpp"
#include "random_generator.hpp"
Expand Down Expand Up @@ -1038,6 +1039,7 @@ TEST(condition_gpu, set_empty_tensor) {
net.set_input_data(empty_input_id, empty_input_mem);
net.set_input_data(input_id, input_mem);

OV_ASSERT_NO_THROW(net.execute());
OV_ASSERT_NO_THROW(net.get_output(cond_id).get_memory());
std::map<primitive_id, network_output> outputs;
OV_ASSERT_NO_THROW(outputs = net.execute());
OV_ASSERT_NO_THROW(outputs.at(cond_id).get_memory());
}
Loading
Loading