Skip to content

Commit

Permalink
[GPU] Make ShapePredictor instance unique for each InferRequest inste…
Browse files Browse the repository at this point in the history
…ad of the cldnn::network
  • Loading branch information
sshlyapn committed Nov 10, 2023
1 parent b1705e8 commit 0bb61c5
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 12 deletions.
5 changes: 3 additions & 2 deletions src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,8 @@ struct network {
const variables_state_info_map& get_variables_state_info() const;
const ExecutionConfig& get_config() const { return _config; }

ShapePredictor& get_shape_predictor() { return *_shape_predictor; }
std::shared_ptr<ShapePredictor> get_shape_predictor() { return _shape_predictor; }
void set_shape_predictor(std::shared_ptr<ShapePredictor> shape_predictor) { _shape_predictor = shape_predictor; }

#ifdef GPU_DEBUG_CONFIG
int64_t get_current_iteration_num() { return iteration; }
Expand Down Expand Up @@ -287,7 +288,7 @@ struct network {
std::unordered_map<primitive_id, event::ptr> _old_events;
output_chains_map _output_chains;

std::unique_ptr<ShapePredictor> _shape_predictor;
std::shared_ptr<ShapePredictor> _shape_predictor;

void build_exec_order();
void allocate_primitive_instance(program_node const& node);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ class SyncInferRequest : public ov::ISyncInferRequest {
std::shared_ptr<Graph> m_graph;
RemoteContextImpl::Ptr m_context = nullptr;
std::shared_ptr<ov::threading::IStreamsExecutor> m_stream_executor = nullptr;
std::shared_ptr<cldnn::ShapePredictor> m_shape_predictor = nullptr;
bool m_enable_profiling = false;
bool m_use_external_queue = false;

Expand Down
5 changes: 3 additions & 2 deletions src/plugins/intel_gpu/src/graph/impls/common/condition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,9 @@ struct condition_impl : typed_primitive_impl<condition> {
set_node_params(instance.get_node());

auto pred = condition_inst::get_pred_from_memory(instance.pred_memory_ptr(), instance.get_network().get_stream());
network::ptr executed_net = pred? instance.get_net_true() : instance.get_net_false();
auto branch = pred? instance.get_branch_true() : instance.get_branch_false();
network::ptr executed_net = pred ? instance.get_net_true() : instance.get_net_false();
auto branch = pred ? instance.get_branch_true() : instance.get_branch_false();
executed_net->set_shape_predictor(instance.get_network().get_shape_predictor());
GPU_DEBUG_LOG << "predicate: " << (pred ? "True" : "False") << std::endl;

// Set input memory of inner network before its execution
Expand Down
1 change: 1 addition & 0 deletions src/plugins/intel_gpu/src/graph/impls/common/loop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ struct loop_impl : typed_primitive_impl<loop> {

auto ev = stream.create_user_event(false);

body_network->set_shape_predictor(outer_network.get_shape_predictor());
OPENVINO_ASSERT(!primitive->num_iteration_id.empty(), "loop operation should have num_iteration_id");

auto num_iterations = instance.get_num_iterations();
Expand Down
2 changes: 1 addition & 1 deletion src/plugins/intel_gpu/src/graph/primitive_inst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ event::ptr primitive_inst::realloc_if_needed() {
}

auto current_shape = actual_layout.get_shape();
auto& sp = get_network().get_shape_predictor();
auto& sp = *get_network().get_shape_predictor();
auto dt_size = ov::element::Type(actual_layout.data_type).bitwidth();
auto prealloc_info = sp.predict_preallocation_shape(id(), current_shape, dt_size, can_reuse_buffer);
if (prealloc_info.first && sp.can_preallocate(ov::shape_size(prealloc_info.second) * dt_size)) {
Expand Down
24 changes: 17 additions & 7 deletions src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -226,13 +226,25 @@ SyncInferRequest::SyncInferRequest(const std::shared_ptr<const CompiledModel>& c
: ov::ISyncInferRequest(compiled_model)
, m_graph(compiled_model->get_graph(0))
, m_context(std::static_pointer_cast<RemoteContextImpl>(compiled_model->get_context_impl()))
, m_shape_predictor(new cldnn::ShapePredictor(&m_graph->get_engine(), m_graph->get_config().get_property(ov::intel_gpu::buffers_preallocation_ratio)))
, m_enable_profiling(m_graph->get_config().get_property(ov::enable_profiling))
, m_use_external_queue(m_graph->use_external_queue()) {
bool is_legacy_api = !compiled_model->is_new_api();
init_mappings(is_legacy_api);
allocate_inputs();
allocate_outputs();
allocate_states();

GPU_DEBUG_GET_INSTANCE(debug_config);
GPU_DEBUG_IF(debug_config->mem_preallocation_params.is_initialized) {
auto& mem_preallocation_params = debug_config->mem_preallocation_params;
m_shape_predictor.reset(
new cldnn::ShapePredictor(&m_graph->get_engine(),
mem_preallocation_params.next_iters_preallocation_count,
mem_preallocation_params.max_per_iter_size,
mem_preallocation_params.max_per_dim_diff,
mem_preallocation_params.buffers_preallocation_ratio));
}
}

void SyncInferRequest::infer() {
Expand Down Expand Up @@ -401,6 +413,7 @@ void SyncInferRequest::enqueue() {

auto network = m_graph->get_network();
network->assign_variables_memories();
network->set_shape_predictor(m_shape_predictor);

m_internal_outputs.clear();
m_internal_outputs = network->execute(dependencies);
Expand Down Expand Up @@ -476,8 +489,7 @@ void SyncInferRequest::wait() {
need_reallocate = usm_host_tensor->get_impl()->get_original_memory()->size() < output_memory->size();

if (need_reallocate) {
auto& shape_predictor = m_graph->get_network()->get_shape_predictor();
auto actual_memory_shape = predict_shape(name, mem_shape, output_tensor->get_element_type(), shape_predictor);
auto actual_memory_shape = predict_shape(name, mem_shape, output_tensor->get_element_type(), *m_shape_predictor);
output_tensor->set_shape(actual_memory_shape);
}
}
Expand Down Expand Up @@ -585,8 +597,7 @@ TensorWrapper SyncInferRequest::create_or_share_device_tensor(const TensorWrappe

auto actual_memory_shape = tensor_shape;
if (is_dynamic) {
auto& shape_predictor = m_graph->get_network()->get_shape_predictor();
actual_memory_shape = predict_shape(name, tensor_shape, element_type, shape_predictor);
actual_memory_shape = predict_shape(name, tensor_shape, element_type, *m_shape_predictor);
}

return { create_device_tensor(actual_memory_shape, element_type, need_lockable_mem), TensorOwner::PLUGIN };
Expand Down Expand Up @@ -746,7 +757,7 @@ std::vector<cldnn::event::ptr> SyncInferRequest::prepare_input(const std::string

if (is_remote) {
m_plugin_inputs[name] = user_tensor_wrapper;
} else if (is_usm_host_tensor && !convert_needed) {
} else if (is_usm_host_tensor && !convert_needed && can_use_usm_host(engine)) {
m_plugin_inputs[name] = {usm_host_ptr->get_impl(), user_tensor_wrapper.owner};
is_remote = true;
}
Expand All @@ -762,8 +773,7 @@ std::vector<cldnn::event::ptr> SyncInferRequest::prepare_input(const std::string
auto device_tensor = std::dynamic_pointer_cast<RemoteTensorImpl>(device_tensor_wrapper.ptr);
if (is_dynamic) {
if (device_tensor->get_original_memory()->size() < user_tensor->get_byte_size()) {
auto& shape_predictor = network->get_shape_predictor();
auto actual_shape = predict_shape(name, user_tensor->get_shape(), device_tensor_et, shape_predictor);
auto actual_shape = predict_shape(name, user_tensor->get_shape(), device_tensor_et, *m_shape_predictor);
GPU_DEBUG_TRACE_DETAIL << " actual memory shape: " << actual_shape.to_string() << std::endl;
auto new_tensor = create_device_tensor(actual_shape, device_tensor_et, false);
new_tensor->set_shape(user_tensor->get_shape());
Expand Down

0 comments on commit 0bb61c5

Please sign in to comment.