Skip to content

Commit

Permalink
[GPU] Fix double copying in case if lockable memory is needed
Browse files Browse the repository at this point in the history
  • Loading branch information
sshlyapn committed Nov 21, 2024
1 parent 5c2b9ac commit 33798e3
Showing 1 changed file with 5 additions and 11 deletions.
16 changes: 5 additions & 11 deletions src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -899,16 +899,6 @@ std::vector<cldnn::event::ptr> SyncInferRequest::prepare_input(const std::string
}

cldnn::event::ptr ret_event = nullptr;
if (!is_remote_tensor_impl && !is_generic_remote && !convert_needed) {
auto src_ptr = static_cast<uint8_t*>(user_tensor->data());
if (!same_host_mem(memory, src_ptr)) {
// WA: Set need_lockable_mem as a blocking argument
// The current input_layout (wait_for_events) does not provide proper synchronization for subsequent CPU implementations
// For IOQ, it creates an already set user event, leading to accessing memory that hasn't completed copying
// For OOOQ, it enqueues a barrier that is ignored by the memory_lock functions, also causing access to not ready memory
ret_event = memory->copy_from(stream, src_ptr, need_lockable_mem);
}
}
if (convert_needed) {
if (is_remote_tensor_impl) {
convert_and_copy(remote_tensor_impl_ptr->get_memory(), device_tensor->get_memory(), stream);
Expand All @@ -919,7 +909,11 @@ std::vector<cldnn::event::ptr> SyncInferRequest::prepare_input(const std::string
if (!is_remote_tensor_impl && !is_generic_remote) {
auto src_ptr = static_cast<uint8_t*>(user_tensor->data());
if (!same_host_mem(memory, src_ptr)) {
ret_event = memory->copy_from(stream, src_ptr, false);
// WA: Set need_lockable_mem as a blocking argument
// The current input_layout (wait_for_events) does not provide proper synchronization for subsequent CPU implementations
// For IOQ, it creates an already set user event, leading to accessing memory that hasn't completed copying
// For OOOQ, it enqueues a barrier that is ignored by the memory_lock functions, also causing access to not ready memory
ret_event = memory->copy_from(stream, src_ptr, need_lockable_mem);
}
} else if (is_generic_remote) {
user_tensor->copy_to(device_tensor);
Expand Down

0 comments on commit 33798e3

Please sign in to comment.