Skip to content

Commit

Permalink
[NPU] Use different graph init (#26903)
Browse files Browse the repository at this point in the history
### Details:
 - *Use different graph init in case graph file schema or elf is used*

### Tickets:
 - *CVS-154233*
  • Loading branch information
pereanub authored Oct 8, 2024
1 parent c005e4f commit 53752e3
Show file tree
Hide file tree
Showing 9 changed files with 93 additions and 37 deletions.
3 changes: 2 additions & 1 deletion src/plugins/intel_npu/src/backend/include/zero_executor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ class ZeroExecutor final : public IExecutor {
}

private:
void initialize_graph_through_command_list() const;

const Config _config;
Logger _logger;

Expand All @@ -72,7 +74,6 @@ class ZeroExecutor final : public IExecutor {
const uint32_t _group_ordinal;

ze_graph_handle_t _graph = nullptr;
ze_graph_properties_t _props{};

std::vector<ArgumentDescriptor> _input_descriptors;
std::vector<ArgumentDescriptor> _output_descriptors;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class ZeroRemoteTensor : public RemoteTensor {
void* _mem = nullptr;
void* _data = nullptr;

bool _external_memory_support = true;
bool _external_memory_support = false;
};

} // namespace intel_npu
17 changes: 15 additions & 2 deletions src/plugins/intel_npu/src/backend/include/zero_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
/**
* @brief Last version of Table of Graph Extension functions used within plugin
*/
using ze_graph_dditable_ext_last_t = ze_graph_dditable_ext_1_7_t;
using ze_graph_dditable_ext_last_t = ze_graph_dditable_ext_1_8_t;
/**
* @brief Last version of the Command Queue functions used within plugin
*/
Expand Down Expand Up @@ -157,10 +157,23 @@ struct ze_graph_dditable_ext_decorator final {
}

// version 1.7
ze_result_t ZE_APICALL pfnGetNativeBinary2(ze_graph_handle_t hGraph, size_t* pSize, uint8_t** pGraphNativeBinary) {
ze_result_t ZE_APICALL pfnGetNativeBinary2(ze_graph_handle_t hGraph,
size_t* pSize,
const uint8_t** pGraphNativeBinary) {
throwWhenUnsupported("pfnGetNativeBinary2", ZE_GRAPH_EXT_VERSION_1_7);
return _impl->pfnGetNativeBinary2(hGraph, pSize, pGraphNativeBinary);
}

// version 1.8
ze_result_t ZE_APICALL pfnGetProperties2(ze_graph_handle_t hGraph, ze_graph_properties_2_t* pGraphProperties) {
throwWhenUnsupported("ze_pfnGraphGetProperties_ext_2_t", ZE_GRAPH_EXT_VERSION_1_8);
return _impl->pfnGetProperties2(hGraph, pGraphProperties);
}

ze_result_t ZE_APICALL pfnGraphInitialize(ze_graph_handle_t hGraph) {
throwWhenUnsupported("ze_pfnGraphGetProperties_ext_2_t", ZE_GRAPH_EXT_VERSION_1_8);
return _impl->pfnGraphInitialize(hGraph);
}
};

/**
Expand Down
76 changes: 52 additions & 24 deletions src/plugins/intel_npu/src/backend/src/zero_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,23 +37,6 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr<const ZeroInitStructsHolder>& i
_initStructs->getCommandQueueDdiTable(),
_config,
group_ordinal)} {
_logger.debug("ZeroExecutor::ZeroExecutor init start - create graph_command_list");
OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "Executor::ZeroExecutor");
CommandList graph_command_list(_initStructs->getDevice(),
_initStructs->getContext(),
_graph_ddi_table_ext,
_config,
_group_ordinal);
_logger.debug("ZeroExecutor::ZeroExecutor - create graph_command_queue");
CommandQueue graph_command_queue(_initStructs->getDevice(),
_initStructs->getContext(),
ZE_COMMAND_QUEUE_PRIORITY_NORMAL,
_initStructs->getCommandQueueDdiTable(),
_config,
_group_ordinal);
_logger.debug("ZeroExecutor::ZeroExecutor - create fence");
Fence fence(graph_command_queue, _config);

_logger.debug("ZeroExecutor::ZeroExecutor - create graph");
OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_GRAPH, itt::domains::LevelZeroBackend, "Executor::ZeroExecutor", "graphCreate");

Expand All @@ -79,7 +62,10 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr<const ZeroInitStructsHolder>& i

OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "pfnGetProperties");
_logger.debug("performing pfnGetProperties");
zeroUtils::throwOnFail("pfnGetProperties", _graph_ddi_table_ext.pfnGetProperties(_graph, &_props));
ze_graph_properties_t props{};
props.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES;

zeroUtils::throwOnFail("pfnGetProperties", _graph_ddi_table_ext.pfnGetProperties(_graph, &props));
auto targetDriverExtVersion = _graph_ddi_table_ext.version();
if (targetDriverExtVersion <= ZE_GRAPH_EXT_VERSION_1_1) {
OPENVINO_THROW("Incompatibility between the NPU plugin and driver! The driver version is too old, please "
Expand All @@ -88,8 +74,9 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr<const ZeroInitStructsHolder>& i

OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "pfnGetArgumentProperties3");
_logger.debug("performing pfnGetArgumentProperties3");
for (uint32_t index = 0; index < _props.numGraphArgs; ++index) {
ze_graph_argument_properties_3_t arg3;
for (uint32_t index = 0; index < props.numGraphArgs; ++index) {
ze_graph_argument_properties_3_t arg3{};
arg3.stype = ZE_STRUCTURE_TYPE_GRAPH_ARGUMENT_PROPERTIES;
zeroUtils::throwOnFail("pfnGetArgumentProperties3",
_graph_ddi_table_ext.pfnGetArgumentProperties3(_graph, index, &arg3));

Expand All @@ -100,6 +87,51 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr<const ZeroInitStructsHolder>& i
}
}

if (_graph_ddi_table_ext.version() < ZE_GRAPH_EXT_VERSION_1_8) {
initialize_graph_through_command_list();
} else {
ze_graph_properties_2_t properties = {};
properties.stype = ZE_STRUCTURE_TYPE_GRAPH_PROPERTIES;
_graph_ddi_table_ext.pfnGetProperties2(_graph, &properties);

if (properties.initStageRequired & ZE_GRAPH_STAGE_INITIALIZE) {
OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "pfnGraphInitialize");
_graph_ddi_table_ext.pfnGraphInitialize(_graph);
}

if (properties.initStageRequired & ZE_GRAPH_STAGE_COMMAND_LIST_INITIALIZE) {
initialize_graph_through_command_list();
}
}

if (config.has<WORKLOAD_TYPE>()) {
setWorkloadType(config.get<WORKLOAD_TYPE>());
}
}

void ZeroExecutor::initialize_graph_through_command_list() const {
OV_ITT_TASK_CHAIN(ZERO_EXECUTOR_GRAPH,
itt::domains::LevelZeroBackend,
"Executor::ZeroExecutor",
"initialize_graph_through_command_list");

_logger.debug("ZeroExecutor::ZeroExecutor init start - create graph_command_list");
OV_ITT_SCOPED_TASK(itt::domains::LevelZeroBackend, "Executor::ZeroExecutor");
CommandList graph_command_list(_initStructs->getDevice(),
_initStructs->getContext(),
_graph_ddi_table_ext,
_config,
_group_ordinal);
_logger.debug("ZeroExecutor::ZeroExecutor - create graph_command_queue");
CommandQueue graph_command_queue(_initStructs->getDevice(),
_initStructs->getContext(),
ZE_COMMAND_QUEUE_PRIORITY_NORMAL,
_initStructs->getCommandQueueDdiTable(),
_config,
_group_ordinal);
_logger.debug("ZeroExecutor::ZeroExecutor - create fence");
Fence fence(graph_command_queue, _config);

OV_ITT_TASK_NEXT(ZERO_EXECUTOR_GRAPH, "appendGraphInitialize");
_logger.debug("ZeroExecutor::ZeroExecutor - performing appendGraphInitialize");
graph_command_list.appendGraphInitialize(_graph);
Expand All @@ -112,10 +144,6 @@ ZeroExecutor::ZeroExecutor(const std::shared_ptr<const ZeroInitStructsHolder>& i
_logger.debug("ZeroExecutor::ZeroExecutor - performing hostSynchronize");
fence.hostSynchronize();
_logger.debug("ZeroExecutor::ZeroExecutor - hostSynchronize completed");

if (config.has<WORKLOAD_TYPE>()) {
setWorkloadType(config.get<WORKLOAD_TYPE>());
}
}

void ZeroExecutor::setWorkloadType(const ov::WorkloadType workloadType) const {
Expand Down
13 changes: 10 additions & 3 deletions src/plugins/intel_npu/src/backend/src/zero_remote_tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,16 @@ ZeroRemoteTensor::ZeroRemoteTensor(std::shared_ptr<ov::IRemoteContext> context,
ze_device_external_memory_properties_t desc = {};
desc.stype = ZE_STRUCTURE_TYPE_DEVICE_EXTERNAL_MEMORY_PROPERTIES;
auto res = zeDeviceGetExternalMemoryProperties(_init_structs->getDevice(), &desc);
if (res != ZE_RESULT_SUCCESS || (desc.memoryAllocationImportTypes != ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF &&
desc.memoryAllocationImportTypes != ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32)) {
_external_memory_support = false;
if (res == ZE_RESULT_SUCCESS) {
#ifdef _WIN32
if (desc.memoryAllocationImportTypes & ZE_EXTERNAL_MEMORY_TYPE_FLAG_OPAQUE_WIN32) {
_external_memory_support = true;
}
#else
if (desc.memoryAllocationImportTypes & ZE_EXTERNAL_MEMORY_TYPE_FLAG_DMA_BUF) {
_external_memory_support = true;
}
#endif
}

allocate(byte_size);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,14 +132,14 @@ class LevelZeroCompilerInDriver final : public ICompiler {
void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
ze_graph_handle_t graphHandle,
std::vector<uint8_t>& blob,
uint8_t*& blobPtr,
const uint8_t*& blobPtr,
size_t& blobSize) const;

template <typename T = TableExtension, typename std::enable_if_t<!UseCopyForNativeBinary(T), bool> = true>
void getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
ze_graph_handle_t graphHandle,
std::vector<uint8_t>& /* unusedBlob */,
uint8_t*& blobPtr,
const uint8_t*& blobPtr,
size_t& blobSize) const;

template <typename T = TableExtension, typename std::enable_if_t<SupportAPIGraphQueryNetworkV2(T), bool> = true>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,12 @@ LevelZeroCompilerAdapter::LevelZeroCompilerAdapter(std::shared_ptr<IEngineBacken
zeContext,
graph_ddi_table_ext);
break;
case ZE_GRAPH_EXT_VERSION_1_8:
apiAdapter = std::make_shared<LevelZeroCompilerInDriver<ze_graph_dditable_ext_1_8_t>>(driverHandle,
deviceHandle,
zeContext,
graph_ddi_table_ext);
break;
default:
apiAdapter = std::make_shared<LevelZeroCompilerInDriver<ze_graph_dditable_ext_1_2_t>>(driverHandle,
deviceHandle,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ template <typename T, std::enable_if_t<UseCopyForNativeBinary(T), bool>>
void LevelZeroCompilerInDriver<TableExtension>::getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
ze_graph_handle_t graphHandle,
std::vector<uint8_t>& blob,
uint8_t*& blobPtr,
const uint8_t*& blobPtr,
size_t& blobSize) const {
// Get blob size first
auto result = _graphDdiTableExt.pfnGetNativeBinary(graphHandle, &blobSize, nullptr);
Expand Down Expand Up @@ -404,7 +404,7 @@ template <typename T, std::enable_if_t<!UseCopyForNativeBinary(T), bool>>
void LevelZeroCompilerInDriver<TableExtension>::getNativeBinary(ze_graph_dditable_ext_curr_t& graphDdiTableExt,
ze_graph_handle_t graphHandle,
std::vector<uint8_t>& /* unusedBlob */,
uint8_t*& blobPtr,
const uint8_t*& blobPtr,
size_t& blobSize) const {
// Get blob ptr and size
auto result = _graphDdiTableExt.pfnGetNativeBinary2(graphHandle, &blobSize, &blobPtr);
Expand All @@ -427,7 +427,7 @@ CompiledNetwork LevelZeroCompilerInDriver<TableExtension>::getCompiledNetwork(
_logger.info("LevelZeroCompilerInDriver getCompiledNetwork get blob from graphHandle");
ze_graph_handle_t graphHandle = static_cast<ze_graph_handle_t>(networkDescription.metadata.graphHandle);

uint8_t* blobPtr = nullptr;
const uint8_t* blobPtr = nullptr;
size_t blobSize = -1;
std::vector<uint8_t> blob;

Expand Down Expand Up @@ -1239,6 +1239,7 @@ template class LevelZeroCompilerInDriver<ze_graph_dditable_ext_1_4_t>;
template class LevelZeroCompilerInDriver<ze_graph_dditable_ext_1_5_t>;
template class LevelZeroCompilerInDriver<ze_graph_dditable_ext_1_6_t>;
template class LevelZeroCompilerInDriver<ze_graph_dditable_ext_1_7_t>;
template class LevelZeroCompilerInDriver<ze_graph_dditable_ext_1_8_t>;

} // namespace driverCompilerAdapter
} // namespace intel_npu
2 changes: 1 addition & 1 deletion src/plugins/intel_npu/thirdparty/level-zero-ext
Submodule level-zero-ext updated 1 files
+87 −4 ze_graph_ext.h

0 comments on commit 53752e3

Please sign in to comment.