From 4eab5b4635184ceebed1a119537afa9a16f32b54 Mon Sep 17 00:00:00 2001 From: Nikolay Shchegolev Date: Wed, 18 Oct 2023 12:50:46 +0400 Subject: [PATCH 01/39] [CPU] RandomUniform-8 implementation. (#20171) --- src/core/src/op/random_uniform.cpp | 2 + src/core/tests/copy.cpp | 4 +- .../onnx/frontend/src/op/random_uniform.cpp | 1 + src/plugins/intel_cpu/src/cpu_types.cpp | 2 + src/plugins/intel_cpu/src/cpu_types.h | 1 + src/plugins/intel_cpu/src/node.cpp | 4 +- src/plugins/intel_cpu/src/node.h | 4 +- .../intel_cpu/src/nodes/grid_sample.cpp | 24 +- .../intel_cpu/src/nodes/grid_sample.hpp | 2 +- .../src/nodes/kernels/x64/grid_sample.cpp | 6 +- .../src/nodes/kernels/x64/grid_sample.hpp | 14 +- .../src/nodes/kernels/x64/jit_kernel_base.cpp | 337 +++++++--- .../src/nodes/kernels/x64/jit_kernel_base.hpp | 106 ++- .../src/nodes/kernels/x64/random_uniform.cpp | 635 ++++++++++++++++++ .../src/nodes/kernels/x64/random_uniform.hpp | 99 +++ .../intel_cpu/src/nodes/random_uniform.cpp | 532 +++++++++++++++ .../intel_cpu/src/nodes/random_uniform.hpp | 120 ++++ src/plugins/intel_cpu/src/nodes/reference.cpp | 53 +- src/plugins/intel_cpu/src/nodes/reference.h | 4 +- src/plugins/intel_cpu/src/nodes_factory.cpp | 2 + .../shape_inference/custom/random_uniform.cpp | 47 ++ .../shape_inference/custom/random_uniform.hpp | 37 + .../skip_tests_config.cpp | 2 + .../classes/random_uniform.cpp | 265 ++++++++ .../classes/random_uniform.hpp | 53 ++ .../instances/common/random_uniform.cpp | 68 ++ .../instances/x64/random_uniform.cpp | 46 ++ .../functional/test_utils/cpu_test_utils.hpp | 1 + .../common_test_utils/common_utils.hpp | 4 + 29 files changed, 2318 insertions(+), 157 deletions(-) create mode 100644 src/plugins/intel_cpu/src/nodes/kernels/x64/random_uniform.cpp create mode 100644 src/plugins/intel_cpu/src/nodes/kernels/x64/random_uniform.hpp create mode 100644 src/plugins/intel_cpu/src/nodes/random_uniform.cpp create mode 100644 src/plugins/intel_cpu/src/nodes/random_uniform.hpp create mode 100644 src/plugins/intel_cpu/src/shape_inference/custom/random_uniform.cpp create mode 100644 src/plugins/intel_cpu/src/shape_inference/custom/random_uniform.hpp create mode 100644 src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.cpp create mode 100644 src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.hpp create mode 100644 src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/random_uniform.cpp create mode 100644 src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/random_uniform.cpp diff --git a/src/core/src/op/random_uniform.cpp b/src/core/src/op/random_uniform.cpp index 296b115979c8f7..b07044960f44c9 100644 --- a/src/core/src/op/random_uniform.cpp +++ b/src/core/src/op/random_uniform.cpp @@ -92,6 +92,8 @@ bool RandomUniform::evaluate(TensorVector& outputs, const TensorVector& inputs) const auto& t_out = get_out_type(); OPENVINO_ASSERT(validate::out_et(t_out), "Unsupported type of RandomUniform: " + t_out.get_type_name()); + outputs[0].set_shape(out_shape); + auto state = ov::reference::random_uniform(out_dims.data(), static_cast(inputs[1].data()), static_cast(inputs[2].data()), diff --git a/src/core/tests/copy.cpp b/src/core/tests/copy.cpp index d7b2b4256f4aa3..f902d7485a1932 100644 --- a/src/core/tests/copy.cpp +++ b/src/core/tests/copy.cpp @@ -447,12 +447,12 @@ TEST(copy, random_uniform) { const auto min_val_param = make_shared(element::f32, Shape{1}); const auto max_val_param = make_shared(element::f32, Shape{1}); - auto out_shape = make_shared(element::i64, Shape{3}, std::vector{1, 2, 3}); + auto out_shape = make_shared(element::i64, Shape{3}, shape); auto ru = std::make_shared(out_shape, min_val_param, max_val_param, element::f32, 150, 10); // Call `evaluate` to update m_state - auto outputs = ov::TensorVector{{element::i64, out_shape->get_shape(), shape.data()}}; + auto outputs = ov::TensorVector{{element::i64, {1lu, 2lu, 3lu}}}; ru->evaluate(outputs, ov::TensorVector{{element::i64, out_shape->get_shape(), shape.data()}, {element::f32, min_val_param->get_shape(), &min}, diff --git a/src/frontends/onnx/frontend/src/op/random_uniform.cpp b/src/frontends/onnx/frontend/src/op/random_uniform.cpp index 6215dcc491c07d..a26ed672a0cc15 100644 --- a/src/frontends/onnx/frontend/src/op/random_uniform.cpp +++ b/src/frontends/onnx/frontend/src/op/random_uniform.cpp @@ -29,6 +29,7 @@ OutputVector random_uniform(const Node& node) { const auto target_type = common::get_ngraph_element_type(dtype); const uint64_t global_seed = 0; + // TODO: This multiplication leads to a mismatch in accuracy. Issue: 123003 const auto seed_uint64 = static_cast(seed * 1000); return {std::make_shared(target_shape_const, diff --git a/src/plugins/intel_cpu/src/cpu_types.cpp b/src/plugins/intel_cpu/src/cpu_types.cpp index 03fbe1a9923b7a..6f5a84701b184d 100644 --- a/src/plugins/intel_cpu/src/cpu_types.cpp +++ b/src/plugins/intel_cpu/src/cpu_types.cpp @@ -123,6 +123,7 @@ static const TypeToNameMap& get_type_to_name_tbl() { { "ScatterElementsUpdate", Type::ScatterElementsUpdate}, { "ScatterNDUpdate", Type::ScatterNDUpdate}, { "Interpolate", Type::Interpolate}, + { "RandomUniform", Type::RandomUniform}, { "ReduceL1", Type::Reduce}, { "ReduceL2", Type::Reduce}, { "ReduceLogicalAnd", Type::Reduce}, @@ -317,6 +318,7 @@ std::string NameFromType(const Type type) { CASE(PriorBox); CASE(PriorBoxClustered) CASE(MHA); + CASE(RandomUniform); CASE(Unique); CASE(Ngram); CASE(Unknown); diff --git a/src/plugins/intel_cpu/src/cpu_types.h b/src/plugins/intel_cpu/src/cpu_types.h index 403ed62d482f8b..9afbe2d7485ddd 100644 --- a/src/plugins/intel_cpu/src/cpu_types.h +++ b/src/plugins/intel_cpu/src/cpu_types.h @@ -110,6 +110,7 @@ enum class Type { PriorBoxClustered, Interaction, MHA, + RandomUniform, Unique, Ngram }; diff --git a/src/plugins/intel_cpu/src/node.cpp b/src/plugins/intel_cpu/src/node.cpp index e8fe6b89a00afc..ab02ae44dd6ce2 100644 --- a/src/plugins/intel_cpu/src/node.cpp +++ b/src/plugins/intel_cpu/src/node.cpp @@ -95,8 +95,6 @@ Node::Node(const std::shared_ptr& op, typeStr(op->get_type_name()), type(TypeFromName(op->get_type_name())), profiling(op->get_friendly_name()) { - const std::string errorPrefix = "Ngraph operation " + std::string(op->get_type_name()) + " with name " + op->get_friendly_name(); - for (size_t i = 0; i < op->get_input_size(); i++) { const auto &shape = op->get_input_partial_shape(i); if (shape.rank().is_dynamic()) { @@ -480,6 +478,8 @@ std::string Node::getPrimitiveDescriptorType() const { SEARCH_TYPE(_dw); SEARCH_TYPE(_1x1); +#undef SEARCH_TYPE + if (type == impl_desc_type::unknown) str_type = "unknown"; else if (str_type.empty()) diff --git a/src/plugins/intel_cpu/src/node.h b/src/plugins/intel_cpu/src/node.h index 5becbfa9863f70..864c08a95b04c6 100644 --- a/src/plugins/intel_cpu/src/node.h +++ b/src/plugins/intel_cpu/src/node.h @@ -41,6 +41,8 @@ #include "nodes/executors/mvn_list.hpp" #include "nodes/executors/executor.hpp" +#define THROW_CPU_NODE_ERR(...) OPENVINO_THROW(getTypeStr(), " node with name '", getName(), "' ", __VA_ARGS__) + namespace ov { namespace intel_cpu { @@ -353,7 +355,7 @@ class Node { inplace = InPlaceType::Unknown; } - std::string getPrimitiveDescriptorType() const; + virtual std::string getPrimitiveDescriptorType() const; PerfCount &PerfCounter() { return perfCounter; } diff --git a/src/plugins/intel_cpu/src/nodes/grid_sample.cpp b/src/plugins/intel_cpu/src/nodes/grid_sample.cpp index 798b04078352bf..6868e907fa7ae8 100644 --- a/src/plugins/intel_cpu/src/nodes/grid_sample.cpp +++ b/src/plugins/intel_cpu/src/nodes/grid_sample.cpp @@ -10,9 +10,11 @@ #include using namespace InferenceEngine; -using namespace dnnl::impl::cpu; using namespace ov::intel_cpu; using namespace ov::intel_cpu::node; +#if defined(OPENVINO_ARCH_X86_64) +using namespace dnnl::impl::cpu; +#endif // OPENVINO_ARCH_X86_64 #define THROW_ERROR IE_THROW() << getTypeStr() << " node with name '" << getName() << "' " @@ -23,10 +25,14 @@ bool GridSample::isSupportedOperation(const std::shared_ptr& op, errorMessage = "Not supported GridSample operation version. CPU plug-in supports only 9th version."; return false; } +#if defined(OPENVINO_ARCH_X86_64) if (!x64::mayiuse(x64::sse41)) { errorMessage = "Not supported CPU instructions set."; return false; } +#else + return false; +#endif // OPENVINO_ARCH_X86_64 } catch (...) { return false; } @@ -34,6 +40,8 @@ bool GridSample::isSupportedOperation(const std::shared_ptr& op, return true; } +#if defined(OPENVINO_ARCH_X86_64) + GridSample::GridSample(const std::shared_ptr& op, const GraphContext::CPtr context) : Node(op, context, NgraphShapeInferFactory(op, PortMask(1))) { std::string errorMessage; @@ -110,7 +118,7 @@ void GridSample::initSupportedPrimitiveDescriptors() { } void GridSample::createPrimitive() { - GridSampleKernelConfParams jcp; + kernel::GridSampleKernelConfParams jcp; jcp.inDataPrc = dataPrecision; jcp.gridPrc = gridPrecision; @@ -133,15 +141,13 @@ void GridSample::createPrimitive() { jcp.cannelNum = jcp.dynamicChannel ? 1lu : srcDataDims[1]; } -#if defined(OPENVINO_ARCH_X86_64) if (x64::mayiuse(x64::avx512_core)) { - jitKernel.reset(new GridSampleKernel(jcp)); + jitKernel.reset(new kernel::GridSampleKernel(jcp)); } else if (x64::mayiuse(x64::avx2)) { - jitKernel.reset(new GridSampleKernel(jcp)); + jitKernel.reset(new kernel::GridSampleKernel(jcp)); } else if (x64::mayiuse(x64::sse41)) { - jitKernel.reset(new GridSampleKernel(jcp)); + jitKernel.reset(new kernel::GridSampleKernel(jcp)); } -#endif // OPENVINO_ARCH_X86_64 if (!jitKernel) { THROW_ERROR << " could not create JIT kernel."; } @@ -268,7 +274,7 @@ void GridSample::execute(dnnl::stream strm) { auto threadBody = [&](const int ithr, const int nthr) { const auto& p = execParamsPerThread[ithr]; - auto arg = GridSamplesKernelExecArgs(); + auto arg = kernel::GridSamplesKernelExecArgs(); if (p.workAmount == 0lu) { return; } @@ -311,3 +317,5 @@ void GridSample::executeDynamicImpl(dnnl::stream strm) { bool GridSample::created() const { return getType() == Type::GridSample; } + +#endif // OPENVINO_ARCH_X86_64 diff --git a/src/plugins/intel_cpu/src/nodes/grid_sample.hpp b/src/plugins/intel_cpu/src/nodes/grid_sample.hpp index 89a1a409764615..78b5f9d66710ca 100644 --- a/src/plugins/intel_cpu/src/nodes/grid_sample.hpp +++ b/src/plugins/intel_cpu/src/nodes/grid_sample.hpp @@ -72,7 +72,7 @@ class GridSample : public Node { static constexpr size_t IN_DATA = 0; static constexpr size_t IN_GRID = 1; - std::shared_ptr jitKernel; + std::shared_ptr jitKernel; }; } // namespace node diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.cpp index 7501dd606427ce..89e658a7d6a6fc 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2018-2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -8,12 +8,13 @@ using namespace dnnl::impl::cpu; namespace ov { namespace intel_cpu { +namespace kernel { #define GET_OFF(field) offsetof(GridSamplesKernelExecArgs, field) template GridSampleKernel::GridSampleKernel(const GridSampleKernelConfParams& jcp) : - GridSampleKernelBase(jit_name(), jcp) { + GridSampleKernelBase(jit_name(), jcp, isa) { vlen = x64::cpu_isa_traits::vlen; dataTypeSize = jcp.inDataPrc.size(); gridTypeSize = jcp.gridPrc.size(); @@ -2085,5 +2086,6 @@ template class GridSampleKernel; template class GridSampleKernel; template class GridSampleKernel; +} // namespace kernel } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.hpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.hpp index c24100259cd5bb..295c715fb8146b 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/grid_sample.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2018-2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -14,6 +14,12 @@ namespace intel_cpu { enum class GridSampleInterpolationMode { BILINEAR, BICUBIC, NEAREST }; enum class GridSamplePaddingMode { ZEROS, BORDER, REFLECTION }; +namespace kernel { + +class GridSampleKernelBase; + +#if defined(OPENVINO_ARCH_X86_64) + struct GridSampleKernelConfParams { bool dynamicShapes = false; bool dynamicBatch = false; @@ -66,7 +72,8 @@ class GridSampleKernelBase: public JitKernelBase { assert(ker_); ker_(args); } - explicit GridSampleKernelBase(const char* name, const GridSampleKernelConfParams& jcp) : JitKernelBase(name), ker_(nullptr), jcp(jcp) {} + explicit GridSampleKernelBase(const char* name, const GridSampleKernelConfParams& jcp, dnnl::impl::cpu::x64::cpu_isa_t isa) + : JitKernelBase(name, isa), ker_(nullptr), jcp(jcp) {} virtual void create_ker() = 0; uint64_t getVecLen() { @@ -173,5 +180,8 @@ class GridSampleKernel : public GridSampleKernelBase { void hwShiftPs2dq(const Vmm& vDst, const Vmm& vHCoord, const Vmm& vWCoord, const Vmm& vWidth); }; +#endif // OPENVINO_ARCH_X86_64 + +} // namespace kernel } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.cpp index 6afbecf143f27b..bc0daaf6e33e2a 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.cpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.cpp @@ -1,172 +1,243 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2018-2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #include "jit_kernel_base.hpp" -using namespace ov; -using namespace intel_cpu; using namespace dnnl::impl::cpu; +namespace ov { +namespace intel_cpu { +namespace kernel { -void JitKernelBase::uni_vfmsub132ps(const Xbyak::Xmm& vDst, - const Xbyak::Xmm& vSrc, +JitKernelBase::JitKernelBase(const char* name, x64::cpu_isa_t isa) + : x64::jit_generator(name, nullptr, x64::MAX_CODE_SIZE, true, isa), m_isa(isa) { + vlen = x64::isa_max_vlen(isa); +} + +void JitKernelBase::uni_vfmsub132ps(const Xbyak::Xmm& v_dst, + const Xbyak::Xmm& v_src, const Xbyak::Operand& op) { if (isValidIsa(x64::avx2)) { - vfmsub132ps(vDst, vSrc, op); + vfmsub132ps(v_dst, v_src, op); } else if (isValidIsa(x64::avx)) { - assert(vDst.getIdx() != vSrc.getIdx()); - vmulps(vDst, vDst, op); - vsubps(vDst, vDst, vSrc); + assert(v_dst.getIdx() != v_src.getIdx()); + vmulps(v_dst, v_dst, op); + vsubps(v_dst, v_dst, v_src); } else { - assert(vDst.getIdx() != vSrc.getIdx()); - mulps(vDst, op); - subps(vDst, vSrc); + assert(v_dst.getIdx() != v_src.getIdx()); + mulps(v_dst, op); + subps(v_dst, v_src); } } -void JitKernelBase::uni_vfnmadd132ps(const Xbyak::Xmm& vDst, - const Xbyak::Xmm& vSrc, +void JitKernelBase::uni_vfnmadd132ps(const Xbyak::Xmm& v_dst, + const Xbyak::Xmm& v_src, const Xbyak::Operand& op) { if (isValidIsa(x64::avx2)) { - vfnmadd132ps(vDst, vSrc, op); + vfnmadd132ps(v_dst, v_src, op); } else if (isValidIsa(x64::avx)) { - assert(vDst.getIdx() != vSrc.getIdx()); - vmulps(vDst, vDst, op); - vsubps(vDst, vSrc, vDst); + assert(v_dst.getIdx() != v_src.getIdx()); + vmulps(v_dst, v_dst, op); + vsubps(v_dst, v_src, v_dst); } else { - assert(vDst.getIdx() != vSrc.getIdx()); - mulps(vDst, op); - subps(vSrc, vDst); - movups(vDst, vSrc); + assert(v_dst.getIdx() != v_src.getIdx()); + mulps(v_dst, op); + subps(v_src, v_dst); + movups(v_dst, v_src); } } -void JitKernelBase::uni_vfmsub231ps(const Xbyak::Xmm& vDst, - const Xbyak::Xmm& vSrc, +void JitKernelBase::uni_vfmsub231ps(const Xbyak::Xmm& v_dst, + const Xbyak::Xmm& v_src, const Xbyak::Operand& op) { if (isValidIsa(x64::avx2)) { - vfmsub231ps(vDst, vSrc, op); + vfmsub231ps(v_dst, v_src, op); } else if (isValidIsa(x64::avx)) { - assert(!vDst.isEqualIfNotInherited(op)); - vmulps(vSrc, vSrc, op); - vsubps(vDst, vSrc, vDst); + assert(!v_dst.isEqualIfNotInherited(op)); + vmulps(v_src, v_src, op); + vsubps(v_dst, v_src, v_dst); } else { - assert(!vDst.isEqualIfNotInherited(op)); - mulps(vSrc, op); - subps(vSrc, vDst); - movups(vDst, vSrc); + assert(!v_dst.isEqualIfNotInherited(op)); + mulps(v_src, op); + subps(v_src, v_dst); + movups(v_dst, v_src); } } -void JitKernelBase::uni_vpaddd(const Xbyak::Ymm& vDst, - const Xbyak::Ymm& vSrc, +void JitKernelBase::uni_vpaddd(const Xbyak::Ymm& v_dst, + const Xbyak::Ymm& v_src, const Xbyak::Operand& op) { if (isValidIsa(x64::avx2)) { - vpaddd(vDst, vSrc, op); + vpaddd(v_dst, v_src, op); } else if (isValidIsa(x64::avx)) { - Xbyak::Xmm xmmDst(vDst.getIdx()); - vmovups(vDst, vSrc); + Xbyak::Xmm xmmDst(v_dst.getIdx()); + vmovups(v_dst, v_src); if (op.isYMM()) { Xbyak::Ymm ymmOp(op.getIdx()); Xbyak::Xmm xmmOp(op.getIdx()); paddd(xmmDst, xmmOp); - vperm2f128(vDst, vDst, vDst, 0x1); + vperm2f128(v_dst, v_dst, v_dst, 0x1); vperm2f128(ymmOp, ymmOp, ymmOp, 0x1); paddd(xmmDst, xmmOp); - vperm2f128(vDst, vDst, vDst, 0x1); + vperm2f128(v_dst, v_dst, v_dst, 0x1); vperm2f128(ymmOp, ymmOp, ymmOp, 0x1); } else if (op.isMEM()) { const int vlen = x64::cpu_isa_traits::vlen; paddd(xmmDst, op.getAddress()); - vperm2f128(vDst, vDst, vDst, 0x1); + vperm2f128(v_dst, v_dst, v_dst, 0x1); paddd(xmmDst, ptr[op.getAddress().getRegExp() + vlen]); - vperm2f128(vDst, vDst, vDst, 0x1); + vperm2f128(v_dst, v_dst, v_dst, 0x1); } else { IE_THROW() << "Not supported operand type."; } } else if (isValidIsa(x64::sse41)) { - assert(vDst.getIdx() != vSrc.getIdx()); - paddd(vDst, op); + assert(v_dst.getIdx() != v_src.getIdx()); + paddd(v_dst, op); } else { IE_THROW() << "Not defined behavior for instruction 'vpaddd' in current instructions set."; } } -void JitKernelBase::uni_vpsubd(const Xbyak::Ymm& vDst, - const Xbyak::Ymm& vSrc, +void JitKernelBase::uni_vpaddq(const Xbyak::Xmm& v_dst, + const Xbyak::Xmm& v_src, + const Xbyak::Operand& op) { + if (isValidIsa(x64::avx2)) { + vpaddq(v_dst, v_src, op); + } else { + if (v_dst.getIdx() != v_src.getIdx()) { + movups(v_dst, v_src); + } + paddq(v_dst, op); + } +} + +void JitKernelBase::uni_vpsubd(const Xbyak::Ymm& v_dst, + const Xbyak::Ymm& v_src, const Xbyak::Operand& op) { if (isValidIsa(x64::avx2)) { - vpsubd(vDst, vSrc, op); + vpsubd(v_dst, v_src, op); } else if (isValidIsa(x64::avx)) { - Xbyak::Xmm xmmDst(vDst.getIdx()); - vmovups(vDst, vSrc); + Xbyak::Xmm xmmDst(v_dst.getIdx()); + vmovups(v_dst, v_src); if (op.isYMM()) { Xbyak::Ymm ymmOp(op.getIdx()); Xbyak::Xmm xmmOp(op.getIdx()); psubd(xmmDst, xmmOp); - vperm2f128(vDst, vDst, vDst, 0x1); + vperm2f128(v_dst, v_dst, v_dst, 0x1); vperm2f128(ymmOp, ymmOp, ymmOp, 0x1); psubd(xmmDst, xmmOp); - vperm2f128(vDst, vDst, vDst, 0x1); + vperm2f128(v_dst, v_dst, v_dst, 0x1); vperm2f128(ymmOp, ymmOp, ymmOp, 0x1); } else if (op.isMEM()) { const int vlen = x64::cpu_isa_traits::vlen; psubd(xmmDst, op.getAddress()); - vperm2f128(vDst, vDst, vDst, 0x1); + vperm2f128(v_dst, v_dst, v_dst, 0x1); psubd(xmmDst, ptr[op.getAddress().getRegExp() + vlen]); - vperm2f128(vDst, vDst, vDst, 0x1); + vperm2f128(v_dst, v_dst, v_dst, 0x1); } else { IE_THROW() << "Not supported operand type."; } } else if (isValidIsa(x64::sse41)) { - assert(vDst.getIdx() != vSrc.getIdx()); - psubd(vDst, op); + assert(v_dst.getIdx() != v_src.getIdx()); + psubd(v_dst, op); } else { IE_THROW() << "Not defined behavior for instruction 'vpsubd' in current instructions set."; } } -void JitKernelBase::uni_vdivps(const Xbyak::Xmm& vDst, +void JitKernelBase::uni_vsubpd(const Xbyak::Xmm& v_dst, + const Xbyak::Xmm& v_src, + const Xbyak::Operand& op) { + if (isValidIsa(x64::avx)) { + vsubpd(v_dst, v_src, op); + } else { + if (v_dst.getIdx() != v_src.getIdx()) { + movups(v_dst, v_src); + } + subpd(v_dst, op); + } +} + +void JitKernelBase::uni_vmulpd(const Xbyak::Xmm& v_dst, + const Xbyak::Xmm& v_src, + const Xbyak::Operand& op) { + if (isValidIsa(x64::avx)) { + vmulpd(v_dst, v_src, op); + } else { + if (v_dst.getIdx() != v_src.getIdx()) { + movups(v_dst, v_src); + } + mulpd(v_dst, op); + } +} + +void JitKernelBase::uni_vpmuludq(const Xbyak::Xmm& v_dst, + const Xbyak::Xmm& v_src, + const Xbyak::Operand& op) { + if (isValidIsa(x64::avx2)) { + vpmuludq(v_dst, v_src, op); + } else { + if (v_dst.getIdx() != v_src.getIdx()) { + movups(v_dst, v_src); + } + pmuludq(v_dst, op); + } +} + +void JitKernelBase::uni_vdivps(const Xbyak::Xmm& v_dst, const Xbyak::Operand& op1, const Xbyak::Operand& op2) { if (isValidIsa(x64::avx)) { - vdivps(vDst, op1, op2); + vdivps(v_dst, op1, op2); + } else { + if (!v_dst.isEqualIfNotInherited(op1)) { + movups(v_dst, op1); + } + divps(v_dst, op2); + } +} + +void JitKernelBase::uni_vdivpd(const Xbyak::Xmm& v_dst, + const Xbyak::Xmm& v_src, + const Xbyak::Operand& op) { + if (isValidIsa(x64::avx)) { + vdivpd(v_dst, v_src, op); } else { - if (!vDst.isEqualIfNotInherited(op1)) { - movups(vDst, op1); + if (v_dst.getIdx() != v_src.getIdx()) { + movups(v_dst, v_src); } - divps(vDst, op2); + divpd(v_dst, op); } } -void JitKernelBase::uni_vandps(const Xbyak::Xmm& vDst, +void JitKernelBase::uni_vandps(const Xbyak::Xmm& v_dst, const Xbyak::Xmm& vSrs, const Xbyak::Operand &op) { if (isValidIsa(x64::avx)) { - vandps(vDst, vSrs, op); + vandps(v_dst, vSrs, op); } else { - if (!vDst.isEqualIfNotInherited(vSrs)) { - movups(vDst, vSrs); + if (!v_dst.isEqualIfNotInherited(vSrs)) { + movups(v_dst, vSrs); } - andps(vDst, op); + andps(v_dst, op); } } -void JitKernelBase::uni_vandnps(const Xbyak::Xmm& vDst, +void JitKernelBase::uni_vandnps(const Xbyak::Xmm& v_dst, const Xbyak::Xmm& vSrs, const Xbyak::Operand &op) { if (isValidIsa(x64::avx)) { - vandnps(vDst, vSrs, op); + vandnps(v_dst, vSrs, op); } else { - if (!vDst.isEqualIfNotInherited(vSrs)) { - movups(vDst, vSrs); + if (!v_dst.isEqualIfNotInherited(vSrs)) { + movups(v_dst, vSrs); } - andnps(vDst, op); + andnps(v_dst, op); } } -void JitKernelBase::gatherdd(const Xbyak::Xmm& vDst, +void JitKernelBase::gatherdd(const Xbyak::Xmm& v_dst, const Xbyak::Reg64& rSrcPtr, const Xbyak::Xmm& vSrcShift, const Xbyak::Opmask& kReadMask, @@ -178,28 +249,28 @@ void JitKernelBase::gatherdd(const Xbyak::Xmm& vDst, if (!useMask) kxnord(kReadMask, kReadMask, kReadMask); if (zeroFill) - uni_vpxor(vDst, vDst, vDst); + uni_vpxor(v_dst, v_dst, v_dst); - vpgatherdd(vDst | kReadMask, ptr[rSrcPtr + vSrcShift]); + vpgatherdd(v_dst | kReadMask, ptr[rSrcPtr + vSrcShift]); } -void JitKernelBase::gatherdd(const Xbyak::Xmm& vDst, +void JitKernelBase::gatherdd(const Xbyak::Xmm& v_dst, const Xbyak::Reg64& rSrcPtr, const Xbyak::Xmm& vSrcShift, const Xbyak::Xmm& vReadMask, const bool useMask, const bool zeroFill) { - if (vDst.getIdx() == vSrcShift.getIdx() || vDst.getIdx() == vReadMask.getIdx() || vSrcShift.getIdx() == vReadMask.getIdx()) { + if (v_dst.getIdx() == vSrcShift.getIdx() || v_dst.getIdx() == vReadMask.getIdx() || vSrcShift.getIdx() == vReadMask.getIdx()) { IE_THROW() << "Any pair of the index, mask, or destination registers cannot be the same."; } if (zeroFill) - pxor(vDst, vDst); // Don't use vpxor. It zeros the rest of the YMM register. + pxor(v_dst, v_dst); // Don't use vpxor. It zeros the rest of the YMM register. if (isValidIsa(x64::avx2)) { if (!useMask) uni_vpcmpeqd(vReadMask, vReadMask, vReadMask); - vpgatherdd(vDst, ptr[rSrcPtr + vSrcShift], vReadMask); + vpgatherdd(v_dst, ptr[rSrcPtr + vSrcShift], vReadMask); } else { auto rAux = getReg64(); Xbyak::Reg32 r32Aux = Xbyak::Reg32(rAux.getIdx()); @@ -213,7 +284,7 @@ void JitKernelBase::gatherdd(const Xbyak::Xmm& vDst, je(lLoopNext, T_NEAR); } uni_vpextrd(r32Aux, vSrcShift, i); - pinsrd(vDst, ptr[rSrcPtr + rAux], i); + pinsrd(v_dst, ptr[rSrcPtr + rAux], i); if (useMask) L(lLoopNext); @@ -221,30 +292,30 @@ void JitKernelBase::gatherdd(const Xbyak::Xmm& vDst, } } -void JitKernelBase::gatherdd(const Xbyak::Ymm& vDst, +void JitKernelBase::gatherdd(const Xbyak::Ymm& v_dst, const Xbyak::Reg64& rSrcPtr, const Xbyak::Ymm& vSrcShift, const Xbyak::Ymm& vReadMask, const bool useMask, const bool zeroFill) { - if (vDst.getIdx() == vSrcShift.getIdx() || vDst.getIdx() == vReadMask.getIdx() || vSrcShift.getIdx() == vReadMask.getIdx()) { + if (v_dst.getIdx() == vSrcShift.getIdx() || v_dst.getIdx() == vReadMask.getIdx() || vSrcShift.getIdx() == vReadMask.getIdx()) { IE_THROW() << "Any pair of the index, mask, or destination registers cannot be the same."; } if (isValidIsa(x64::avx2)) { if (!useMask) uni_vpcmpeqd(vReadMask, vReadMask, vReadMask); if (zeroFill) - uni_vpxor(vDst, vDst, vDst); + uni_vpxor(v_dst, v_dst, v_dst); - vpgatherdd(vDst, ptr[rSrcPtr + vSrcShift], vReadMask); + vpgatherdd(v_dst, ptr[rSrcPtr + vSrcShift], vReadMask); } else { - Xbyak::Xmm xmmDst = Xbyak::Xmm(vDst.getIdx()), + Xbyak::Xmm xmmDst = Xbyak::Xmm(v_dst.getIdx()), xmmSrcShft = Xbyak::Xmm(vSrcShift.getIdx()), xmmReadMask = Xbyak::Xmm(vReadMask.getIdx()); for (uint8_t i = 0; i < 2; i++) { gatherdd(xmmDst, rSrcPtr, xmmSrcShft, xmmReadMask, useMask, zeroFill); - vperm2f128(vDst, vDst, vDst, 0x1); + vperm2f128(v_dst, v_dst, v_dst, 0x1); vperm2f128(vSrcShift, vSrcShift, vSrcShift, 0x1); if (useMask) vperm2f128(vReadMask, vReadMask, vReadMask, 0x1); @@ -252,6 +323,15 @@ void JitKernelBase::gatherdd(const Xbyak::Ymm& vDst, } } +void JitKernelBase::uni_vpbroadcastq(const Xbyak::Xmm &x, const Xbyak::Operand &op) { + if (isValidIsa(x64::avx2)) { + vpbroadcastq(x, op); + } else { + movsd(x, op); + shufpd(x, x, 0x0); + } +} + void JitKernelBase::uni_vpbroadcastd(const Xbyak::Xmm &x, const Xbyak::Operand &op) { if (isValidIsa(x64::avx2)) { vpbroadcastd(x, op); @@ -285,6 +365,57 @@ void JitKernelBase::uni_vpbroadcastd(const Xbyak::Ymm &x, const Xbyak::Operand & } } +void JitKernelBase::uni_vroundpd(const Xbyak::Xmm& v_dst, const Xbyak::Operand& op, const uint8_t imm) { + if (isValidIsa(x64::avx512_core)) { + vrndscalepd(v_dst, op, imm & 0x3); + } else if (isValidIsa(x64::avx)) { + vroundpd(v_dst, op, imm); + } else { + roundpd(v_dst, op, imm); + } +} + +void JitKernelBase::uni_vcvtdq2pd(const Xbyak::Xmm& v_dst, + const Xbyak::Operand& op) { + if (isValidIsa(x64::avx)) { + vcvtdq2pd(v_dst, op); + } else { + cvtdq2pd(v_dst, op); + } +} + +void JitKernelBase::uni_vcvtpd2dq(const Xbyak::Xmm& v_dst, + const Xbyak::Operand& op) { + if (isValidIsa(x64::avx)) { + vcvtpd2dq(v_dst, op); + } else { + cvtpd2dq(v_dst, op); + } +} + +void JitKernelBase::uni_vpmovzxdq(const Xbyak::Xmm& v_dst, + const Xbyak::Operand& op) { + if (isValidIsa(x64::avx2)) { + vpmovzxdq(v_dst, op); + } else { + pmovzxdq(v_dst, op); + } +} + +void JitKernelBase::uni_vshufpd(const Xbyak::Xmm& v_dst, + const Xbyak::Xmm& v_src, + const Xbyak::Operand& op, + uint8_t imm) { + if (isValidIsa(x64::avx)) { + vshufpd(v_dst, v_src, op, imm); + } else { + if (v_dst.getIdx() != v_src.getIdx()) { + movups(v_dst, v_src); + } + shufpd(v_dst, op, imm); + } +} + void JitKernelBase::fillRestWorkMask(const Xbyak::Opmask& dstMask, const Xbyak::Reg64& rWorkRest) { auto rOnes = getReg64(); @@ -362,7 +493,7 @@ void JitKernelBase::fillRestWorkMask(const Xbyak::Ymm& ymmDstMask, L(lEnd); } -void JitKernelBase::load(const Xbyak::Xmm& vDst, +void JitKernelBase::load(const Xbyak::Xmm& v_dst, const Xbyak::Address& srcAddr, const Xbyak::Reg64& rLoadNum, const size_t typeSize, @@ -373,7 +504,7 @@ void JitKernelBase::load(const Xbyak::Xmm& vDst, const uint8_t elPerVec = x64::cpu_isa_traits::vlen / typeSize; Xbyak::Label lEnd; if (zeroFilling) - pxor(vDst, vDst); + pxor(v_dst, v_dst); for (uint8_t i = 0; i < elPerVec; i++) { cmp(rLoadNum, i); @@ -381,18 +512,18 @@ void JitKernelBase::load(const Xbyak::Xmm& vDst, const size_t offset = i * typeSize; if (typeSize == 1) - pinsrb(vDst, ptr[srcAddr.getRegExp() + offset], i); + pinsrb(v_dst, ptr[srcAddr.getRegExp() + offset], i); else if (typeSize == 2) - pinsrw(vDst, ptr[srcAddr.getRegExp() + offset], i); + pinsrw(v_dst, ptr[srcAddr.getRegExp() + offset], i); else if (typeSize == 4) - pinsrd(vDst, ptr[srcAddr.getRegExp() + offset], i); + pinsrd(v_dst, ptr[srcAddr.getRegExp() + offset], i); else if (typeSize == 8) - pinsrq(vDst, ptr[srcAddr.getRegExp() + offset], i); + pinsrq(v_dst, ptr[srcAddr.getRegExp() + offset], i); } L(lEnd); } -void JitKernelBase::load(const Xbyak::Ymm& vDst, +void JitKernelBase::load(const Xbyak::Ymm& v_dst, const Xbyak::Address& srcAddr, const Xbyak::Reg64& rLoadNum, const size_t typeSize, @@ -403,8 +534,8 @@ void JitKernelBase::load(const Xbyak::Ymm& vDst, const size_t elPerXmm = x64::cpu_isa_traits::vlen / typeSize; Xbyak::Label lEnd; if (zeroFilling) - uni_vpxor(vDst, vDst, vDst); - Xbyak::Xmm xmmDst(vDst.getIdx()); + uni_vpxor(v_dst, v_dst, v_dst); + Xbyak::Xmm xmmDst(v_dst.getIdx()); for (size_t i = 0lu; i < 2lu; i++) { Xbyak::Label lPerm; @@ -427,13 +558,13 @@ void JitKernelBase::load(const Xbyak::Ymm& vDst, } L(lPerm); - vperm2f128(vDst, vDst, vDst, 0x1); + vperm2f128(v_dst, v_dst, v_dst, 0x1); } L(lEnd); } void JitKernelBase::store(const Xbyak::Address& dstAddr, - const Xbyak::Xmm& vSrc, + const Xbyak::Xmm& v_src, const Xbyak::Reg64& rToStoreNum, const size_t typeSize) { if (!one_of(typeSize, 1u, 2u, 4u, 8u)) { @@ -448,27 +579,27 @@ void JitKernelBase::store(const Xbyak::Address& dstAddr, const size_t offset = i * typeSize; if (typeSize == 1) { - uni_vpextrb(ptr[dstAddr.getRegExp() + offset], vSrc, i); + uni_vpextrb(ptr[dstAddr.getRegExp() + offset], v_src, i); } else if (typeSize == 2) { - uni_vpextrw(ptr[dstAddr.getRegExp() + offset], vSrc, i); + uni_vpextrw(ptr[dstAddr.getRegExp() + offset], v_src, i); } else if (typeSize == 4) { - uni_vpextrd(ptr[dstAddr.getRegExp() + offset], vSrc, i); + uni_vpextrd(ptr[dstAddr.getRegExp() + offset], v_src, i); } else if (typeSize == 8) { - uni_vpextrq(ptr[dstAddr.getRegExp() + offset], vSrc, i); + uni_vpextrq(ptr[dstAddr.getRegExp() + offset], v_src, i); } } L(lEnd); } void JitKernelBase::store(const Xbyak::Address& dstAddr, - const Xbyak::Ymm& vSrc, + const Xbyak::Ymm& v_src, const Xbyak::Reg64& rToStoreNum, const size_t typeSize) { if (!one_of(typeSize, 1u, 2u, 4u, 8u)) { IE_THROW() << "Could not store data with type size " << typeSize; } Xbyak::Label lEnd; - Xbyak::Xmm xmmSrc(vSrc.getIdx()); + Xbyak::Xmm xmmSrc(v_src.getIdx()); const size_t elPerXmm = x64::cpu_isa_traits::vlen / typeSize; for (int i = 0; i < 2; i++) { @@ -493,7 +624,7 @@ void JitKernelBase::store(const Xbyak::Address& dstAddr, } L(lPerm); - vperm2f128(vSrc, vSrc, vSrc, 0x1); + vperm2f128(v_src, v_src, v_src, 0x1); } L(lEnd); } @@ -575,3 +706,7 @@ void JitKernelBase::memMovDD(const Xbyak::Reg64& rDst, } L(lEnd); } + +} // namespace kernel +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.hpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.hpp index e39efde753bbbc..f17eb9a02d8771 100644 --- a/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.hpp +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/jit_kernel_base.hpp @@ -1,14 +1,23 @@ -// Copyright (C) 2022 Intel Corporation +// Copyright (C) 2018-2023 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // #pragma once +#include "openvino/core/visibility.hpp" + +#if defined(OPENVINO_ARCH_X86_64) #include "cpu/x64/jit_generator.hpp" #include "registers_pool.hpp" +#endif // OPENVINO_ARCH_X86_64 namespace ov { namespace intel_cpu { +namespace kernel { + +class JitKernelBase; + +#if defined(OPENVINO_ARCH_X86_64) #define getReg64() RegistersPool::Reg(registersPool) #define getReg32() RegistersPool::Reg(registersPool) @@ -17,7 +26,11 @@ namespace intel_cpu { class JitKernelBase: public dnnl::impl::cpu::x64::jit_generator { public: - JitKernelBase(const char* name) : dnnl::impl::cpu::x64::jit_generator(name) {} + JitKernelBase(const char* name, dnnl::impl::cpu::x64::cpu_isa_t max_cpu_isa); + + dnnl::impl::cpu::x64::cpu_isa_t getIsa() { return m_isa; } + + size_t getVectorLen() { return vlen; } void uni_vfmsub132ps(const Xbyak::Xmm& vDst, const Xbyak::Xmm& vSrc, const Xbyak::Operand& op); @@ -31,14 +44,24 @@ class JitKernelBase: public dnnl::impl::cpu::x64::jit_generator { void uni_vpaddd(const Xbyak::Ymm& vDst, const Xbyak::Ymm& vSrc, const Xbyak::Operand& op); + void uni_vpaddq(const Xbyak::Xmm& vDst, const Xbyak::Xmm& vSrc, const Xbyak::Operand& op); + void uni_vpsubd(const Xbyak::Xmm& vDst, const Xbyak::Xmm& vSrc, const Xbyak::Operand& op) { jit_generator::uni_vpsubd(vDst, vSrc, op); } void uni_vpsubd(const Xbyak::Ymm& vDst, const Xbyak::Ymm& vSrc, const Xbyak::Operand& op); + void uni_vsubpd(const Xbyak::Xmm& v_dst, const Xbyak::Xmm& v_src, const Xbyak::Operand& op); + + void uni_vmulpd(const Xbyak::Xmm& v_dst, const Xbyak::Xmm& v_src, const Xbyak::Operand& op); + + void uni_vpmuludq(const Xbyak::Xmm& v_dst, const Xbyak::Xmm& op_1, const Xbyak::Operand& op_2); + void uni_vdivps(const Xbyak::Xmm& vDst, const Xbyak::Operand& op1, const Xbyak::Operand& op2); + void uni_vdivpd(const Xbyak::Xmm& v_dst, const Xbyak::Xmm& v_src, const Xbyak::Operand& op2); + void uni_vandps(const Xbyak::Xmm& vDst, const Xbyak::Xmm& vSrs, const Xbyak::Operand &op); void uni_vandnps(const Xbyak::Xmm& vDst, const Xbyak::Xmm& vSrs, const Xbyak::Operand &op); @@ -63,6 +86,18 @@ class JitKernelBase: public dnnl::impl::cpu::x64::jit_generator { void uni_vpbroadcastd(const Xbyak::Ymm &x, const Xbyak::Operand &op); + void uni_vpbroadcastq(const Xbyak::Xmm &x, const Xbyak::Operand &op); + + void uni_vroundpd(const Xbyak::Xmm& v_dst, const Xbyak::Operand& op, const uint8_t imm); + + void uni_vcvtdq2pd(const Xbyak::Xmm& v_dst, const Xbyak::Operand& op); + + void uni_vcvtpd2dq(const Xbyak::Xmm& v_dst, const Xbyak::Operand& op); + + void uni_vpmovzxdq(const Xbyak::Xmm& v_dst, const Xbyak::Operand& op); + + void uni_vshufpd(const Xbyak::Xmm& v_dst, const Xbyak::Xmm& v_srs, const Xbyak::Operand& op, uint8_t imm); + void gatherdd(const Xbyak::Xmm& vDst, const Xbyak::Reg64& rSrcPtr, const Xbyak::Xmm& vSrcShift, @@ -140,7 +175,9 @@ class JitKernelBase: public dnnl::impl::cpu::x64::jit_generator { return dnnl::impl::cpu::x64::mayiuse(isa); } + const dnnl::impl::cpu::x64::cpu_isa_t m_isa; RegistersPool::Ptr registersPool; + size_t vlen; enum { // Comparison predicate operand (immediate byte) for single-precision floating-point values. @@ -155,5 +192,70 @@ class JitKernelBase: public dnnl::impl::cpu::x64::jit_generator { }; }; +template +class JitKernel : public JitKernelBase { +public: + using KernelFunc = void (*)(const CallArgs *); + + explicit JitKernel(const char* name, const CompileParams& jcp, dnnl::impl::cpu::x64::cpu_isa_t max_cpu_isa) + : JitKernelBase{name, max_cpu_isa}, m_jcp{jcp}, m_func{nullptr} {} + + ~JitKernel() override = default; + + dnnl::impl::status_t create_kernel() override { + const dnnl::impl::status_t code = jit_generator::create_kernel(); + if (code != dnnl::impl::status::success) { + OPENVINO_THROW("Could not create kernel. Error code: ", std::to_string(code), ". ", + "Xbyak error code: ", Xbyak::ConvertErrorToString(Xbyak::GetError())); + } + m_func = (decltype(m_func))jit_ker(); + return code; + } + + void operator()(const CallArgs* args) const { + assert(m_func); + m_func(args); + } + + void operator()(const CallArgs& args) const { + this->operator()(&args); + } + + template class KernelT> + static std::shared_ptr> createInstance(const CompileParams& jcp) { + std::shared_ptr> res; + + try { +#define IF_ISA_CASE(ISA) \ + if (dnnl::impl::cpu::x64::mayiuse(ISA)) \ + res.reset(new KernelT(jcp)); \ + else + + IF_ISA_CASE(dnnl::impl::cpu::x64::avx512_core) + IF_ISA_CASE(dnnl::impl::cpu::x64::avx2) + IF_ISA_CASE(dnnl::impl::cpu::x64::sse41); + +#undef IF_ISA_CASE + + if (res) { + res->create_kernel(); + } + } catch (...) { + return nullptr; + } + + return res; + } + +protected: + CompileParams m_jcp; + +private: + KernelFunc m_func; +}; + +#endif // OPENVINO_ARCH_X86_64 + +} // namespace kernel } // namespace intel_cpu } // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/random_uniform.cpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/random_uniform.cpp new file mode 100644 index 00000000000000..301c2f7e08ff69 --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/random_uniform.cpp @@ -0,0 +1,635 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "random_uniform.hpp" + +using namespace dnnl::impl::cpu; + +namespace ov { +namespace intel_cpu { +namespace kernel { + +#define GET_OFF(field) offsetof(RandomUniformCallArgs, field) + +template +RandomUniform::RandomUniform(const RandomUniformCompileParams& jcp) : + JitKernel(jit_name(), jcp, isa) { +} + +template +void RandomUniform::generate() { + this->preamble(); + registersPool = RegistersPool::create(isa, {rax, rcx, rsp, rdi, k0}); + + r64_dst = getReg64(); + r64_work_amount = getReg64(); + + mov(r64_work_amount, ptr[r64_params + GET_OFF(work_amount)]); + mov(r64_dst, ptr[r64_params + GET_OFF(dst_ptr)]); + + initVectors(); + process(); + + registersPool.reset(); + this->postamble(); +} + +template <> +void RandomUniform::initVectors() { + const auto r64_aux = getReg64(); + const auto r32_aux = Xbyak::Reg32(r64_aux.getIdx()); + const auto r16_aux = Xbyak::Reg16(r64_aux.getIdx()); + + v_max_mul_n_64 = getVmm(); + v_max_mul_c_64 = getVmm(); + v_add_low_k = getVmm(); + v_add_up_k = getVmm(); + v_n_inc = getVmm(); + v_range = getVmm(); + v_min = getVmm(); + v_key_64 = getVmm(); + v_counter_64 = getVmm(); + v_n_64 = getVmm(); + v_res_perm = getVmm(); + + if (m_jcp.out_data_type.is_real()) { + v_convert_0 = getVmm(); + v_convert_1 = getVmm(); + } + + // Initialize constants. +#define BROADCAST_R(F, V, R, C) \ + mov(R, C); \ + F(V, R); +#define BROADCAST_P(F, V, R, C) \ + mov(R, ptr[r64_params + GET_OFF(C)]); \ + F(V, ptr[R]); + + BROADCAST_R(vpbroadcastq, v_max_mul_n_64, r64_aux, STATISTIC_MAXIMIZING_MULTIPLIER_N) + BROADCAST_R(vpbroadcastq, v_max_mul_c_64, r64_aux, STATISTIC_MAXIMIZING_MULTIPLIER_COUNTER) + BROADCAST_R(vpbroadcastd, v_add_low_k, r32_aux, CRUSH_RESISTANCE_CONST_LOWER_VALUE) + BROADCAST_R(vpbroadcastd, v_add_up_k, r32_aux, CRUSH_RESISTANCE_CONST_UPPER_VALUE) + BROADCAST_R(vpbroadcastq, v_n_inc, r64_aux, 0x00000008) + + if (m_jcp.out_data_type == element::f32) { + BROADCAST_R(vpbroadcastd, v_convert_0, r32_aux, 0x3f800000) + BROADCAST_R(vpbroadcastd, v_convert_1, r32_aux, 0x007fffff) + BROADCAST_P(vpbroadcastd, v_range, r64_aux, range_ptr) + BROADCAST_P(vpbroadcastd, v_min, r64_aux, min_ptr) + } else if (m_jcp.out_data_type == element::f16 && x64::mayiuse(x64::avx512_core_fp16)) { + BROADCAST_R(vpbroadcastw, v_convert_0, r16_aux, 0x3c00) + BROADCAST_R(vpbroadcastw, v_convert_1, r16_aux, 0x03ff) + BROADCAST_P(vpbroadcastw, v_range, r64_aux, range_ptr) + BROADCAST_P(vpbroadcastw, v_min, r64_aux, min_ptr) + } else if (m_jcp.out_data_type == element::bf16 && x64::mayiuse(x64::avx512_core_bf16)) { + v_convert_2 = getVmm(); + const auto ymm_min = Xbyak::Ymm(v_min.getIdx()); + const auto ymm_range = Xbyak::Ymm(v_range.getIdx()); + + BROADCAST_R(vpbroadcastw, v_convert_0, r16_aux, 0x3f80) + BROADCAST_R(vpbroadcastw, v_convert_1, r16_aux, 0x007f) + BROADCAST_R(vpbroadcastd, v_convert_2, r32_aux, 0x3f800000) + + BROADCAST_P(vpbroadcastw, v_range, r64_aux, range_ptr) + vpmovzxwd(v_range, ymm_range); + uni_vpslld(v_range, v_range, 16); + + BROADCAST_P(vpbroadcastw, v_min, r64_aux, min_ptr) + vpmovzxwd(v_min, ymm_min); + uni_vpslld(v_min, v_min, 16); + } else if (m_jcp.out_data_type == element::i32) { + const auto ymm_range = Xbyak::Ymm(v_range.getIdx()); + + BROADCAST_P(vpbroadcastd, v_range, r64_aux, range_ptr) + BROADCAST_P(vpbroadcastd, v_min, r64_aux, min_ptr) + + uni_vcvtdq2pd(v_range, ymm_range); + } else { + OPENVINO_THROW("RandomUniform kernel does not support precision ", m_jcp.out_data_type, " for ", x64::get_isa_info()); + } + + // Initialize inputs. + BROADCAST_P(vpbroadcastq, v_key_64, r64_aux, key_ptr) + BROADCAST_P(vpbroadcastq, v_counter_64, r64_aux, counter_ptr) + BROADCAST_P(vpbroadcastq, v_n_64, r64_aux, n_ptr) + + if (m_jcp.out_data_type.size() <= 4) { + static const uint64_t n_inc_arr[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; + mov(r64_aux, reinterpret_cast(n_inc_arr)); + } else { + static const uint64_t n_inc_arr[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; // TODO: i64 + mov(r64_aux, reinterpret_cast(n_inc_arr)); + } + uni_vpaddq(v_n_64, v_n_64, ptr[r64_aux]); + + // Initialize auxiliary vectors. + static const uint32_t res_perm_mask[16] = { 0b00000000, 0b00010000, 0b00001000, 0b00011000, 0b00000010, 0b00010010, 0b00001010, 0b00011010, + 0b00000100, 0b00010100, 0b00001100, 0b00011100, 0b00000110, 0b00010110, 0b00001110, 0b00011110 }; + mov(r64_aux, reinterpret_cast(res_perm_mask)); + uni_vmovups(v_res_perm, ptr[r64_aux]); + + if (m_jcp.out_data_type == element::f16 && x64::mayiuse(x64::avx512_core_fp16)) { + v_perm_16 = getVmm(); + static const uint16_t perm_16[32] = { 0b00000000, 0b00000010, 0b00000100, 0b00000110, 0b00001000, 0b00001010, 0b00001100, 0b00001110, + 0b00010000, 0b00010010, 0b00010100, 0b00010110, 0b00011000, 0b00011010, 0b00011100, 0b00011110, + 0b00100000, 0b00100010, 0b00100100, 0b00100110, 0b00101000, 0b00101010, 0b00101100, 0b00101110, + 0b00110000, 0b00110010, 0b00110100, 0b00110110, 0b00111000, 0b00111010, 0b00111100, 0b00111110 }; + mov(r64_aux, reinterpret_cast(perm_16)); + uni_vmovups(v_perm_16, ptr[r64_aux]); + } + +#undef BROADCAST_R +#undef BROADCAST_P +} + +template // Works for AVX2, SSE41 +void RandomUniform::initVectors() { + const auto r64_aux = getReg64(); + + v_max_mul_n_64 = getVmm(); + v_max_mul_c_64 = getVmm(); + v_add_low_k = getVmm(); + v_add_up_k = getVmm(); + v_range = getVmm(); + v_key_64 = getVmm(); + v_counter_64 = getVmm(); + v_n_64 = getVmm(); + + r64_n_inc = getReg64(); + r64_min = getReg64(); + +#define INIT_ARR(A, V, R, T) \ + static const T A[8] = { V, V, V, V, V, V, V, V }; \ + if (isa == x64::avx2) { \ + mov(R, reinterpret_cast(A)); \ + } else { \ + static const T* A##_aligned = A + (reinterpret_cast(A) % 16) / sizeof(T); \ + mov(R, reinterpret_cast(A##_aligned)); \ + } + + // Initialize constants. + INIT_ARR(max_mul_n_64, STATISTIC_MAXIMIZING_MULTIPLIER_N, r64_aux, uint64_t); + uni_vmovups(v_max_mul_n_64, ptr[r64_aux]); + + INIT_ARR(max_mul_c_64, STATISTIC_MAXIMIZING_MULTIPLIER_COUNTER, r64_aux, uint64_t); + uni_vmovups(v_max_mul_c_64, ptr[r64_aux]); + + INIT_ARR(add_low_k, CRUSH_RESISTANCE_CONST_LOWER_VALUE, r64_aux, uint32_t); + uni_vmovups(v_add_low_k, ptr[r64_aux]); + + INIT_ARR(add_up_k, CRUSH_RESISTANCE_CONST_UPPER_VALUE, r64_aux, uint32_t); + uni_vmovups(v_add_up_k, ptr[r64_aux]); + + INIT_ARR(n_inc_step, isa == x64::avx2 ? 4 : 2, r64_n_inc, uint64_t); + + if (m_jcp.out_data_type == element::f32) { + r64_convert_0 = getReg64(); + r64_convert_1 = getReg64(); + + INIT_ARR(convert_0, 0x3f800000, r64_convert_0, uint32_t); + INIT_ARR(convert_1, 0x007fffff, r64_convert_1, uint32_t); + + mov(r64_aux, ptr[r64_params + GET_OFF(range_ptr)]); + uni_vpbroadcastd(v_range, ptr[r64_aux]); + + auto v_aux = getVmm(); + mov(r64_aux, ptr[r64_params + GET_OFF(min_ptr)]); + uni_vpbroadcastd(v_aux, ptr[r64_aux]); + static uint32_t min_arr[8]; + mov(r64_min, reinterpret_cast(min_arr)); + uni_vmovups(ptr[r64_min], v_aux); + } else if (m_jcp.out_data_type == element::i32) { + r64_f64_pow_52 = getReg64(); + const auto v_aux = getVmm(); + const auto xmm_range = Xbyak::Xmm(v_range.getIdx()); + + INIT_ARR(f64_pow_52, 0x4330000000000000, r64_f64_pow_52, uint64_t); + + mov(r64_aux, ptr[r64_params + GET_OFF(range_ptr)]); + uni_vpbroadcastd(v_range, ptr[r64_aux]); + + mov(r64_aux, ptr[r64_params + GET_OFF(min_ptr)]); + uni_vpbroadcastd(v_aux, ptr[r64_aux]); + static uint32_t min_arr[8]; + mov(r64_min, reinterpret_cast(min_arr)); + uni_vmovups(ptr[r64_min], v_aux); + + uni_vcvtdq2pd(v_range, xmm_range); + } else { + OPENVINO_THROW("RandomUniform kernel does not support precision ", m_jcp.out_data_type, " for ", x64::get_isa_info()); + } + + // Initialize inputs. + mov(r64_aux, ptr[r64_params + GET_OFF(key_ptr)]); + uni_vpbroadcastq(v_key_64, ptr[r64_aux]); + + mov(r64_aux, ptr[r64_params + GET_OFF(counter_ptr)]); + uni_vpbroadcastq(v_counter_64, ptr[r64_aux]); + + mov(r64_aux, ptr[r64_params + GET_OFF(n_ptr)]); + uni_vpbroadcastq(v_n_64, ptr[r64_aux]); + + if (m_jcp.out_data_type.size() <= 4) { + if (isa == x64::avx2) { + static const uint64_t n_inc_arr[4] = { 0, 1, 2, 3 }; + mov(r64_aux, reinterpret_cast(n_inc_arr)); + } else { + static uint64_t n_inc_arr[4]; + static uint64_t* n_inc_arr_aligned = n_inc_arr + (reinterpret_cast(n_inc_arr) % 16) / sizeof(uint64_t); + n_inc_arr_aligned[0] = 0; + n_inc_arr_aligned[1] = 1; + mov(r64_aux, reinterpret_cast(n_inc_arr_aligned)); + } + } else { + static const uint64_t n_inc_arr[4] = { 0, 1, 2, 3 }; // TODO: i64 + mov(r64_aux, reinterpret_cast(n_inc_arr)); + } + + uni_vpaddq(v_n_64, v_n_64, ptr[r64_aux]); + +#undef INIT_ARR +} + +template +void RandomUniform::process() { + auto v_dst_0 = getVmm(); + auto v_dst_1 = getVmm(); + std::vector v_res{ v_dst_0, v_dst_1 }; + + auto step = vlen; + if (one_of(m_jcp.out_data_type.size(), 2lu, 4lu)) { + step = vlen * 2 / sizeof(uint32_t); + } else if (m_jcp.out_data_type.size() == 8) { + step = vlen / sizeof(uint32_t); + } + + Xbyak::Label l_loop, l_tail; + L(l_loop); { + cmp(r64_work_amount, step); + jl(l_tail, T_NEAR); + + runPhilox(v_res, v_key_64, v_counter_64, v_n_64); + convert(v_res, v_res); + + uni_vmovups(ptr[r64_dst], v_dst_0); + add(r64_dst, vlen); + if (one_of(m_jcp.out_data_type.size(), 4lu, 8lu)) { + uni_vmovups(ptr[r64_dst], v_dst_1); + add(r64_dst, vlen); + } + + if (isa == x64::avx512_core) { + uni_vpaddd(v_n_64, v_n_64, v_n_inc); + } else { + uni_vpaddd(v_n_64, v_n_64, ptr[r64_n_inc]); + } + + sub(r64_work_amount, step); + jmp(l_loop, T_NEAR); + } + + L(l_tail); + tail(v_res); +} + +template +void RandomUniform::calculateRound(const Vmm& vmm_k_0, const Vmm& vmm_k_1, const Vmm& vmm_c_0, const Vmm& vmm_c_1, + const Vmm& vmm_n_0, const Vmm& vmm_n_1, const Vmm& vmm_aux_0, const Vmm& vmm_aux_1) { + uni_vpmuludq(vmm_aux_0, vmm_n_0, v_max_mul_n_64); // {p0,p1,p0,p1} = {n0,_,n0,_} * {m0,_,m0,_} + uni_vpmuludq(vmm_aux_1, vmm_c_0, v_max_mul_c_64); // {r0,r1,r0,r1} = {c0,_,c0,_} * {m0,_,m0,_} + + uni_vpshufd(vmm_c_0, vmm_aux_0, 0b10110001); // {p1,p0,p1,p0} = shuf {p0,p1,p0,p1} + uni_vxorps(vmm_c_0, vmm_c_0, vmm_c_1); // {c0,_,c0,_} = {p1,_,p1,_} ^ {c1,_,c1,_} + uni_vxorps(vmm_c_0, vmm_c_0, vmm_k_1); // {c0,_,c0,_} = {c0,_,c0,_} ^ {k1,_,k1,_} + + uni_vpshufd(vmm_n_0, vmm_aux_1, 0b10110001); // {r1,r0,r1,r0} = shuf {r0,r1,r0,r1} + uni_vxorps(vmm_n_0, vmm_n_0, vmm_n_1); // {n0,_,n0,_} = {r1,_,r1,_} ^ {n1,_,n1,_} + uni_vxorps(vmm_n_0, vmm_n_0, vmm_k_0); // {n0,_,n0,_} = {n0,_,n0,_} ^ {k0,_,k0,_} +} + +template +void RandomUniform::runPhilox(const std::vector& vmm_dst, const Vmm& vmm_key, const Vmm& vmm_counter, const Vmm& vmm_n) { + auto vmm_k_0 = getVmm(); + auto vmm_k_1 = getVmm(); + auto vmm_n_0 = getVmm(); + auto vmm_n_1 = vmm_dst[0]; + auto vmm_c_0 = getVmm(); + auto vmm_c_1 = getVmm(); + auto vmm_aux_0 = getVmm(); + auto vmm_aux_1 = vmm_dst[1]; + + uni_vmovups(vmm_k_0, vmm_key); // {k0,k1,k0,k1} -> {k0,_,k0,_} + uni_vpshufd(vmm_k_1, vmm_key, 0b10110001); // {k0,k1,k0,k1} -> {k1,_,k1,_} + + uni_vpmuludq(vmm_aux_0, vmm_n, v_max_mul_n_64); // {p0,p1,p0,p1} = {n0,_,n0,_} * {m0,_,m0,_} + uni_vpmuludq(vmm_aux_1, vmm_counter, v_max_mul_c_64); // {r0,r1,r0,r1} = {c0,_,c0,_} * {m0,_,m0,_} + + uni_vxorps(vmm_c_0, vmm_aux_0, vmm_counter); // {_,c0,_,c0} = {_,p1,_,p1} ^ {_,c1,_,c1} + uni_vxorps(vmm_c_0, vmm_c_0, vmm_key); // {_,c0,_,c0} = {_,c0,_,c0} ^ {_,k1,_,k1} + uni_vpshufd(vmm_c_0, vmm_c_0, 0b10110001); // {_,c0,_,c0} -> {c0,_,c0,_} + + uni_vxorps(vmm_n_0, vmm_aux_1, vmm_n); // {_,n0,_,n0} = {_,r1,_,r1} ^ {_,n1,_,n1} + uni_vpshufd(vmm_n_0, vmm_n_0, 0b10110001); // {_,n0,_,n0} -> {n0,_,n0,_} + uni_vxorps(vmm_n_0, vmm_n_0, vmm_key); // {n0,_,n0,_} = {n0,_,n0,_} ^ {k0,_,k0,_} + + for (size_t i = 0lu; i < ROUNDS_NUMBER - 1; i++) { + raiseKey(vmm_k_0, vmm_k_1); + + std::swap(vmm_c_1, vmm_aux_0); + std::swap(vmm_n_1, vmm_aux_1); + calculateRound(vmm_k_0, vmm_k_1, vmm_c_0, vmm_c_1, vmm_n_0, vmm_n_1, vmm_aux_0, vmm_aux_1); + } + std::swap(vmm_c_1, vmm_aux_0); + std::swap(vmm_n_1, vmm_aux_1); + + if (isa == x64::avx512_core) { + vpermt2d(vmm_n_0, v_res_perm, vmm_n_1); // {n0,n1,n0,n1} = perm {n0,_,n0,_} {n1,_,n1,_} + vpermt2d(vmm_c_0, v_res_perm, vmm_c_1); // {c0,c1,c0,c1} = perm {c0,_,c0,_} {c1,_,c1,_} + vshufpd(vmm_dst[0], vmm_n_0, vmm_c_0, 0b00000000); // {n0,n1,c0,c1} = shuf {n0,n1,n0,n1} {c0,c1,c0,c1} + vshufpd(vmm_dst[1], vmm_n_0, vmm_c_0, 0b11111111); // {n0,n1,c0,c1} = shuf {n0,n1,n0,n1} {c0,c1,c0,c1} + } else if (isa == x64::avx2) { + auto ymm_dst_0 = Xbyak::Ymm(vmm_dst[0].getIdx()); + auto ymm_dst_1 = Xbyak::Ymm(vmm_dst[1].getIdx()); + auto ymm_c_0 = Xbyak::Ymm(vmm_c_0.getIdx()); + + uni_vshufps(vmm_n_0, vmm_n_0, vmm_n_1, 0b10001000); // {n0,n0,n1,n1} = shuf {n0,_,n0,_} {n1,_,n1,_} + uni_vshufps(vmm_c_0, vmm_c_0, vmm_c_1, 0b10001000); // {c0,c0,c1,c1} = shuf {c0,_,c0,_} {c1,_,c1,_} + uni_vshufps(ymm_dst_1, vmm_n_0, vmm_c_0, 0b10001000); // {n0,n1,c0,c1} = shuf {n0,n0,n1,n1} {c0,c0,c1,c1} + uni_vshufps(vmm_c_0, vmm_n_0, vmm_c_0, 0b11011101); // {n0,n1,c0,c1} = shuf {n0,n0,n1,n1} {c0,c0,c1,c1} + vperm2f128(ymm_dst_0, ymm_dst_1, ymm_c_0, 0b00100000); + vperm2f128(ymm_dst_1, ymm_dst_1, ymm_c_0, 0b00110001); + } else { + uni_vshufps(vmm_n_0, vmm_n_0, vmm_n_1, 0b10001000); + uni_vshufps(vmm_c_0, vmm_c_0, vmm_c_1, 0b10001000); + uni_vshufps(vmm_dst[0], vmm_n_0, vmm_c_0, 0b10001000); + uni_vshufps(vmm_dst[1], vmm_n_0, vmm_c_0, 0b11011101); + } +} + +template +void RandomUniform::raiseKey(const Vmm& vmm_k_0, const Vmm& vmm_k_1) { + uni_vpaddd(vmm_k_0, vmm_k_0, v_add_low_k); // {k0,_,k0,_} + {l0,_,l0,_} + uni_vpaddd(vmm_k_1, vmm_k_1, v_add_up_k); // {k1,_,k1,_} + {u0,_,u0,_} +} + +template <> +void RandomUniform::convert(const std::vector& v_dst, const std::vector& v_src) { + if (m_jcp.out_data_type.size() == 4) { + for (size_t i = 0lu; i < v_src.size(); i++) { + const auto& vmm_src = v_src[i]; + const auto& vmm_dst = v_dst[i]; + + if (m_jcp.out_data_type == element::f32) { + uni_vandps(vmm_dst, vmm_src, v_convert_1); + uni_vorps(vmm_dst, vmm_dst, v_convert_0); + uni_vsubps(vmm_dst, vmm_dst, v_convert_0); + vfmadd132ps(vmm_dst, v_min, v_range); + } else if (m_jcp.out_data_type == element::i32) { + // x % (max - min) + min + const auto v_aux_0 = getVmm(); + const auto v_aux_1 = getVmm(); + const auto ymm_src = Xbyak::Ymm(vmm_src.getIdx()); + const auto ymm_dst = Xbyak::Ymm(vmm_dst.getIdx()); + const auto ymm_aux_1 = Xbyak::Ymm(v_aux_1.getIdx()); + + // Divide in the f64 due to the f32 loses accuracy here. + vcvtudq2pd(v_aux_0, ymm_src); + uni_vdivpd(v_aux_1, v_aux_0, v_range); + uni_vroundpd(v_aux_1, v_aux_1, 3); + vfnmadd132pd(v_aux_1, v_aux_0, v_range); + + vextractf64x4(ymm_dst, vmm_src, 1); + vcvtudq2pd(v_aux_0, ymm_dst); + uni_vcvtpd2dq(ymm_dst, v_aux_1); + uni_vdivpd(v_aux_1, v_aux_0, v_range); + uni_vroundpd(v_aux_1, v_aux_1, 3); + vfnmadd132pd(v_aux_1, v_aux_0, v_range); + uni_vcvtpd2dq(ymm_aux_1, v_aux_1); + vshuff64x2(vmm_dst, vmm_dst, v_aux_1, 0b01000100); + + uni_vpaddd(vmm_dst, vmm_dst, v_min); + } else { + OPENVINO_THROW("RandomUniform kernel does not support precision ", m_jcp.out_data_type, " for ", x64::get_isa_info()); + } + } + } else if (m_jcp.out_data_type.size() == 2) { + if (m_jcp.out_data_type == element::f16 && x64::mayiuse(x64::avx512_core_fp16)) { + const auto& vmm_dst = v_dst[0]; + + if (v_src[0].getIdx() != vmm_dst.getIdx()) { + uni_vmovups(vmm_dst, v_src[0]); + } + vpermt2w(vmm_dst, v_perm_16, v_src[1]); + + uni_vandps(vmm_dst, vmm_dst, v_convert_1); + uni_vorps(vmm_dst, vmm_dst, v_convert_0); + vsubph(vmm_dst, vmm_dst, v_convert_0); + vfmadd132ph(vmm_dst, v_min, v_range); + } else if (m_jcp.out_data_type == element::bf16 && x64::mayiuse(x64::avx512_core_bf16)) { + for (size_t i = 0lu; i < v_src.size(); i++) { + const auto& vmm_dst = v_dst[i]; + + uni_vandps(vmm_dst, v_src[i], v_convert_1); + uni_vorps(vmm_dst, vmm_dst, v_convert_0); + uni_vpslld(vmm_dst, vmm_dst, 16); + + uni_vsubps(vmm_dst, vmm_dst, v_convert_2); + vfmadd132ps(vmm_dst, v_min, v_range); + } + + vcvtne2ps2bf16(v_dst[0], v_dst[1], v_dst[0]); + } else { + OPENVINO_THROW("RandomUniform kernel does not support precision ", m_jcp.out_data_type, " for ", x64::get_isa_info()); + } + } else if (m_jcp.out_data_type.size() == 8) { + if (m_jcp.out_data_type == element::i64) { + // TODO: in scope of i64 enabling. + } + OPENVINO_THROW("RandomUniform kernel does not support precision ", m_jcp.out_data_type, " for ", x64::get_isa_info()); + } else { + OPENVINO_THROW("RandomUniform kernel does not support precision ", m_jcp.out_data_type, " for ", x64::get_isa_info()); + } +} + +template // Works for AVX2, SSE41 +void RandomUniform::convert(const std::vector& v_dst, const std::vector& v_src) { + if (m_jcp.out_data_type.size() == 4) { + for (size_t i = 0lu; i < v_src.size(); i++) { + auto vmm_src = v_src[i]; + auto vmm_dst = v_dst[i]; + + if (m_jcp.out_data_type == element::f32) { + uni_vandps(vmm_dst, vmm_src, ptr[r64_convert_1]); + uni_vorps(vmm_dst, vmm_dst, ptr[r64_convert_0]); + uni_vsubps(vmm_dst, vmm_dst, ptr[r64_convert_0]); + if (isa == x64::avx2) { + vfmadd213ps(vmm_dst, v_range, ptr[r64_min]); + } else { + uni_vmulps(vmm_dst, vmm_dst, v_range); + uni_vaddps(vmm_dst, vmm_dst, ptr[r64_min]); + } + } else if (m_jcp.out_data_type == element::i32) { + // x % (max - min) + min + const auto v_aux_0 = getVmm(); + const auto v_aux_1 = getVmm(); + const auto xmm_dst = Xbyak::Xmm(vmm_dst.getIdx()); + const auto ymm_dst = Xbyak::Ymm(vmm_dst.getIdx()); + const auto xmm_aux_1 = Xbyak::Xmm(v_aux_1.getIdx()); + + // Convert u32->f64. TODO: move to convert emitter after i64 enabling. + uni_vpmovzxdq(v_aux_0, xmm_dst); + uni_vorps(v_aux_0, v_aux_0, ptr[r64_f64_pow_52]); + uni_vsubpd(v_aux_0, v_aux_0, ptr[r64_f64_pow_52]); + + // Divide in the f64 due to the f32 loses accuracy here. + uni_vdivpd(v_aux_1, v_aux_0, v_range); + uni_vroundpd(v_aux_1, v_aux_1, 3); + if (isa == x64::avx2) { + vfnmadd132pd(v_aux_1, v_aux_0, v_range); + } else { + uni_vmulpd(v_aux_1, v_aux_1, v_range); + uni_vsubpd(v_aux_0, v_aux_0, v_aux_1); + uni_vmovups(v_aux_1, v_aux_0); + } + + if (isa == x64::avx2) { + vperm2f128(ymm_dst, ymm_dst, ymm_dst, 0b00000001); + } else { + uni_vshufpd(vmm_dst, vmm_dst, vmm_dst, 0b00000001); + } + // Convert u32->f64. TODO: move to convert emitter after i64 enabling. + uni_vpmovzxdq(v_aux_0, xmm_dst); + uni_vorps(v_aux_0, v_aux_0, ptr[r64_f64_pow_52]); + uni_vsubpd(v_aux_0, v_aux_0, ptr[r64_f64_pow_52]); + + uni_vcvtpd2dq(xmm_dst, v_aux_1); + uni_vdivpd(v_aux_1, v_aux_0, v_range); + uni_vroundpd(v_aux_1, v_aux_1, 3); + if (isa == x64::avx2) { + vfnmadd132pd(v_aux_1, v_aux_0, v_range); + } else { + uni_vmulpd(v_aux_1, v_aux_1, v_range); + uni_vsubpd(v_aux_0, v_aux_0, v_aux_1); + uni_vmovups(v_aux_1, v_aux_0); + } + uni_vcvtpd2dq(xmm_aux_1, v_aux_1); + if (isa == x64::avx2) { + vperm2f128(ymm_dst, ymm_dst, v_aux_1, 0b00100000); + } else { + uni_vshufpd(vmm_dst, vmm_dst, v_aux_1, 0b00000000); + } + + uni_vpaddd(vmm_dst, vmm_dst, ptr[r64_min]); + } else { + OPENVINO_THROW("RandomUniform kernel does not support precision ", m_jcp.out_data_type, " for ", x64::get_isa_info()); + } + } + } else if (m_jcp.out_data_type.size() == 8) { + if (m_jcp.out_data_type == element::i64) { + // TODO: in scope of i64 enabling. + } + OPENVINO_THROW("RandomUniform kernel does not support precision ", m_jcp.out_data_type, " for ", x64::get_isa_info()); + } else { + OPENVINO_THROW("RandomUniform kernel does not support precision ", m_jcp.out_data_type, " for ", x64::get_isa_info()); + } +} + +template <> +void RandomUniform::tail(const std::vector& vmm_dst) { + Xbyak::Label l_end; + const auto k_rest_mask = getMask(); + + cmp(r64_work_amount, 0); + jle(l_end, T_NEAR); + + runPhilox(vmm_dst, v_key_64, v_counter_64, v_n_64); + convert(vmm_dst, vmm_dst); + + if (m_jcp.out_data_type.size() == 4) { + Xbyak::Label l_0; + const auto step = vlen / sizeof(uint32_t); + + cmp(r64_work_amount, step); + jl(l_0, T_NEAR); + + uni_vmovups(ptr[r64_dst], vmm_dst[0]); + add(r64_dst, vlen); + sub(r64_work_amount, step); + fillRestWorkMask(k_rest_mask, r64_work_amount); + uni_vmovups(ptr[r64_dst] | k_rest_mask, vmm_dst[1]); + jmp(l_end, T_NEAR); + + L(l_0); + fillRestWorkMask(k_rest_mask, r64_work_amount); + uni_vmovups(ptr[r64_dst] | k_rest_mask, vmm_dst[0]); + } else if (m_jcp.out_data_type.size() == 2) { + fillRestWorkMask(k_rest_mask, r64_work_amount); + vmovdqu16(ptr[r64_dst] | k_rest_mask, vmm_dst[0]); + } + + L(l_end); +} + +template <> +void RandomUniform::tail(const std::vector& vmm_dst) { + Xbyak::Label l_0, l_end; + const auto step = vlen / sizeof(uint32_t); + + cmp(r64_work_amount, 0); + jle(l_end, T_NEAR); + + runPhilox(vmm_dst, v_key_64, v_counter_64, v_n_64); + convert(vmm_dst, vmm_dst); + const auto v_rest_mask = getVmm(); + + cmp(r64_work_amount, step); + jl(l_0, T_NEAR); + + uni_vmovups(ptr[r64_dst], vmm_dst[0]); + add(r64_dst, vlen); + sub(r64_work_amount, step); + fillRestWorkMask(v_rest_mask, r64_work_amount, m_jcp.out_data_type.size()); + vmaskmovps(ptr[r64_dst], v_rest_mask, vmm_dst[1]); + jmp(l_end, T_NEAR); + + L(l_0); + fillRestWorkMask(v_rest_mask, r64_work_amount, m_jcp.out_data_type.size()); + vmaskmovps(ptr[r64_dst], v_rest_mask, vmm_dst[0]); + + L(l_end); +} + +template +void RandomUniform::tail(const std::vector& vmm_dst) { + Xbyak::Label l_0, l_end; + const auto step = vlen / sizeof(uint32_t); + + cmp(r64_work_amount, 0); + jle(l_end, T_NEAR); + + runPhilox(vmm_dst, v_key_64, v_counter_64, v_n_64); + convert(vmm_dst, vmm_dst); + + cmp(r64_work_amount, step); + jl(l_0, T_NEAR); + + uni_vmovups(ptr[r64_dst], vmm_dst[0]); + add(r64_dst, vlen); + sub(r64_work_amount, step); + store(ptr[r64_dst], vmm_dst[1], r64_work_amount, m_jcp.out_data_type.size()); + jmp(l_end, T_NEAR); + + L(l_0); + store(ptr[r64_dst], vmm_dst[0], r64_work_amount, m_jcp.out_data_type.size()); + + L(l_end); +} + +template class RandomUniform; +template class RandomUniform; +template class RandomUniform; + +} // namespace kernel +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/kernels/x64/random_uniform.hpp b/src/plugins/intel_cpu/src/nodes/kernels/x64/random_uniform.hpp new file mode 100644 index 00000000000000..366be4c3a132ce --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/kernels/x64/random_uniform.hpp @@ -0,0 +1,99 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "jit_kernel_base.hpp" + +#if defined(OPENVINO_ARCH_X86_64) + +namespace ov { +namespace intel_cpu { +namespace kernel { + +struct RandomUniformCompileParams { + element::Type out_data_type = element::f32; +}; + +struct RandomUniformCallArgs { + void* dst_ptr; + const void* key_ptr; + const void* counter_ptr; + const void* n_ptr; + const void* min_ptr; + const void* range_ptr; + uint64_t work_amount = 0lu; +}; + +template +class RandomUniform : public JitKernel { +public: + DECLARE_CPU_JIT_AUX_FUNCTIONS(RandomUniform) + + explicit RandomUniform(const RandomUniformCompileParams& jcp); + + void generate() override; + +private: + using Vmm = typename dnnl::impl::utils::conditional3::type; + using Vmask = typename dnnl::impl::utils::conditional3::type; + + RegistersPool::Reg r64_dst; + RegistersPool::Reg r64_work_amount; + RegistersPool::Reg r64_n_inc; + RegistersPool::Reg r64_convert_0; + RegistersPool::Reg r64_convert_1; + RegistersPool::Reg r64_min; + RegistersPool::Reg r64_f64_pow_52; + + const Xbyak::Reg64 r64_params = Xbyak::Reg64(dnnl::impl::cpu::x64::abi_param_regs[0]); + + // Vector registers. + RegistersPool::Reg v_max_mul_n_64; + RegistersPool::Reg v_max_mul_c_64; + RegistersPool::Reg v_add_low_k; + RegistersPool::Reg v_add_up_k; + RegistersPool::Reg v_convert_0; + RegistersPool::Reg v_convert_1; + RegistersPool::Reg v_convert_2; + RegistersPool::Reg v_n_inc; + RegistersPool::Reg v_key_64; + RegistersPool::Reg v_counter_64; + RegistersPool::Reg v_n_64; + RegistersPool::Reg v_min; + RegistersPool::Reg v_range; + RegistersPool::Reg v_res_perm; + RegistersPool::Reg v_perm_16; + + void initVectors(); + + void process(); + + void runPhilox(const std::vector& vmm_res, const Vmm& vmm_key, const Vmm& vmm_counter, const Vmm& vmm_n); + + void calculateRound(const Vmm& vmm_k_0, const Vmm& vmm_k_1, const Vmm& vmm_c_0, const Vmm& vmm_c_1, + const Vmm& vmm_n_0, const Vmm& vmm_n_1, const Vmm& vmm_aux_0, const Vmm& vmm_aux_1); + + void raiseKey(const Vmm& vmm_k_0, const Vmm& vmm_k_1); + + void convert(const std::vector& vmm_dst, const std::vector& vmm_src); + + void tail(const std::vector& vmm_dst); + + static constexpr uint64_t ROUNDS_NUMBER = 10lu; + static constexpr uint32_t CRUSH_RESISTANCE_CONST_LOWER_VALUE = 0x9E3779B9; + static constexpr uint32_t CRUSH_RESISTANCE_CONST_UPPER_VALUE = 0xBB67AE85; + static constexpr uint64_t STATISTIC_MAXIMIZING_MULTIPLIER_N = 0xD2511F53; + static constexpr uint64_t STATISTIC_MAXIMIZING_MULTIPLIER_COUNTER = 0xCD9E8D57; +}; + +} // namespace kernel +} // namespace intel_cpu +} // namespace ov + +#endif // OPENVINO_ARCH_X86_64 diff --git a/src/plugins/intel_cpu/src/nodes/random_uniform.cpp b/src/plugins/intel_cpu/src/nodes/random_uniform.cpp new file mode 100644 index 00000000000000..77d823710c942f --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/random_uniform.cpp @@ -0,0 +1,532 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "random_uniform.hpp" + +#include "ie_parallel.hpp" +#include "ie_ngraph_utils.hpp" +#include +#include +#include "shape_inference/custom/random_uniform.hpp" + +namespace ov { +namespace intel_cpu { +namespace node { + +bool RandomUniform::isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept { + try { + if (op->get_type_info() != op::v8::RandomUniform::get_type_info_static()) { + errorMessage = "Only RandomUniform operation from the opset8 is supported by the CPU plugin."; + return false; + } + } catch (...) { + return false; + } + return true; +} + +RandomUniform::RandomUniform(const std::shared_ptr& op, const GraphContext::CPtr& context) + : Node(op, context, RandomUniformShapeInferFactory(op)) { + std::string errorMessage; + if (!isSupportedOperation(op, errorMessage)) { + THROW_CPU_NODE_ERR(errorMessage); + } + + // RandomUniform should generate new sequence each run even if all inputs are constants. So that method Node::IsConstant() + // doesn't return 'True' for RandomUniform with all constant inputs and the node generates new values for each inference, + // we set 'NoConst' value for 'ConstantType' in ctor. + constant = ConstantType::NoConst; + + auto rnd_op = as_type_ptr(op); + m_global_seed = rnd_op->get_global_seed(); + m_op_seed = rnd_op->get_op_seed(); + + m_output_prc = op->get_output_element_type(0); + + for (size_t i = 0lu; i < op->get_input_size(); i++) { + if (is_type(op->get_input_node_ptr(i))) { + m_const_inputs[i] = true; + } + } + + if (m_algo == STL) { + m_generator = std::default_random_engine{static_cast(m_op_seed)}; + } +} + +void RandomUniform::getSupportedDescriptors() { + if (getParentEdges().size() != 3) { + THROW_CPU_NODE_ERR("has incorrect number of input edges."); + } + if (getChildEdges().empty()) { + THROW_CPU_NODE_ERR("has incorrect number of output edges."); + } +} + +void RandomUniform::initSupportedPrimitiveDescriptors() { + auto shape_prc = getOriginalInputPrecisionAtPort(SHAPE); + if (!one_of(shape_prc, InferenceEngine::Precision::I32, InferenceEngine::Precision::I64)) { + shape_prc = InferenceEngine::Precision::I32; + } + + auto out_prc = getOriginalOutputPrecisionAtPort(0); + if (out_prc.is_float() && ((m_algo == PHILOX && + !one_of(out_prc, InferenceEngine::Precision::FP32, InferenceEngine::Precision::FP16, InferenceEngine::Precision::BF16)) || + (m_algo == STL && !one_of(out_prc, InferenceEngine::Precision::FP32)))) { + out_prc = InferenceEngine::Precision::FP32; + } + if (!out_prc.is_float() && !one_of(out_prc, InferenceEngine::Precision::I32, InferenceEngine::Precision::I64)) { + out_prc = InferenceEngine::Precision::I32; + } + m_output_prc = InferenceEngine::details::convertPrecision(out_prc); + + addSupportedPrimDesc({{LayoutType::ncsp, shape_prc, m_const_inputs[SHAPE]}, + {LayoutType::ncsp, out_prc, m_const_inputs[MIN_VAL]}, + {LayoutType::ncsp, out_prc, m_const_inputs[MAX_VAL]}}, + {{LayoutType::ncsp, out_prc}}, + ref_any); +} + +void RandomUniform::createPrimitive() { + if (m_const_inputs[MIN_VAL]) { + initEdgeValues(m_min_val, getParentEdgeAt(MIN_VAL)->getMemoryPtr()->getData(), m_output_prc); + } + if (m_const_inputs[MAX_VAL]) { + initEdgeValues(m_max_val, getParentEdgeAt(MAX_VAL)->getMemoryPtr()->getData(), m_output_prc); + evalRange(); + } + + if (m_algo == PHILOX) { +#if defined(OPENVINO_ARCH_X86_64) + kernel::RandomUniformCompileParams jcp; + + jcp.out_data_type = m_output_prc; + + m_jit_kernel = kernel::JitKernel::createInstance(jcp); + + if (m_jit_kernel) { + if (auto selected_pd = getSelectedPrimitiveDescriptor()) { + using namespace dnnl::impl::cpu; + if (m_jit_kernel->getIsa() == x64::avx512_core) { + selected_pd->setImplementationType(jit_avx512); + } else if (m_jit_kernel->getIsa() == x64::avx2) { + selected_pd->setImplementationType(jit_avx2); + } else if (m_jit_kernel->getIsa() == x64::sse41) { + selected_pd->setImplementationType(jit_sse42); + } + } + } +#endif // OPENVINO_ARCH_X86_64 + } + + if (m_const_inputs[SHAPE]) { + Node::createPrimitive(); + } +} + +bool RandomUniform::needPrepareParams() const { + if (m_out_shape != getChildEdgeAt(0)->getMemoryPtr()->getShape().getStaticDims()) { + return true; + } + return false; +} + +void RandomUniform::prepareParams() { + m_out_shape = getChildEdgeAt(0)->getMemoryPtr()->getShape().getStaticDims(); + m_out_el_num = std::accumulate(m_out_shape.begin(), m_out_shape.end(), 1lu, std::multiplies()); + + if (m_algo == PHILOX) { + m_skip_count = m_out_el_num * SKIP_CONST; + + if (m_out_el_num < PHILOX_PARALLEL_EXECUTION_THRESHOLD) { + m_threads_num = 1; + } else { + m_threads_num = parallel_get_max_threads(); + } + m_thread_params.resize(m_threads_num); + + parallel_nt(m_threads_num, [&](const int ithr, const int nthr) { + auto& p = m_thread_params[ithr]; + uint64_t start = 0lu, end = 0lu; + + if (m_jit_kernel) { +#if defined(OPENVINO_ARCH_X86_64) + const auto block_size = (m_jit_kernel->getVectorLen() / m_output_prc.size()) * 2; + const auto blocks_num = (m_out_el_num + block_size - 1) / block_size; + const auto blocks_per_thr = (blocks_num + nthr - 1) / nthr; + + start = ithr * blocks_per_thr * block_size; + end = (ithr + 1) * blocks_per_thr * block_size; +#endif // OPENVINO_ARCH_X86_64 + } else { + const auto groups_num = (m_out_el_num + PHILOX_GROUP_SIZE - 1) / PHILOX_GROUP_SIZE; + const auto groups_per_thr = (groups_num + nthr - 1) / nthr; + + start = ithr * groups_per_thr * PHILOX_GROUP_SIZE; + end = (ithr + 1) * groups_per_thr * PHILOX_GROUP_SIZE; + + p.step = m_output_prc.size() > 4 ? 2 : 4; + } + + if (end > m_out_el_num) { + end = m_out_el_num; + } + if (start > end) { + start = end; + } + p.work_amount = end - start; + p.n_shift = start / PHILOX_GROUP_SIZE; + p.dst_shift = start * m_output_prc.size(); + }); + } +} + +void RandomUniform::execute(dnnl::stream strm) { + if (!m_const_inputs[MIN_VAL]) { + initEdgeValues(m_min_val, getParentEdgeAt(MIN_VAL)->getMemoryPtr()->getData(), m_output_prc); + if (m_const_inputs[MAX_VAL]) { + evalRange(); + } + } + if (!m_const_inputs[MAX_VAL]) { + initEdgeValues(m_max_val, getParentEdgeAt(MAX_VAL)->getMemoryPtr()->getData(), m_output_prc); + evalRange(); + } + + auto data = getChildEdgeAt(0)->getMemoryPtr()->getData(); + + if (m_algo == PHILOX) { + m_state = computePhilox(data, m_out_el_num, m_state); + } else if (m_algo == STL) { + computeStl(data, m_out_el_num); + } else { + THROW_CPU_NODE_ERR("unsupported algorithm."); + } +} + +void RandomUniform::executeDynamicImpl(dnnl::stream strm) { + execute(strm); +} + +////////////// PHILOX algo /////////////// + +namespace { +// Following const values are taken from the original paper: +// https://www.thesalmons.org/john/random123/papers/random123sc11.pdf +constexpr uint32_t CRUSH_RESISTANCE_CONST_LOWER_VALUE = 0x9E3779B9; +constexpr uint32_t CRUSH_RESISTANCE_CONST_UPPER_VALUE = 0xBB67AE85; +constexpr uint64_t STATISTIC_MAXIMIZING_MULTIPLIER_N = 0xD2511F53; +constexpr uint64_t STATISTIC_MAXIMIZING_MULTIPLIER_COUNTER = 0xCD9E8D57; +constexpr uint64_t ROUNDS_NUMBER = 10llu; + +inline void calculateRound(const uint32_t* key, uint32_t* counter, uint32_t* n) { + uint64_t prod_0 = STATISTIC_MAXIMIZING_MULTIPLIER_N * n[0]; + uint64_t prod_1 = STATISTIC_MAXIMIZING_MULTIPLIER_COUNTER * counter[0]; + n[0] = static_cast(prod_1 >> 32) ^ n[1] ^ key[0]; + n[1] = static_cast(prod_1); + counter[0] = static_cast(prod_0 >> 32) ^ counter[1] ^ key[1]; + counter[1] = static_cast(prod_0); +} + +inline void raiseKey(uint32_t* key) { + key[0] += CRUSH_RESISTANCE_CONST_LOWER_VALUE; + key[1] += CRUSH_RESISTANCE_CONST_UPPER_VALUE; +} + +inline void runPhilox(uint64_t key, uint64_t counter, uint64_t n, uint32_t* res) { + uint32_t* key_32 = reinterpret_cast(&key); + uint32_t* counter_32 = reinterpret_cast(&counter); + uint32_t* n_32 = reinterpret_cast(&n); + + for (size_t i = 0lu; i < ROUNDS_NUMBER; i++) { + calculateRound(key_32, counter_32, n_32); + if (i < ROUNDS_NUMBER - 1) + raiseKey(key_32); + } + + res[0] = n_32[0]; + res[1] = n_32[1]; + res[2] = counter_32[0]; + res[3] = counter_32[1]; +} + +inline void convertToOutputType(const uint32_t* in, + float min, + float range, + float* out, + size_t el_to_copy) { + RandomUniform::OutputType out_val; + + for (size_t i = 0lu; i < el_to_copy; i++) { + out_val.u32 = 0x3f800000 | (in[i] & 0x7fffffu); + out[i] = (out_val.f32 - 1.f) * range + min; + } +} + +inline void convertToOutputType(const uint32_t* in, + float16 min, + float16 range, + float16* out, + size_t el_to_copy) { + RandomUniform::OutputType out_val; + + for (size_t i = 0lu; i < el_to_copy; i++) { + uint16_t x_uint16 = static_cast(in[i]); + out_val.u16 = 0x3c00 | (x_uint16 & 0x03ffu); + out[i] = (out_val.f16 - static_cast(1)) * range + min; + } +} + +inline void convertToOutputType(const uint32_t* in, + bfloat16 min, + bfloat16 range, + bfloat16* out, + size_t el_to_copy) { + RandomUniform::OutputType out_val; + + for (size_t i = 0lu; i < el_to_copy; i++) { + uint16_t x_uint16 = static_cast(in[i]); + out_val.u16 = 0x3f80 | (x_uint16 & 0x7fu); + out[i] = (out_val.bf16 - static_cast(1)) * range + min; + } +} + +inline void convertToOutputType(const uint32_t* in, + int32_t min, + int32_t range, + int32_t* out, + size_t el_to_copy) { + for (size_t i = 0lu; i < el_to_copy; i++) { + out[i] = static_cast(in[i] % range + min); + } +} + +inline void convertToOutputType(const uint32_t* in, + int64_t min, + int64_t range, + int64_t* out, + size_t el_to_copy) { + for (size_t i = 0lu; i < el_to_copy; i++) { + out[i] = static_cast(((static_cast(in[i * 2]) << 32) + in[i * 2 + 1]) % range + min); + } +} + +} // namespace + +std::pair RandomUniform::computePhilox(void* out, size_t out_el_num, const std::pair& prev_state) { + // When both seed values are equal to zero RandomUniform should generate non-deterministic sequence. + if (m_global_seed == 0lu && m_op_seed == 0lu) { + std::srand(static_cast(std::time(nullptr))); + m_global_seed = std::rand(); + } + + uint64_t n_state = prev_state.first; + uint64_t counter_state = prev_state.second; + + uint64_t counter = counter_state > 0 ? counter_state : m_op_seed; + + auto out_u8 = reinterpret_cast(out); + + if (m_jit_kernel) { +#if defined(OPENVINO_ARCH_X86_64) + parallel_nt(m_threads_num, [&](const int ithr, const int nthr) { + auto& p = m_thread_params[ithr]; + if (p.work_amount == 0lu) { + return; + } + auto n = n_state + p.n_shift; + + kernel::RandomUniformCallArgs args; + + args.dst_ptr = (out_u8 + p.dst_shift); + args.key_ptr = &m_global_seed; + args.counter_ptr = &counter; + args.n_ptr = &n; + args.min_ptr = &m_min_val; + args.range_ptr = &m_range_val; + args.work_amount = p.work_amount; + + (*m_jit_kernel)(&args); + }); +#endif // OPENVINO_ARCH_X86_64 + } else { + auto threadBody = [&](const int ithr, const int nthr) { + auto& p = m_thread_params[ithr]; + if (p.work_amount == 0lu) { + return; + } + auto n = n_state + p.n_shift; + auto out_cur = out_u8 + p.dst_shift; + auto work_rest = static_cast(p.work_amount); + uint32_t res[4]; + +#define EXEC_CASE(P) \ + case element::P: { \ + auto out_t = reinterpret_cast::value_type *>(out_cur); \ + for (; work_rest > 0l; work_rest -= p.step, out_t += p.step) { \ + runPhilox(m_global_seed, counter, n, res); \ + auto el_to_copy = std::min(p.step, static_cast(work_rest)); \ + convertToOutputType(res, m_min_val.P, m_range_val.P, out_t, el_to_copy); \ + if (++n == 0) { \ + counter++; \ + } \ + } \ + } break; + + switch (m_output_prc) { + EXEC_CASE(f32) + EXEC_CASE(f16) + EXEC_CASE(bf16) + EXEC_CASE(i32) + EXEC_CASE(i64) + default: THROW_CPU_NODE_ERR("Unsupported type of RandomUniform: ", m_output_prc.to_string()); + } + +#undef EXEC_CASE + }; + + parallel_nt(m_threads_num, threadBody); + } + + // Calculate counter values for next RandomUniform run. + n_state += m_skip_count; + if (n_state < m_skip_count) { + counter_state++; + } + + return { n_state, counter_state }; +} + +////////////// STL algo /////////////// +void RandomUniform::computeStl(void* out, size_t work_amount) { + switch (m_output_prc) { + case element::f32: { + generateData>( + std::uniform_real_distribution{m_min_val.f32, m_max_val.f32}, out, work_amount); + } break; + case element::i32: { + generateData>( + std::uniform_int_distribution{m_min_val.i32, m_max_val.i32}, out, work_amount); + } break; + case element::i64: { + generateData>( + std::uniform_int_distribution{m_min_val.i64, m_max_val.i64}, out, work_amount); + } break; + default: + THROW_CPU_NODE_ERR("has unsupported output type: ", m_output_prc); + } +} + +template +void RandomUniform::generateData(DISTR_TYPE distribution, void* out, size_t work_amount) { + auto dst = reinterpret_cast(out); + for (size_t i = 0; i < work_amount; i++) { + *dst = distribution(m_generator); + dst++; + } +} +////////////////////////////////// + +void RandomUniform::initEdgeValues(OutputType& dst, const void* src, const element::Type& output_type) { +#define EL_CASE(E) \ + case element::E: \ + dst.E = *reinterpret_cast::value_type *>(src); \ + break; + + switch (output_type) { + EL_CASE(f32) + EL_CASE(f16) + EL_CASE(bf16) + EL_CASE(i32) + EL_CASE(i64) + EL_CASE(f64) + default: + THROW_CPU_NODE_ERR("has unsupported output precision: ", output_type); + } + +#undef EL_CASE +} + +void RandomUniform::evalRange() { +#define EL_CASE(E) \ + case element::E: \ + m_range_val.E = m_max_val.E - m_min_val.E; \ + break; + + switch (m_output_prc) { + EL_CASE(f32) + EL_CASE(f16) + EL_CASE(bf16) + EL_CASE(i32) + EL_CASE(i64) + EL_CASE(f64) + default: + THROW_CPU_NODE_ERR("has unsupported output precision: ", m_output_prc); + } + +#undef EL_CASE +} + +std::string RandomUniform::getPrimitiveDescriptorType() const { + auto selectedPrimitiveDesc = getSelectedPrimitiveDescriptor(); + + impl_desc_type type = impl_desc_type::undef; + if (selectedPrimitiveDesc) { + type = selectedPrimitiveDesc->getImplementationType(); + } + + std::string str_type; + + auto add_type = [&](std::string t) { + if (!str_type.empty() && t.c_str()[0] != '_') + str_type += "_"; + str_type += t; + }; + +#define SEARCH_TYPE(_type) \ + if ((type & impl_desc_type::_type) == impl_desc_type::_type) \ + add_type(#_type) + + SEARCH_TYPE(undef); + SEARCH_TYPE(jit); + SEARCH_TYPE(ref); + + SEARCH_TYPE(avx512); + SEARCH_TYPE(avx2); + SEARCH_TYPE(sse42); + SEARCH_TYPE(any); + +#undef SEARCH_TYPE + + if (type == impl_desc_type::unknown) + str_type = "unknown"; + else if (str_type.empty()) + str_type = "undef"; + + if (selectedPrimitiveDesc) { + if (selectedPrimitiveDesc->getConfig().outConfs[0].getMemDesc()->getPrecision() != InferenceEngine::Precision::U8) { + str_type += "_" + std::string(selectedPrimitiveDesc->getConfig().outConfs[0].getMemDesc()->getPrecision().name()); + } else { + str_type += "_I8"; + } + } + + return str_type; +} + +bool RandomUniform::needShapeInfer() const { + return !m_const_inputs[SHAPE]; +} + +bool RandomUniform::isExecutable() const { + return !isInputTensorAtPortEmpty(SHAPE); +} + +bool RandomUniform::created() const { + return getType() == Type::RandomUniform; +} + +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/random_uniform.hpp b/src/plugins/intel_cpu/src/nodes/random_uniform.hpp new file mode 100644 index 00000000000000..ecbfebdf5d79c6 --- /dev/null +++ b/src/plugins/intel_cpu/src/nodes/random_uniform.hpp @@ -0,0 +1,120 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include +#include "kernels/x64/random_uniform.hpp" + +namespace ov { +namespace intel_cpu { +namespace node { + +class RandomUniform : public Node { +public: + union OutputType { + float f32; + float16 f16; + bfloat16 bf16; + double f64; + int32_t i32; + uint32_t u32; + uint16_t u16; + int64_t i64; + }; + + RandomUniform(const std::shared_ptr& op, const GraphContext::CPtr& context); + + void getSupportedDescriptors() override; + + void initSupportedPrimitiveDescriptors() override; + + bool needPrepareParams() const override; + + void prepareParams() override; + + void execute(dnnl::stream strm) override; + + void executeDynamicImpl(dnnl::stream strm) override; + + bool isExecutable() const override; + + void createPrimitive() override; + + bool created() const override; + + bool canBeInPlace() const override { return false; } + + static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + + std::string getPrimitiveDescriptorType() const override; + +protected: + bool needShapeInfer() const override; + +private: + void computeStl(void* out, size_t work_amount); + + std::pair computePhilox(void* out, size_t work_amount, const std::pair& prev_state); + + template + void generateData(DISTR_TYPE distribution, void* out, size_t work_amount); + + void initOutShape(VectorDims& dst, const void* src, const element::Type& shape_type, size_t len); + + void initEdgeValues(OutputType& dst, const void* src, const element::Type& output_type); + + void evalRange(); + + enum { SHAPE = 0, MIN_VAL, MAX_VAL }; + enum AlgoType { STL, PHILOX }; + + bool m_const_inputs[3] = {false, false, false}; + + ov::element::Type m_output_prc; + uint64_t m_global_seed = 0lu; + uint64_t m_op_seed = 0lu; + std::pair m_state {0lu, 0lu}; + + VectorDims m_out_shape = {}; + uint64_t m_out_el_num = 1lu; + OutputType m_min_val; + OutputType m_max_val; + OutputType m_range_val; + AlgoType m_algo = PHILOX; + + std::default_random_engine m_generator; + + struct ThreadParams { + uint64_t work_amount = 0lu; + uint64_t dst_shift = 0lu; + uint64_t n_shift = 0lu; + uint64_t step = 0lu; + }; + + uint64_t m_threads_num = 0lu; + std::vector m_thread_params; + + ///// PHILOX constants ///// + + // Determines how many sequence elements of RNG sequence are skipped between runs. + // Can be any positive value, 256 is chosen for parity with Tensorflow. + static constexpr uint64_t SKIP_CONST = 256lu; + + // Philox algorithm returns 4 elements of RNG sequence per each invocation + static constexpr uint64_t PHILOX_GROUP_SIZE = 4lu; + + // Output elements number threshold to execute on one thread. + static constexpr uint64_t PHILOX_PARALLEL_EXECUTION_THRESHOLD = 1000lu; + + uint64_t m_skip_count = 0lu; + ///////////////////////////////////////////////////////////////////////////////// + + std::shared_ptr m_jit_kernel; +}; + +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/nodes/reference.cpp b/src/plugins/intel_cpu/src/nodes/reference.cpp index b42dc99b390fb4..091e31813125cf 100644 --- a/src/plugins/intel_cpu/src/nodes/reference.cpp +++ b/src/plugins/intel_cpu/src/nodes/reference.cpp @@ -2,18 +2,12 @@ // SPDX-License-Identifier: Apache-2.0 // -#include - -#include -#include +#include "reference.h" -#include "common/blocked_desc_creator.h" #include "common/cpu_memcpy.h" +#include #include "openvino/core/shape_util.hpp" -#include "openvino/runtime/tensor.hpp" -#include "reference.h" -using namespace dnnl; using namespace InferenceEngine; using namespace InferenceEngine::details; @@ -21,21 +15,15 @@ namespace ov { namespace intel_cpu { namespace node { -Reference::Reference(const std::shared_ptr& op, const GraphContext::CPtr context, +Reference::Reference(const std::shared_ptr& op, const GraphContext::CPtr& context, const std::string& errorMessage) : - Node(op, context, NgraphShapeInferFactory(op, FULL_PORT_MASK)), ngraphOp(op), additionalErrorMessage(errorMessage) { + Node(op, context, NgraphShapeInferFactory(op, FULL_PORT_MASK)), ovCoreNode(op), additionalErrorMessage(errorMessage) { if (!op->has_evaluate()) { IE_THROW(NotImplemented) << "Cannot fallback on ngraph reference implementation (Ngraph::Node::evaluate() is not implemented)"; } + setType(Type::Reference); setTypeStr("Reference"); - - // RandomUniform should generate new sequence each run even if all inputs are constants. So that method Node::IsConstant() - // doesn't return 'True' for RandomUniform with all constant inputs and the node generates new values for each inference, - // we set 'NoConst' value for 'ConstantType' in ctor - if (ov::is_type(ngraphOp)) { - constant = ConstantType::NoConst; - } } void Reference::getSupportedDescriptors() {} @@ -47,13 +35,13 @@ void Reference::initSupportedPrimitiveDescriptors() { std::vector inputConfigurators; inputConfigurators.reserve(inputShapes.size()); for (size_t i = 0; i < inputShapes.size(); i++) { - inputConfigurators.emplace_back(LayoutType::ncsp, convertPrecision(ngraphOp->get_input_element_type(i)), inputShapes[i]); + inputConfigurators.emplace_back(LayoutType::ncsp, convertPrecision(ovCoreNode->get_input_element_type(i)), inputShapes[i]); } std::vector outputConfigurators; outputConfigurators.reserve(inputShapes.size()); for (size_t i = 0; i < outputShapes.size(); i++) { - outputConfigurators.emplace_back(LayoutType::ncsp, convertPrecision(ngraphOp->get_output_element_type(i)), outputShapes[i]); + outputConfigurators.emplace_back(LayoutType::ncsp, convertPrecision(ovCoreNode->get_output_element_type(i)), outputShapes[i]); } addSupportedPrimDesc(inputConfigurators, outputConfigurators, impl_desc_type::ref); @@ -64,8 +52,8 @@ void Reference::createPrimitive() {} void Reference::execute(dnnl::stream strm) { auto inputs = prepareInputs(); auto outputs = prepareOutputs(); - if (!ngraphOp->evaluate(outputs, inputs)) { - IE_THROW() << "Evaluation failed on node of type: " << std::string(ngraphOp->get_type_name()) << " name: " << getName(); + if (!ovCoreNode->evaluate(outputs, inputs)) { + THROW_CPU_NODE_ERR("evaluation failed for core operation: ", std::string(ovCoreNode->get_type_name())); } } @@ -81,18 +69,16 @@ void Reference::executeDynamicImpl(dnnl::stream strm) { for (size_t i = 0; i < outputShapes.size(); ++i) { auto mem_desc = getBaseMemDescAtOutputPort(i); if (mem_desc->isDefined()) { - outputs.emplace_back(ngraphOp->get_output_element_type(i), mem_desc->getShape().getStaticDims()); + outputs.emplace_back(ovCoreNode->get_output_element_type(i), mem_desc->getShape().getStaticDims()); } else { - outputs.emplace_back(ngraphOp->get_output_element_type(i), ov::util::make_dynamic_shape()); + outputs.emplace_back(ovCoreNode->get_output_element_type(i), ov::util::make_dynamic_shape()); } } } else { - IE_THROW(Unexpected) << - "Unexpected shape infer result status during the inference of a node with type " << - getTypeStr() << " and name " << getName(); + THROW_CPU_NODE_ERR("got unexpected shape infer result status during the inference."); } - if (!ngraphOp->evaluate(outputs, inputs)) { - IE_THROW() << "Evaluation failed on node of type: " << std::string(ngraphOp->get_type_name()) << " name: " << getName(); + if (!ovCoreNode->evaluate(outputs, inputs)) { + THROW_CPU_NODE_ERR("evaluation failed for core operation: ", std::string(ovCoreNode->get_type_name())); } if (ShapeInferStatus::skip == result.status) { std::vector newOutputDims; @@ -105,8 +91,7 @@ void Reference::executeDynamicImpl(dnnl::stream strm) { auto memory = getChildEdgesAtPort(i)[0]->getMemoryPtr(); auto& tensor = outputs[i]; if (memory->getSize() != tensor.get_byte_size()) { - IE_THROW(Unexpected) << "Output tensor data size mismatch occurred during the inference of a node with type " << - getTypeStr() << " and name " << getName() << " on output port number " << i; + THROW_CPU_NODE_ERR("output tensor data size mismatch occurred during the inference on output port number ", i); } cpu_memcpy(memory->getData(), tensor.data(), tensor.get_byte_size()); } @@ -125,9 +110,9 @@ ov::TensorVector Reference::prepareInputs() const { ov::TensorVector inputs; for (size_t i = 0; i < inputShapes.size(); i++) { void *srcDataPtr = getParentEdgesAtPort(i)[0]->getMemory().getData(); - ov::Shape shape = ngraphOp->get_input_partial_shape(i).rank().get_length() == 0 ? + ov::Shape shape = ovCoreNode->get_input_partial_shape(i).rank().get_length() == 0 ? ov::Shape{} : getParentEdgesAtPort(i)[0]->getMemory().getStaticDims(); - inputs.push_back(ov::Tensor(ngraphOp->get_input_element_type(i), shape, srcDataPtr)); + inputs.push_back(ov::Tensor(ovCoreNode->get_input_element_type(i), shape, srcDataPtr)); } return inputs; } @@ -136,9 +121,9 @@ ov::TensorVector Reference::prepareOutputs() const { ov::TensorVector outputs; for (size_t i = 0; i < outputShapes.size(); i++) { void *dstDataPtr = getChildEdgesAtPort(i)[0]->getMemory().getData(); - ov::Shape shape = ngraphOp->get_output_partial_shape(i).rank().get_length() == 0 ? + ov::Shape shape = ovCoreNode->get_output_partial_shape(i).rank().get_length() == 0 ? ov::Shape{} : getChildEdgesAtPort(i)[0]->getMemory().getStaticDims(); - outputs.push_back(ov::Tensor(ngraphOp->get_output_element_type(i), shape, dstDataPtr)); + outputs.push_back(ov::Tensor(ovCoreNode->get_output_element_type(i), shape, dstDataPtr)); } return outputs; } diff --git a/src/plugins/intel_cpu/src/nodes/reference.h b/src/plugins/intel_cpu/src/nodes/reference.h index 4c2a8a1310806f..c2453835229138 100644 --- a/src/plugins/intel_cpu/src/nodes/reference.h +++ b/src/plugins/intel_cpu/src/nodes/reference.h @@ -12,7 +12,7 @@ namespace node { class Reference : public Node { public: - Reference(const std::shared_ptr& op, const GraphContext::CPtr context, const std::string& errorMessage); + Reference(const std::shared_ptr& op, const GraphContext::CPtr& context, const std::string& errorMessage); void getSupportedDescriptors() override; void initSupportedPrimitiveDescriptors() override; @@ -29,7 +29,7 @@ class Reference : public Node { ov::TensorVector prepareOutputs() const; private: - const std::shared_ptr ngraphOp; + const std::shared_ptr ovCoreNode; const std::string additionalErrorMessage; }; diff --git a/src/plugins/intel_cpu/src/nodes_factory.cpp b/src/plugins/intel_cpu/src/nodes_factory.cpp index 3afe8aaa32c1d9..7add05741f04e1 100644 --- a/src/plugins/intel_cpu/src/nodes_factory.cpp +++ b/src/plugins/intel_cpu/src/nodes_factory.cpp @@ -79,6 +79,7 @@ #include "nodes/experimental_detectron_generate_proposals_single_image.h" #include "nodes/generate_proposals.h" #include "nodes/embedding_bag_packed_sum.h" +#include "nodes/random_uniform.hpp" #include "nodes/reduce.h" #include "nodes/if.h" #include "nodes/ctc_greedy_decoder.h" @@ -180,6 +181,7 @@ Node::NodesFactory::NodesFactory() INTEL_CPU_NODE(Unique, Type::Unique); INTEL_CPU_NODE(Ngram, Type::Ngram); INTEL_CPU_NODE(Interpolate, Type::Interpolate); + INTEL_CPU_NODE(RandomUniform, Type::RandomUniform); INTEL_CPU_NODE(Reduce, Type::Reduce); INTEL_CPU_NODE(Gather, Type::Gather); INTEL_CPU_NODE(NonMaxSuppression, Type::NonMaxSuppression); diff --git a/src/plugins/intel_cpu/src/shape_inference/custom/random_uniform.cpp b/src/plugins/intel_cpu/src/shape_inference/custom/random_uniform.cpp new file mode 100644 index 00000000000000..cca3c74cce86b0 --- /dev/null +++ b/src/plugins/intel_cpu/src/shape_inference/custom/random_uniform.cpp @@ -0,0 +1,47 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "random_uniform.hpp" +#include + +namespace ov { +namespace intel_cpu { +namespace node { + +// TODO: remove after fixing the issue 123011 +IShapeInfer::Result RandomUniformShapeInfer::infer( + const std::vector>& input_shapes, + const std::unordered_map& data_dependency) { + VectorDims dims; + const auto& mem = data_dependency.at(0); + const auto rank = mem->getShape().getElementsCount(); + auto shape_prc = mem->getDesc().getPrecision(); + switch (shape_prc) { + case InferenceEngine::Precision::I32: { + auto data = reinterpret_cast(mem->getData()); + dims.assign(data, data + rank); + } break; + case InferenceEngine::Precision::I64: { + auto data = reinterpret_cast(mem->getData()); + dims.assign(data, data + rank); + } break; + default: + OPENVINO_THROW("Unexpected Shape input precision: ", shape_prc); + } + + return {{dims}, ShapeInferStatus::success}; +} + +RandomUniformShapeInferFactory::RandomUniformShapeInferFactory(const std::shared_ptr& op) : m_op(op) { + OPENVINO_ASSERT(ov::is_type(m_op), + "Unexpected op type in RandomUniform shape inference factory: ", m_op->get_type_name()); +} + +ShapeInferPtr RandomUniformShapeInferFactory::makeShapeInfer() const { + return std::make_shared(); +} + +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/src/shape_inference/custom/random_uniform.hpp b/src/plugins/intel_cpu/src/shape_inference/custom/random_uniform.hpp new file mode 100644 index 00000000000000..ce87a966a9cbc9 --- /dev/null +++ b/src/plugins/intel_cpu/src/shape_inference/custom/random_uniform.hpp @@ -0,0 +1,37 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "shape_inference/shape_inference_cpu.hpp" +#include + +#pragma once + +namespace ov { +namespace intel_cpu { +namespace node { + +class RandomUniformShapeInfer : public ShapeInferEmptyPads { +public: + explicit RandomUniformShapeInfer() {} + IShapeInfer::Result infer( + const std::vector>& input_shapes, + const std::unordered_map& data_dependency) override; + + port_mask_t get_port_mask() const override { + return PortMask(0); + } +}; + +class RandomUniformShapeInferFactory : public ShapeInferFactory { +public: + explicit RandomUniformShapeInferFactory(const std::shared_ptr& op); + ShapeInferPtr makeShapeInfer() const override; + +private: + std::shared_ptr m_op; +}; + +} // namespace node +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 9faf421c26a0f9..4eb40365fa95d7 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -189,6 +189,8 @@ std::vector disabledTestPatterns() { R"(smoke_LSTMSequenceCommon.*LSTMSequenceTest.Inference.*CONVERT_TO_TI.*)", // Issue: 122094 R"(smoke_Interpolate_Basic_Down_Sample_Tail/InterpolateLayerTest.Inference.*(asymmetric|align_corners).*f16.*)", + // Need to generate sequence exactly in the i64 data type. Enable in scope of i64 enabling. + R"(.*RandomUniformLayerTestCPU.*OutPrc=i64.*)", }; #if defined(OPENVINO_ARCH_X86) diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.cpp new file mode 100644 index 00000000000000..2f9706e7d2562e --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.cpp @@ -0,0 +1,265 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "random_uniform.hpp" +#include "ov_models/builders.hpp" + +using namespace CPUTestUtils; +using namespace ov::test; + +namespace CPULayerTestsDefinitions { + +std::string RandomUniformLayerTestCPU::getTestCaseName(const testing::TestParamInfo& obj) { + const auto& out_shape = std::get<0>(obj.param); + const auto& min_max = std::get<1>(obj.param); + + std::ostringstream result; + + result << "IS={" << out_shape.size(); + result << "}_OS=" << out_shape; + result << "_Min=" << std::get<0>(min_max); + result << "_Max=" << std::get<1>(min_max); + result << "_ShapePrc=" << std::get<2>(obj.param); + result << "_OutPrc=" << std::get<3>(obj.param); + result << "_GlobalSeed=" << std::get<4>(obj.param); + result << "_OperationalSeed=" << std::get<5>(obj.param); + result << "_ConstIn={" << utils::bool2str(std::get<6>(obj.param)) << "," + << utils::bool2str(std::get<7>(obj.param)) << "," + << utils::bool2str(std::get<8>(obj.param)) << "}"; + + result << CPUTestsBase::getTestCaseName(std::get<9>(obj.param)); + + const auto& config = std::get<10>(obj.param); + if (!config.empty()) { + result << "_PluginConf={"; + for (const auto& conf_item : config) { + result << "_" << conf_item.first << "="; + conf_item.second.print(result); + } + result << "}"; + } + + return result.str(); +} + +void RandomUniformLayerTestCPU::SetUp() { + targetDevice = utils::DEVICE_CPU; + + const auto& params = this->GetParam(); + m_output_shape = std::get<0>(params); + const auto& min_max = std::get<1>(params); + const auto& shape_prc = std::get<2>(params); + const auto& output_prc = std::get<3>(params); + m_global_seed = std::get<4>(params); + m_operational_seed = std::get<5>(params); + const auto& const_in_1 = std::get<6>(params); + const auto& const_in_2 = std::get<7>(params); + const auto& const_in_3 = std::get<8>(params); + const auto& cpu_params = std::get<9>(params); + configuration = std::get<10>(params); + + m_min_val = std::get<0>(min_max); + m_max_val = std::get<1>(min_max); + std::tie(inFmts, outFmts, priority, selectedType) = cpu_params; + +#if defined(OV_CPU_WITH_ACL) + updateSelectedType("ref_any", output_prc, configuration); +#else + if (output_prc == ElementType::i64) { + updateSelectedType(getPrimitiveType(), ElementType::i32, configuration); + } else if (output_prc == ElementType::f64) { + updateSelectedType(getPrimitiveType(), ElementType::f32, configuration); + } else if (output_prc == ElementType::f16) { + if (InferenceEngine::with_cpu_x86_avx512_core_fp16()) { + updateSelectedType(getPrimitiveType(), ElementType::f16, configuration); + } else { + updateSelectedType(getPrimitiveType(), ElementType::f32, configuration); + } + } else if (output_prc == ElementType::bf16) { + if (InferenceEngine::with_cpu_x86_bfloat16()) { + updateSelectedType(getPrimitiveType(), ElementType::bf16, configuration); + } else { + updateSelectedType("ref_any", ElementType::bf16, configuration); + } + } else { + updateSelectedType(getPrimitiveType(), output_prc, configuration); + } +#endif + + std::vector in_shapes; + ov::ParameterVector in_params; + std::vector> inputs; + + if (!const_in_1) { + in_shapes.push_back({{}, {{m_output_shape.size()}}}); + in_params.push_back(std::make_shared(shape_prc, ov::PartialShape{static_cast(m_output_shape.size())})); + in_params.back()->set_friendly_name("shape"); + inputs.push_back(in_params.back()); + } else { + inputs.push_back(ngraph::builder::makeConstant(shape_prc, {m_output_shape.size()}, m_output_shape)); + } + if (!const_in_2) { + in_shapes.push_back({{}, {{1}}}); + in_params.push_back(std::make_shared(output_prc, ov::PartialShape{1})); + in_params.back()->set_friendly_name("minval"); + inputs.push_back(in_params.back()); + } else { + inputs.push_back(ngraph::builder::makeConstant(output_prc, {1}, std::vector{m_min_val})); + } + if (!const_in_3) { + in_shapes.push_back({{}, {{1}}}); + in_params.push_back(std::make_shared(output_prc, ov::PartialShape{1})); + in_params.back()->set_friendly_name("maxval"); + inputs.push_back(in_params.back()); + } else { + inputs.push_back(ngraph::builder::makeConstant(output_prc, {1}, std::vector{m_max_val})); + } + + init_input_shapes(in_shapes); + + const auto rnd_op = std::make_shared(inputs[0], inputs[1], inputs[2], output_prc, m_global_seed, m_operational_seed); + const ov::ResultVector results{std::make_shared(rnd_op)}; + + function = std::make_shared(results, in_params, "RandomUniformLayerTestCPU"); +} + +template +void fill_data(TD* dst, const TS* src, size_t len) { + for (size_t i = 0llu; i < len; i++) { + dst[i] = static_cast(src[i]); + } +} + +void RandomUniformLayerTestCPU::generate_inputs(const std::vector& targetInputStaticShapes) { + inputs.clear(); + const auto& func_inputs = function->inputs(); + + for (size_t i = 0llu; i < func_inputs.size(); ++i) { + const auto& func_input = func_inputs[i]; + const auto& name = func_input.get_node()->get_friendly_name(); + const auto& in_prc = func_input.get_element_type(); + auto tensor = ov::Tensor(in_prc, targetInputStaticShapes[i]); + +#define CASE(P, S, L) \ +case P : \ +fill_data(tensor.data::value_type>(), S, L); break; + + if (name == "shape") { + switch (in_prc) { + CASE(ElementType::i32, m_output_shape.data(), m_output_shape.size()) + CASE(ElementType::i64, m_output_shape.data(), m_output_shape.size()) + default: + OPENVINO_THROW("RandomUniform does not support precision ", in_prc, " for the Shape input."); + } + } else if (name == "minval") { + switch (in_prc) { + CASE(ElementType::f32, &m_min_val, 1) + CASE(ElementType::f16, &m_min_val, 1) + CASE(ElementType::bf16, &m_min_val, 1) + CASE(ElementType::i32, &m_min_val, 1) + CASE(ElementType::i64, &m_min_val, 1) + CASE(ElementType::f64, &m_min_val, 1) + default: + OPENVINO_THROW("RandomUniform does not support precision ", in_prc, " for the Minval input."); + } + } else if (name == "maxval") { + switch (in_prc) { + CASE(ElementType::f32, &m_max_val, 1) + CASE(ElementType::f16, &m_max_val, 1) + CASE(ElementType::bf16, &m_max_val, 1) + CASE(ElementType::i32, &m_max_val, 1) + CASE(ElementType::i64, &m_max_val, 1) + CASE(ElementType::f64, &m_max_val, 1) + default: + OPENVINO_THROW("RandomUniform does not support precision ", in_prc, " for the Maxval input."); + } + } + +#undef CASE + + inputs.insert({func_input.get_node_shared_ptr(), tensor}); + } +} + +void RandomUniformLayerTestCPU::compare(const std::vector& expected, const std::vector& actual) { + if (m_global_seed != 0lu || m_operational_seed != 0lu) { + SubgraphBaseTest::compare(expected, actual); + return; + } + + // When both seed values are equal to zero, RandomUniform should generate non-deterministic sequence. + // In this case will use Mean and Variance metrics. + +#define CASE(X) case X : rndUCompare::value_type>(expected[0], actual[0]); break; + + switch (expected[0].get_element_type()) { + CASE(ElementType::f32) + CASE(ElementType::i32) + CASE(ElementType::f16) + CASE(ElementType::bf16) + CASE(ElementType::i64) + CASE(ElementType::f64) + default: OPENVINO_THROW("Unsupported element type: ", expected[0].get_element_type()); + } + +#undef CASE +} + +precisions_map RandomUniformLayerTestCPU::get_ref_precisions_convert_map() { + precisions_map precisions; + + if (!InferenceEngine::with_cpu_x86_avx512_core()) { + precisions.insert({ ov::element::bf16, ov::element::f32 }); + } + if (!InferenceEngine::with_cpu_x86_avx512_core_fp16()) { + precisions.insert({ ov::element::f16, ov::element::f32 }); + } + + return precisions; +} + +inline double less_or_equal(double a, double b) { + return (b - a) >= (std::fmax(std::fabs(a), std::fabs(b)) * std::numeric_limits::epsilon()); +} + +template +void RandomUniformLayerTestCPU::rndUCompare(const ov::Tensor& expected, const ov::Tensor& actual) { + auto actual_data = actual.data(); + size_t shape_size_cnt = ov::shape_size(expected.get_shape()); + double act_mean = 0.0; + double act_variance = 0.0; + const double exp_mean = (m_max_val + m_min_val) / 2.0; + const double exp_variance = std::pow(m_max_val - m_min_val, 2) / 12.0; + + for (size_t i = 0; i < shape_size_cnt; ++i) { + auto actual_value = static_cast(actual_data[i]); + if (std::isnan(actual_value)) { + std::ostringstream out_stream; + out_stream << "Actual value is NAN on coordinate: " << i; + throw std::runtime_error(out_stream.str()); + } + act_mean += actual_value; + act_variance += std::pow(actual_value - exp_mean, 2); + } + act_mean /= shape_size_cnt; + act_variance /= shape_size_cnt; + + auto rel_mean = (exp_mean - act_mean) / (m_max_val - m_min_val); + auto rel_variance = (exp_variance - act_variance) / std::pow(m_max_val - m_min_val, 2); + + if (!(less_or_equal(rel_mean, m_mean_threshold) && less_or_equal(rel_variance, m_variance_threshold))) { + std::ostringstream out_stream; + out_stream << "rel_mean < m_mean_threshold && rel_variance < m_variance_threshold" << + "\n\t rel_mean: " << rel_mean << + "\n\t rel_variance: " << rel_variance; + throw std::runtime_error(out_stream.str()); + } +} + +TEST_P(RandomUniformLayerTestCPU, CompareWithRefs) { + run(); + CheckPluginRelatedResults(compiledModel, "RandomUniform"); +} + +} // namespace CPULayerTestsDefinitions diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.hpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.hpp new file mode 100644 index 00000000000000..1cb9f5fccc451a --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/classes/random_uniform.hpp @@ -0,0 +1,53 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "test_utils/cpu_test_utils.hpp" + +namespace CPULayerTestsDefinitions { + +typedef std::tuple< + ov::Shape, // Output shapes + std::tuple, // Min and Max values + ov::test::ElementType, // Shape precision + ov::test::ElementType, // Output precision + uint64_t, // Global seed + uint64_t, // Operational seed + bool, // Is 1st input constant + bool, // Is 2nd input constant + bool, // Is 3rd input constant + CPUTestUtils::CPUSpecificParams, // CPU specific params + ov::AnyMap // Additional plugin configuration +> RandomUniformLayerTestCPUParamSet; + +class RandomUniformLayerTestCPU : public testing::WithParamInterface, + public ov::test::SubgraphBaseTest, public CPUTestUtils::CPUTestsBase { +public: + static std::string getTestCaseName(const testing::TestParamInfo& obj); + +protected: + void SetUp() override; + + void generate_inputs(const std::vector& target_shapes) override; + + void compare(const std::vector& expected, const std::vector& actual) override; + + precisions_map get_ref_precisions_convert_map() override; + + template + void rndUCompare(const ov::Tensor& expected, const ov::Tensor& actual); + +private: + ov::Shape m_output_shape; + uint64_t m_global_seed; + uint64_t m_operational_seed; + double m_min_val; + double m_max_val; + static constexpr double m_mean_threshold = 0.05; + static constexpr double m_variance_threshold = 0.1; +}; + +} // namespace CPULayerTestsDefinitions diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/random_uniform.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/random_uniform.cpp new file mode 100644 index 00000000000000..f319fb6ada2719 --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/common/random_uniform.cpp @@ -0,0 +1,68 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "single_layer_tests/classes/random_uniform.hpp" + +using namespace CPUTestUtils; +using namespace ov::test; + +namespace CPULayerTestsDefinitions { +namespace RandomUniform { + +static const std::vector shape_prc = { + ElementType::i32, + ElementType::i64 +}; + +static const std::vector output_shapes = { + {500}, + {4, 3, 210} +}; + +static const std::vector global_seed = { + 0, 8 +}; + +static const std::vector operational_seed = { + 0, 3, 5 +}; + +static const std::vector> min_max = { + {0, 50}, + {-50, 50}, + {-50, 0} +}; + +INSTANTIATE_TEST_SUITE_P(smoke_Param, RandomUniformLayerTestCPU, + ::testing::Combine( + ::testing::ValuesIn(output_shapes), + ::testing::ValuesIn(min_max), + ::testing::ValuesIn(shape_prc), + ::testing::Values(ElementType::f32, ElementType::i32), + ::testing::ValuesIn(global_seed), + ::testing::ValuesIn(operational_seed), + ::testing::Values(false), + ::testing::Values(false), + ::testing::Values(false), + ::testing::Values(emptyCPUSpec), + ::testing::Values(empty_plugin_config)), + RandomUniformLayerTestCPU::getTestCaseName); + +INSTANTIATE_TEST_SUITE_P(smoke_ParamConst, RandomUniformLayerTestCPU, + ::testing::Combine( + ::testing::Values(output_shapes[0]), + ::testing::Values(min_max[0]), + ::testing::Values(ElementType::i32), + ::testing::Values(ElementType::f32), + ::testing::Values(1), + ::testing::Values(0), + ::testing::Values(true, false), + ::testing::Values(true, false), + ::testing::Values(true, false), + ::testing::Values(emptyCPUSpec), + ::testing::Values(empty_plugin_config)), + RandomUniformLayerTestCPU::getTestCaseName); + +} // namespace RandomUniform +} // namespace CPULayerTestsDefinitions diff --git a/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/random_uniform.cpp b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/random_uniform.cpp new file mode 100644 index 00000000000000..8fec42f382464d --- /dev/null +++ b/src/plugins/intel_cpu/tests/functional/single_layer_tests/instances/x64/random_uniform.cpp @@ -0,0 +1,46 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "single_layer_tests/classes/random_uniform.hpp" + +using namespace CPUTestUtils; +using namespace ov::test; + +namespace CPULayerTestsDefinitions { +namespace RandomUniform { + +static const std::vector output_prc_nightly = { + ElementType::f32, + ElementType::f16, + ElementType::bf16, + ElementType::i32, + ElementType::i64 +}; + +// Need to validate the Kernel corner cases. +static const std::vector output_shapes_nightly = { + {1}, {2}, {3}, {2, 2}, {5}, {2, 3}, {7}, {2, 2, 2}, {3, 3}, {2, 5}, {11}, {2, 3, 2}, {13}, {2, 7}, {3, 5}, + {4, 4}, {1, 17}, {2, 9}, {19}, {4, 5}, {21}, {11, 2}, {23, 1}, {4, 2, 3}, {5, 5}, {26}, {1, 27}, {14, 2}, + {29}, {10, 3}, {31}, {2, 8, 2}, {33}, {17, 2}, {5, 7}, {2, 3, 2, 3}, {37}, {2, 19}, {2, 20}, {41}, {42}, + {43}, {22, 2}, {3, 5, 3}, {5, 2, 5}, {1, 3, 1, 17, 1}, {26, 2}, {53}, {54}, {55}, {56}, {57}, {58}, {59}, + {2, 32}, {99}, {127}, {128}, {129}, {199}, {255}, {499}, {997}, {1753}, {2899} +}; + +INSTANTIATE_TEST_SUITE_P(nightly_Param, RandomUniformLayerTestCPU, + ::testing::Combine( + ::testing::ValuesIn(output_shapes_nightly), + ::testing::Values(std::tuple{-31, 17}), + ::testing::Values(ElementType::i32), + ::testing::ValuesIn(output_prc_nightly), + ::testing::Values(3), + ::testing::Values(1), + ::testing::Values(true, false), + ::testing::Values(true, false), + ::testing::Values(true, false), + ::testing::Values(emptyCPUSpec), + ::testing::Values(empty_plugin_config)), + RandomUniformLayerTestCPU::getTestCaseName); + +} // namespace RandomUniform +} // namespace CPULayerTestsDefinitions diff --git a/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.hpp b/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.hpp index d8deddfebe5d69..fff65f9e1c442f 100644 --- a/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.hpp +++ b/src/plugins/intel_cpu/tests/functional/test_utils/cpu_test_utils.hpp @@ -170,6 +170,7 @@ class CPUTestsBase { // common parameters const auto emptyCPUSpec = CPUSpecificParams{{}, {}, {}, {}}; const std::map cpuEmptyPluginConfig; +const ov::AnyMap empty_plugin_config{}; const std::map cpuFP32PluginConfig = { { InferenceEngine::PluginConfigParams::KEY_ENFORCE_BF16, InferenceEngine::PluginConfigParams::NO } }; const std::map cpuBF16PluginConfig = diff --git a/src/tests/test_utils/common_test_utils/include/common_test_utils/common_utils.hpp b/src/tests/test_utils/common_test_utils/include/common_test_utils/common_utils.hpp index cc45a47d779d57..0bd9f4845d481b 100644 --- a/src/tests/test_utils/common_test_utils/include/common_test_utils/common_utils.hpp +++ b/src/tests/test_utils/common_test_utils/include/common_test_utils/common_utils.hpp @@ -115,6 +115,10 @@ inline std::string set2str(const std::set& set) { return std::string("()"); } +inline std::string bool2str(const bool val) { + return val ? "True" : "False"; +} + template std::vector> combineParams(const std::map>& keyValueSets) { std::vector> resVec; From 222fbb1aec55febcab0f0e53fd49b9b98fb5f7fb Mon Sep 17 00:00:00 2001 From: Ekaterina Aidova Date: Wed, 18 Oct 2023 12:58:54 +0400 Subject: [PATCH 02/39] [PT FE]: support aten::fill_diagonal_, aten::fill (#20395) * [PT FE]: support aten::fill_diagonal_, aten::fill * remove xfail * Update src/frontends/pytorch/src/op/full.cpp Co-authored-by: Maxim Vafin * Update tests/model_hub_tests/torch_tests/test_hf_transformers.py --------- Co-authored-by: Maxim Vafin --- src/frontends/pytorch/src/op/full.cpp | 81 ++++++++++++++++++- src/frontends/pytorch/src/op_table.cpp | 7 +- tests/layer_tests/pytorch_tests/test_full.py | 81 ++++++++++++++++--- .../torch_tests/hf_transformers_models | 8 +- .../torch_tests/test_hf_transformers.py | 5 +- 5 files changed, 163 insertions(+), 19 deletions(-) diff --git a/src/frontends/pytorch/src/op/full.cpp b/src/frontends/pytorch/src/op/full.cpp index cf60d096555007..e8bfa1c7ce99d7 100644 --- a/src/frontends/pytorch/src/op/full.cpp +++ b/src/frontends/pytorch/src/op/full.cpp @@ -3,10 +3,19 @@ // #include "openvino/frontend/pytorch/node_context.hpp" +#include "openvino/op/add.hpp" #include "openvino/op/broadcast.hpp" #include "openvino/op/constant.hpp" #include "openvino/op/convert_like.hpp" +#include "openvino/op/divide.hpp" +#include "openvino/op/gather.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/power.hpp" +#include "openvino/op/range.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/op/scatter_elements_update.hpp" #include "openvino/op/shape_of.hpp" +#include "openvino/op/squeeze.hpp" #include "utils.hpp" namespace ov { @@ -71,12 +80,17 @@ OutputVector translate_full_like(const NodeContext& context) { return {base_translate_full_with_convertlike(context, sizes, value, out)}; }; -OutputVector translate_fill_(const NodeContext& context) { - num_inputs_check(context, 2, 2); +OutputVector translate_fill(const NodeContext& context) { + num_inputs_check(context, 2, 3); auto input = context.get_input(0); auto value = context.get_input(1); auto sizes = context.mark_node(std::make_shared(input, element::i32)); - return {base_translate_full_with_convertlike(context, sizes, value, input)}; + auto out = context.input_is_none(2) ? input : context.get_input(2); + auto result = base_translate_full_with_convertlike(context, sizes, value, out); + if (!context.input_is_none(2)) { + context.mutate_input(2, result); + } + return {result}; }; OutputVector translate_new_full(const NodeContext& context) { @@ -187,6 +201,67 @@ OutputVector translate_empty(const NodeContext& context) { } return {empty}; }; + +OutputVector translate_fill_diagonal(const NodeContext& context) { + // aten::fill_diagonal_(Tensor(a!) self, Scalar fill_value, bool wrap=False) -> Tensor(a!) + // realization inspired by numpy: + // https://github.com/numpy/numpy/blob/c236e694d222ae6b812cb8dab54471bc4c912f0f/numpy/lib/_index_tricks_impl.py#L787-L918 + num_inputs_check(context, 3, 3); + auto input_tensor = context.get_input(0); + auto fill_value = context.get_input(1); + auto input_shape = context.mark_node(std::make_shared(input_tensor, element::i32)); + auto input_rank = input_tensor.get_partial_shape().rank(); + auto const_one = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {1})); + auto const_zero = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {0})); + auto const_one_s = context.mark_node(v0::Constant::create(element::i32, Shape{}, {1})); + auto const_zero_s = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0})); + auto const_neg_one = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {-1})); + if (input_rank.is_dynamic() || input_rank.get_length() < 2) { + FRONT_END_OP_CONVERSION_CHECK(false, "aten::fill_diagonal_ required tensor with static rank >= 2 "); + } + auto flatten_input = context.mark_node(std::make_shared(input_tensor, const_neg_one, false)); + auto wrap = context.const_input(2); + Output step; + // default value for end - number of elements in input tensor + Output end; + auto flatten_shape = context.mark_node(std::make_shared(flatten_input, element::i32)); + end = context.mark_node(std::make_shared(flatten_shape, const_neg_one, const_zero)); + auto last_dim = context.mark_node(std::make_shared(input_shape, const_neg_one, const_zero)); + if (input_rank.get_length() == 2) { + // step = a.shape[1] + 1 + step = context.mark_node(std::make_shared(last_dim, const_one_s)); + if (!wrap) { + // if not wrap. and non squared matrix, do not fill tail by cutting end to square + end = context.mark_node(std::make_shared(last_dim, last_dim)); + } + } else { + // step = 1 + (cumprod(a.shape[:-1])).sum() + // cumprod operation is not supported by ov, but with condition that >2D tensors supported only if all dims + // equals cumprod can be represented as finite geometric serial and its sum can be found by formula + // b0 * (bn * q - 1) / (q - 1), where in this particual case q = b0, bn = b0 ^ n + auto rank_minus_one = + context.mark_node(v0::Constant::create(element::i32, Shape{}, {input_rank.get_length() - 1})); + auto dim_power = context.mark_node(std::make_shared(last_dim, rank_minus_one)); + auto dim_power_minus_one = context.mark_node(std::make_shared(dim_power, const_neg_one)); + auto dim_minus_one = context.mark_node(std::make_shared(last_dim, const_neg_one)); + auto q = context.mark_node(std::make_shared(dim_power_minus_one, dim_minus_one, true)); + auto cumprod_sum = context.mark_node(std::make_shared(last_dim, q)); + step = context.mark_node(std::make_shared(const_one_s, cumprod_sum)); + // wrap parameter is not applicable in this case as supported only equal dims on pytorch side + } + step = context.mark_node(std::make_shared(step, const_zero)); + end = context.mark_node(std::make_shared(end, const_zero)); + auto indices = context.mark_node(std::make_shared(const_zero_s, end, step, element::i32)); + auto indices_shape = context.mark_node(std::make_shared(indices, element::i32)); + fill_value = context.mark_node(std::make_shared(fill_value, input_tensor)); + fill_value = context.mark_node(std::make_shared(fill_value, indices_shape)); + // fill values + auto filled_tensor = + context.mark_node(std::make_shared(flatten_input, indices, fill_value, const_zero)); + // reshape back to original shape + filled_tensor = context.mark_node(std::make_shared(filled_tensor, input_shape, false)); + return {filled_tensor}; +} } // namespace op } // namespace pytorch } // namespace frontend diff --git a/src/frontends/pytorch/src/op_table.cpp b/src/frontends/pytorch/src/op_table.cpp index 47969ddb57d1c6..75665ffe8d4d14 100644 --- a/src/frontends/pytorch/src/op_table.cpp +++ b/src/frontends/pytorch/src/op_table.cpp @@ -66,7 +66,8 @@ OP_CONVERTER(translate_expand_as); OP_CONVERTER(translate_eye); OP_CONVERTER(translate_fake_quantize_per_channel_affine); OP_CONVERTER(translate_fake_quantize_per_tensor_affine); -OP_CONVERTER(translate_fill_); +OP_CONVERTER(translate_fill); +OP_CONVERTER(translate_fill_diagonal); OP_CONVERTER(translate_flatten); OP_CONVERTER(translate_flip); OP_CONVERTER(translate_floor_divide); @@ -323,7 +324,9 @@ const std::map get_supported_ops_ts() { {"aten::fake_quantize_per_channel_affine", op::translate_fake_quantize_per_channel_affine}, {"aten::fake_quantize_per_tensor_affine", op::translate_fake_quantize_per_tensor_affine}, {"aten::feature_dropout", op::skip_node}, - {"aten::fill_", op::inplace_op}, + {"aten::fill", op::translate_fill}, + {"aten::fill_", op::inplace_op}, + {"aten::fill_diagonal_", op::inplace_op}, {"aten::flatten", op::quantizable_op}, {"aten::flip", op::translate_flip}, {"aten::floor", op::translate_1to1_match_1_inputs}, diff --git a/tests/layer_tests/pytorch_tests/test_full.py b/tests/layer_tests/pytorch_tests/test_full.py index 4ce42db7fa9167..c564b1bb3731b9 100644 --- a/tests/layer_tests/pytorch_tests/test_full.py +++ b/tests/layer_tests/pytorch_tests/test_full.py @@ -104,31 +104,94 @@ def test_full_out(self, shape, value, dtype, with_names, ie_device, precision, i ir_version, kwargs_to_prepare_input={'value': value}) class TestFill(PytorchLayerTest): - def _prepare_input(self, value, shape, input_dtype, value_dtype): - return (np.random.randn(*shape).astype(input_dtype), np.array(value, dtype=value_dtype),) + def _prepare_input(self, value, shape, input_dtype, value_dtype, out=False): + if not out: + return (np.random.randn(*shape).astype(input_dtype), np.array(value, dtype=value_dtype),) + return (np.random.randn(*shape).astype(input_dtype), np.array(value, dtype=value_dtype), np.zeros(shape, dtype=input_dtype)) - def create_model(self): + + def create_model(self, mode): import torch class aten_fill(torch.nn.Module): + def __init__(self, mode) -> None: + super().__init__() + if mode == "inplace": + self.forward = self.forward_inplace + if mode == "out": + self.forward = self.forward_out - def forward(self, input_t: torch.Tensor, x: float): + + def forward_inplace(self, input_t: torch.Tensor, x: float): return input_t.fill_(x) + + def forward_out(self, input_t: torch.Tensor, x: float, out: torch.Tensor): + return input_t.fill(x, out=out), out + + def forward(self, input_t: torch.Tensor, x:float): + return input_t.fill(x) + ref_net = None - model = aten_fill() + model = aten_fill(mode) - return model, ref_net, "aten::fill_" + return model, ref_net, "aten::fill_" if mode == "inplace" else "aten::fill" @pytest.mark.parametrize("shape", [[1], [1, 2], [1, 2, 3], [1, 2, 3, 4], [2, 3, 4, 5, 6]]) @pytest.mark.parametrize("value", [0, 1, -1, 0.5]) @pytest.mark.parametrize("input_dtype", ["int8", "int32", "int64", "float32", "float64"]) @pytest.mark.parametrize("value_dtype", ["int8", "int32", "int64", "float32", "float64"]) + @pytest.mark.parametrize("mode", ["", "inplace", "out"]) @pytest.mark.nightly @pytest.mark.precommit - def test_fill(self, shape, value, input_dtype, value_dtype, ie_device, precision, ir_version): - self._test(*self.create_model(), ie_device, precision, ir_version, - kwargs_to_prepare_input={'value': value, 'shape': shape, "input_dtype": input_dtype, "value_dtype": value_dtype}) + def test_fill(self, shape, value, input_dtype, value_dtype, mode, ie_device, precision, ir_version): + self._test(*self.create_model(mode), ie_device, precision, ir_version, + kwargs_to_prepare_input={ + 'value': value, + 'shape': shape, + "input_dtype": input_dtype, + "value_dtype": value_dtype, + "out": mode == "out" + }) + +class TestFillDiagonal(PytorchLayerTest): + def _prepare_input(self, shape, input_dtype, value, value_dtype): + return np.zeros(shape).astype(input_dtype), np.array(value, dtype=value_dtype) + + def create_model(self, shape, wrap): + import torch + + class aten_fill_diagonal(torch.nn.Module): + def __init__(self, input_shape, wrap=False) -> None: + super().__init__() + self.wrap = wrap + self.input_shape = input_shape + + def forward(self, x:torch.Tensor, y:float): + x = x.reshape(self.input_shape) + return x.fill_diagonal_(y, wrap=self.wrap), x + + ref_net = None + + model = aten_fill_diagonal(shape, wrap) + return model, "aten::fill_diagonal_", ref_net + + @pytest.mark.parametrize("shape", ([4, 4], [5, 4], [8, 4], [4, 3], [5, 5, 5], [3, 3, 3, 3], [4, 4, 4, 4, 4])) + @pytest.mark.parametrize("value", [0, 1, -1, 2.5]) + @pytest.mark.parametrize("input_dtype", ["int8", "int32", "int64", "float32", "float64"]) + @pytest.mark.parametrize("value_dtype", ["int8", "int32", "int64", "float32", "float64"]) + @pytest.mark.parametrize("wrap", [True, False]) + @pytest.mark.nightly + @pytest.mark.precommit + def test_fill_diagonal(self, shape, value, input_dtype, value_dtype, wrap, ie_device, precision, ir_version): + self._test(*self.create_model(shape, wrap), ie_device, precision, ir_version, + kwargs_to_prepare_input={ + 'value': value, + 'shape': shape, + "input_dtype": input_dtype, + "value_dtype": value_dtype + }) + class TestZero(PytorchLayerTest): def _prepare_input(self, shape, input_dtype): diff --git a/tests/model_hub_tests/torch_tests/hf_transformers_models b/tests/model_hub_tests/torch_tests/hf_transformers_models index 112aedeb60de0c..0618d98a4d9f31 100644 --- a/tests/model_hub_tests/torch_tests/hf_transformers_models +++ b/tests/model_hub_tests/torch_tests/hf_transformers_models @@ -242,7 +242,7 @@ microsoft/deberta-base,deberta microsoft/git-large-coco,git,skip,Load problem microsoft/layoutlm-base-uncased,layoutlm microsoft/layoutlmv2-base-uncased,layoutlmv2,skip,Load problem -microsoft/layoutlmv3-base,layoutlmv3,xfail,Unsupported op aten::amax aten::clip +microsoft/layoutlmv3-base,layoutlmv3 microsoft/markuplm-base,markuplm microsoft/resnet-50,resnet microsoft/speecht5_hifigan,hifigan,skip,Load problem @@ -251,7 +251,7 @@ microsoft/swinv2-tiny-patch4-window8-256,swinv2,xfail,Unsupported op aten::adapt microsoft/table-transformer-detection,table-transformer microsoft/wavlm-large,wavlm,skip,Load problem microsoft/xclip-base-patch32,xclip,skip,Load problem -microsoft/xprophetnet-large-wiki100-cased,xlm-prophetnet,xfail,Unsupported op aten::fill_diagonal_ +microsoft/xprophetnet-large-wiki100-cased,xlm-prophetnet miguelvictor/python-fromzero-lstmlm,lstmlm,skip,Load problem mingzi151/test-hf-wav2vec2bert,wav2vec2bert,skip,Load problem MIT/ast-finetuned-audioset-10-10-0.4593,audio-spectrogram-transformer,skip,Load problem @@ -348,7 +348,7 @@ SteveZhan/my-resnet50d,resnet_steve,skip,Load problem suno/bark,bark,skip,Load problem surajnair/r3m-50,r3m,skip,Load problem susnato/clvp_dev,clvp,skip,Load problem -Tanrei/GPTSAN-japanese,gptsan-japanese,xfail,Unsupported op aten::clip aten::index_put_ prim::TupleConstruct +Tanrei/GPTSAN-japanese,gptsan-japanese,xfail,Unsupported op aten::index_put_ prim::TupleConstruct tau/bart-large-sled-govreport,tau/sled,skip,Load problem taufeeque/best-cb-model,codebook,skip,Load problem Team-PIXEL/pixel-base,pixel,skip,Load problem @@ -357,7 +357,7 @@ thomwolf/vqgan_imagenet_f16_1024,vqgan_model,skip,Load problem thu-ml/zh-clip-vit-roberta-large-patch14,zhclip,skip,Load problem tifa-benchmark/promptcap-coco-vqa,ofa,skip,Load problem tli8hf/robertabase_snli,transformerfornli,skip,Load problem -transfo-xl-wt103,transfo-xl,xfail,Unsupported op aten::clamp_ aten::index_copy_ +transfo-xl-wt103,transfo-xl,xfail,Unsupported op aten::index_copy_ transZ/BART_shared_clean,shared_bart,skip,Load problem transZ/BART_shared_v2,shared_bart_v2,skip,Load problem transZ/misecom,misecom,skip,Load problem diff --git a/tests/model_hub_tests/torch_tests/test_hf_transformers.py b/tests/model_hub_tests/torch_tests/test_hf_transformers.py index 3a677353c86508..184e725a04f9b9 100644 --- a/tests/model_hub_tests/torch_tests/test_hf_transformers.py +++ b/tests/model_hub_tests/torch_tests/test_hf_transformers.py @@ -298,7 +298,10 @@ def teardown_method(self): ("google/tapas-large-finetuned-wtq", "tapas"), ("gpt2", "gpt2"), ("openai/clip-vit-large-patch14", "clip"), - ("RWKV/rwkv-4-169m-pile", "rwkv")]) + ("RWKV/rwkv-4-169m-pile", "rwkv"), + ("microsoft/layoutlmv3-base", "layoutlmv3"), + ("microsoft/xprophetnet-large-wiki100-cased", "xlm-prophetnet"), + ]) @pytest.mark.precommit def test_convert_model_precommit(self, name, type, ie_device): self.run(model_name=name, model_link=type, ie_device=ie_device) From 2415f0c7cffda56dfaedf88d54630badfe449545 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Wed, 18 Oct 2023 11:09:27 +0200 Subject: [PATCH 03/39] [core]Migrate BatchToSpace to new API (#20450) * Migrate BatchToSpace to new API * Remove not required suppression macro --- .../include/openvino/op/batch_to_space.hpp | 4 +- src/core/src/op/batch_to_space.cpp | 112 ++++++++---------- 2 files changed, 50 insertions(+), 66 deletions(-) diff --git a/src/core/include/openvino/op/batch_to_space.hpp b/src/core/include/openvino/op/batch_to_space.hpp index 6609e539087628..2dbbf018913fd3 100644 --- a/src/core/include/openvino/op/batch_to_space.hpp +++ b/src/core/include/openvino/op/batch_to_space.hpp @@ -37,9 +37,7 @@ class OPENVINO_API BatchToSpace : public Op { const Output& block_shape, const Output& crops_begin, const Output& crops_end); - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; void validate_and_infer_types() override; diff --git a/src/core/src/op/batch_to_space.cpp b/src/core/src/op/batch_to_space.cpp index da2c2c5fa703a1..0b522b5156b017 100644 --- a/src/core/src/op/batch_to_space.cpp +++ b/src/core/src/op/batch_to_space.cpp @@ -2,33 +2,23 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/batch_to_space.hpp" - -#include -#include -#include -#include -#include -#include -#include +#include "openvino/op/batch_to_space.hpp" +#include "batch_to_space_shape_inference.hpp" #include "itt.hpp" -#include "ngraph/builder/make_constant.hpp" -#include "ngraph/node.hpp" -#include "ngraph/opsets/opset3.hpp" -#include "ngraph/shape.hpp" #include "openvino/op/util/precision_sensitive_attribute.hpp" #include "openvino/op/util/slice_plan.hpp" #include "openvino/reference/reshape.hpp" #include "openvino/reference/strided_slice.hpp" -using namespace std; -using namespace ngraph; +namespace ov { +namespace op { +namespace v1 { -ngraph::op::v1::BatchToSpace::BatchToSpace(const ngraph::Output& data, - const ngraph::Output& block_shape, - const ngraph::Output& crops_begin, - const ngraph::Output& crops_end) +BatchToSpace::BatchToSpace(const Output& data, + const Output& block_shape, + const Output& crops_begin, + const Output& crops_end) : Op({data, block_shape, crops_begin, crops_end}) { ov::mark_as_precision_sensitive(input(1)); ov::mark_as_precision_sensitive(input(2)); @@ -36,7 +26,7 @@ ngraph::op::v1::BatchToSpace::BatchToSpace(const ngraph::Output& d constructor_validate_and_infer_types(); } -void op::v1::BatchToSpace::validate_and_infer_types() { +void BatchToSpace::validate_and_infer_types() { OV_OP_SCOPE(v1_BatchToSpace_validate_and_infer_types); const auto& data_et = get_input_element_type(0); @@ -66,30 +56,29 @@ void op::v1::BatchToSpace::validate_and_infer_types() { set_output_type(0, data_et, output_shape); } -std::shared_ptr ngraph::op::v1::BatchToSpace::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr BatchToSpace::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v1_BatchToSpace_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), new_args.at(1), new_args.at(2), new_args.at(3)); + return std::make_shared(new_args.at(0), new_args.at(1), new_args.at(2), new_args.at(3)); } -bool ngraph::op::v1::BatchToSpace::visit_attributes(ngraph::AttributeVisitor& visitor) { +bool BatchToSpace::visit_attributes(AttributeVisitor& visitor) { OV_OP_SCOPE(v1_BatchToSpace_visit_attributes); return true; } -OPENVINO_SUPPRESS_DEPRECATED_START namespace { -bool batch_to_space_evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) { - auto data = inputs[0]; - const auto elem_size = data->get_element_type().size(); +bool batch_to_space_evaluate(TensorVector& outputs, const TensorVector& inputs) { + const auto& in = inputs[0]; + const auto elem_size = in.get_element_type().size(); - auto data_shape = data->get_shape(); + auto data_shape = in.get_shape(); - auto const block_values_size = shape_size(inputs[1]->get_shape()); + auto const block_values_size = shape_size(inputs[1].get_shape()); - const auto* block_values = inputs[1]->get_data_ptr(); - const auto* crops_begin_values = inputs[2]->get_data_ptr(); - const auto* crops_end_values = inputs[3]->get_data_ptr(); + const auto* block_values = inputs[1].data(); + const auto* crops_begin_values = inputs[2].data(); + const auto* crops_end_values = inputs[3].data(); ov::Shape dispersed_shape(1); dispersed_shape.insert(dispersed_shape.end(), data_shape.begin(), data_shape.end()); @@ -101,7 +90,13 @@ bool batch_to_space_evaluate(const HostTensorVector& outputs, const HostTensorVe return false; } - auto* flat_data = data->get_data_ptr(); + auto* in_first = static_cast(in.data()); + + // Copy input tensor to not overwrite evaluate's inputs tensors passed as const. + // The evaluate algorithm should be improved to avoid additional data copy. + auto flat_in = Tensor(in.get_element_type(), data_shape); + auto* flat_data = static_cast(flat_in.data()); + std::memcpy(flat_data, in_first, flat_in.get_byte_size()); std::vector dispersed_data(shape_size(data_shape) * elem_size); ov::Shape post_transpose_shape(axes_order.size()); @@ -117,15 +112,15 @@ bool batch_to_space_evaluate(const HostTensorVector& outputs, const HostTensorVe dispersed_shape, elem_size); - size_t val = 1; - for (size_t axis_idx = 0; axis_idx <= block_values_size; ++axis_idx) { + for (size_t axis_idx = 0, val = 1; axis_idx <= block_values_size; ++axis_idx) { if ((block_idx + 1) == axis_idx) { axes_order[axis_idx] = 0; } else { axes_order[axis_idx] = val; - val++; + ++val; } } + for (size_t axis_idx = 0; axis_idx < axes_order.size(); ++axis_idx) { post_transpose_shape[axis_idx] = dispersed_shape[axes_order[axis_idx]]; } @@ -148,61 +143,52 @@ bool batch_to_space_evaluate(const HostTensorVector& outputs, const HostTensorVe data_shape = squeezed_shape; } - std::vector upperbounds_values(data_shape.size()); + std::vector upper_bounds_values(data_shape.size()); for (size_t i = 0; i < data_shape.size(); ++i) { - upperbounds_values[i] = data_shape[i] - crops_end_values[i]; + upper_bounds_values[i] = data_shape[i] - crops_end_values[i]; } std::vector begin_mask(data_shape.size(), 0); std::vector end_mask(data_shape.size(), 0); - std::vector begins(shape_size(inputs[2]->get_shape())); - begins.assign(crops_begin_values, crops_begin_values + shape_size(inputs[2]->get_shape())); + std::vector begins(shape_size(inputs[2].get_shape())); + begins.assign(crops_begin_values, crops_begin_values + shape_size(inputs[2].get_shape())); std::vector default_strides(begins.size(), 1); const auto slice_plan = ov::op::util::make_slice_plan(data_shape, begins, - upperbounds_values, + upper_bounds_values, default_strides, begin_mask, end_mask, AxisSet(), AxisSet(), AxisSet()); - ov::reference::strided_slice(flat_data, outputs[0]->get_data_ptr(), data_shape, slice_plan, elem_size); + ov::reference::strided_slice(flat_data, static_cast(outputs[0].data()), data_shape, slice_plan, elem_size); return true; } } // namespace -bool ngraph::op::v1::BatchToSpace::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { +bool BatchToSpace::evaluate(TensorVector& outputs, const TensorVector& inputs) const { OV_OP_SCOPE(v1_BatchToSpace_evaluate); - OPENVINO_SUPPRESS_DEPRECATED_START - OPENVINO_ASSERT(validate_host_tensor_vector(inputs, 4)); - OPENVINO_ASSERT(validate_host_tensor_vector(outputs, 1)); - OPENVINO_SUPPRESS_DEPRECATED_END - - if (outputs[0]->get_partial_shape().is_dynamic()) { - std::vector input_shapes; - input_shapes.reserve(inputs.size()); + OPENVINO_ASSERT(outputs.size() == 1); - for (size_t i = 0; i < inputs.size(); ++i) { - input_shapes.push_back(inputs[i]->get_partial_shape()); - if (input_shapes.back().is_dynamic()) { - return false; - } - } - - const auto output_shape = shape_infer(this, input_shapes, ov::make_tensor_accessor(inputs)).front().to_shape(); - - outputs[0]->set_element_type(inputs[0]->get_element_type()); - outputs[0]->set_shape(output_shape); + std::vector input_shapes; + for (const auto& in : inputs) { + input_shapes.emplace_back(in.get_shape()); } + const auto output_shape = shape_infer(this, input_shapes, ov::make_tensor_accessor(inputs)).front().to_shape(); + outputs[0].set_shape(output_shape); + return batch_to_space_evaluate(outputs, inputs); } -bool ngraph::op::v1::BatchToSpace::has_evaluate() const { +bool BatchToSpace::has_evaluate() const { OV_OP_SCOPE(v1_BatchToSpace_has_evaluate); return !get_input_partial_shape(0).is_dynamic() && get_input_shape(0).size() >= 2 && get_input_shape(0).size() <= shape_size(get_input_shape(1)); } +} // namespace v1 +} // namespace op +} // namespace ov From 9fb40b0007a4798c718ca67cdf89cadf89157777 Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Wed, 18 Oct 2023 11:16:12 +0200 Subject: [PATCH 04/39] [core]Migrate LogicalNot to new API (#20438) * Migrate LogicalNot to new API * Rename file not.hpp -> logical_not.hpp * Apply code style --- src/core/include/openvino/op/logical_not.hpp | 4 +- .../openvino/reference/logical_not.hpp | 25 +++++ .../include/openvino/reference/not.hpp | 18 --- src/core/src/op/logical_not.cpp | 105 ++++++++---------- 4 files changed, 74 insertions(+), 78 deletions(-) create mode 100644 src/core/reference/include/openvino/reference/logical_not.hpp delete mode 100644 src/core/reference/include/openvino/reference/not.hpp diff --git a/src/core/include/openvino/op/logical_not.hpp b/src/core/include/openvino/op/logical_not.hpp index c5421b8db14a47..052aed0a09ad24 100644 --- a/src/core/include/openvino/op/logical_not.hpp +++ b/src/core/include/openvino/op/logical_not.hpp @@ -24,9 +24,7 @@ class OPENVINO_API LogicalNot : public Op { void validate_and_infer_types() override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool has_evaluate() const override; }; } // namespace v1 diff --git a/src/core/reference/include/openvino/reference/logical_not.hpp b/src/core/reference/include/openvino/reference/logical_not.hpp new file mode 100644 index 00000000000000..ca31a824b50d5f --- /dev/null +++ b/src/core/reference/include/openvino/reference/logical_not.hpp @@ -0,0 +1,25 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include + +namespace ov { +namespace reference { + +/** + * @brief Reference implementation of LogicalNot operator. + * + * @param arg Pointer to input data. + * @param out Pointer to output data. + * @param count Number of elements in input buffer. + */ +template +void logical_not(const T* arg, T* out, const size_t count) { + std::transform(arg, std::next(arg, count), out, std::logical_not()); +} +} // namespace reference +} // namespace ov diff --git a/src/core/reference/include/openvino/reference/not.hpp b/src/core/reference/include/openvino/reference/not.hpp deleted file mode 100644 index e0444a8eb73a2a..00000000000000 --- a/src/core/reference/include/openvino/reference/not.hpp +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include - -namespace ov { -namespace reference { -template -void logical_not(const T* arg, T* out, size_t count) { - for (size_t i = 0; i < count; i++) { - out[i] = static_cast(!(arg[i])); - } -} -} // namespace reference -} // namespace ov diff --git a/src/core/src/op/logical_not.cpp b/src/core/src/op/logical_not.cpp index 7ed4971861766a..db9f939463651a 100644 --- a/src/core/src/op/logical_not.cpp +++ b/src/core/src/op/logical_not.cpp @@ -2,22 +2,34 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "openvino/op/logical_not.hpp" + +#include "element_visitor.hpp" #include "itt.hpp" -#include "ngraph/op/not.hpp" -#include "ngraph/op/op.hpp" -#include "ngraph/op/util/elementwise_args.hpp" -#include "ngraph/runtime/host_tensor.hpp" -#include "ngraph/validation_util.hpp" -#include "openvino/reference/not.hpp" +#include "openvino/reference/logical_not.hpp" + +namespace ov { +namespace op { +namespace logical_not { -using namespace ngraph; -using namespace std; +struct Evaluate : element::NoAction { + using element::NoAction::visit; -op::v1::LogicalNot::LogicalNot(const Output& arg) : Op({arg}) { + template > + static result_type visit(const Tensor& in, Tensor& out, const size_t count) { + reference::logical_not(in.data(), out.data(), count); + return true; + } +}; +} // namespace logical_not + +namespace v1 { + +LogicalNot::LogicalNot(const Output& arg) : Op({arg}) { constructor_validate_and_infer_types(); } -void op::v1::LogicalNot::validate_and_infer_types() { +void LogicalNot::validate_and_infer_types() { OV_OP_SCOPE(v1_LogicalNot_validate_and_infer_types); const auto& element_type = get_input_element_type(0); // No boolean element_type validation for backward compatibility @@ -25,64 +37,43 @@ void op::v1::LogicalNot::validate_and_infer_types() { set_output_type(0, element_type, arg_pshape); } -shared_ptr op::v1::LogicalNot::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr LogicalNot::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v1_LogicalNot_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0)); -} - -OPENVINO_SUPPRESS_DEPRECATED_START -namespace notop { -namespace { -template -inline bool evaluate(const HostTensorPtr& arg0, const HostTensorPtr& out, const size_t count) { - using T = typename element_type_traits::value_type; - ov::reference::logical_not(arg0->get_data_ptr(), out->get_data_ptr(), count); - return true; + return std::make_shared(new_args.at(0)); } -bool evaluate_not(const HostTensorPtr& arg0, const HostTensorPtr& out, const size_t count) { - bool rc = true; - out->set_unary(arg0); +bool LogicalNot::evaluate(TensorVector& outputs, const TensorVector& inputs) const { + OV_OP_SCOPE(v1_LogicalNot_evaluate); + OPENVINO_ASSERT(outputs.size() == 1); + OPENVINO_ASSERT(inputs.size() == 1); - switch (arg0->get_element_type()) { - OPENVINO_TYPE_CASE(evaluate_not, boolean, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_not, i32, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_not, i64, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_not, u32, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_not, u64, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_not, f16, arg0, out, count); - OPENVINO_TYPE_CASE(evaluate_not, f32, arg0, out, count); - default: - rc = false; - break; - } - return rc; -} -} // namespace -} // namespace notop + outputs[0].set_shape(inputs[0].get_shape()); -bool op::v1::LogicalNot::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { - OV_OP_SCOPE(v1_LogicalNot_evaluate); - OPENVINO_SUPPRESS_DEPRECATED_START - OPENVINO_ASSERT(validate_host_tensor_vector(outputs, 1) && validate_host_tensor_vector(inputs, 1)); - OPENVINO_SUPPRESS_DEPRECATED_END - return notop::evaluate_not(inputs[0], outputs[0], inputs[0]->get_element_count()); + using namespace ov::element; + return IfTypeOf::apply( + inputs[0].get_element_type(), + inputs[0], + outputs[0], + shape_size(inputs[0].get_shape())); } -bool op::v1::LogicalNot::has_evaluate() const { +bool LogicalNot::has_evaluate() const { OV_OP_SCOPE(v1_LogicalNot_has_evaluate); switch (get_input_element_type(0)) { - case ngraph::element::boolean: - case ngraph::element::i32: - case ngraph::element::i64: - case ngraph::element::u32: - case ngraph::element::u64: - case ngraph::element::f16: - case ngraph::element::f32: + case element::boolean: + case element::f16: + case element::f32: + case element::i32: + case element::i64: + case element::u32: + case element::u64: return true; default: - break; + return false; } - return false; } + +} // namespace v1 +} // namespace op +} // namespace ov From d82cd839ce2fe3c738b42e33eb61f094833846c8 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Wed, 18 Oct 2023 17:21:53 +0800 Subject: [PATCH 05/39] [PaddleFT]Simplify slice converter by opset8::slice (#20508) --- src/frontends/paddle/src/op/slice_ops.hpp | 68 +++---------------- .../test_models/gen_scripts/generate_slice.py | 2 +- 2 files changed, 12 insertions(+), 58 deletions(-) diff --git a/src/frontends/paddle/src/op/slice_ops.hpp b/src/frontends/paddle/src/op/slice_ops.hpp index dc2a609ba18513..1f8798bc00e91f 100644 --- a/src/frontends/paddle/src/op/slice_ops.hpp +++ b/src/frontends/paddle/src/op/slice_ops.hpp @@ -33,63 +33,16 @@ NamedOutputs slice_op(const NodeContext& node, const bool& stride_input) { Output start_idx_node = idx_node("StartsTensor", "StartsTensorList", "starts", node); Output end_idx_node = idx_node("EndsTensor", "EndsTensorList", "ends", node); Output strides_idx_node; - if (stride_input) - strides_idx_node = idx_node("StridesTensor", "StridesTensorList", "strides", node); - - // The following process is: - // Given: - // data = [ [1, 2, 3, 4], [5, 6, 7, 8], ] // shape is: [2, 4] - // axes = [0] - // starts = [1] - // ends = [2] - // Our process is: - // 1. Get 'axes': [0, 1], 'starts', 'ends' - // 2. Get data shape: [2,4] and dims: 2 - // 3. Create two tensor t1 and t2, shape is the dims from step2: 2. t1: [0, 0], t2: [INT_MAX, INT_MAX] - // 4. Use 'ScatterNDUpdate' to update some elements in t1, the updated indexes are coming from 'axes', the contents - // are coming from 'starts', t1: [1, 0]; apply the similar process to t2 - // 5. Call 'StrideSlice' with t1 and t2 - // Why using ScatterNDUpdate is that 'axes' may be discontinuous. - - // the shape of input, such as [2, 4] - const auto shape_node = std::make_shared(data, element::Type_t::i32); - // the input dim, such as [2] - const auto rank_node = std::make_shared(shape_node, element::i32); - const auto const_0_node = default_opset::Constant::create(element::i32, {}, {0}); - const auto const_max_node = default_opset::Constant::create(element::i32, {}, {INT_MAX}); - const auto const_1_node = default_opset::Constant::create(element::i32, {}, {1}); - // t1: [0, 0] - const auto start_node = std::make_shared(const_0_node, rank_node); - // t2: [INT_MAX, INT_MAX] - const auto end_node = std::make_shared(const_max_node, rank_node); - const auto strides_node = std::make_shared(const_1_node, rank_node); - const auto axes_node = default_opset::Constant::create(element::i32, {axes.size(), 1}, axes); - // update t1 - const auto fixed_start_node = - std::make_shared(start_node, axes_node, start_idx_node); - // update t2 - const auto fixed_end_node = std::make_shared(end_node, axes_node, end_idx_node); - std::shared_ptr stride_slice_node; if (stride_input) { - const auto fixed_strides_node = - std::make_shared(strides_node, axes_node, strides_idx_node); - - stride_slice_node = std::make_shared(data, - fixed_start_node, - fixed_end_node, - fixed_strides_node, - std::vector{0}, - std::vector{0}); + strides_idx_node = idx_node("StridesTensor", "StridesTensorList", "strides", node); } else { - stride_slice_node = std::make_shared(data, - fixed_start_node, - fixed_end_node, - std::vector{0}, - std::vector{0}); + strides_idx_node = + default_opset::Constant::create(element::i32, start_idx_node.get_shape(), std::vector{1}); } - + const auto axes_node = default_opset::Constant::create(element::i32, {axes.size()}, axes); + const auto slice_node = + std::make_shared(data, start_idx_node, end_idx_node, strides_idx_node, axes_node); const auto decrease_axis = node.get_attribute>("decrease_axis"); - if (decrease_axis.size() > 0) { PartialShape input_shape = data.get_partial_shape(); PADDLE_OP_CHECK(node, @@ -99,18 +52,19 @@ NamedOutputs slice_op(const NodeContext& node, const bool& stride_input) { // according to paddle slice_op, when all axes are decreased, output shape is [1], instead of scalar. // Ref: paddle/fluid/operators/slice_op.h auto decreased_node = std::make_shared( - stride_slice_node, + slice_node, std::make_shared(element::i64, Shape{1}, 1), false); return node.default_single_output_mapping({decreased_node}, {"Out"}); } + const auto squeeze_index_node = default_opset::Constant::create(element::i32, {decrease_axis.size()}, decrease_axis); - const auto decreased_node = std::make_shared(stride_slice_node, squeeze_index_node); + const auto decreased_node = std::make_shared(slice_node, squeeze_index_node); return node.default_single_output_mapping({decreased_node}, {"Out"}); + } else { + return node.default_single_output_mapping({slice_node}, {"Out"}); } - - return node.default_single_output_mapping({stride_slice_node}, {"Out"}); } } // namespace } // namespace op diff --git a/src/frontends/paddle/tests/test_models/gen_scripts/generate_slice.py b/src/frontends/paddle/tests/test_models/gen_scripts/generate_slice.py index f89e18d7500c65..f2a6d1a8769295 100644 --- a/src/frontends/paddle/tests/test_models/gen_scripts/generate_slice.py +++ b/src/frontends/paddle/tests/test_models/gen_scripts/generate_slice.py @@ -20,7 +20,7 @@ def slice(name : str, x, axes : list, start : list, end : list): with paddle.static.program_guard(paddle.static.Program(), paddle.static.Program()): node_x = paddle.static.data(name='x', shape=x.shape, dtype = data_type) - out = paddle.fluid.layers.slice(node_x, axes = axes, starts = start, ends = end) + out = paddle.slice(node_x, axes = axes, starts = start, ends = end) cpu = paddle.static.cpu_places(1) exe = paddle.static.Executor(cpu[0]) From 4574fb112c62ee9faec29507ba1dd2dadacde035 Mon Sep 17 00:00:00 2001 From: Tatiana Savina Date: Wed, 18 Oct 2023 11:27:10 +0200 Subject: [PATCH 06/39] change snippet name (#20538) --- .../preprocessing_overview/preprocessing_usecase_save.md | 4 ++-- docs/snippets/ov_preprocessing.cpp | 4 ++-- docs/snippets/ov_preprocessing.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/articles_en/openvino_workflow/openvino_intro/dldt_deployment_optimization_guide/preprocessing_overview/preprocessing_usecase_save.md b/docs/articles_en/openvino_workflow/openvino_intro/dldt_deployment_optimization_guide/preprocessing_overview/preprocessing_usecase_save.md index 71de4a7e5cc82f..2cb7270c14daf0 100644 --- a/docs/articles_en/openvino_workflow/openvino_intro/dldt_deployment_optimization_guide/preprocessing_overview/preprocessing_usecase_save.md +++ b/docs/articles_en/openvino_workflow/openvino_intro/dldt_deployment_optimization_guide/preprocessing_overview/preprocessing_usecase_save.md @@ -68,14 +68,14 @@ in the model preparation script for such a case. .. doxygensnippet:: docs/snippets/ov_preprocessing.py :language: Python - :fragment: ov:preprocess:save + :fragment: ov:preprocess:save_model .. tab-item:: C++ :sync: cpp .. doxygensnippet:: docs/snippets/ov_preprocessing.cpp :language: cpp - :fragment: ov:preprocess:save + :fragment: ov:preprocess:save_model Application Code - Load Model to Target Device diff --git a/docs/snippets/ov_preprocessing.cpp b/docs/snippets/ov_preprocessing.cpp index f559a7a5a1aef4..176953f46691f1 100644 --- a/docs/snippets/ov_preprocessing.cpp +++ b/docs/snippets/ov_preprocessing.cpp @@ -165,7 +165,7 @@ int main() { //! [ov:preprocess:save_headers] void save_example() { - //! [ov:preprocess:save] + //! [ov:preprocess:save_model] // ======== Step 0: read original model ========= ov::Core core; std::shared_ptr model = core.read_model("/path/to/some_model.onnx"); @@ -200,7 +200,7 @@ void save_example() { std::string xml = "/path/to/some_model_saved.xml"; std::string bin = "/path/to/some_model_saved.bin"; ov::serialize(model, xml, bin); - //! [ov:preprocess:save] + //! [ov:preprocess:save_model] } diff --git a/docs/snippets/ov_preprocessing.py b/docs/snippets/ov_preprocessing.py index 23cd30548115ad..8a8f4ce212b4f7 100644 --- a/docs/snippets/ov_preprocessing.py +++ b/docs/snippets/ov_preprocessing.py @@ -184,7 +184,7 @@ def custom_abs(output: Output): model_path = get_path_to_model() serialized_model_path = get_path_to_model() -# ! [ov:preprocess:save] +# ! [ov:preprocess:save_model] # ======== Step 0: read original model ========= core = Core() model = core.read_model(model=model_path) @@ -219,7 +219,7 @@ def custom_abs(output: Output): # ======== Step 3: Save the model ================ serialize(model, serialized_model_path) -# ! [ov:preprocess:save] +# ! [ov:preprocess:save_model] path_to_cache_dir = get_temp_dir() From 90ad4c618ded8ec273a11e410c5f57f2cbab60db Mon Sep 17 00:00:00 2001 From: Vladimir Paramuzov Date: Wed, 18 Oct 2023 15:56:35 +0400 Subject: [PATCH 07/39] [GPU] Grouped decompression scale/zp support (#20491) --- .../graph_optimizer/prepare_quantization.cpp | 39 +++ .../src/graph/impls/ocl/fully_connected.cpp | 7 - .../intel_gpu/src/graph/layout_optimizer.cpp | 2 +- .../fully_connected_gpu_bf_tiled.cl | 60 ++++- .../fully_connected_gpu_bfyx_ref.cl | 32 ++- .../fully_connected_kernel_base.cpp | 12 + .../convert_fc_to_compressed.cpp | 85 ++++-- .../transformations/convert_matmul_to_fc.cpp | 2 +- .../dynamic/matmul_weights_decompression.cpp | 241 ++++++++++++------ .../test_cases/fully_connected_gpu_test.cpp | 71 +++--- 10 files changed, 389 insertions(+), 162 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_quantization.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_quantization.cpp index d1b125aa8f1df5..f55d99b6a5fa80 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_quantization.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_quantization.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "fully_connected_inst.h" #include "pooling_inst.h" #include "quantize_inst.h" #include "reorder_inst.h" @@ -847,6 +848,42 @@ bool prepare_quantization::optimize_quantize(program &p, quantize_node& quantize return true; } +static void optimize_weights_decompression_parameters(fully_connected_node& fc_node, program& p) { + auto fc_prim = fc_node.get_primitive(); + if (!fc_prim->compressed_weights) + return; + + auto reorder_bfyx_to_fbyx = [&](size_t dep_id) { + auto& dep = fc_node.get_dependency(dep_id); + auto target_layout = dep.get_output_layout(); + target_layout.format = format::fbyx; + auto reorder_prim = std::make_shared(dep.id() + "_reorder", dep.id(), target_layout); + p.add_intermediate(reorder_prim, fc_node, dep_id, true); + fc_node.get_dependency(dep_id).recalc_output_layout(false); + }; + + auto need_reorder = [&](size_t dep_id) { + auto dep_layout = fc_node.get_input_layout(dep_id); + auto dep_pshape = dep_layout.get_partial_shape(); + + auto groups_count = dep_pshape[dep_pshape.size() - 1].get_length(); + + return groups_count > 1; + }; + + auto decompression_scale_idx = !fc_node.bias_term() ? 2 : 3; + if (need_reorder(decompression_scale_idx)) { + reorder_bfyx_to_fbyx(decompression_scale_idx); + } + + if (!fc_prim->decompression_zero_point.empty()) { + auto decompression_zp_idx = decompression_scale_idx + 1; + if (need_reorder(decompression_zp_idx)) { + reorder_bfyx_to_fbyx(decompression_zp_idx); + } + } +} + void prepare_quantization::run(program& p) { auto itr = p.get_processing_order().begin(); while (itr != p.get_processing_order().end()) { @@ -859,6 +896,8 @@ void prepare_quantization::run(program& p) { remove_fake_reorders(p, node->as()); } else if (node->is_type()) { prepare_asymmetric_quantization(p, node->as()); + } else if (node->is_type()) { + optimize_weights_decompression_parameters(node->as(), p); } } } diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp index 43ce081d2f69ea..19007a481579f6 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/fully_connected.cpp @@ -110,20 +110,13 @@ struct fully_connected_impl : typed_primitive_impl_ocl { bool has_scale = !primitive->decompression_scale.empty(); size_t offset = primitive->bias.empty() ? 2 : 3; - const auto& weights_pshape = input1_layout.get_partial_shape(); if (has_scale) { auto scale_layout = input_layouts[offset++]; - if (input1_pshape.size() != 2) { - scale_layout.set_partial_shape(reshape_to_2d(scale_layout.get_partial_shape(), weights_pshape[0], primitive->weights_rank)); - } layouts.push_back(scale_layout); } if (has_zp) { auto zp_layout = input_layouts[offset]; - if (input1_pshape.size() != 2) { - zp_layout.set_partial_shape(reshape_to_2d(zp_layout.get_partial_shape(), weights_pshape[0], primitive->weights_rank)); - } layouts.push_back(zp_layout); } diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index f5f6c1ac16d82a..69b1e12fa3b4ae 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -307,7 +307,7 @@ bool layout_optimizer::can_fuse_reorder(program_node& prev, program_node& next, (fmt_prev == format::b_fs_yx_fsv4 && prev_output_layout.feature() % 32 == 0 && prev_output_layout.spatial(0) == 1 && - prev_output_layout.spatial(1) == 1))) + prev_output_layout.spatial(1) == 1)) && is_input_reorder(prev, next)) return true; if (next.is_type() && fmt_prev == format::b_fs_yx_fsv16 && fmt_next == format::b_fs_yx_fsv4 && is_input_idx(0)) diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl index d4992801a80447..f6dacec4a73c80 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bf_tiled.cl @@ -120,7 +120,7 @@ KERNEL(fc)( uint input_offset = out_b * TILE_IN_B_PITCH + INPUT0_OFFSET; uint weights_offset = out_f * INPUT_ELEMENTS_COUNT; -#if COMPRESSED_WEIGHTS +#if COMPRESSED_WEIGHTS && DECOMPRESSION_SCALE_GROUPS_NUM == 1 #if DECOMPRESSION_SCALE_LENGTH > 1 && DECOMPRESSION_SCALE_LENGTH % SIMD == 0 ACCUMULATOR_VEC_TYPE d_scale = BLOCK_READN(ACCUMULATOR_TYPE, TILE_OFM, decompression_scale, out_f); #elif DECOMPRESSION_SCALE_LENGTH > 1 && DECOMPRESSION_SCALE_LENGTH % SIMD != 0 @@ -134,9 +134,11 @@ KERNEL(fc)( ACCUMULATOR_VEC_TYPE d_scale = decompression_scale[0]; #endif - #if !DECOMPRESSION_ZP_TERM - ACCUMULATOR_VEC_TYPE d_zp = 0; - #elif DECOMPRESSION_ZP_LENGTH > 1 && DECOMPRESSION_ZP_LENGTH % SIMD == 0 + ACCUMULATOR_TYPE* d_scales = (ACCUMULATOR_TYPE*)(&d_scale); +#endif + +#if COMPRESSED_WEIGHTS && DECOMPRESSION_ZP_TERM && DECOMPRESSION_ZP_GROUPS_NUM == 1 + #if DECOMPRESSION_ZP_LENGTH > 1 && DECOMPRESSION_ZP_LENGTH % SIMD == 0 ACCUMULATOR_VEC_TYPE d_zp = BLOCK_READN(ACCUMULATOR_TYPE, TILE_OFM, decompression_zp, out_f); #elif DECOMPRESSION_ZP_LENGTH > 1 && DECOMPRESSION_ZP_LENGTH % SIMD != 0 ACCUMULATOR_VEC_TYPE d_zp = 0; @@ -148,9 +150,7 @@ KERNEL(fc)( #else ACCUMULATOR_VEC_TYPE d_zp = decompression_zp[0]; #endif - - ACCUMULATOR_TYPE* ds = (ACCUMULATOR_TYPE*)(&d_scale); - ACCUMULATOR_TYPE* dzp = (ACCUMULATOR_TYPE*)(&d_zp); + ACCUMULATOR_TYPE* d_zps = (ACCUMULATOR_TYPE*)(&d_zp); #endif #if REALIGN_FP16_OFFSET @@ -193,7 +193,28 @@ KERNEL(fc)( ACCUMULATOR_TYPE* w = (ACCUMULATOR_TYPE*)(&wei); unroll_for(uint kii = 0; kii < TILE_K; ++kii) { unroll_for(uint fi = 0; fi < TILE_OFM; ++fi) { - w[kii * TILE_OFM + fi] = (w[kii * TILE_OFM + fi] - dzp[fi]) * ds[fi]; + const uint w_idx = kii * TILE_OFM + fi; + const uint offset_ofm = out_f + fi*SIMD + sglid; + #if DECOMPRESSION_SCALE_GROUPS_NUM > 1 + const uint scale_offset = (offset_ofm % DECOMPRESSION_SCALE_BATCH_NUM) * DECOMPRESSION_SCALE_BATCH_PITCH + + ((kii + ki*TILE_K + ni*TILE_IFM*SIMD) / DECOMPRESSION_SCALE_GROUP_SIZE)*DECOMPRESSION_SCALE_FEATURE_PITCH; + ACCUMULATOR_TYPE ds = decompression_scale[scale_offset]; + #else + ACCUMULATOR_TYPE ds = d_scales[fi]; + #endif + + #if DECOMPRESSION_ZP_TERM + #if DECOMPRESSION_ZP_GROUPS_NUM > 1 + const uint zp_offset = (offset_ofm % DECOMPRESSION_ZP_BATCH_NUM) * DECOMPRESSION_ZP_BATCH_PITCH + + ((kii + ki*TILE_K + ni*TILE_IFM*SIMD) / DECOMPRESSION_ZP_GROUP_SIZE) * DECOMPRESSION_ZP_FEATURE_PITCH; + ACCUMULATOR_TYPE dzp = decompression_zp[zp_offset]; + #else + ACCUMULATOR_TYPE dzp = d_zps[fi]; + #endif + #else + ACCUMULATOR_TYPE dzp = ACCUMULATOR_VAL_ZERO; + #endif + w[w_idx] = (w[w_idx] - dzp) * ds; } } #endif @@ -230,7 +251,28 @@ KERNEL(fc)( ACCUMULATOR_TYPE* w = (ACCUMULATOR_TYPE*)(&wei); unroll_for(uint kii = 0; kii < TILE_K; ++kii) { unroll_for(uint fi = 0; fi < TILE_OFM; ++fi) { - w[kii * TILE_OFM + fi] = (w[kii * TILE_OFM + fi] - dzp[fi]) * ds[fi]; + const uint w_idx = kii * TILE_OFM + fi; + uint offset_ofm = out_f + fi*SIMD + get_sub_group_local_id(); + #if DECOMPRESSION_SCALE_GROUPS_NUM > 1 + const uint scale_offset = (offset_ofm % DECOMPRESSION_SCALE_BATCH_NUM) * DECOMPRESSION_SCALE_BATCH_PITCH + + ((kii + ki*TILE_K + ni*TILE_IFM*SIMD) / DECOMPRESSION_SCALE_GROUP_SIZE)*DECOMPRESSION_SCALE_FEATURE_PITCH; + ACCUMULATOR_TYPE ds = decompression_scale[scale_offset]; + #else + ACCUMULATOR_TYPE ds = d_scales[fi]; + #endif + + #if DECOMPRESSION_ZP_TERM + #if DECOMPRESSION_ZP_GROUPS_NUM > 1 + const uint zp_offset = (offset_ofm % DECOMPRESSION_ZP_BATCH_NUM) * DECOMPRESSION_ZP_BATCH_PITCH + + ((kii + ki*TILE_K + ni*TILE_IFM*SIMD) / DECOMPRESSION_ZP_GROUP_SIZE) * DECOMPRESSION_ZP_FEATURE_PITCH; + ACCUMULATOR_TYPE dzp = decompression_zp[zp_offset]; + #else + ACCUMULATOR_TYPE dzp = d_zps[fi]; + #endif + #else + ACCUMULATOR_TYPE dzp = ACCUMULATOR_VAL_ZERO; + #endif + w[w_idx] = (w[w_idx] - dzp) * ds; } } #endif diff --git a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bfyx_ref.cl b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bfyx_ref.cl index 72e8d6d7d3d855..6374e65c4f5fcc 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bfyx_ref.cl +++ b/src/plugins/intel_gpu/src/kernel_selector/cl_kernels/fully_connected_gpu_bfyx_ref.cl @@ -36,18 +36,24 @@ KERNEL(fc)( for (uint x = 0; x < INPUT0_SIZE_X; ++x) { const uint input0_idx = INPUT0_GET_INDEX(b, ofm, y, x); - const uint filter_idx = GET_FILTER_INDEX(FILTER, 0, oym, y, 0, 0); #if COMPRESSED_WEIGHTS - ACCUMULATOR_TYPE filter_compressed = TO_ACCUMULATOR_TYPE(weights[filter_idx]); #if DECOMPRESSION_ZP_TERM - ACCUMULATOR_TYPE zp = TO_ACCUMULATOR_TYPE(decompression_zp[DECOMPRESSION_ZP_GET_INDEX_SAFE(0, oym, 0, 0)]); + const uint zp_offset = DECOMPRESSION_ZP_GET_INDEX_SAFE(oym, y / DECOMPRESSION_ZP_GROUP_SIZE, 0, 0); + ACCUMULATOR_TYPE zp = TO_ACCUMULATOR_TYPE(decompression_zp[zp_offset]); #else ACCUMULATOR_TYPE zp = ACCUMULATOR_VAL_ZERO; #endif - DECOMPRESSION_SCALE_TYPE scale = decompression_scale[DECOMPRESSION_SCALE_GET_INDEX_SAFE(0, oym, 0, 0)]; - ACCUMULATOR_TYPE filter_val = (TO_ACCUMULATOR_TYPE(filter_compressed) - TO_ACCUMULATOR_TYPE(zp)) * scale; + const uint decomp_offset = DECOMPRESSION_SCALE_GET_INDEX_SAFE(oym, y / DECOMPRESSION_SCALE_GROUP_SIZE, 0, 0); + DECOMPRESSION_SCALE_TYPE scale = decompression_scale[decomp_offset]; + #endif + + #if COMPRESSED_WEIGHTS_INT8 + const uint filter_idx = GET_FILTER_INDEX(FILTER, 0, oym, y, 0, 0); + ACCUMULATOR_TYPE filter_compressed = TO_ACCUMULATOR_TYPE(weights[filter_idx]); + ACCUMULATOR_TYPE filter_val = (filter_compressed - zp) * scale; dotProd += (ACCUMULATOR_TYPE)(input[input0_idx]) * (ACCUMULATOR_TYPE)(filter_val); #else + const uint filter_idx = GET_FILTER_INDEX(FILTER, 0, oym, y, 0, 0); dotProd += (ACCUMULATOR_TYPE)(input[input0_idx]) * (ACCUMULATOR_TYPE)(weights[filter_idx]); #endif } @@ -67,19 +73,25 @@ KERNEL(fc)( for (uint x = 0; x < INPUT0_SIZE_X; ++x) { const uint input0_idx = INPUT0_GET_INDEX(b, ifm, y, x); - const uint filter_idx = GET_FILTER_INDEX(FILTER, 0, ofm, ifm, y, x); #if COMPRESSED_WEIGHTS - FILTER_TYPE filter_compressed = weights[filter_idx]; #if DECOMPRESSION_ZP_TERM - ACCUMULATOR_TYPE zp = decompression_zp[DECOMPRESSION_ZP_GET_INDEX_SAFE(0, ofm, 0, 0)]; + const uint zp_offset = DECOMPRESSION_ZP_GET_INDEX_SAFE(ofm, ifm / DECOMPRESSION_ZP_GROUP_SIZE, 0, 0); + ACCUMULATOR_TYPE zp = TO_ACCUMULATOR_TYPE(decompression_zp[zp_offset]); #else ACCUMULATOR_TYPE zp = ACCUMULATOR_VAL_ZERO; #endif + const uint decomp_offset = DECOMPRESSION_SCALE_GET_INDEX_SAFE(ofm, ifm / DECOMPRESSION_SCALE_GROUP_SIZE, 0, 0); + DECOMPRESSION_SCALE_TYPE scale = decompression_scale[decomp_offset]; + #endif - DECOMPRESSION_SCALE_TYPE scale = decompression_scale[DECOMPRESSION_SCALE_GET_INDEX_SAFE(0, ofm, 0, 0)]; - ACCUMULATOR_TYPE filter_val = (TO_ACCUMULATOR_TYPE(filter_compressed) - TO_ACCUMULATOR_TYPE(zp)) * scale; + + #if COMPRESSED_WEIGHTS_INT8 + const uint filter_idx = GET_FILTER_INDEX(FILTER, 0, ofm, ifm, y, x); + FILTER_TYPE filter_compressed = weights[filter_idx]; + ACCUMULATOR_TYPE filter_val = (TO_ACCUMULATOR_TYPE(filter_compressed) - zp) * scale; dotProd += (ACCUMULATOR_TYPE)(input[input0_idx]) * (ACCUMULATOR_TYPE)(filter_val); #else + const uint filter_idx = GET_FILTER_INDEX(FILTER, 0, ofm, ifm, y, x); dotProd += (ACCUMULATOR_TYPE)(input[input0_idx]) * (ACCUMULATOR_TYPE)(weights[filter_idx]); #endif } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_base.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_base.cpp index 3e9eb35cdaaff0..a75d35469837f7 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_base.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_base.cpp @@ -24,11 +24,23 @@ JitConstants FullyConnectedKernelBase::GetJitConstants(const fully_connected_par if (params.compressed) { jit.AddConstants({MakeJitConstant("COMPRESSED_WEIGHTS", 1)}); + if (params.weights.GetDType() == WeightsType::INT8 || params.weights.GetDType() == WeightsType::UINT8) { + jit.AddConstants({MakeJitConstant("COMPRESSED_WEIGHTS_INT8", 1)}); + } + + const size_t scale_groups_num = params.decompression_scale.Feature().v; + const size_t scale_group_size = params.weights.IFM().v / params.decompression_scale.Feature().v; jit.AddConstants({MakeJitConstant("DECOMPRESSION_SCALE_TERM", 1)}); jit.AddConstants({MakeJitConstant("DECOMPRESSION_SCALE", params.decompression_scale)}); + jit.AddConstants({MakeJitConstant("DECOMPRESSION_SCALE_GROUPS_NUM", scale_groups_num)}); + jit.AddConstants({MakeJitConstant("DECOMPRESSION_SCALE_GROUP_SIZE", scale_group_size)}); if (params.has_decompression_zp) { + const size_t zp_groups_num = params.decompression_zero_point.Feature().v; + const size_t zp_group_size = params.weights.IFM().v / params.decompression_zero_point.Feature().v; jit.AddConstants({MakeJitConstant("DECOMPRESSION_ZP_TERM", 1)}); jit.AddConstants({MakeJitConstant("DECOMPRESSION_ZP", params.decompression_zero_point)}); + jit.AddConstants({MakeJitConstant("DECOMPRESSION_ZP_GROUPS_NUM", zp_groups_num)}); + jit.AddConstants({MakeJitConstant("DECOMPRESSION_ZP_GROUP_SIZE", zp_group_size)}); } } diff --git a/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp b/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp index a1c4d60b81977c..0ff0e1fd0bf258 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/convert_fc_to_compressed.cpp @@ -3,16 +3,19 @@ // #include "convert_fc_to_compressed.hpp" +#include #include "intel_gpu/op/fully_connected.hpp" #include "intel_gpu/op/fully_connected_compressed.hpp" +#include "openvino/op/constant.hpp" #include "openvino/op/subtract.hpp" #include "openvino/op/matmul.hpp" #include "openvino/op/convert.hpp" #include "openvino/op/transpose.hpp" #include "openvino/op/reshape.hpp" #include "openvino/core/rt_info.hpp" +#include "openvino/pass/pattern/op/pattern.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" #include "openvino/pass/pattern/op/or.hpp" #include "transformations/utils/utils.hpp" @@ -23,7 +26,19 @@ namespace intel_gpu { ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyConnectedCompressed() { using namespace ov::pass::pattern; - auto weights_m = wrap_type(consumers_count(1)); + auto compressed_constant = [](const ov::Output& output) { + return (output.get_element_type() == ov::element::u8 || + output.get_element_type() == ov::element::i8) && + output.get_target_inputs().size() == 1; + }; + + auto reshape_3d_to_2d = [](const ov::Output& output) { + auto in_ps = output.get_node()->get_input_partial_shape(0); + auto out_ps = output.get_node()->get_output_partial_shape(0); + return in_ps.rank().is_static() && out_ps.rank().is_static() && in_ps.size() == 3 && out_ps.size() == 2; + }; + + auto weights_m = wrap_type(compressed_constant); auto convert_m = wrap_type({weights_m}); auto sub_const_m = wrap_type(consumers_count(1)); @@ -34,11 +49,15 @@ ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyCon auto mul_no_sub_m = wrap_type({convert_m, mul_const_m}); auto mul_m = std::make_shared(OutputVector{mul_with_sub_m, mul_no_sub_m}); + auto reshape_const_m = wrap_type(); + auto reshape_m = wrap_type({mul_m, reshape_const_m}, reshape_3d_to_2d); + + auto transpose_input = std::make_shared(OutputVector{reshape_m, mul_m}); auto transpose_const_m = wrap_type(); - auto transpose_m = wrap_type({mul_m, transpose_const_m}); - auto weights_input_m = std::make_shared(ov::OutputVector{mul_m, transpose_m}); + auto transpose_m = wrap_type({transpose_input, transpose_const_m}); auto data_m = any_input(); + auto weights_input_m = std::make_shared(ov::OutputVector{reshape_m, transpose_m, mul_m}); auto fully_connected_m = wrap_type({data_m, weights_input_m}); ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { @@ -52,53 +71,73 @@ ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyCon return false; } + bool has_transpose = pattern_map.count(transpose_m); + auto scale_shape = pattern_map.at(mul_const_m).get_shape(); + bool grouped = std::count_if(scale_shape.begin(), scale_shape.end(), [](size_t d) { return d > 1; }) > 1; + + auto reshape_const_to_2d = [has_transpose, grouped](std::shared_ptr node) { + auto constant = std::dynamic_pointer_cast(node); + OPENVINO_ASSERT(constant != nullptr); + ov::Shape current_shape = constant->get_shape(); + if (current_shape.size() == 2) + return constant; + OPENVINO_ASSERT(current_shape.size() == 3); + + auto new_shape = (has_transpose || !grouped) ? ov::Shape{current_shape[0] * current_shape[1], current_shape[2]} + : ov::Shape{current_shape[0], current_shape[1] * current_shape[2]}; + + return std::make_shared(*constant, new_shape); + }; + const auto& fc_input_a = fc->get_input_node_shared_ptr(0); - const auto& scale = pattern_map.at(mul_const_m).get_node_shared_ptr(); + const auto& scale = reshape_const_to_2d(pattern_map.at(mul_const_m).get_node_shared_ptr()); std::shared_ptr optional_zero_point = nullptr; - ov::NodeVector nodes_to_copy_info{pattern_map.at(fully_connected_m).get_node_shared_ptr(), - pattern_map.at(convert_m).get_node_shared_ptr()}; - if (pattern_map.count(mul_no_sub_m)) { - nodes_to_copy_info.push_back(pattern_map.at(mul_no_sub_m).get_node_shared_ptr()); - } - if (pattern_map.count(mul_with_sub_m)) { - nodes_to_copy_info.push_back(pattern_map.at(mul_with_sub_m).get_node_shared_ptr()); - } - const bool with_zero_point = pattern_map.count(subtract_m) > 0; if (with_zero_point) { - optional_zero_point = pattern_map.at(sub_const_m).get_node_shared_ptr(); - nodes_to_copy_info.push_back(subtract_m); + optional_zero_point = reshape_const_to_2d(pattern_map.at(sub_const_m).get_node_shared_ptr()); } - std::shared_ptr fc_input_b = pattern_map.at(weights_m).get_node_shared_ptr(); - if (pattern_map.count(transpose_m)) { + std::shared_ptr fc_input_b = reshape_const_to_2d(pattern_map.at(weights_m).get_node_shared_ptr()); + std::shared_ptr fc_input_scale = scale; + std::shared_ptr fc_input_zp = optional_zero_point; + if (has_transpose) { const auto& transpose = pattern_map.at(transpose_m).get_node_shared_ptr(); - const auto& transpose_const = pattern_map.at(transpose_const_m).get_node_shared_ptr(); + std::shared_ptr transpose_const = pattern_map.at(transpose_const_m).get_node_shared_ptr(); + if (ov::shape_size(transpose_const->get_shape()) != fc_input_b->get_output_partial_shape(0).size()) { + std::vector new_order(fc_input_b->get_output_partial_shape(0).size()); + std::iota(new_order.begin(), new_order.end(), 0); + std::swap(new_order[new_order.size() - 1], new_order[new_order.size() - 2]); + transpose_const = std::make_shared(ov::element::i32, ov::Shape{new_order.size()}, new_order); + } + fc_input_b = transpose->clone_with_new_inputs({ fc_input_b->output(0), transpose_const }); + fc_input_scale = transpose->clone_with_new_inputs({ scale->output(0), transpose_const }); + if (with_zero_point) + fc_input_zp = transpose->clone_with_new_inputs({ optional_zero_point->output(0), transpose_const }); } std::shared_ptr new_fc = nullptr; if (with_zero_point) { new_fc = std::make_shared(fc_input_a, fc_input_b, - scale, - optional_zero_point, + fc_input_scale, + fc_input_zp, fc->get_output_type()); } else { new_fc = std::make_shared(fc_input_a, fc_input_b, - scale, + fc_input_scale, fc->get_output_type()); } new_fc->set_friendly_name(fc->get_friendly_name()); - ov::copy_runtime_info(nodes_to_copy_info, new_fc); + ov::copy_runtime_info(m.get_matched_nodes(), new_fc); ov::replace_node(fc, new_fc); return true; }; - auto m = std::make_shared(fully_connected_m); + auto m = std::make_shared(fully_connected_m, "ConvertFullyConnectedToFullyConnectedCompressed"); this->register_matcher(m, callback); } diff --git a/src/plugins/intel_gpu/src/plugin/transformations/convert_matmul_to_fc.cpp b/src/plugins/intel_gpu/src/plugin/transformations/convert_matmul_to_fc.cpp index a30c88e7d1492d..2caf3cd4d69850 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/convert_matmul_to_fc.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/convert_matmul_to_fc.cpp @@ -160,7 +160,7 @@ ConvertMatMulToFullyConnected::ConvertMatMulToFullyConnected() { return true; }; - auto m = std::make_shared(matmul_m); + auto m = std::make_shared(matmul_m, "ConvertMatMulToFullyConnected"); this->register_matcher(m, callback); } diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp index 313015da3406ba..75bdb9f0ec71a7 100644 --- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/matmul_weights_decompression.cpp @@ -2,19 +2,21 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ov_models/builders.hpp" +#include "common_test_utils/ov_tensor_utils.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/matmul.hpp" #include "shared_test_classes/base/layer_test_utils.hpp" #include "shared_test_classes/base/ov_subgraph.hpp" #include "transformations/rt_info/decompression.hpp" -using namespace ngraph; +using namespace ov; using namespace ov::test; namespace SubgraphTestsDefinitions { /* - * Subtract_const(U8) + * Subtract_const(U8/NF4/U4) * / - * Weights(U8) Convert(F32) + * Weights(U8/NF4/U4) Convert(F32) * | / * Convert(F32) Reshape(optional) * \ / Multiply_const(F32) @@ -29,7 +31,20 @@ namespace SubgraphTestsDefinitions { * | * Bias */ -using MatmulWeightsDecompressionParams = std::tuple, // input shapes + +struct ShapeParams { + ShapeParams() = default; + ShapeParams(InputShape data_shape, ov::Shape weights_shape, int weights_group_size = -1) + : data_shape(std::move(data_shape)), + weights_shape(std::move(weights_shape)), + weights_group_size(weights_group_size) {} + + InputShape data_shape; + ov::Shape weights_shape; + // Decompression group size. If the value is equal to -1, ordinary decompression is used + int weights_group_size; +}; +using MatmulWeightsDecompressionParams = std::tuple, // class MatmulWeightsDecompression : public testing::WithParamInterface, public SubgraphBaseTest { public: static std::string get_test_case_name(testing::TestParamInfo obj) { - std::vector inputShapes; + ShapeParams shape_params; ov::test::ElementType weights_precision; ov::test::ElementType activations_precision; bool transpose; @@ -48,7 +63,7 @@ class MatmulWeightsDecompression : public testing::WithParamInterface additional_config; - std::tie(inputShapes, + std::tie(shape_params, weights_precision, activations_precision, transpose, @@ -57,20 +72,9 @@ class MatmulWeightsDecompression : public testing::WithParamInterface init_subgraph(std::vector& inputShapes, - const ov::element::Type data_precision, - const ov::element::Type weights_precision, - const bool transpose_weights, - const bool add_subtract, - const bool reshape_on_decompression) { - ov::ParameterVector params{std::make_shared(data_precision, inputShapes[0])}; + std::shared_ptr init_subgraph(const ov::PartialShape& data_shape, + const ov::Shape& weights_shape, + const int group_size, + const ov::element::Type data_precision, + const ov::element::Type weights_precision, + const bool transpose_weights, + const bool add_subtract, + const bool reshape_on_decompression) { + ov::ParameterVector params{std::make_shared(data_precision, data_shape)}; + const auto weights_subgraph = init_compressed_weights_subgraph(weights_shape, + group_size, + data_precision, + weights_precision, + transpose_weights, + add_subtract, + reshape_on_decompression); + + auto mat_mul = std::make_shared(params[0], weights_subgraph); + return std::make_shared(NodeVector{mat_mul}, params, "MatmulWeightsDecompression"); + } + + std::shared_ptr init_compressed_weights_subgraph(const ov::Shape& weights_shape, + const int group_size, + const ov::element::Type data_precision, + const ov::element::Type weights_precision, + const bool transpose_weights, + const bool add_subtract, + const bool reshape_on_decompression_constant) { auto transpose_if_necessary = [&](const ov::Shape& shape) { - if (!transpose_weights) - return shape; - auto transposed_shape = shape; - std::swap(*transposed_shape.rbegin(), *(transposed_shape.rbegin() + 1)); - return transposed_shape; + auto result_shape = shape; + if (transpose_weights) + std::swap(*result_shape.rbegin(), *(result_shape.rbegin() + 1)); + return result_shape; }; - auto weights_shape = transpose_if_necessary(inputShapes[1].to_shape()); - auto weights = ngraph::builder::makeConstant(weights_precision, weights_shape, {}, true); + const bool group_decompression = group_size != -1; + // Weights has shape [I, O], where + // I - input channels + // O - output channels + // In case of group decompression, input channels dimension is split into 2: I -> [N, G], where + // N - number of groups + // G - group size + auto transformed_weights_shape = transpose_if_necessary(weights_shape); + if (group_decompression) { + OPENVINO_ASSERT(weights_shape[0] % group_size == 0, + "Weights output channels count (", + weights_shape[0], + ") must be divisible by decompression group size (", + group_size, + ")."); + auto in_channel_idx = transpose_weights ? transformed_weights_shape.size() - 1 : transformed_weights_shape.size() - 2; + transformed_weights_shape[in_channel_idx] = weights_shape[0] / group_size; + transformed_weights_shape.insert(transformed_weights_shape.begin() + in_channel_idx + 1, group_size); + } + auto weights_tensor = ov::test::utils::create_and_fill_tensor(weights_precision, transformed_weights_shape); + auto weights = std::make_shared(weights_tensor); weights->set_friendly_name("Compressed_weights"); auto weights_convert = std::make_shared(weights, data_precision); std::shared_ptr mul_parent = weights_convert; - auto output_channels = transpose_weights ? *(weights_shape.rbegin() + 1) : *weights_shape.rbegin(); - auto scaleshift_target_shape = transpose_if_necessary(ov::Shape{1, output_channels}); - auto scaleshift_const_shape = reshape_on_decompression ? ov::Shape{output_channels} : scaleshift_target_shape; + auto output_channels = *weights_shape.rbegin(); + + // Decompression constants shape: + // Ordinary decompression: [O, 1] + // Group decompression: [O, N, 1] + ov::Shape scaleshift_target_shape{output_channels}; + scaleshift_target_shape.insert(scaleshift_target_shape.begin(), group_decompression ? weights_shape[0] / group_size : 1); + scaleshift_target_shape = transpose_if_necessary(scaleshift_target_shape); + if (group_decompression) { + auto in_channel_idx = transpose_weights ? scaleshift_target_shape.size() - 1 : scaleshift_target_shape.size() - 2; + scaleshift_target_shape.insert(scaleshift_target_shape.begin() + in_channel_idx + 1, 1); + } + + auto scaleshift_const_shape = scaleshift_target_shape; + if (reshape_on_decompression_constant) + scaleshift_const_shape.erase(std::remove(scaleshift_const_shape.begin(), scaleshift_const_shape.end(), 1), scaleshift_const_shape.end()); if (add_subtract) { - auto shift_const = ngraph::builder::makeConstant(weights_precision, scaleshift_const_shape, {}, true); + auto shift_tensor = ov::test::utils::create_and_fill_tensor(weights_precision, scaleshift_const_shape); + auto shift_const = std::make_shared(shift_tensor); std::shared_ptr shift_convert = std::make_shared(shift_const, data_precision); - if (reshape_on_decompression) { + if (reshape_on_decompression_constant) { auto shift_reshape_const = ov::opset10::Constant::create(ov::element::i32, {scaleshift_target_shape.size()}, scaleshift_target_shape); auto shift_reshape = std::make_shared(shift_convert, shift_reshape_const, false); shift_convert = shift_reshape; @@ -122,32 +179,36 @@ class MatmulWeightsDecompression : public testing::WithParamInterface(weights_convert, shift_convert); } - std::shared_ptr scale_const = ngraph::builder::makeConstant(data_precision, scaleshift_const_shape, {}, true); - if (reshape_on_decompression) { + auto scale_tensor = ov::test::utils::create_and_fill_tensor(data_precision, scaleshift_const_shape, 1, -0.5, 10000); + std::shared_ptr scale_const = std::make_shared(scale_tensor); + if (reshape_on_decompression_constant) { auto scale_reshape_const = ov::opset10::Constant::create(ov::element::i32, {scaleshift_target_shape.size()}, scaleshift_target_shape); auto scale_reshape = std::make_shared(scale_const, scale_reshape_const, false); scale_const = scale_reshape; } - auto multiply = std::make_shared(mul_parent, scale_const); + std::shared_ptr last_node = std::make_shared(mul_parent, scale_const); - std::shared_ptr matmul_weights = multiply; + if (group_decompression) { + auto reshape_target_shape = transpose_weights ? std::vector{-1, static_cast(weights_shape[0])} + : std::vector{static_cast(weights_shape[0]), -1}; + auto target_shape_node = ov::opset10::Constant::create(ov::element::i32, {reshape_target_shape.size()}, reshape_target_shape); + last_node = std::make_shared(last_node, target_shape_node, false); + } if (transpose_weights) { - const size_t rank = matmul_weights->get_output_partial_shape(0).size(); + const size_t rank = last_node->get_output_partial_shape(0).size(); std::vector order(rank); std::iota(order.begin(), order.end(), 0); std::swap(*order.rbegin(), *(order.rbegin() + 1)); auto transpose_constant = ov::opset10::Constant::create(ov::element::i32, {rank}, order); - auto transpose = std::make_shared(matmul_weights, transpose_constant); - matmul_weights = transpose; + last_node = std::make_shared(last_node, transpose_constant); } - auto matMul = builder::makeMatMul(params[0], matmul_weights); - return std::make_shared(NodeVector{matMul}, params, "MatmulWeightsDecompression"); + return last_node; } void SetUp() override { targetDevice = ov::test::utils::DEVICE_GPU; - std::vector inputShapes; + ShapeParams shape_params; ov::test::ElementType weights_precision; ov::test::ElementType activations_precision; bool transpose_weights; @@ -155,7 +216,7 @@ class MatmulWeightsDecompression : public testing::WithParamInterface additional_config; - std::tie(inputShapes, + std::tie(shape_params, weights_precision, activations_precision, transpose_weights, @@ -164,14 +225,47 @@ class MatmulWeightsDecompression : public testing::WithParamInterface 200) so fp16 representation & math error is larger than default threshold + if (weights_input_channels > 2048) { + abs_threshold = 4.0f; + } else { + abs_threshold = 1.0f; + } + } + } + + void generate_inputs(const std::vector& target_input_static_shapes) override { + inputs.clear(); + const auto& model_inputs = function->inputs(); + for (size_t i = 0; i < model_inputs.size(); ++i) { + const auto& model_input = model_inputs[i]; + ov::Tensor tensor = ov::test::utils::create_and_fill_tensor(model_input.get_element_type(), + target_input_static_shapes[i], + 2, + -1, + 10000); + inputs.insert({model_input.get_node_shared_ptr(), tensor}); + } } - void checkResults() { + void check_results() { const auto& test_param = GetParam(); ov::test::ElementType weights_precision = std::get<1>(test_param); for (const auto& n : compiledModel.get_runtime_model()->get_ordered_ops()) { @@ -185,24 +279,20 @@ class MatmulWeightsDecompression : public testing::WithParamInterface activations_precisions = {ov::element::f32, ov::element::f16}; const std::vector weights_precisions = {ov::element::u8}; -const std::vector> input_shapes_basic = { - {{{-1, -1, -1}, {{1, 4, 16}, {10, 16, 16}}}, {{}, {{16, 32}}}}, - {{{}, {{10, 40, 496}}}, {{}, {{1, 496, 240}}}}, - {{{}, {{1, 4, 48}}}, {{}, {{48, 256}}}}, - {{{}, {{11, 339, 377}}}, {{}, {{377, 335}}}}, - {{{}, {{1, 4, 32}}}, {{}, {{32, 256}}}}, - {{{}, {{1, 4, 512}}}, {{}, {{512, 256}}}}, - {{{}, {{1, 16, 32}}}, {{}, {{32, 64}}}}, - {{{}, {{2, 4, 32}}}, {{}, {{32, 65}}}}, - {{{}, {{3, 12, 768}}}, {{}, {{768, 1024}}}}, - {{{}, {{11, 339, 577}}}, {{}, {{577, 335}}}}, +const std::vector input_shapes_basic = { + {{{-1, -1, -1}, {{1, 4, 16}, {10, 16, 16}}}, {16, 32}}, + {{{}, {{1, 4, 16}}}, {16, 32}, 2ul}, + {{{}, {{1, 4, 16}}}, {1, 16, 32}}, + {{{}, {{10, 40, 496}}}, {1, 496, 240}}, + {{{}, {{1, 4, 48}}}, {48, 256}}, + {{{}, {{11, 339, 377}}}, {377, 335}} }; INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_basic, @@ -216,15 +306,16 @@ INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_basic, ::testing::Values(std::map())), MatmulWeightsDecompression::get_test_case_name); -const std::vector> input_shapes_corner_cases_basic = { - {{{-1, -1, -1}, {{1, 4, 16}}}, {{}, {{1, 16, 32}}}}, - {{{}, {{1, 4, 16}}}, {{}, {{1, 16, 32}}}}, - {{{-1, -1, -1}, {{1, 4, 16}}}, {{}, {{16, 32}}}}, - {{{-1, -1, -1, -1}, {{1, 1, 4, 16}}}, {{}, {{1, 1, 16, 32}}}}, - {{{}, {{1, 1, 4, 16}}}, {{}, {{1, 1, 16, 32}}}}, +const std::vector input_shapes_corner_cases_basic = { + {{{-1, -1, -1}, {{1, 4, 16}}}, {1, 16, 32}}, + {{{-1, -1, -1}, {{1, 4, 16}}}, {16, 32}}, + {{{-1, -1, 16}, {{1, 4, 16}}}, {16, 32}, 4}, }; -const std::vector> input_shapes_corner_cases_big = { - {{{-1, -1, -1}, {{10, 40, 480}, {11, 40, 480}}}, {{}, {{1, 480, 256}}}}, +const std::vector input_shapes_corner_cases_big = { + {{{-1, -1, -1}, {{10, 40, 480}, {11, 40, 480}}}, {1, 480, 256}}, + {{{-1, -1, -1}, {{1, 1, 4096}}}, {4096, 4096}, 128}, + {{{-1, -1, -1}, {{1, 1, 4096}}}, {4096, 4096}}, + {{{-1, 4096}, {{1, 4096}}}, {4096, 4096}, 128}, }; const std::vector transpose_weights = {true, false}; @@ -242,7 +333,7 @@ INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_corner_cases_basic, ::testing::Values(std::map{})), MatmulWeightsDecompression::get_test_case_name); -INSTANTIATE_TEST_SUITE_P(smoke_MatMulCompressedWeights_corner_cases_big, +INSTANTIATE_TEST_SUITE_P(MatMulCompressedWeights_corner_cases_big, MatmulWeightsDecompression, ::testing::Combine(::testing::ValuesIn(input_shapes_corner_cases_big), ::testing::ValuesIn(weights_precisions), diff --git a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp index 71301447bb28b9..dc23440c48af67 100644 --- a/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/test_cases/fully_connected_gpu_test.cpp @@ -663,21 +663,22 @@ TEST(fully_connected_gpu, compressed_scale_zp_bias) { auto& engine = get_test_engine(); auto input_mem = engine.allocate_memory({ {1, 2, 4}, data_types::f32, format::bfyx }); - auto weights_mem = engine.allocate_memory({ {8, 4}, data_types::f32, format::bfyx }); + auto weights_mem = engine.allocate_memory({ {8, 4}, data_types::u8, format::bfyx }); auto bias_mem = engine.allocate_memory({ {1, 1, 8}, data_types::f32, format::bfyx }); - auto scale_mem = engine.allocate_memory({ {1, 1, 8}, data_types::f32, format::bfyx }); - auto zp_mem = engine.allocate_memory({ {1, 1, 8}, data_types::f32, format::bfyx }); + auto scale_mem = engine.allocate_memory({ {8, 1}, data_types::f32, format::bfyx }); + auto zp_mem = engine.allocate_memory({ {8, 1}, data_types::f32, format::bfyx }); set_values(input_mem, { -0.5f, 2.0f, 0.5f, 1.0f, 0.5f, -2.0f, -0.5f, -1.0f }); - set_values(weights_mem, { 1.5f, 1.0f, 0.5f, -1.0f, - 0.0f, 0.5f, 0.5f, -0.5f, - -2.0f, -0.5f, 1.0f, 1.5f, - -2.0f, -0.5f, 1.0f, 1.5f, - 2.0f, 0.5f, -1.0f, -1.5f, - 2.0f, 0.5f, -1.0f, -1.5f, - -1.5f, -1.0f, -0.5f, 1.0f, - 0.0f, -0.5f, 0.5f, 0.5f }); + set_values(weights_mem, { 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 0, + 15, 14, 13, 12, + 11, 10, 9, 8, + 7, 6, 5, 4, + 3, 2, 1, 0}); + set_values(bias_mem, { 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, 2.0f }); set_values(scale_mem, { 2.0f, 4.0f, -2.0f, -4.0f, 0.5f, -0.5f, 2.0f, 2.0f }); @@ -709,8 +710,7 @@ TEST(fully_connected_gpu, compressed_scale_zp_bias) { ov::PartialShape expected_shape{1, 2, 8}; ASSERT_EQ(expected_shape, output_mem->get_layout().get_partial_shape()); - std::vector expected_result = {-4.0f, -23.0f, 11.0f, 0.0f, -2.0f, -3.5f, -30.0f, -10.5f, - 6.0f, 19.0f, -5.0f, -8.0f, 12.0f, -8.5f, 44.0f, 14.5f}; + std::vector expected_result = {13.f, 58.f, -51.f, -108.f, 18.5f, -18.f, 1.f, -4.f, -11.f, -62.f, 57.f, 100.f, -8.5f, 6.f, 13.f, 8.f, }; for (size_t i = 0; i < expected_result.size(); i++) { ASSERT_EQ(expected_result[i], output_ptr[i]) << "i = " << i; @@ -721,20 +721,20 @@ TEST(fully_connected_gpu, compressed_scale_bias) { auto& engine = get_test_engine(); auto input_mem = engine.allocate_memory({ {1, 2, 4}, data_types::f32, format::bfyx }); - auto weights_mem = engine.allocate_memory({ {8, 4}, data_types::f32, format::bfyx }); + auto weights_mem = engine.allocate_memory({ {8, 4}, data_types::u8, format::bfyx }); auto bias_mem = engine.allocate_memory({ {1, 1, 8}, data_types::f32, format::bfyx }); auto scale_mem = engine.allocate_memory({ {1, 1, 8}, data_types::f32, format::bfyx }); set_values(input_mem, { -0.5f, 2.0f, 0.5f, 1.0f, 0.5f, -2.0f, -0.5f, -1.0f }); - set_values(weights_mem, { 1.5f, 1.0f, 0.5f, -1.0f, - 0.0f, 0.5f, 0.5f, -0.5f, - -2.0f, -0.5f, 1.0f, 1.5f, - -2.0f, -0.5f, 1.0f, 1.5f, - 2.0f, 0.5f, -1.0f, -1.5f, - 2.0f, 0.5f, -1.0f, -1.5f, - -1.5f, -1.0f, -0.5f, 1.0f, - 0.0f, -0.5f, 0.5f, 0.5f }); + set_values(weights_mem, { 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 0, + 15, 14, 13, 12, + 11, 10, 9, 8, + 7, 6, 5, 4, + 3, 2, 1, 0}); set_values(bias_mem, { 1.0f, -2.0f, 3.0f, -4.0f, 5.0f, -6.0f, 7.0f, -8.0f }); set_values(scale_mem, { 2.0f, 4.0f, -2.0f, -4.0f, 0.5f, -0.5f, 2.0f, 1.0f }); @@ -764,8 +764,7 @@ TEST(fully_connected_gpu, compressed_scale_bias) { ov::PartialShape expected_shape{1, 2, 8}; ASSERT_EQ(expected_shape, output_mem->get_layout().get_partial_shape()); - std::vector expected_result = {2.0f, 1.0f, -1.0f, -12.0f, 4.0f, -5.0f, 6.0f, -8.25f, - 0.0f, -5.0f, 7.0f, 4.0f, 6.0f, -7.0f, 8.0f, -7.75f}; + std::vector expected_result = {19.f, 40.f, 69.f, 54.f, 83.f, 48.f, 37.f, -2.f, -17.f, -44.f, -63.f, -62.f, -73.f, -60.f, -23.f, -14.f }; for (size_t i = 0; i < expected_result.size(); i++) { ASSERT_EQ(expected_result[i], output_ptr[i]) << "i = " << i; @@ -776,19 +775,19 @@ TEST(fully_connected_gpu, compressed_scale_fp16) { auto& engine = get_test_engine(); auto input_mem = engine.allocate_memory({ { 2, 4}, data_types::f16, format::bfyx }); - auto weights_mem = engine.allocate_memory({ {8, 4}, data_types::f16, format::bfyx }); - auto scale_mem = engine.allocate_memory({ {1, 8}, data_types::f16, format::bfyx }); + auto weights_mem = engine.allocate_memory({ {8, 4}, data_types::u8, format::bfyx }); + auto scale_mem = engine.allocate_memory({ {8, 1}, data_types::f16, format::bfyx }); set_values(input_mem, { ov::float16(-0.5f), ov::float16(2.0f), ov::float16(0.5f), ov::float16(1.0f), ov::float16(0.5f), ov::float16(-2.0f), ov::float16(-0.5f), ov::float16(-1.0f) }); - set_values(weights_mem, {ov::float16( 1.5f), ov::float16( 1.0f), ov::float16( 0.5f), ov::float16(-1.0f), - ov::float16( 0.0f), ov::float16( 0.5f), ov::float16( 0.5f), ov::float16(-0.5f), - ov::float16(-2.0f), ov::float16(-0.5f), ov::float16( 1.0f), ov::float16( 1.5f), - ov::float16(-2.0f), ov::float16(-0.5f), ov::float16( 1.0f), ov::float16( 1.5f), - ov::float16( 2.0f), ov::float16( 0.5f), ov::float16(-1.0f), ov::float16(-1.5f), - ov::float16( 2.0f), ov::float16( 0.5f), ov::float16(-1.0f), ov::float16(-1.5f), - ov::float16(-1.5f), ov::float16(-1.0f), ov::float16(-0.5f), ov::float16( 1.0f), - ov::float16( 0.0f), ov::float16(-0.5f), ov::float16(0.5f), ov::float16( 0.5f) }); + set_values(weights_mem, { 1, 2, 3, 4, + 5, 6, 7, 8, + 9, 10, 11, 12, + 13, 14, 15, 0, + 15, 14, 13, 12, + 11, 10, 9, 8, + 7, 6, 5, 4, + 3, 2, 1, 0}); set_values(scale_mem, {ov::float16(2.0f), ov::float16(4.0f), ov::float16(-2.0f), ov::float16(-4.0f), ov::float16(0.5f), ov::float16(-0.5f), ov::float16(2.0f), ov::float16(2.0f)}); @@ -817,8 +816,8 @@ TEST(fully_connected_gpu, compressed_scale_fp16) { ASSERT_EQ(expected_shape, output_mem->get_layout().get_partial_shape()); std::vector expected_result = { - ov::float16(1.0f), ov::float16( 3.0f), ov::float16(-4.0f), ov::float16(-8.0f), ov::float16(-1.0f), ov::float16( 1.0f), ov::float16(-1.0f), ov::float16(-0.5f), - ov::float16(-1.0f), ov::float16(-3.0f), ov::float16( 4.0f), ov::float16( 8.0f), ov::float16( 1.0f), ov::float16(-1.0f), ov::float16( 1.0f), ov::float16( 0.5f)}; + ov::float16(18), ov::float16(84), ov::float16(-66), ov::float16(-116), ov::float16(19.5), ov::float16(-13.5), ov::float16(30), ov::float16(6), + ov::float16(-18), ov::float16(-84), ov::float16(66), ov::float16(116), ov::float16(-19.5), ov::float16(13.5), ov::float16(-30), ov::float16(-6) }; for (size_t i = 0; i < expected_result.size(); i++) { ASSERT_FLOAT_EQ(expected_result[i], output_ptr[i]) << "i = " << i; From 6e97b91a774a993b8a1995991336c74aa5198565 Mon Sep 17 00:00:00 2001 From: Sergey Shlyapnikov Date: Wed, 18 Oct 2023 16:12:15 +0400 Subject: [PATCH 08/39] [GPU] UsmHostTensor implementation (#20518) Co-authored-by: Vladimir Paramuzov --- .../intel_gpu/plugin/remote_allocators.hpp | 43 ---- .../intel_gpu/plugin/remote_tensor.hpp | 8 +- .../intel_gpu/plugin/sync_infer_request.hpp | 4 +- .../intel_gpu/plugin/usm_host_tensor.hpp | 42 ++++ .../src/plugin/remote_allocators.cpp | 37 --- .../intel_gpu/src/plugin/remote_context.cpp | 5 +- .../intel_gpu/src/plugin/remote_tensor.cpp | 90 ++++++-- .../src/plugin/sync_infer_request.cpp | 97 ++++---- .../intel_gpu/src/plugin/usm_host_tensor.cpp | 50 ++++ .../tests/common/subgraphs_builders.hpp | 50 ++++ .../ov_infer_request/iteration_chaining.cpp | 23 ++ .../subgraph_tests/dynamic/kv_cache.cpp | 217 ++++++++++++++++++ 12 files changed, 516 insertions(+), 150 deletions(-) delete mode 100644 src/plugins/intel_gpu/include/intel_gpu/plugin/remote_allocators.hpp create mode 100644 src/plugins/intel_gpu/include/intel_gpu/plugin/usm_host_tensor.hpp delete mode 100644 src/plugins/intel_gpu/src/plugin/remote_allocators.cpp create mode 100644 src/plugins/intel_gpu/src/plugin/usm_host_tensor.cpp create mode 100644 src/plugins/intel_gpu/tests/common/subgraphs_builders.hpp create mode 100644 src/plugins/intel_gpu/tests/functional/shared_tests_instances/behavior/ov_infer_request/iteration_chaining.cpp create mode 100644 src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache.cpp diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_allocators.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_allocators.hpp deleted file mode 100644 index 877c2c707f1791..00000000000000 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_allocators.hpp +++ /dev/null @@ -1,43 +0,0 @@ -// Copyright (C) 2022 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "openvino/runtime/so_ptr.hpp" - -#include - -namespace ov { -namespace intel_gpu { - -class RemoteTensorImpl; -class RemoteContextImpl; - -class USMHostAllocator final { -private: - ov::SoPtr _usm_host_tensor = { nullptr, nullptr }; - std::shared_ptr _context = nullptr; - -public: - using Ptr = std::shared_ptr; - - explicit USMHostAllocator(std::shared_ptr context) : _context(context) { } - - /** - * @brief Allocates memory - * @param size The size in bytes to allocate - * @return Handle to the allocated resource - */ - void* allocate(const size_t bytes, const size_t alignment = alignof(max_align_t)) noexcept; - /** - * @brief Releases handle and all associated memory resources which invalidates the handle. - * @return false if handle cannot be released, otherwise - true. - */ - bool deallocate(void* handle, const size_t bytes, size_t alignment = alignof(max_align_t)) noexcept; - - bool is_equal(const USMHostAllocator& other) const; -}; - -} // namespace intel_gpu -} // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_tensor.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_tensor.hpp index 74a07bbcbf38bf..f7f72cc77a16a3 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_tensor.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/remote_tensor.hpp @@ -15,6 +15,7 @@ #endif #include "openvino/runtime/iremote_tensor.hpp" +#include "intel_gpu/runtime/memory_caps.hpp" #include "intel_gpu/runtime/memory.hpp" #include "intel_gpu/runtime/engine.hpp" #include "intel_gpu/plugin/common_utils.hpp" @@ -56,6 +57,8 @@ class RemoteTensorImpl : public ov::IRemoteTensor { cldnn::memory::ptr get_memory() const; cldnn::memory::ptr get_original_memory() const; + void set_memory(cldnn::memory::ptr memory, size_t actual_size); + std::shared_ptr get_context() const; private: @@ -76,8 +79,11 @@ class RemoteTensorImpl : public ov::IRemoteTensor { size_t m_hash = 0; bool supports_caching() const; + void update_hash(); void update_strides(); - void init_properties(); + void update_properties(); + + static TensorType allocation_type_to_tensor_type(cldnn::allocation_type t); }; } // namespace intel_gpu diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/sync_infer_request.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/sync_infer_request.hpp index 1fd6d035dd48af..3050846e2c2354 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/sync_infer_request.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/sync_infer_request.hpp @@ -90,9 +90,7 @@ class SyncInferRequest : public ov::ISyncInferRequest { bool need_lockable_mem) const; std::shared_ptr reinterpret_device_tensor(std::shared_ptr tensor, const ov::Shape new_shape) const; std::shared_ptr create_host_tensor(const ov::PartialShape& port_shape, const ov::element::Type& port_element_type) const; - std::shared_ptr create_device_tensor(const ov::Shape& pshape, ov::element::Type element_type, - bool need_lockable_memory = false, void* mem_ptr = nullptr) const; - std::shared_ptr create_shared_device_tensor(const ov::Shape& pshape, ov::element::Type element_type, void* usm_host_mem) const; + std::shared_ptr create_device_tensor(const ov::PartialShape& pshape, ov::element::Type element_type, bool need_lockable_memory = false) const; void allocate_inputs(); void allocate_outputs(); diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/usm_host_tensor.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/usm_host_tensor.hpp new file mode 100644 index 00000000000000..d410fa046651e5 --- /dev/null +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/usm_host_tensor.hpp @@ -0,0 +1,42 @@ +// Copyright (C) 2022 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/runtime/itensor.hpp" + +#include + +namespace ov { +namespace intel_gpu { + +class RemoteContextImpl; +class RemoteTensorImpl; + +class USMHostTensor : public ov::ITensor { +public: + USMHostTensor(std::shared_ptr context, const element::Type element_type, const Shape& shape); + explicit USMHostTensor(std::shared_ptr tensor); + + ~USMHostTensor() override = default; + + void* data(const element::Type& element_type) const override; + const element::Type& get_element_type() const override; + + const Shape& get_shape() const override; + + const Strides& get_strides() const override; + + void set_shape(ov::Shape new_shape) override; + + void set_memory(std::shared_ptr tensor); + + std::shared_ptr get_impl() const; + +private: + std::shared_ptr m_impl; +}; + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/remote_allocators.cpp b/src/plugins/intel_gpu/src/plugin/remote_allocators.cpp deleted file mode 100644 index fe9ff746f04283..00000000000000 --- a/src/plugins/intel_gpu/src/plugin/remote_allocators.cpp +++ /dev/null @@ -1,37 +0,0 @@ -// Copyright (C) 2018-2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "intel_gpu/plugin/remote_allocators.hpp" -#include "intel_gpu/plugin/remote_tensor.hpp" -#include "intel_gpu/plugin/remote_context.hpp" -#include - -namespace ov { -namespace intel_gpu { - -void* USMHostAllocator::allocate(const size_t bytes, const size_t /* alignment */) noexcept { - try { - ov::AnyMap params = { ov::intel_gpu::shared_mem_type(ov::intel_gpu::SharedMemType::USM_HOST_BUFFER) }; - _usm_host_tensor = _context->create_tensor(ov::element::u8, {bytes}, params); - if (auto casted = std::dynamic_pointer_cast(_usm_host_tensor._ptr)) { - return casted->get_original_memory()->get_internal_params().mem; - } - return nullptr; - } catch (std::exception&) { - return nullptr; - } -} - -bool USMHostAllocator::deallocate(void* /* handle */, const size_t /* bytes */, size_t /* alignment */) noexcept { - try { - _usm_host_tensor = {nullptr, nullptr}; - } catch (std::exception&) { } - return true; -} - -bool USMHostAllocator::is_equal(const USMHostAllocator& other) const { - return other._usm_host_tensor != nullptr && _usm_host_tensor != nullptr && other._usm_host_tensor._ptr == _usm_host_tensor._ptr; -} -} // namespace intel_gpu -} // namespace ov diff --git a/src/plugins/intel_gpu/src/plugin/remote_context.cpp b/src/plugins/intel_gpu/src/plugin/remote_context.cpp index 1b932226881db3..e4aefa00bb0f0f 100644 --- a/src/plugins/intel_gpu/src/plugin/remote_context.cpp +++ b/src/plugins/intel_gpu/src/plugin/remote_context.cpp @@ -6,7 +6,7 @@ #include "openvino/runtime/make_tensor.hpp" #include "intel_gpu/plugin/remote_context.hpp" #include "intel_gpu/plugin/remote_tensor.hpp" -#include "intel_gpu/plugin/remote_allocators.hpp" +#include "intel_gpu/plugin/usm_host_tensor.hpp" #include "intel_gpu/runtime/itt.hpp" #include "intel_gpu/runtime/device_query.hpp" #include @@ -111,8 +111,7 @@ std::shared_ptr RemoteContextImpl::get_this_shared_ptr() { ov::SoPtr RemoteContextImpl::create_host_tensor(const ov::element::Type type, const ov::Shape& shape) { if (m_engine->use_unified_shared_memory()) { - USMHostAllocator allocator(get_this_shared_ptr()); - return { ov::make_tensor(type, shape, allocator), nullptr }; + return { std::make_shared(get_this_shared_ptr(), type, shape), nullptr }; } else { return { ov::make_tensor(type, shape), nullptr }; } diff --git a/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp b/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp index a7c68cd8f81107..cd1011ea153bfe 100644 --- a/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp +++ b/src/plugins/intel_gpu/src/plugin/remote_tensor.cpp @@ -2,17 +2,29 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/plugin/common_utils.hpp" #include "intel_gpu/plugin/remote_context.hpp" #include "intel_gpu/plugin/remote_tensor.hpp" -#include "intel_gpu/plugin/remote_allocators.hpp" #include "intel_gpu/plugin/plugin.hpp" #include "intel_gpu/runtime/itt.hpp" +#include "intel_gpu/runtime/memory_caps.hpp" #include namespace ov { namespace intel_gpu { +TensorType RemoteTensorImpl::allocation_type_to_tensor_type(cldnn::allocation_type t) { + switch (t) { + case cldnn::allocation_type::cl_mem: return TensorType::BT_BUF_INTERNAL; + case cldnn::allocation_type::usm_host: return TensorType::BT_USM_HOST_INTERNAL; + case cldnn::allocation_type::usm_device: return TensorType::BT_USM_DEVICE_INTERNAL; + default: return TensorType::BT_EMPTY; + } + + return TensorType::BT_EMPTY; +} + RemoteTensorImpl::RemoteTensorImpl(RemoteContextImpl::Ptr context, const ov::Shape& shape, const ov::element::Type& element_type, @@ -28,20 +40,8 @@ RemoteTensorImpl::RemoteTensorImpl(RemoteContextImpl::Ptr context, , m_mem(mem) , m_surf(surf) , m_plane(plane) { - if (supports_caching()) { - m_hash = cldnn::hash_combine(0, m_mem); - m_hash = cldnn::hash_combine(m_hash, m_surf); - m_hash = cldnn::hash_combine(m_hash, plane); - m_hash = cldnn::hash_combine(m_hash, m_shape.size()); - m_hash = cldnn::hash_combine(m_hash, element_type.hash()); - for (const auto& d : m_shape) { - m_hash = cldnn::hash_combine(m_hash, d); - } - } - - update_strides(); + update_hash(); allocate(); - init_properties(); } RemoteTensorImpl::~RemoteTensorImpl() { @@ -82,12 +82,15 @@ const AnyMap& RemoteTensorImpl::get_properties() const { m_shape = shape; if (ov::shape_size(shape) > m_memory_object->count()) { - OPENVINO_ASSERT(!is_shared(), "Cannot call setShape for Tensor created on top of preallocated memory if shape was increased."); + GPU_DEBUG_TRACE_DETAIL << "Remote realloc" << std::endl; + OPENVINO_ASSERT(!is_shared(), "Cannot call set_shape for Tensor created on top of preallocated memory if shape was increased."); if (!deallocate()) { - OPENVINO_THROW("Cannot deallocate tensor while an attempt to enlarge tensor area in setShape."); + OPENVINO_THROW("Cannot deallocate tensor while an attempt to enlarge tensor area in set_shape."); } allocate(); + } else { + update_strides(); } } @@ -108,23 +111,39 @@ void RemoteTensorImpl::allocate() { if (enable_caching) { m_memory_object = context->try_get_cached_memory(m_hash); - if (m_memory_object) + if (m_memory_object) { + update_properties(); + update_strides(); return; + } } auto& engine = context->get_engine(); + // Currently, clDeviceMemAllocINTEL returns memory address allocated to other input blob if the current blob is empty + // W/A for this issue: + // Allocate with non-empty shape and then reinterprete with original shape + auto shape_copy = m_shape; + for (auto &i : shape_copy) { + if (i == 0) + i = 1; + } + + m_layout.set_partial_shape(shape_copy); + + const bool reset = false; + switch (m_mem_type) { case TensorType::BT_BUF_INTERNAL: { - m_memory_object = engine.allocate_memory(m_layout, cldnn::allocation_type::cl_mem); + m_memory_object = engine.allocate_memory(m_layout, cldnn::allocation_type::cl_mem, reset); break; } case TensorType::BT_USM_HOST_INTERNAL: { - m_memory_object = engine.allocate_memory(m_layout, cldnn::allocation_type::usm_host); + m_memory_object = engine.allocate_memory(m_layout, cldnn::allocation_type::usm_host, reset); break; } case TensorType::BT_USM_DEVICE_INTERNAL: { - m_memory_object = engine.allocate_memory(m_layout, cldnn::allocation_type::usm_device); + m_memory_object = engine.allocate_memory(m_layout, cldnn::allocation_type::usm_device, reset); break; } case TensorType::BT_BUF_SHARED: { @@ -161,6 +180,9 @@ void RemoteTensorImpl::allocate() { m_memory_object.reset(); } + update_properties(); + update_strides(); + if (enable_caching) context->add_to_cache(m_hash, m_memory_object); } @@ -181,6 +203,19 @@ bool RemoteTensorImpl::supports_caching() const { return is_shared(); } +void RemoteTensorImpl::update_hash() { + if (supports_caching()) { + m_hash = cldnn::hash_combine(0, m_mem); + m_hash = cldnn::hash_combine(m_hash, m_surf); + m_hash = cldnn::hash_combine(m_hash, m_plane); + m_hash = cldnn::hash_combine(m_hash, m_shape.size()); + m_hash = cldnn::hash_combine(m_hash, m_element_type.hash()); + for (const auto& d : m_shape) { + m_hash = cldnn::hash_combine(m_hash, d); + } + } +} + bool RemoteTensorImpl::is_surface() const noexcept { return m_mem_type == TensorType::BT_SURF_SHARED || m_mem_type == TensorType::BT_IMG_SHARED || @@ -196,11 +231,24 @@ cldnn::memory::ptr RemoteTensorImpl::get_original_memory() const { return m_memory_object; } +void RemoteTensorImpl::set_memory(cldnn::memory::ptr memory, size_t actual_size) { + auto engine = m_memory_object->get_engine(); + m_layout = memory->get_layout(); + m_shape = m_layout.get_shape(); + + auto actual_layout = m_layout; + actual_layout.set_partial_shape({ov::Dimension(actual_size)}); + m_memory_object = engine->reinterpret_buffer(*memory, actual_layout); + + update_properties(); + update_strides(); +} + std::shared_ptr RemoteTensorImpl::get_context() const { return m_context; } -void RemoteTensorImpl::init_properties() { +void RemoteTensorImpl::update_properties() { OPENVINO_ASSERT(is_allocated(), "[GPU] Can't initialize RemoteTensorImpl parameters as memory was not allocated"); auto params = m_memory_object->get_internal_params(); diff --git a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp index 61ac1424c7649e..9c097d222fdc1b 100644 --- a/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp +++ b/src/plugins/intel_gpu/src/plugin/sync_infer_request.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "intel_gpu/plugin/usm_host_tensor.hpp" #include "openvino/runtime/make_tensor.hpp" #include "openvino/core/preprocess/input_tensor_info.hpp" #include "openvino/core/parallel.hpp" @@ -10,7 +11,6 @@ #include "intel_gpu/plugin/sync_infer_request.hpp" #include "intel_gpu/plugin/remote_context.hpp" -#include "intel_gpu/plugin/remote_allocators.hpp" #include "intel_gpu/plugin/remote_tensor.hpp" #include "intel_gpu/plugin/compiled_model.hpp" #include "intel_gpu/plugin/variable_state.hpp" @@ -41,6 +41,15 @@ inline bool can_use_usm_host(const cldnn::engine& engine) { return can_use_usm; } +inline ov::Shape get_tensor_shape(const ov::PartialShape& pshape) { + ov::Shape res(pshape.size()); + for (size_t i = 0; i < pshape.size(); i++) { + res[i] = pshape[i].is_dynamic() ? 0 : pshape[i].get_length(); + } + + return res; +} + inline std::string get_port_name(const ov::Output& port, const bool is_legacy_api) { std::string name; // TODO: Should use tensor name as the port name, but many legacy tests still use legacy name @@ -72,7 +81,7 @@ void convert_and_copy(const void* src_ptr, ov::element::Type src_et, void* dst_p return; if (src_et == dst_et) { - std::memcpy(dst_ptr, src_ptr, size); + std::memcpy(dst_ptr, src_ptr, size * src_et.size()); return; } @@ -425,6 +434,7 @@ void SyncInferRequest::wait() { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "SyncInferRequest::wait::reinterpret_memory"); OPENVINO_ASSERT(!output_memory->get_layout().data_padding, "[GPU] Unexpected padding in output buffer"); output_memory = m_graph->get_engine().reinterpret_buffer(*output_memory, output_layout); + GPU_DEBUG_TRACE_DETAIL << name << " model output: " << output_memory->buffer_ptr() << std::endl; } OPENVINO_ASSERT(m_user_outputs.count(name) > 0, "[GPU] Output ", name, " is not found in output tensors map"); @@ -433,6 +443,12 @@ void SyncInferRequest::wait() { auto remote_ptr = std::dynamic_pointer_cast(output_tensor); bool is_remote = remote_ptr != nullptr; + if (is_remote) { + GPU_DEBUG_TRACE_DETAIL << name << " handle output tensor (remote): " << remote_ptr->get_original_memory()->buffer_ptr() << std::endl; + } else { + GPU_DEBUG_TRACE_DETAIL << name << " handle output tensor (host): " << output_tensor->data() << std::endl; + } + bool need_output_update = output_layout.bytes_count() == 0 || (output_memory && output_tensor->get_byte_size() != output_memory->size()); if (need_output_update) { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "SyncInferRequest::wait::update_output"); @@ -444,6 +460,19 @@ void SyncInferRequest::wait() { OPENVINO_ASSERT(ov::shape_size(port.get_shape()) == ov::shape_size(mem_shape), "[GPU] Unexpected elements count for output tensor"); mem_shape = port.get_shape(); } + if (port.get_partial_shape().is_dynamic()) { + bool need_reallocate = true; + auto usm_host_tensor = std::dynamic_pointer_cast(output_tensor); + if (usm_host_tensor && output_memory) + need_reallocate = usm_host_tensor->get_impl()->get_original_memory()->size() < output_memory->size(); + + if (need_reallocate) { + auto& shape_predictor = m_graph->get_network()->get_shape_predictor(); + auto actual_memory_shape = predict_shape(name, mem_shape, output_tensor->get_element_type(), shape_predictor); + output_tensor->set_shape(actual_memory_shape); + } + } + output_tensor->set_shape(mem_shape); } @@ -453,6 +482,8 @@ void SyncInferRequest::wait() { auto dst_ptr = static_cast(output_tensor->data()); bool same_mem = same_host_mem(output_memory, dst_ptr); if (!same_mem && output_memory->size()) { + GPU_DEBUG_TRACE_DETAIL << name << " copy from: " << output_memory->buffer_ptr() << " to " + << (!is_remote ? output_tensor->data() : remote_ptr->get_original_memory()->buffer_ptr()) << std::endl; if (auto ev = copy_output_data(output_memory, *output_tensor)) { copy_events.push_back(ev); } @@ -492,22 +523,13 @@ void SyncInferRequest::setup_stream_graph() { std::shared_ptr SyncInferRequest::create_host_tensor(const ov::PartialShape& port_shape, const ov::element::Type& port_element_type) const { OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "SyncInferRequest::create_host_tensor"); - // Disable USM usage as USMHostAllocator may fail for attempt to allocate 0 bytes - // If we add WA for such case to avoid driver call, then deallocate method will return false and Blob::setShape call will throw an exception - bool use_usm = m_graph->get_engine().use_unified_shared_memory() && !port_shape.is_dynamic(); - - auto shape = port_shape.is_static() ? port_shape.to_shape() : ov::Shape(port_shape.size(), 0); - auto usm_allocator = USMHostAllocator(m_context); - return use_usm ? ov::make_tensor(port_element_type, shape, usm_allocator) - : ov::make_tensor(port_element_type, shape); + return m_context->create_host_tensor(port_element_type, get_tensor_shape(port_shape))._ptr; } -std::shared_ptr SyncInferRequest::create_device_tensor(const ov::Shape& shape, ov::element::Type element_type, - bool need_lockable_memory, void* mem_ptr) const { +std::shared_ptr SyncInferRequest::create_device_tensor(const ov::PartialShape& port_shape, ov::element::Type element_type, + bool need_lockable_memory) const { TensorType tensor_type = TensorType::BT_EMPTY; - if (mem_ptr) { - tensor_type = TensorType::BT_USM_SHARED; - } else if (m_graph->get_engine().use_unified_shared_memory()) { + if (m_graph->get_engine().use_unified_shared_memory()) { tensor_type = need_lockable_memory ? TensorType::BT_USM_HOST_INTERNAL : TensorType::BT_USM_DEVICE_INTERNAL; } else { tensor_type = TensorType::BT_BUF_INTERNAL; @@ -517,24 +539,10 @@ std::shared_ptr SyncInferRequest::create_device_tensor(const ov::Sh if (!can_use_usm_host(m_graph->get_engine()) && need_lockable_memory) tensor_type = TensorType::BT_BUF_INTERNAL; - // Currently, clDeviceMemAllocINTEL returns memory address allocated to other input blob if the current blob is empty - // W/A for this issue: - // Allocate with non-empty shape and then reinterprete with original shape - auto shape_copy = shape; - for (auto &i : shape_copy) { - if (i == 0) - i = 1; - } - return std::make_shared(m_context, - shape_copy, + get_tensor_shape(port_shape), element_type, - tensor_type, - mem_ptr); -} - -std::shared_ptr SyncInferRequest::create_shared_device_tensor(const ov::Shape& shape, ov::element::Type element_type, void* usm_host_mem) const { - return create_device_tensor(shape, element_type, false, usm_host_mem); + tensor_type); } TensorWrapper SyncInferRequest::create_or_share_device_tensor(const TensorWrapper& user_tensor_wrapper, @@ -546,17 +554,12 @@ TensorWrapper SyncInferRequest::create_or_share_device_tensor(const TensorWrappe auto tensor_shape = user_tensor->get_shape(); bool is_dynamic = port_pshape.is_dynamic(); OPENVINO_ASSERT(std::dynamic_pointer_cast(user_tensor) == nullptr, "[GPU] Unexpected remote tensor"); - auto input_ptr = user_tensor->data(); - const auto alloc_type = m_graph->get_engine().detect_usm_allocation_type(input_ptr); - const auto is_usm_host = alloc_type == cldnn::allocation_type::usm_host; - bool can_share = is_usm_host && - !is_convert_required(user_tensor->get_element_type(), element_type) && + auto usm_host_tensor = std::dynamic_pointer_cast(user_tensor); + bool can_share = usm_host_tensor != nullptr && !is_convert_required(user_tensor->get_element_type(), element_type) && can_use_usm_host(m_graph->get_engine()); if (can_share) { - // For USM case we create host blob using custom USM host allocator - // and then create shared device blob on top of this buffer - return { create_shared_device_tensor(tensor_shape, element_type, input_ptr), user_tensor_wrapper.owner }; + return { usm_host_tensor->get_impl(), user_tensor_wrapper.owner }; } auto actual_memory_shape = tensor_shape; @@ -689,13 +692,17 @@ std::vector SyncInferRequest::prepare_batched_input(const std std::vector SyncInferRequest::prepare_input(const std::string& name, const ov::Output& port, const TensorWrapper& user_tensor_wrapper) { - OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, "SyncInferRequest::prepare_input"); + OV_ITT_SCOPED_TASK(itt::domains::intel_gpu_plugin, openvino::itt::handle("SyncInferRequest::prepare_input: " + name)); auto pshape = port.get_partial_shape(); auto is_dynamic = pshape.is_dynamic(); auto user_tensor = user_tensor_wrapper.ptr; auto element_type = user_tensor->get_element_type(); + auto remote_ptr = std::dynamic_pointer_cast(user_tensor); + auto usm_host_ptr = std::dynamic_pointer_cast(user_tensor); bool is_remote = remote_ptr != nullptr; + bool is_usm_host_tensor = usm_host_ptr != nullptr; + GPU_DEBUG_TRACE_DETAIL << "Prepare input for " << name << " ( is_remote ? " << is_remote << ")" << std::endl; GPU_DEBUG_TRACE_DETAIL << " port shape : " << pshape.to_string() << std::endl; GPU_DEBUG_TRACE_DETAIL << " user_tensor shape: " << user_tensor->get_shape().to_string() << std::endl; @@ -713,12 +720,16 @@ std::vector SyncInferRequest::prepare_input(const std::string user_tensor->get_shape(), ") are incompatible"); + auto device_tensor_et = convert_to_supported_device_type(element_type); + bool convert_needed = is_convert_required(element_type, device_tensor_et); + if (is_remote) { m_plugin_inputs[name] = user_tensor_wrapper; + } else if (is_usm_host_tensor && !convert_needed) { + m_plugin_inputs[name] = {usm_host_ptr->get_impl(), user_tensor_wrapper.owner}; + is_remote = true; } - auto device_tensor_et = convert_to_supported_device_type(element_type); - bool convert_needed = is_convert_required(element_type, device_tensor_et); bool update_device_tensor = m_plugin_inputs.count(name) == 0 || (m_plugin_inputs[name].owner == TensorOwner::USER && !is_remote); if (update_device_tensor) { @@ -780,6 +791,7 @@ std::vector SyncInferRequest::prepare_input(const std::string } } + GPU_DEBUG_TRACE_DETAIL << name << " prepare input: " << memory->buffer_ptr() << std::endl; const cldnn::primitive_id internal_name = "parameter:" + name; network->set_input_data(internal_name, memory); @@ -839,6 +851,7 @@ std::vector SyncInferRequest::prepare_output(const std::strin auto output_tensor = std::dynamic_pointer_cast(m_plugin_outputs.at(name).ptr); auto output_memory = output_tensor->get_memory(); + GPU_DEBUG_TRACE_DETAIL << name << " prepare output: " << output_memory->buffer_ptr() << std::endl; return network->set_output_memory(internal_name, output_memory); } diff --git a/src/plugins/intel_gpu/src/plugin/usm_host_tensor.cpp b/src/plugins/intel_gpu/src/plugin/usm_host_tensor.cpp new file mode 100644 index 00000000000000..bcb0877b521f20 --- /dev/null +++ b/src/plugins/intel_gpu/src/plugin/usm_host_tensor.cpp @@ -0,0 +1,50 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "intel_gpu/plugin/usm_host_tensor.hpp" +#include "intel_gpu/plugin/remote_tensor.hpp" +#include "intel_gpu/plugin/remote_context.hpp" +#include "openvino/runtime/make_tensor.hpp" +#include + +namespace ov { +namespace intel_gpu { + +USMHostTensor::USMHostTensor(std::shared_ptr context, const element::Type element_type, const Shape& shape) + : m_impl(std::make_shared(context, shape, element_type, TensorType::BT_USM_HOST_INTERNAL)) {} + +USMHostTensor::USMHostTensor(std::shared_ptr tensor) + : m_impl(tensor) {} + +void* USMHostTensor::data(const element::Type& element_type) const { + return m_impl->get_original_memory()->buffer_ptr(); +} + +const element::Type& USMHostTensor::get_element_type() const { + return m_impl->get_element_type(); +} + +const Shape& USMHostTensor::get_shape() const { + return m_impl->get_shape(); +} + +const Strides& USMHostTensor::get_strides() const { + return m_impl->get_strides(); +} + +void USMHostTensor::set_shape(ov::Shape new_shape) { + m_impl->set_shape(new_shape); +} + +void USMHostTensor::set_memory(std::shared_ptr tensor) { + OPENVINO_ASSERT(tensor->get_original_memory()->get_allocation_type() == cldnn::allocation_type::usm_host, "[GPU] Unexpected allocation type"); + m_impl = tensor; +} + +std::shared_ptr USMHostTensor::get_impl() const { + return m_impl; +} + +} // namespace intel_gpu +} // namespace ov diff --git a/src/plugins/intel_gpu/tests/common/subgraphs_builders.hpp b/src/plugins/intel_gpu/tests/common/subgraphs_builders.hpp new file mode 100644 index 00000000000000..dea703cf7104b2 --- /dev/null +++ b/src/plugins/intel_gpu/tests/common/subgraphs_builders.hpp @@ -0,0 +1,50 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include +#include "openvino/core/dimension.hpp" +#include "openvino/core/model.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/transpose.hpp" +#include "openvino/op/result.hpp" +#include "openvino/op/parameter.hpp" +#include "openvino/op/matmul.hpp" +#include "openvino/op/convert.hpp" +#include "openvino/op/concat.hpp" + +namespace tests { + +inline std::shared_ptr make_llm_kv_cache_pattern(ov::Dimension batch = ov::Dimension::dynamic(), + ov::Dimension n_heads = ov::Dimension::dynamic(), + ov::Dimension n_features = ov::Dimension::dynamic(), + ov::element::Type_t element_type = ov::element::f32) { + ov::PartialShape kv_cache_size = {batch, n_heads, -1, n_features}; + ov::PartialShape new_token_size = {batch, -1, n_heads, n_features}; + ov::PartialShape matmul_in_size = {batch, n_heads, -1, -1}; + + auto in_kv_prev = std::make_shared(element_type, kv_cache_size); + in_kv_prev->set_friendly_name("past_key_values"); + auto in_new_token = std::make_shared(element_type, new_token_size); + in_new_token->set_friendly_name("new_token_input"); + auto in_matmul = std::make_shared(element_type, matmul_in_size); + in_matmul->set_friendly_name("in_matmul"); + + auto transpose_const = ov::op::v0::Constant::create(ov::element::i32, {new_token_size.size()}, {0, 2, 1, 3}); + auto transpose = std::make_shared(in_new_token, transpose_const); + auto concat = std::make_shared(ov::OutputVector{in_kv_prev, transpose}, 2); + auto convert = std::make_shared(concat, element_type); + auto kv_present = std::make_shared(convert); + kv_present->set_friendly_name("present_key_values"); + auto matmul = std::make_shared(in_matmul, concat, false, false); + auto matmul_out = std::make_shared(matmul); + matmul_out->set_friendly_name("matmul_out"); + + ov::ParameterVector params{in_kv_prev, in_new_token, in_matmul}; + ov::ResultVector results{kv_present, matmul_out}; + return std::make_shared(results, params, "LLM-KV-Cache"); +} + +} // namespace tests diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/behavior/ov_infer_request/iteration_chaining.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/behavior/ov_infer_request/iteration_chaining.cpp new file mode 100644 index 00000000000000..4bcef9a7bedbe0 --- /dev/null +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/behavior/ov_infer_request/iteration_chaining.cpp @@ -0,0 +1,23 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// +#include +#include "behavior/ov_infer_request/iteration_chaining.hpp" +#include "common_test_utils/test_constants.hpp" +#include "openvino/runtime/properties.hpp" + +using namespace ov::test::behavior; + +namespace { + +const std::vector configs = { + { ov::hint::inference_precision(ov::element::f32) } +}; + +INSTANTIATE_TEST_SUITE_P(smoke_BehaviorTests, OVIterationChaining, + ::testing::Combine( + ::testing::Values(ov::test::utils::DEVICE_GPU), + ::testing::ValuesIn(configs)), + OVIterationChaining::getTestCaseName); + +} // namespace diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache.cpp new file mode 100644 index 00000000000000..a32e97d8e8e0fc --- /dev/null +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/kv_cache.cpp @@ -0,0 +1,217 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_test_utils/ov_tensor_utils.hpp" +#include "openvino/core/node_vector.hpp" +#include "openvino/core/partial_shape.hpp" +#include "openvino/core/preprocess/pre_post_process.hpp" +#include "openvino/op/concat.hpp" +#include "openvino/op/matmul.hpp" +#include "openvino/op/parameter.hpp" +#include "openvino/op/transpose.hpp" +#include "shared_test_classes/base/layer_test_utils.hpp" +#include "shared_test_classes/base/ov_subgraph.hpp" +#include "shared_test_classes/base/utils/compare_results.hpp" +#include "transformations/rt_info/decompression.hpp" +#include "subgraphs_builders.hpp" + +using namespace ov::test; + +namespace SubgraphTestsDefinitions { + +using KVCacheTestParams = std::tuple, // input shapes + ov::element::Type, // in/out precision + std::map>; // additional config + +class KVCacheTest : public testing::WithParamInterface, public SubgraphBaseTest { +public: + static std::string get_test_case_name(testing::TestParamInfo obj) { + std::vector input_shapes; + ov::element::Type element_type; + std::map additional_config; + + std::tie(input_shapes, element_type, additional_config) = obj.param; + + std::ostringstream result; + for (const auto& shape : input_shapes) { + result << ov::test::utils::partialShape2str({shape.first}) << "_"; + } + result << "TS="; + for (const auto& shape : input_shapes) { + result << "("; + if (!shape.second.empty()) { + auto itr = shape.second.begin(); + do { + result << ov::test::utils::vec2str(*itr); + } while (++itr != shape.second.end() && result << "_"); + } + result << ")_"; + } + result << "precision=" << element_type << "_"; + result << "config=("; + for (const auto& configEntry : additional_config) { + result << configEntry.first << ", " << configEntry.second << ":"; + } + result << ")"; + + return result.str(); + } + +protected: + void SetUp() override { + targetDevice = ov::test::utils::DEVICE_GPU; + + std::vector input_shapes; + ov::element::Type element_type; + std::map additional_config; + + std::tie(input_shapes, element_type, additional_config) = GetParam(); + + configuration.insert(additional_config.begin(), additional_config.end()); + init_input_shapes(input_shapes); + + inType = outType = element_type; + + function = tests::make_llm_kv_cache_pattern(inputDynamicShapes[0][0], inputDynamicShapes[0][1], inputDynamicShapes[0][3], element_type); + } +}; + +TEST_P(KVCacheTest, CompareWithRefs) { + SKIP_IF_CURRENT_TEST_IS_DISABLED() + run(); +} + +namespace { + +const std::vector precisions = {ov::element::f32, ov::element::f16}; + +const std::vector> input_shapes_basic = { + { + {{-1, 32, -1, 80}, { {1, 32, 0, 80}, {1, 32, 20, 80} }}, + {{-1, -1, 32, 80}, { {1, 20, 32, 80}, {1, 1, 32, 80} }}, + {{-1, 32, -1, -1}, { {1, 32, 1, 20}, {1, 32, 1, 21} }} + }, +}; + +INSTANTIATE_TEST_SUITE_P(smoke_GPU_Dynamic, + KVCacheTest, + ::testing::Combine(::testing::ValuesIn(input_shapes_basic), + ::testing::ValuesIn(precisions), + ::testing::Values(std::map())), + KVCacheTest::get_test_case_name); +} // namespace + +TEST(KVCacheTest, smoke_multipleIterations) { +#if defined(ANDROID) + GTEST_SKIP(); +#endif + auto core = ov::Core(); + + const size_t batch = 1; + const size_t n_heads = 32; + const size_t n_features = 80; + const size_t context_size = 20; + size_t cache_size = 0; + + ov::element::Type element_type = ov::element::f16; + + auto model = tests::make_llm_kv_cache_pattern(batch, n_heads, n_features, element_type); + auto compiled_model = core.compile_model(model, ov::test::utils::DEVICE_GPU, ov::hint::inference_precision(ov::element::f16)); + + auto input0 = model->get_parameters().at(0); + auto input1 = model->get_parameters().at(1); + auto input2 = model->get_parameters().at(2); + auto output0 = model->get_results().at(0); + auto output1 = model->get_results().at(1); + + auto infer_request = compiled_model.create_infer_request(); + auto input0_tensor_remote_io = infer_request.get_tensor(input0); + auto input1_tensor_remote_io = infer_request.get_tensor(input1); + auto input2_tensor_remote_io = infer_request.get_tensor(input2); + auto output0_tensor_remote_io = infer_request.get_tensor(output0); + auto output1_tensor_remote_io = infer_request.get_tensor(output1); + + auto compare_tensors = [&model](const std::vector expected, const std::vector& actual) { + ASSERT_EQ(expected.size(), actual.size()); + ASSERT_EQ(expected.size(), model->get_results().size()); + auto compareMap = ov::test::utils::getCompareMap(); + const auto& results = model->get_results(); + for (size_t j = 0; j < results.size(); j++) { + const auto result = results[j]; + for (size_t i = 0; i < result->get_input_size(); ++i) { + std::shared_ptr inputNode = result->get_input_node_shared_ptr(i); + if (std::dynamic_pointer_cast(inputNode)) { + std::shared_ptr nextNodePtr = inputNode->get_input_node_shared_ptr(0); + if (!ngraph::is_type(nextNodePtr)) { + inputNode = nextNodePtr; + } + } + auto it = compareMap.find(inputNode->get_type_info()); + ASSERT_NE(it, compareMap.end()); + it->second(inputNode, i, expected[j], actual[j], 1e-4f, 1e-4f); + } + } + }; + + { + const ov::Shape kv_cache_size_initial = {batch, n_heads, cache_size, n_features}; + const ov::Shape new_token_size_initial = {batch, context_size, n_heads, n_features}; + const ov::Shape matmul_in_size_initial = {batch, n_heads, context_size, context_size}; + + auto new_token_data = ov::test::utils::create_and_fill_tensor(element_type, new_token_size_initial); + auto matmul_data = ov::test::utils::create_and_fill_tensor(element_type, matmul_in_size_initial); + + auto kv_cache_input = infer_request.get_tensor(input0); + kv_cache_input.set_shape(kv_cache_size_initial); + + auto ref_model = model->clone(); + ngraph::helpers::resize_function(ref_model, {kv_cache_input.get_shape(), new_token_data.get_shape(), matmul_data.get_shape()}); + auto results = ngraph::helpers::interpretFunction(ref_model, {{input0, kv_cache_input}, {input1, new_token_data}, {input2, matmul_data}}); + + infer_request.set_tensor(input0, kv_cache_input); + infer_request.set_tensor(input1, new_token_data); + infer_request.set_tensor(input2, matmul_data); + + infer_request.infer(); + + compare_tensors(results, {infer_request.get_tensor(output0), infer_request.get_tensor(output1)}); + + cache_size += context_size; + } + + const size_t input_tokens = 1; + const size_t niters = 10; + const ov::Shape new_token_size = {batch, input_tokens, n_heads, n_features}; + size_t context_length = cache_size + input_tokens; + for (size_t i = 0; i < niters; i++, context_length += input_tokens) { + ov::Shape matmul_in_size_loop = {batch, n_heads, input_tokens, context_length}; + auto new_token_data = ov::test::utils::create_and_fill_tensor(element_type, new_token_size); + auto matmul_data = ov::test::utils::create_and_fill_tensor(element_type, matmul_in_size_loop); + + auto kv_cache_input = infer_request.get_tensor(output0); + auto kv_shape = kv_cache_input.get_shape(); + + auto ref_model = model->clone(); + ngraph::helpers::resize_function(ref_model, {kv_shape, new_token_data.get_shape(), matmul_data.get_shape()}); + auto results = ngraph::helpers::interpretFunction(ref_model, {{input0, kv_cache_input}, {input1, new_token_data}, {input2, matmul_data}}); + + auto new_token_input = infer_request.get_tensor(input1); + new_token_input.set_shape(new_token_data.get_shape()); + auto matmul_input = infer_request.get_tensor(input2); + matmul_input.set_shape(matmul_data.get_shape()); + + new_token_data.copy_to(new_token_input); + matmul_data.copy_to(matmul_input); + + infer_request.set_tensor(input0, kv_cache_input); + infer_request.set_tensor(input1, new_token_input); + infer_request.set_tensor(input2, matmul_input); + + infer_request.infer(); + + compare_tensors(results, {infer_request.get_tensor(output0), infer_request.get_tensor(output1)}); + } +} + +} // namespace SubgraphTestsDefinitions From 6c396fac9dacd8c62e41f77e288f5b6327730630 Mon Sep 17 00:00:00 2001 From: Sergey Shlyapnikov Date: Wed, 18 Oct 2023 16:28:27 +0400 Subject: [PATCH 09/39] [GPU] Deferred events deallocation and tuning for FC bf_tiled kernel (#20519) --- src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp | 2 ++ src/plugins/intel_gpu/src/graph/network.cpp | 8 +++++++- .../fully_connected/fully_connected_kernel_bf_tiled.cpp | 3 +++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp index 1474543428a7b7..ab5d6b5e0af140 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/network.hpp @@ -283,6 +283,8 @@ struct network { size_t _weights_cache_capacity = 1; std::unordered_map _events; + // This map is used to temporarily hold events that will be deallocated later + std::unordered_map _old_events; output_chains_map _output_chains; std::unique_ptr _shape_predictor; diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp index 51b988076d18f3..240db96d5b4988 100644 --- a/src/plugins/intel_gpu/src/graph/network.cpp +++ b/src/plugins/intel_gpu/src/graph/network.cpp @@ -758,7 +758,10 @@ void network::reset_execution(bool wait) { get_stream().wait_for_events(events); } } - _events.clear(); + + // Move events to temporarily map to deallocate them at the end of network::execute() call for better overlapping with + // kernels execution, since it may take significant time for high amount of events + _old_events = std::move(_events); } event::ptr network::set_input_data(const primitive_id& id, memory::ptr data) { @@ -1457,6 +1460,9 @@ void network::execute_impl(const std::vector& events) { // In scenarios with a big number of very small networks it can provide performance drop. get_stream().flush(); + // Deallocate events from the previos iteration + _old_events.clear(); + GPU_DEBUG_IF(debug_config->dump_runtime_memory_pool > 0) { get_memory_pool().dump(get_id()); } diff --git a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp index 6b0407f6580cad..c272124627db23 100644 --- a/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp +++ b/src/plugins/intel_gpu/src/kernel_selector/kernels/fully_connected/fully_connected_kernel_bf_tiled.cpp @@ -241,6 +241,9 @@ FullyConnected_bf_tiled::GetAutoTuneParams(const fully_connected_params& params, .Case(tune_params(8, std::min(max_tile_ofm, 2u), 1, 1, 1, 1, EXE_MODE_AGE_BASED)); } + if (params.compressed && batch == 1) + selector.Case(tune_params(1, std::min(max_tile_ofm, 2u), 4, 2, 1, 1, EXE_MODE_AGE_BASED)); + selector.Case([&](const fully_connected_params&) -> tune_params { tune_params result(8, std::min(max_tile_ofm, 2u), 1, 2, 1, 1, EXE_MODE_DEFAULT); From f2549f2d5903de90f3702cf1454a1604d7e207ff Mon Sep 17 00:00:00 2001 From: Pawel Raasz Date: Wed, 18 Oct 2023 15:02:18 +0200 Subject: [PATCH 10/39] [core]Migrate VariadicSplit op to new API (#20416) * Migrate VariadicSlice to new API - refactor to reduce bin size * Move `get_tensors_partial_shapes` to dev API * Use get_tensors_partial_shapes in VariadicSplit * Remove `visit_attributes` is same as base --- .../include/openvino/op/variadic_split.hpp | 10 +- src/core/src/op/variadic_split.cpp | 167 ++++++++---------- 2 files changed, 75 insertions(+), 102 deletions(-) diff --git a/src/core/include/openvino/op/variadic_split.hpp b/src/core/include/openvino/op/variadic_split.hpp index 2d6f751d48d3ba..49cb5dcc714502 100644 --- a/src/core/include/openvino/op/variadic_split.hpp +++ b/src/core/include/openvino/op/variadic_split.hpp @@ -29,25 +29,17 @@ class OPENVINO_API VariadicSplit : public Op { /// outputs. The sum of split_lengths must match data.shape[axis] VariadicSplit(const Output& data, const Output& axis, const Output& split_lengths); - bool visit_attributes(AttributeVisitor& visitor) override; - void validate_and_infer_types() override; std::shared_ptr clone_with_new_inputs(const OutputVector& new_args) const override; size_t get_default_output_index() const override { return no_default_index(); } - OPENVINO_SUPPRESS_DEPRECATED_START - bool evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const override; - OPENVINO_SUPPRESS_DEPRECATED_END + bool evaluate(TensorVector& outputs, const TensorVector& inputs) const override; bool evaluate_lower(TensorVector& outputs) const override; bool evaluate_upper(TensorVector& outputs) const override; bool has_evaluate() const override; bool evaluate_label(TensorLabelVector& output_labels) const override; - -private: - bool evaluate_variadic_split(const HostTensorVector& outputs, const HostTensorVector& inputs) const; - bool has_axis_and_splits_bound_set() const; }; } // namespace v1 } // namespace op diff --git a/src/core/src/op/variadic_split.cpp b/src/core/src/op/variadic_split.cpp index ab94af3fd8345f..38b309325fe16a 100644 --- a/src/core/src/op/variadic_split.cpp +++ b/src/core/src/op/variadic_split.cpp @@ -2,33 +2,69 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "ngraph/op/variadic_split.hpp" +#include "openvino/op/variadic_split.hpp" #include #include "bound_evaluate.hpp" #include "compare.hpp" #include "itt.hpp" -#include "ngraph/validation_util.hpp" +#include "openvino/core/validation_util.hpp" #include "openvino/reference/slice.hpp" #include "variadic_split_shape_inference.hpp" -using namespace std; -using namespace ngraph; +namespace ov { +namespace op { +namespace variadic_split { +namespace { -op::v1::VariadicSplit::VariadicSplit(const Output& data, - const Output& axis, - const Output& split_lengths) - : Op({data, axis, split_lengths}) { - constructor_validate_and_infer_types(); +bool has_axis_and_splits_bound_set(const Node* const node) { + return have_node_inputs_bounds_set(node, 1, 2); } -bool ngraph::op::v1::VariadicSplit::visit_attributes(AttributeVisitor& visitor) { - OV_OP_SCOPE(v1_VariadicSplit_visit_attributes); +bool evaluate(TensorVector& outputs, const TensorVector& inputs) { + const auto& data_tensor = inputs[0]; + const auto& axis_tensor = inputs[1]; + const auto axis = + ov::util::normalize(get_tensor_data_as(axis_tensor).front(), data_tensor.get_shape().size()); + + ov::Coordinate upper_bounds(data_tensor.get_shape()); + ov::Coordinate lower_bounds(upper_bounds.size()); + upper_bounds[axis] = 0; + + const Strides default_strides(upper_bounds.size(), 1); + constexpr auto is_zero_dim = ov::cmp::Equal(0); + + for (auto& output : outputs) { + const auto& out_shape = output.get_shape(); + upper_bounds[axis] += out_shape[axis]; + + if (std::none_of(out_shape.cbegin(), out_shape.cend(), is_zero_dim)) { + reference::slice(static_cast(data_tensor.data()), + static_cast(output.data()), + data_tensor.get_shape(), + lower_bounds, + upper_bounds, + default_strides, + out_shape, + data_tensor.get_element_type().size()); + } + + lower_bounds[axis] = upper_bounds[axis]; + } + return true; } +} // namespace +} // namespace variadic_split -void ngraph::op::v1::VariadicSplit::validate_and_infer_types() { +namespace v1 { +VariadicSplit::VariadicSplit(const Output& data, const Output& axis, const Output& split_lengths) + : Op({data, axis, split_lengths}) { + constructor_validate_and_infer_types(); +} + +void VariadicSplit::validate_and_infer_types() { OV_OP_SCOPE(v1_VariadicSplit_validate_and_infer_types); for (size_t i = 0; i < get_input_size(); ++i) { set_input_is_relevant_to_value(i); @@ -45,107 +81,52 @@ void ngraph::op::v1::VariadicSplit::validate_and_infer_types() { } } -shared_ptr op::v1::VariadicSplit::clone_with_new_inputs(const OutputVector& new_args) const { +std::shared_ptr VariadicSplit::clone_with_new_inputs(const OutputVector& new_args) const { OV_OP_SCOPE(v1_VariadicSplit_clone_with_new_inputs); check_new_args_count(this, new_args); - return make_shared(new_args.at(0), new_args.at(1), new_args.at(2)); + return std::make_shared(new_args.at(0), new_args.at(1), new_args.at(2)); } -OPENVINO_SUPPRESS_DEPRECATED_START -namespace variadic_split { -namespace { -inline bool evaluate(const HostTensorPtr& in, - const HostTensorPtr& out, - const Coordinate& lower_bounds, - const Coordinate& upper_bounds) { - const auto& output_shape = out->get_shape(); - const auto has_nonzero_dims = std::none_of(output_shape.begin(), output_shape.end(), ov::cmp::Equal(0)); - - if (has_nonzero_dims) { - ov::reference::slice(in->get_data_ptr(), - out->get_data_ptr(), - in->get_shape(), - lower_bounds, - upper_bounds, - Strides(lower_bounds.size(), 1), - out->get_shape(), - in->get_element_type().size()); - return true; - } - return false; -} -} // namespace -} // namespace variadic_split - -bool op::v1::VariadicSplit::evaluate_variadic_split(const HostTensorVector& inputs, - const HostTensorVector& outputs) const { - const auto& data_tensor = inputs[0]; - const auto& axis_tensor = inputs[1]; - const auto& split_lengths_tensor = inputs[2]; - OPENVINO_ASSERT(axis_tensor->get_element_type().is_integral_number(), - "axis element type is not integral data type"); - OPENVINO_ASSERT(split_lengths_tensor->get_element_type().is_integral_number(), - "split_lengths element type is not integral data type"); - - OPENVINO_SUPPRESS_DEPRECATED_START - int64_t axis = host_tensor_2_vector(axis_tensor)[0]; - axis = ngraph::normalize_axis(this, axis, data_tensor->get_partial_shape().rank()); - OPENVINO_SUPPRESS_DEPRECATED_END +bool VariadicSplit::evaluate(TensorVector& outputs, const TensorVector& inputs) const { + OV_OP_SCOPE(v1_VariadicSplit_evaluate); - std::vector input_shapes = {data_tensor->get_partial_shape(), - axis_tensor->get_partial_shape(), - split_lengths_tensor->get_partial_shape()}; - auto output_shapes = shape_infer(this, input_shapes, make_tensor_accessor(inputs)); + if (inputs[1].get_element_type().is_integral_number() && inputs[2].get_element_type().is_integral_number()) { + const auto output_shapes = + shape_infer(this, ov::util::get_tensors_partial_shapes(inputs), make_tensor_accessor(inputs)); + OPENVINO_ASSERT(outputs.size() == output_shapes.size()); - const auto data_shape = data_tensor->get_shape(); - std::vector lower_bounds(data_shape.size(), 0); - std::vector upper_bounds = data_shape; - upper_bounds[axis] = 0; + auto out_partial_shape = output_shapes.cbegin(); + for (auto& output : outputs) { + output.set_shape(out_partial_shape->to_shape()); + ++out_partial_shape; + } - size_t split_pos = 0; - for (const auto& output : outputs) { - ov::Shape output_shape = output_shapes[split_pos++].get_shape(); - upper_bounds[axis] += output_shape[axis]; - output->set_shape(output_shape); - variadic_split::evaluate(data_tensor, output, lower_bounds, upper_bounds); - lower_bounds.at(axis) = upper_bounds.at(axis); + return variadic_split::evaluate(outputs, inputs); + } else { + return false; } - - return true; -} -bool op::v1::VariadicSplit::evaluate(const HostTensorVector& outputs, const HostTensorVector& inputs) const { - OV_OP_SCOPE(v1_VariadicSplit_evaluate); - return evaluate_variadic_split(inputs, outputs); } -bool op::v1::VariadicSplit::has_evaluate() const { +bool VariadicSplit::has_evaluate() const { OV_OP_SCOPE(v1_VariadicSplit_has_evaluate); return get_input_element_type(1).is_integral_number() && get_input_element_type(2).is_integral_number(); } -bool op::v1::VariadicSplit::has_axis_and_splits_bound_set() const { - for (size_t i = 1; i < get_input_size(); ++i) { - if (!get_input_tensor(i).has_and_set_bound()) { - return false; - } - } - return true; -} - -bool op::v1::VariadicSplit::evaluate_lower(ov::TensorVector& output_values) const { +bool VariadicSplit::evaluate_lower(TensorVector& output_values) const { OV_OP_SCOPE(v1_Split_evaluate_lower); - - return has_axis_and_splits_bound_set() && default_lower_bound_evaluator(this, output_values); + return variadic_split::has_axis_and_splits_bound_set(this) && default_lower_bound_evaluator(this, output_values); } -bool op::v1::VariadicSplit::evaluate_upper(ov::TensorVector& output_values) const { +bool VariadicSplit::evaluate_upper(TensorVector& output_values) const { OV_OP_SCOPE(v1_Split_evaluate_upper); - - return has_axis_and_splits_bound_set() && default_upper_bound_evaluator(this, output_values); + return variadic_split::has_axis_and_splits_bound_set(this) && default_upper_bound_evaluator(this, output_values); } -bool op::v1::VariadicSplit::evaluate_label(TensorLabelVector& output_labels) const { +bool VariadicSplit::evaluate_label(TensorLabelVector& output_labels) const { OPENVINO_SUPPRESS_DEPRECATED_START - return has_axis_and_splits_bound_set() && default_label_evaluator(this, output_labels); + return variadic_split::has_axis_and_splits_bound_set(this) && default_label_evaluator(this, output_labels); OPENVINO_SUPPRESS_DEPRECATED_END } +} // namespace v1 +} // namespace op +} // namespace ov From 491454103ea2f29b242587c6084c19868a879a82 Mon Sep 17 00:00:00 2001 From: Anastasia Kuporosova Date: Wed, 18 Oct 2023 17:23:52 +0300 Subject: [PATCH 11/39] [Docs] fix failure of python snippet (#20516) * [Docs] fix failure of python snippet * trigger python snippets * fix ga * Update .github/workflows/linux.yml Co-authored-by: Mikhail Ryzhov * Update .github/workflows/linux.yml Co-authored-by: Mikhail Ryzhov * return back fix --------- Co-authored-by: Chen Peter Co-authored-by: Mikhail Ryzhov --- .github/workflows/linux.yml | 24 ++++++++++++++---------- docs/snippets/ov_auto.py | 3 +-- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 59e021e24eb153..733dfed4c09d14 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -5,17 +5,21 @@ on: - cron: '0 0 * * 3,6' workflow_dispatch: pull_request: - paths-ignore: - - '**/docs/**' - - 'docs/**' - - '**/**.md' - - '**.md' + paths: + - '**' + - '!**/docs/**' + - '!docs/**' + - 'docs/snippets/**' + - '!**/**.md' + - '!**.md' push: - paths-ignore: - - '**/docs/**' - - 'docs/**' - - '**/**.md' - - '**.md' + paths: + - '**' + - '!docs/**' + - '!**/docs/**' + - 'docs/snippets/**' + - '!**/**.md' + - '!**.md' branches: - master - 'releases/**' diff --git a/docs/snippets/ov_auto.py b/docs/snippets/ov_auto.py index 8c2721a355d1b3..47d8d877ecda24 100644 --- a/docs/snippets/ov_auto.py +++ b/docs/snippets/ov_auto.py @@ -8,7 +8,6 @@ import openvino.properties.device as device import openvino.properties.hint as hints import openvino.properties.streams as streams -import openvino.properties.enable_profiling as enable_profiling #! [py_ov_property_import_header] import openvino.properties.log as log @@ -167,7 +166,7 @@ def part5(): cpu_config = { hints.performance_mode: hints.PerformanceMode.LATENCY, streams.num: 8, - enable_profiling: True + properties.enable_profiling: True } compiled_model = core.compile_model( model=model, From cf9791e3e03983f08fe21d9812d4746516a56d5a Mon Sep 17 00:00:00 2001 From: Mikhail Ryzhov Date: Wed, 18 Oct 2023 18:43:46 +0200 Subject: [PATCH 12/39] [GHA] Temporary disable win workflows (#20568) * moved win workflow to nightly * reset redundant files * removed redundant files --- .github/workflows/windows.yml | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 6aed320376c21e..6ce891e6767698 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -1,6 +1,9 @@ name: Windows (VS 2022, Python 3.11) on: - workflow_dispatch: + schedule: + # at 00:00 on workdays + - cron: '0 0 * * 1,2,3,4,5' +# workflow_dispatch: # pull_request: # paths-ignore: # - '**/docs/**' @@ -9,16 +12,16 @@ on: # - '**.md' # - '**/layer_tests_summary/**' # - '**/conformance/**' - push: - paths-ignore: - - '**/docs/**' - - 'docs/**' - - '**/**.md' - - '**.md' - - '**/layer_tests_summary/**' - - '**/conformance/**' - branches: - - master +# push: +# paths-ignore: +# - '**/docs/**' +# - 'docs/**' +# - '**/**.md' +# - '**.md' +# - '**/layer_tests_summary/**' +# - '**/conformance/**' +# branches: +# - master concurrency: # github.ref is not unique in post-commit @@ -336,7 +339,7 @@ jobs: shell: cmd run: | python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt - + :: requires 'unit_tests' from 'tools/mo' set PYTHONPATH=${{ env.OPENVINO_REPO }}\tools\mo;${{ env.LAYER_TESTS_INSTALL_DIR }};%PYTHONPATH% python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/onnx_tests -m "not launch_only_if_manually_specified and precommit" --junitxml=${INSTALL_TEST_DIR}/TEST-onnx.xml @@ -404,7 +407,7 @@ jobs: shell: cmd run: | python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt - + :: TODO: remove setupvars.bat from here; currently, it's used for 'test_utils' installed in '/python/openvino' call "${{ env.INSTALL_DIR }}\\setupvars.bat" && python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/mo_python_api_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-test_mo_convert.xml env: @@ -415,7 +418,7 @@ jobs: shell: cmd run: | python3 -m pip install -r ${{ env.LAYER_TESTS_INSTALL_DIR }}/requirements.txt - + :: TODO: remove setupvars.sh from here; currently, it's used for 'test_utils' installed in '/python/openvino' call "${{ env.INSTALL_DIR }}\\setupvars.bat" && python3 -m pytest ${{ env.LAYER_TESTS_INSTALL_DIR }}/ovc_python_api_tests --junitxml=${{ env.INSTALL_TEST_DIR }}/TEST-test_ovc_convert.xml env: From 46935e0a0050ece3e29790ae13afb5a1e2b56c32 Mon Sep 17 00:00:00 2001 From: Sergey Lyalin Date: Wed, 18 Oct 2023 21:06:17 +0400 Subject: [PATCH 13/39] Partial GPTQ int4 models conversion support + Swap nibbles in u4/i4 element type (#20371) * Reference implementation for u4 constant compression from pytorch model based on bitwise ops pattern * Fixed order of 4-bit halfs in byte * Switched PyTorch FE to dev mode: in case if model cannot be fully converted, give partially converted model with PTFrameworkNode's with a printed warning (normally would raise an exception in case). * Moved u4 compression to utils_quantize. Implemented not-interleaved version of u4 compression * Removed debug output * Added aten::matmul to the list of exceptions in may_produce_alias as a workaround for gptq models * Added patching for gptq models applied automatically in convert_model * WA for an inssue with u4 with earlier convert to fp16 * U4 blocked repacking for gptq patched model layout * Deleted obsolete u4 re-packing based on aten::cat. Fixed the resulting u4 constant shape. Removed debug output. * Revert "Switched PyTorch FE to dev mode: in case if model cannot be fully converted, give partially converted model with PTFrameworkNode's with a printed warning (normally would raise an exception in case)." This reverts commit 0ef1455e70001f0519a15249950825d10f1aa1cc. * Update src/frontends/pytorch/src/op/cat.cpp * Check mask and shift values in u4 pattern. deque -> OutputVector for u4_compression_stack * Convert to a given floating type instead of half in gptq patching. Better structured code. * Code style fix * Removed deque include * Code style fixes * Trailing space removed * Fixed patched_forward and ts_decoder after unvalidated commits. * Swap nibbles in u4/i4 * Better exception handling around jit.trace and gptq.patch_model * Update src/bindings/python/src/openvino/frontend/pytorch/gptq.py Co-authored-by: Alexander Kozlov * Update src/bindings/python/src/openvino/frontend/pytorch/gptq.py Co-authored-by: Alexander Kozlov * Code style * Revers int4 byte order * Fixed core tests * Fixed unguarded dynamic_cast result Co-authored-by: Evgenya Nugmanova * Fixed transformation tests * Update src/bindings/python/src/openvino/frontend/pytorch/gptq.py Co-authored-by: Maxim Vafin * Prevent patching of non-gptq models * Removed extra calling of quantized weights decompression patterns * Better detection of supported AutoGPTQ models + more diagnostics * Accurate diagnostics in case when aten::stack has multiple axes --------- Co-authored-by: Alexander Kozlov Co-authored-by: Ilya Churaev Co-authored-by: Evgenya Nugmanova Co-authored-by: Maxim Vafin --- .../openvino/frontend/pytorch/fx_decoder.py | 2 +- .../src/openvino/frontend/pytorch/gptq.py | 140 ++++++++++++++++++ .../openvino/frontend/pytorch/ts_decoder.py | 26 +++- .../tests/utils/convert_precision.cpp | 4 +- src/core/include/openvino/op/constant.hpp | 32 +--- .../include/openvino/reference/convert.hpp | 12 +- src/core/tests/constant.cpp | 20 +-- src/core/tests/int4.cpp | 4 +- src/core/tests/uint4.cpp | 4 +- src/frontends/pytorch/src/frontend.cpp | 2 + .../aten_stack_list_construct_replacer.cpp | 32 ++-- .../src/transforms/u4_block_repack.cpp | 98 ++++++++++++ .../src/transforms/u4_block_repack.hpp | 24 +++ src/frontends/pytorch/src/utils_quantize.cpp | 49 ++++++ src/frontends/pytorch/src/utils_quantize.hpp | 6 + .../tests/functional/op_reference/convert.cpp | 88 +++++------ .../functional/op_reference/convert_like.cpp | 106 ++++++------- tools/mo/openvino/tools/mo/ops/Cast.py | 20 ++- tools/mo/unit_tests/mo/ops/cast_test.py | 83 ++--------- 19 files changed, 516 insertions(+), 236 deletions(-) create mode 100644 src/bindings/python/src/openvino/frontend/pytorch/gptq.py create mode 100644 src/frontends/pytorch/src/transforms/u4_block_repack.cpp create mode 100644 src/frontends/pytorch/src/transforms/u4_block_repack.hpp diff --git a/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py index 45a662e4e45fd1..a79892b3e4d6f5 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py @@ -370,7 +370,7 @@ def inlined_inputs(self, index): return result def may_produce_alias(self, in_index: int, out_index: int) -> bool: - if self.get_op_type() in ["aten::conv1d", "aten::conv2d", "aten::conv3d"]: + if self.get_op_type() in ["aten::conv1d", "aten::conv2d", "aten::conv3d", "aten::matmul"]: # AliasDB::may_contain_alias sometimes return True for tensors produced by convnd, we have to workaround that return False try: diff --git a/src/bindings/python/src/openvino/frontend/pytorch/gptq.py b/src/bindings/python/src/openvino/frontend/pytorch/gptq.py new file mode 100644 index 00000000000000..b4bd06552b2a1e --- /dev/null +++ b/src/bindings/python/src/openvino/frontend/pytorch/gptq.py @@ -0,0 +1,140 @@ + +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# flake8: noqa +# mypy: ignore-errors + +import torch +from functools import partial + +# Wraps a single tensor to a module to prevent it from jit.freezing +# It depends on a tensor dtype whether it will be preserved from freezing. Refer to the decoder code to learn which types will be preserved. +class KeepWeight(torch.nn.Module): + + def __init__(self, weight): + super().__init__() + self.weight = torch.nn.Parameter(weight, requires_grad=False) + + def forward(self): + return self.weight + + +# Produces a pattern that can be captured later and represented as a single u4 constant node +def decompression_pattern(weights): + mask = torch.tensor(15, dtype=torch.uint8).to(weights.device) + return torch.stack((torch.bitwise_and(weights, mask), torch.bitwise_right_shift(weights, 4)), dim=-1) + + +def patched_forward(self, *args, **kwargs): + if hasattr(self, '_hf_hook'): + args, kwargs = self._hf_hook.pre_forward(self, *args, **kwargs) + + x = args[0] + dtype = x.dtype + outshape = x.shape[:-1] + (self.width,) + x = x.view(-1, x.shape[-1]) + groups = self.qzeros.shape[0] + height = self.qweight.shape[0] + + unpacked_weights = decompression_pattern( + self._openvino_u4_compression_submodule_qweights()).contiguous().view(height, -1, 8) + unpacked_weights = torch.transpose( + unpacked_weights, 1, 2).contiguous().view(-1, self.group_size, self.width) + unpacked_zp = decompression_pattern( + self._openvino_u4_compression_submodule_qzeros()).contiguous().view(groups, 1, -1) + + unpacked_zp = unpacked_zp.to(dtype) + 1 + + unpacked_weights = (unpacked_weights.to(dtype) - unpacked_zp) * self.scales + unpacked_weights = unpacked_weights.view(-1, self.width) + + out = x @ unpacked_weights + + out = out.view(outshape) + if self.bias is not None: + out.add_(self.bias) + + if hasattr(self, '_hf_hook'): + out = self._hf_hook.post_forward(self, out) + return out + + +# All the following AutoGPTQ's quant types are supposed to have the same weights packing schema +supported_quant_types = ['triton', 'exllama', 'cuda', 'exllamav2', 'cuda-old'] + + +def patch_model(model): + for name, m in model.named_modules(): + if hasattr(m, '_openvino_patch_orig_forward'): + # already patched, skipping + continue + # TODO: Check module type + is_quantized = getattr(m, 'is_quantized', None) + if is_quantized is not None: + m.is_quantized = False + m.float() # enables tracing on CPU, applied for all modules + if hasattr(m, 'QUANT_TYPE'): + if m.QUANT_TYPE not in supported_quant_types: + raise ValueError( + f'Unsupported QUANT_TYPE == {m.QUANT_TYPE} is discovered for AutoGPTQ model, only the following types are supported: {supported_quant_types}') + if m.bits != 4: + raise ValueError( + f'Unsupported bits == {m.bits} is discovered in module {name} in AutoGPTQ model, only bits == 4 is supported.') + + int4_in_int32 = 8 + groups = m.qzeros.shape[0] + m.width = m.qweight.shape[1] + assert m.group_size == m.qweight.shape[0] * int4_in_int32 // groups + + m._openvino_patch_orig_forward = m.forward + m.forward = partial(patched_forward, m) + + # Keep original field properties to be used when model is returned back to its original state + m._openvino_patch_orig_qweights_type = m.qweight.dtype + m._openvino_patch_orig_qzeros_type = m.qzeros.dtype + m._openvino_patch_orig_scale_shape = m.scales.shape + + m.qweight = m.qweight.view(dtype=torch.uint8) + m.qzeros = m.qzeros.view(dtype=torch.uint8) + + # TODO: Redundant tensor copy? Try to remove m.qweigh and m.qzeros after keeping modified values as submodules + m.add_module( + '_openvino_u4_compression_submodule_qweights', KeepWeight(m.qweight)) + m.add_module('_openvino_u4_compression_submodule_qzeros', + KeepWeight(m.qzeros)) + + m.scales = m.scales.view(-1, 1, m.width) + + +def unpatch_model(model): + for _, m in model.named_modules(): + if hasattr(m, '_openvino_patch_orig_forward'): + try: + m.forward = m._openvino_patch_orig_forward + del m._openvino_patch_orig_forward + + m.qweight = m.qweight.view( + dtype=m._openvino_patch_orig_qweights_type) + del m._openvino_patch_orig_qweights_type + + m.qzeros = m.qzeros.view( + dtype=m._openvino_patch_orig_qzeros_type) + del m._openvino_patch_orig_qzeros_type + + m.scales = m.scales.view(m._openvino_patch_orig_scale_shape) + del m._openvino_patch_orig_scale_shape + + del m._openvino_u4_compression_submodule_qweights + del m._openvino_u4_compression_submodule_qzeros + except Exception as error: + print('[ WARNING ] Exception raised during GPTQ model unpatching. Depending on the exact issue it may lead to broken original model') + print(error) + + +def detect_gptq_model_raw(model): + return model and getattr(model, 'config', None) and getattr(model.config, 'quantization_config', None) and model.config.quantization_config.quant_method == 'gptq' + + +def detect_gptq_model(model): + return detect_gptq_model_raw(model) or getattr(model, 'model', None) and detect_gptq_model_raw(model.model) diff --git a/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py index b6caf22cfc7b68..11d5991e700c42 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py @@ -9,6 +9,7 @@ from openvino.runtime import op, PartialShape, Type as OVType, OVAny from openvino.frontend.pytorch.utils import ivalue_to_constant, get_value_from_getattr, pt_to_ov_type_map, prepare_example_inputs_and_model, convert_quantized_tensor from openvino.runtime import opset11 as ops +from openvino.frontend.pytorch import gptq import typing import torch @@ -84,8 +85,27 @@ def _get_scripted_model(self, pt_module, example_inputs=None, skip_freeze=False) if example_inputs is None: scripted = torch.jit.script(pt_module) else: - input_parameters, input_signature, pt_module, self._input_is_list = prepare_example_inputs_and_model(example_inputs, input_params, pt_module) - scripted = torch.jit.trace(pt_module, **input_parameters, strict=False) + input_parameters, input_signature, pt_module, self._input_is_list = prepare_example_inputs_and_model( + example_inputs, input_params, pt_module) + gptq_patched = False + + if gptq.detect_gptq_model(pt_module): + try: + gptq.patch_model(pt_module) + gptq_patched = True + except Exception as error: + print('[ WARNING ] Failed patching of AutoGPTQ model. Error message:\n', error) + print('[ WARNING ] Tracing of the model will likely be unsuccesfull or incorrect') + gptq.unpatch_model(pt_module) + gptq_patched = False + + try: + scripted = torch.jit.trace( + pt_module, **input_parameters, strict=False) + finally: + if gptq_patched: + gptq.unpatch_model(pt_module) + if not skip_freeze: for n in scripted.inlined_graph.nodes(): # TODO: switch off freezing for all traced models @@ -341,7 +361,7 @@ def input_is_none(self, index: int) -> bool: return False def may_produce_alias(self, in_index: int, out_index: int) -> bool: - if self.get_op_type() in ["aten::conv1d", "aten::conv2d", "aten::conv3d"]: + if self.get_op_type() in ["aten::conv1d", "aten::conv2d", "aten::conv3d", "aten::matmul"]: # AliasDB::may_contain_alias sometimes return True for tensors produced by convnd, we have to workaround that return False try: diff --git a/src/common/transformations/tests/utils/convert_precision.cpp b/src/common/transformations/tests/utils/convert_precision.cpp index 6c0da965f9bfad..1dac080461d16b 100644 --- a/src/common/transformations/tests/utils/convert_precision.cpp +++ b/src/common/transformations/tests/utils/convert_precision.cpp @@ -1188,7 +1188,7 @@ void constant_convert_test(element::Type type_from, } ASSERT_TRUE(actual.size() >= expected.size()); for (size_t i = 0; i < expected.size(); i++) { - ASSERT_EQ(expected[i], actual[i]); + EXPECT_EQ(expected[i], actual[i]) << "Elements with index " << i << " are not equal."; } } @@ -1378,7 +1378,7 @@ TEST(TransformationTests, ConvertPrecision_ConstantConversion_U1ToU4) { constant_convert_test(element::u1, element::u4, std::vector{171}, - {1, 0, 1, 0, 1, 0, 1, 1}); + {0, 1, 0, 1, 0, 1, 1, 1}); } TEST(TransformationTests, ConvertPrecision_keep_precission_sensitive_fp32_with_exp) { diff --git a/src/core/include/openvino/op/constant.hpp b/src/core/include/openvino/op/constant.hpp index 14ee7b3313490e..e122d36a8223d1 100644 --- a/src/core/include/openvino/op/constant.hpp +++ b/src/core/include/openvino/op/constant.hpp @@ -426,7 +426,7 @@ class OPENVINO_API Constant : public Op { typename StorageDataType = fundamental_type_for, typename std::enable_if::type = true> StorageDataType get_element_value(size_t index) const { - return (get_data_ptr()[index / 2] >> (index % 2 ? 0 : 4)) & 0x0F; + return (get_data_ptr()[index / 2] >> (index % 2 ? 4 : 0)) & 0x0F; } template , typename std::enable_if::type = true> StorageDataType get_element_value(size_t index) const { - const uint8_t i4data = (get_data_ptr()[index / 2] >> (index % 2 ? 0 : 4)) & 0x0F; + const uint8_t i4data = (get_data_ptr()[index / 2] >> (index % 2 ? 4 : 0)) & 0x0F; const bool is_negative_number = (i4data >> 3) & 0x01; const int8_t data = is_negative_number ? i4data | 0xF0 : i4data; return data; @@ -530,7 +530,7 @@ class OPENVINO_API Constant : public Op { const auto round_element_no = element_number % 2 ? element_number + 1 : element_number; output.reserve(round_element_no); // adds 1 more elements here? std::for_each(source_begin, source_end, [&](IN_T c) { - for (const auto i : {4, 0}) { + for (const auto i : {0, 4}) { const uint8_t data = (c >> i) & 0x0F; output.push_back(data); } @@ -548,7 +548,7 @@ class OPENVINO_API Constant : public Op { const auto round_element_no = element_number % 2 ? element_number + 1 : element_number; output.reserve(round_element_no); // adds 1 more elements here? std::for_each(source_begin, source_end, [&](IN_T c) { - for (const auto i : {4, 0}) { + for (const auto i : {0, 4}) { const uint8_t i4data = (c >> i) & 0x0F; const bool is_negative_number = (i4data >> 3) & 0x01; const int8_t data = is_negative_number ? i4data | 0xF0 : i4data; @@ -663,27 +663,9 @@ class OPENVINO_API Constant : public Op { template , - typename std::enable_if::type = true> - void write_buffer(const std::vector& source) { - auto p = get_data_ptr_nc(); - size_t i = 0; - for (; i < source.size() / 2; i++) { - const auto v1 = value_in_range(source[i * 2]) & 0x0F; - const auto v2 = value_in_range(source[i * 2 + 1]) & 0x0F; - const auto v = (v1 << 4) | v2; - p[i] = static_cast(v); - } - if (source.size() % 2) { - const auto v1 = value_in_range(source[i * 2]) & 0x0F; - const auto v = v1 << 4; - p[i] = static_cast(v); - } - } - - template , - typename std::enable_if::value, bool>::type = true> + typename std::enable_if::value), + bool>::type = true> void write_buffer(const std::vector& source) { auto p = get_data_ptr_nc(); size_t i = 0; diff --git a/src/core/reference/include/openvino/reference/convert.hpp b/src/core/reference/include/openvino/reference/convert.hpp index e943e548a8fa4e..bd36b50b03301d 100644 --- a/src/core/reference/include/openvino/reference/convert.hpp +++ b/src/core/reference/include/openvino/reference/convert.hpp @@ -14,7 +14,7 @@ namespace reference { namespace detail { inline void set_u1(uint8_t* buf, size_t idx, uint8_t val) { const size_t byte_idx = idx / 8; - const uint8_t bit_idx = 7 - (idx % 8); + const uint8_t bit_idx = 7 - (idx % 8); // Reversed order of bits if (val) { buf[byte_idx] |= (1 << bit_idx); } else { @@ -24,33 +24,33 @@ inline void set_u1(uint8_t* buf, size_t idx, uint8_t val) { inline uint8_t get_u1(const uint8_t* buf, size_t idx) { const size_t byte_idx = idx / 8; - const uint8_t bit_idx = 7 - (idx % 8); + const uint8_t bit_idx = 7 - (idx % 8); // Reversed order of bits return (buf[byte_idx] & (1 << bit_idx)) ? 1 : 0; } inline void set_u4(uint8_t* buf, size_t idx, uint8_t val) { const size_t byte_idx = idx / 2; - const uint8_t bit_shift = 4 * (++idx % 2); + const uint8_t bit_shift = 4 * (idx % 2); buf[byte_idx] &= ~(0xF << bit_shift); // half byte zeroed buf[byte_idx] |= ((val & 0xF) << bit_shift); // set 1's } inline uint8_t get_u4(const uint8_t* buf, size_t idx) { const size_t byte_idx = idx / 2; - const uint8_t bit_shift = 4 * (++idx % 2); + const uint8_t bit_shift = 4 * (idx % 2); return (buf[byte_idx] >> bit_shift) & 0xF; } inline void set_i4(uint8_t* buf, size_t idx, int8_t val) { const size_t byte_idx = idx / 2; - const uint8_t bit_shift = 4 * (++idx % 2); + const uint8_t bit_shift = 4 * (idx % 2); buf[byte_idx] &= ~(0xF << bit_shift); // half byte zeroed buf[byte_idx] |= ((val & 0xF) << bit_shift); // set 1's } inline int8_t get_i4(const uint8_t* buf, size_t idx) { const size_t byte_idx = idx / 2; - const uint8_t bit_shift = 4 * (++idx % 2); + const uint8_t bit_shift = 4 * (idx % 2); uint8_t val = (buf[byte_idx] >> bit_shift) & 0xF; if (val & 0x08) { // negative number val |= 0xF0; diff --git a/src/core/tests/constant.cpp b/src/core/tests/constant.cpp index 45ad60d153627a..0feefb84bed8a5 100644 --- a/src/core/tests/constant.cpp +++ b/src/core/tests/constant.cpp @@ -266,8 +266,8 @@ TEST(constant, int4_string) { EXPECT_EQ(v[2], -1); const auto p = c.get_data_ptr(); - EXPECT_EQ(0x10, p[0]); - EXPECT_EQ(0xF0, p[1] & 0xF0); + EXPECT_EQ(0x01, p[0]); + EXPECT_EQ(0x0F, p[1] & 0x0F); EXPECT_EQ(input, c.get_value_strings()); @@ -318,8 +318,8 @@ TEST(constant, int4_vector_negative_number) { EXPECT_EQ(v[2], int8_t(-1)); const auto p = c.get_data_ptr(); - EXPECT_EQ(0xFE, p[0]); - EXPECT_EQ(0xF0, p[1] & 0xF0); + EXPECT_EQ(0xEF, p[0]); + EXPECT_EQ(0x0F, p[1] & 0x0F); } TEST(constant, int4_vector_positive_number) { @@ -332,8 +332,8 @@ TEST(constant, int4_vector_positive_number) { EXPECT_EQ(v[2], int8_t(5)); const auto p = c.get_data_ptr(); - EXPECT_EQ(0x12, p[0]); - EXPECT_EQ(0x50, p[1] & 0xF0); + EXPECT_EQ(0x21, p[0]); + EXPECT_EQ(0x05, p[1] & 0x0F); } TEST(constant, int4_vector_broadcast_negative_number) { @@ -795,8 +795,8 @@ TEST(constant, uint4_string) { EXPECT_EQ(v[3], 0); const auto p = c.get_data_ptr(); - EXPECT_EQ(p[0], 0x10); - EXPECT_EQ(p[1], 0x10); + EXPECT_EQ(p[0], 0x01); + EXPECT_EQ(p[1], 0x01); EXPECT_EQ(input, c.get_value_strings()); @@ -831,8 +831,8 @@ TEST(constant, uint4_vector) { EXPECT_EQ(v[3], 0); const auto p = c.get_data_ptr(); - EXPECT_EQ(p[0], 0x10); - EXPECT_EQ(p[1], 0x10); + EXPECT_EQ(p[0], 0x01); + EXPECT_EQ(p[1], 0x01); } TEST(constant, uint4_vector_broadcast) { diff --git a/src/core/tests/int4.cpp b/src/core/tests/int4.cpp index 2edb82dda0183c..d9a20fbf3649b2 100644 --- a/src/core/tests/int4.cpp +++ b/src/core/tests/int4.cpp @@ -15,9 +15,9 @@ TEST(int4, convert_i4_to_string) { vector values{171, 16}; auto constant = make_shared(element::i4, Shape{3}, &values[0]); - vector ref{"-6", "-5", "1"}; + vector ref{"-5", "-6", "0"}; for (size_t i = 0; i < 3; ++i) { - ASSERT_EQ(constant->convert_value_to_string(i), ref[i]); + EXPECT_EQ(constant->convert_value_to_string(i), ref[i]); } } diff --git a/src/core/tests/uint4.cpp b/src/core/tests/uint4.cpp index 5c3b0a5e06af20..8285fdb3cd5e1c 100644 --- a/src/core/tests/uint4.cpp +++ b/src/core/tests/uint4.cpp @@ -13,9 +13,9 @@ TEST(uint4, convert_u4_to_string) { vector values{171, 16}; auto constant = make_shared(element::u4, Shape{3}, &values[0]); - vector ref{"10", "11", "1"}; + vector ref{"11", "10", "0"}; for (size_t i = 0; i < 3; ++i) { - ASSERT_EQ(constant->convert_value_to_string(i), ref[i]); + EXPECT_EQ(constant->convert_value_to_string(i), ref[i]); } } diff --git a/src/frontends/pytorch/src/frontend.cpp b/src/frontends/pytorch/src/frontend.cpp index 0910aa3e057e72..14c1094b3d098f 100644 --- a/src/frontends/pytorch/src/frontend.cpp +++ b/src/frontends/pytorch/src/frontend.cpp @@ -41,6 +41,7 @@ #include "transforms/softmax_reshape_elimination.hpp" #include "transforms/string_equality_replacer.hpp" #include "transforms/tuple_unpack_replacer.hpp" +#include "transforms/u4_block_repack.hpp" #include "translate_session.hpp" namespace ov { @@ -200,6 +201,7 @@ void FrontEnd::normalize(const std::shared_ptr& model) const { manager.register_pass(); manager.register_pass(); manager.register_pass(); + manager.register_pass(); manager.register_pass(); manager.register_pass(); manager.register_pass(); diff --git a/src/frontends/pytorch/src/transforms/aten_stack_list_construct_replacer.cpp b/src/frontends/pytorch/src/transforms/aten_stack_list_construct_replacer.cpp index f8de5275b69ae8..67ea5f4f9e1ff9 100644 --- a/src/frontends/pytorch/src/transforms/aten_stack_list_construct_replacer.cpp +++ b/src/frontends/pytorch/src/transforms/aten_stack_list_construct_replacer.cpp @@ -12,6 +12,7 @@ #include "openvino/pass/pattern/matcher.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" #include "utils.hpp" +#include "utils_quantize.hpp" namespace ov { namespace frontend { @@ -38,22 +39,31 @@ AtenStackListConstructReplacer::AtenStackListConstructReplacer() { auto axis_node = pattern_map.at(axis).get_node_shared_ptr(); auto axis_const = std::dynamic_pointer_cast(axis_node); auto axis = axis_const->cast_vector(); + if (axis.size() != 1) { + add_exception_to_fw_node(stack, "aten::stack has multiple axes, only one is supported."); + return false; + } // Check if ListConstruct is an input if (auto list_construct_node = cast_fw_node(input_node, "prim::ListConstruct")) { const auto& list_inputs = list_construct_node->input_values(); - OutputVector node_vector; - auto zero = v0::Constant::create(element::i32, Shape{}, {0}); - // Iterate over values in ListConstruct - for (const auto& list_input : list_inputs) { - auto node = concat_list_construct(list_input); - auto unsqueezed_node = std::make_shared(node, axis_const); - node_vector.push_back(unsqueezed_node); + std::shared_ptr node; + if (auto compression = u4_compression_stack(list_inputs, axis[0])) { + node = compression; + } else { + OutputVector node_vector; + auto zero = v0::Constant::create(element::i32, Shape{}, {0}); + // Iterate over values in ListConstruct + for (const auto& list_input : list_inputs) { + auto node = concat_list_construct(list_input); + auto unsqueezed_node = std::make_shared(node, axis_const); + node_vector.push_back(unsqueezed_node); + } + // Concat vectors on provided axis + node = std::make_shared(node_vector, axis[0]); } - // Concat vectors on provided axis - auto concat = std::make_shared(node_vector, axis[0]); - copy_runtime_info_and_name(stack, {concat}, {input_node}); - replace_node(stack, concat); + copy_runtime_info_and_name(stack, {node}, {input_node}); + replace_node(stack, node); return true; } add_exception_to_fw_node(stack, "Unsupported case of aten::stack."); diff --git a/src/frontends/pytorch/src/transforms/u4_block_repack.cpp b/src/frontends/pytorch/src/transforms/u4_block_repack.cpp new file mode 100644 index 00000000000000..e08ebd728b050e --- /dev/null +++ b/src/frontends/pytorch/src/transforms/u4_block_repack.cpp @@ -0,0 +1,98 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "u4_block_repack.hpp" + +#include "openvino/core/rt_info.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/op/transpose.hpp" +#include "openvino/pass/pattern/matcher.hpp" +#include "openvino/pass/pattern/op/wrap_type.hpp" +#include "utils.hpp" +#include "utils_quantize.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace pass { + +using namespace ov::op; +using namespace ov::pass::pattern; + +U4BlockRepack::U4BlockRepack() { + const auto m_constant = ov::pass::pattern::wrap_type(); + const auto m_reshape1 = ov::pass::pattern::wrap_type({m_constant, any_input()}); + const auto m_transpose = ov::pass::pattern::wrap_type({m_reshape1, any_input()}); + const auto m_reshape2 = ov::pass::pattern::wrap_type({m_transpose, any_input()}); + + auto pack_byte = [](uint8_t lo, uint8_t hi) { + return (hi << 4) | (lo & 0x0F); + }; // swap halfs because Convert op assumes this layout + + auto get_u4 = [](const uint8_t* src, size_t idx) { + const size_t byte_idx = idx / 2; + const uint8_t bit_shift = 4 * (idx % 2); + return (src[byte_idx] >> bit_shift) & 0xF; + }; + + register_matcher( + std::make_shared(m_reshape2, "ov::frontend::pytorch::pass::U4BlockRepack"), + [=](ov::pass::pattern::Matcher& m) { + auto& pattern_to_output = m.get_pattern_value_map(); + auto constant = + std::dynamic_pointer_cast(pattern_to_output[m_constant].get_node_shared_ptr()); + if (!constant) + return false; + auto reshape1 = pattern_to_output[m_reshape1].get_node_shared_ptr(); + auto transpose = pattern_to_output[m_transpose].get_node_shared_ptr(); + auto reshape2 = pattern_to_output[m_reshape2].get_node_shared_ptr(); + + if (constant->get_element_type() != element::u4) + return false; + + // FIXME: Check reshape/transpose/reshape target shapes and axes permutation; now they are supposed to be + // always in expected form + + auto source_shape = reshape1->get_output_shape(0); + + if (source_shape.size() != 3) + return false; + + auto destination_shape = reshape2->get_output_shape(0); + + size_t n_blocks = source_shape[0]; + size_t block_height = source_shape[1]; + size_t lane_size = source_shape[2]; // size in u4 units + size_t block_size = block_height * lane_size / 2; // size in bytes + + auto src = constant->get_data_ptr(); + + auto new_const = std::make_shared(element::u4, destination_shape); + auto dst = const_cast( // const_cast? + reinterpret_cast(new_const->get_data_ptr())); // TODO: How to better accees u4 data? + + for (size_t iblock = 0; iblock < n_blocks; ++iblock) { + auto src_block = src + iblock * block_size; + auto dst_block = dst + iblock * block_size; + for (size_t i = 0; i < lane_size; ++i) { + for (size_t j = 0; j < block_height / 2; ++j) { // /2 because we handle two bytes at once + uint8_t lo = get_u4(src_block, 2 * j * lane_size + i); + uint8_t hi = get_u4(src_block, (2 * j + 1) * lane_size + i); + dst_block[i * block_height / 2 + j] = pack_byte(lo, hi); + } + } + } + + copy_runtime_info(NodeVector{constant, reshape1, transpose, reshape2}, new_const); + replace_node(reshape2, new_const); + + return true; + }); +}; + +} // namespace pass +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/transforms/u4_block_repack.hpp b/src/frontends/pytorch/src/transforms/u4_block_repack.hpp new file mode 100644 index 00000000000000..aa6e00f70e564c --- /dev/null +++ b/src/frontends/pytorch/src/transforms/u4_block_repack.hpp @@ -0,0 +1,24 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/pass/graph_rewrite.hpp" +#include "openvino/pass/pass.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace pass { + +class U4BlockRepack : public ov::pass::MatcherPass { +public: + OPENVINO_RTTI("ov::frontend::pytorch::pass::U4BlockRepack"); + U4BlockRepack(); +}; + +} // namespace pass +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/utils_quantize.cpp b/src/frontends/pytorch/src/utils_quantize.cpp index 5af546f3d5be5d..1346fd76971fcc 100644 --- a/src/frontends/pytorch/src/utils_quantize.cpp +++ b/src/frontends/pytorch/src/utils_quantize.cpp @@ -6,6 +6,7 @@ #include "openvino/frontend/pytorch/node_context.hpp" #include "openvino/op/broadcast.hpp" +#include "openvino/op/constant.hpp" #include "openvino/op/convert.hpp" #include "openvino/op/convert_like.hpp" #include "openvino/op/fake_quantize.hpp" @@ -13,6 +14,7 @@ #include "openvino/op/reshape.hpp" #include "openvino/op/scatter_elements_update.hpp" #include "openvino/op/subtract.hpp" +#include "transformations/utils/utils.hpp" namespace ov { namespace frontend { @@ -168,6 +170,53 @@ std::shared_ptr cast_quantized_fw_node(std::shared_ptr no return quant_node; } +std::shared_ptr u4_compression_stack(const OutputVector& list_elems, int64_t axis) { + // Part 1: Detect pattern + + if (list_elems.size() != 2) + return nullptr; + auto bitwise_and = cast_fw_node(list_elems[0].get_node_shared_ptr(), "aten::bitwise_and"); + if (!bitwise_and) + return nullptr; + auto bitwise_shift = cast_fw_node(list_elems[1].get_node_shared_ptr(), "aten::bitwise_right_shift"); + if (!bitwise_shift) + return nullptr; + + auto weights_u8 = std::dynamic_pointer_cast(bitwise_and->get_input_node_shared_ptr(0)); + if (weights_u8 != std::dynamic_pointer_cast(bitwise_shift->get_input_node_shared_ptr(0))) + return nullptr; + + if (weights_u8->get_output_element_type(0) != element::u8) + return nullptr; + + if (axis != -1 && static_cast(axis) != weights_u8->get_shape().size() - 1) + return nullptr; + + if (!ov::op::util::has_constant_value(bitwise_and->get_input_node_shared_ptr(1), 0x0F)) + return nullptr; + + if (!ov::op::util::has_constant_value(bitwise_shift->get_input_node_shared_ptr(1), 4)) + return nullptr; + + // Pattern detected, weights_u8 is target u8 packed constant with weights + + // Part 2: Form u4 constant by repacking of the original weights_u8 + // Repacking transformes half of lanes to interleaved representation. + + auto u8_shape = weights_u8->get_shape(); + size_t full_size = shape_size(u8_shape); + auto src = weights_u8->get_data_ptr(); + + auto u4_shape = u8_shape; + u4_shape.push_back(2); + auto new_const = std::make_shared(element::u4, u4_shape); + auto dst = const_cast(reinterpret_cast(new_const->get_data_ptr())); + + std::copy(src, src + full_size, dst); // TODO: Avoid copying, reuse the same constant + copy_runtime_info_and_name(weights_u8, {new_const}, {weights_u8, bitwise_and, bitwise_shift}); + return new_const; +} + } // namespace pytorch } // namespace frontend } // namespace ov diff --git a/src/frontends/pytorch/src/utils_quantize.hpp b/src/frontends/pytorch/src/utils_quantize.hpp index 69917e7b8bce3e..e02bce880d2480 100644 --- a/src/frontends/pytorch/src/utils_quantize.hpp +++ b/src/frontends/pytorch/src/utils_quantize.hpp @@ -166,6 +166,12 @@ OutputVector quantizable_op(const NodeContext& context) { } } // namespace op +/** + * Captures aten::stack([aten::bitwise_and(Constant(u8)), aten::bitwise_right_shift(Constant(u8))], dim=-1). + * This pattern is transformed to a single Constant with element_type=u4. + */ +std::shared_ptr u4_compression_stack(const OutputVector& list_elems, int64_t axis); + } // namespace pytorch } // namespace frontend } // namespace ov diff --git a/src/plugins/template/tests/functional/op_reference/convert.cpp b/src/plugins/template/tests/functional/op_reference/convert.cpp index 0f3e47148790be..b6195744c9c6f3 100644 --- a/src/plugins/template/tests/functional/op_reference/convert.cpp +++ b/src/plugins/template/tests/functional/op_reference/convert.cpp @@ -103,7 +103,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{2, 2}, ov::element::u4, ov::element::f32, - std::vector{0xFB, 0x0A}, + std::vector{0xBF, 0xA0}, std::vector{15.0f, 11.0f, 0.0f, 10.0f}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -134,7 +134,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{2, 2}, ov::element::i4, ov::element::f32, - std::vector{0xFE, 0xF2}, + std::vector{0xEF, 0x2F}, std::vector{-1.0f, -2.0f, -1.0f, 2.0f}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -245,7 +245,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::i4, std::vector{0xA0}, - std::vector{0x10, 0x10}, + std::vector{0x01, 0x01}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -261,7 +261,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u8, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -269,7 +269,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u16, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -277,7 +277,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u32, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -285,7 +285,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u64, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -301,7 +301,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i8, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -309,7 +309,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i16, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -317,7 +317,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i32, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -325,7 +325,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i64, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -333,7 +333,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::f16, ov::element::i4, std::vector{-1, -2, 0, 3}, - std::vector{0xFE, 0x03}, + std::vector{0xEF, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -341,7 +341,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::bf16, ov::element::i4, std::vector{-1, -2, 0, 3}, - std::vector{0xFE, 0x03}, + std::vector{0xEF, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -349,7 +349,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::f32, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), // destination i8 @@ -364,7 +364,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::i8, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -395,7 +395,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::i8, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -452,7 +452,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::i16, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -483,7 +483,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::i16, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -540,7 +540,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::i32, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -571,7 +571,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::i32, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -628,7 +628,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::i64, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -659,7 +659,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::i64, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -718,7 +718,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{8}, ov::element::u4, ov::element::u1, - std::vector{0x10, 0x01, 0x00, 0x00}, + std::vector{0x01, 0x10, 0x00, 0x00}, std::vector{0x90}, 8, 8), @@ -758,7 +758,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{8}, ov::element::i4, ov::element::u1, - std::vector{0x10, 0x01, 0x00, 0x00}, + std::vector{0x01, 0x10, 0x00, 0x00}, std::vector{0x90}, 8, 8), @@ -825,7 +825,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::u4, std::vector{0xA0}, - std::vector{0x10, 0x10}, + std::vector{0x01, 0x01}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -841,7 +841,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u8, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -849,7 +849,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u16, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -857,7 +857,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u32, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -865,7 +865,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u64, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -881,7 +881,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i8, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -889,7 +889,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i16, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -897,7 +897,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i32, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -905,7 +905,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i64, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -913,7 +913,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::f16, ov::element::u4, std::vector{-1, -2, 0, 3}, - std::vector{0xFE, 0x03}, + std::vector{0xEF, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -921,7 +921,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::bf16, ov::element::u4, std::vector{-1, -2, 0, 3}, - std::vector{0xFE, 0x03}, + std::vector{0xEF, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT, @@ -929,7 +929,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::f32, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), @@ -945,7 +945,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::u8, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -976,7 +976,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::u8, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -1034,7 +1034,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::u16, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -1065,7 +1065,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::u16, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -1123,7 +1123,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::u32, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -1154,7 +1154,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::u32, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -1211,7 +1211,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::u64, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, @@ -1242,7 +1242,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::u64, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT, diff --git a/src/plugins/template/tests/functional/op_reference/convert_like.cpp b/src/plugins/template/tests/functional/op_reference/convert_like.cpp index b46fe98af030c2..4ddf3dda276b92 100644 --- a/src/plugins/template/tests/functional/op_reference/convert_like.cpp +++ b/src/plugins/template/tests/functional/op_reference/convert_like.cpp @@ -6,6 +6,8 @@ #include +#include + #include "conversion.hpp" using namespace ov; @@ -101,7 +103,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{2, 2}, ov::element::u4, ov::element::f32, - std::vector{0xFB, 0x0A}, + std::vector{0xBF, 0xA0}, std::vector{15.0f, 11.0f, 0.0f, 10.0f}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -132,7 +134,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{2, 2}, ov::element::i4, ov::element::f32, - std::vector{0xFE, 0xF2}, + std::vector{0xEF, 0x2F}, std::vector{-1.0f, -2.0f, -1.0f, 2.0f}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -243,7 +245,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::i4, std::vector{0xA0}, - std::vector{0x10, 0x10}, + std::vector{0x01, 0x01}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -259,7 +261,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u8, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -267,7 +269,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u16, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -275,7 +277,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u32, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -283,7 +285,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u64, ov::element::i4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -299,7 +301,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i8, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -307,7 +309,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i16, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -315,7 +317,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i32, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -323,7 +325,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i64, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -331,7 +333,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::f16, ov::element::i4, std::vector{-1, -2, 0, 3}, - std::vector{0xFE, 0x03}, + std::vector{0xEF, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -339,7 +341,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::bf16, ov::element::i4, std::vector{-1, -2, 0, 3}, - std::vector{0xFE, 0x03}, + std::vector{0xEF, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -347,7 +349,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::f32, ov::element::i4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), // destination i8 @@ -362,7 +364,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::i8, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -393,7 +395,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::i8, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -436,7 +438,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::f32, ov::element::i8, - std::vector{-1, -2, 2, 3}, + std::vector{-1, -2, 2.2, 3.8}, std::vector{-1, -2, 2, 3}), // destination i16 ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -450,7 +452,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::i16, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -481,7 +483,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::i16, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -524,7 +526,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::f32, ov::element::i16, - std::vector{-1, -2, 2, 3}, + std::vector{-1, -2, 2.2, 3.8}, std::vector{-1, -2, 2, 3}), // destination i32 ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -538,7 +540,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::i32, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -569,7 +571,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::i32, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -612,7 +614,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::f32, ov::element::i32, - std::vector{-1, -2, 2, 3}, + std::vector{-1, -2, 2.2, 3.8}, std::vector{-1, -2, 2, 3}), // destination i64 ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -626,7 +628,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::i64, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -657,7 +659,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::i64, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -700,7 +702,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::f32, ov::element::i64, - std::vector{-1, -2, 2, 3}, + std::vector{-1, -2, 2.2, 3.8}, std::vector{-1, -2, 2, 3}), // destination u1 @@ -716,7 +718,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{8}, ov::element::u4, ov::element::u1, - std::vector{0x10, 0x01, 0x00, 0x00}, + std::vector{0x01, 0x10, 0x00, 0x00}, std::vector{0x90}, 8, 8), @@ -756,7 +758,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{8}, ov::element::i4, ov::element::u1, - std::vector{0x10, 0x01, 0x00, 0x00}, + std::vector{0x01, 0x10, 0x00, 0x00}, std::vector{0x90}, 8, 8), @@ -823,7 +825,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u1, ov::element::u4, std::vector{0xA0}, - std::vector{0x10, 0x10}, + std::vector{0x01, 0x01}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -839,7 +841,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u8, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -847,7 +849,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u16, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -855,7 +857,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u32, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -863,7 +865,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::u64, ov::element::u4, std::vector{1, 2, 0, 3}, - std::vector{0x12, 0x03}, + std::vector{0x21, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -879,7 +881,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i8, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -887,7 +889,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i16, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -895,7 +897,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i32, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -903,7 +905,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::i64, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -911,7 +913,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::f16, ov::element::u4, std::vector{-1, -2, 0, 3}, - std::vector{0xFE, 0x03}, + std::vector{0xEF, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -919,7 +921,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::bf16, ov::element::u4, std::vector{-1, -2, 0, 3}, - std::vector{0xFE, 0x03}, + std::vector{0xEF, 0x30}, 4, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -927,7 +929,7 @@ INSTANTIATE_TEST_SUITE_P( ov::element::f32, ov::element::u4, std::vector{-1, -2, 2, 3}, - std::vector{0xFE, 0x23}, + std::vector{0xEF, 0x32}, 4, 4), @@ -943,7 +945,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::u8, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -974,7 +976,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::u8, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -1017,7 +1019,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::f32, ov::element::u8, - std::vector{1, 2, 2, 3}, + std::vector{1, 2, 2.2, 3.8}, std::vector{1, 2, 2, 3}), // destination u16 @@ -1032,7 +1034,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::u16, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -1063,7 +1065,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::u16, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -1106,7 +1108,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::f32, ov::element::u16, - std::vector{1, 2, 2, 3}, + std::vector{1, 2, 2.2, 3.8}, std::vector{1, 2, 2, 3}), // destination u32 @@ -1121,7 +1123,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::u32, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -1152,7 +1154,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::u32, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -1195,7 +1197,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::f32, ov::element::u32, - std::vector{1, 2, 2, 3}, + std::vector{1, 2, 2.2, 3.8}, std::vector{1, 2, 2, 3}), // destination u64 ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -1209,7 +1211,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::u4, ov::element::u64, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -1240,7 +1242,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::i4, ov::element::u64, - std::vector{0x21, 0x43}, + std::vector{0x12, 0x34}, std::vector{2, 1, 4, 3}, 4), ConvertParams(ConversionTypes::CONVERT_LIKE, @@ -1283,7 +1285,7 @@ INSTANTIATE_TEST_SUITE_P( ov::PartialShape{4}, ov::element::f32, ov::element::u64, - std::vector{1, 2, 2, 3}, + std::vector{1, 2, 2.2, 3.8}, std::vector{1, 2, 2, 3})), ReferenceConversionLayerTest::getTestCaseName); } // namespace diff --git a/tools/mo/openvino/tools/mo/ops/Cast.py b/tools/mo/openvino/tools/mo/ops/Cast.py index 77beb07c74122e..24409912429f07 100644 --- a/tools/mo/openvino/tools/mo/ops/Cast.py +++ b/tools/mo/openvino/tools/mo/ops/Cast.py @@ -36,12 +36,14 @@ def backend_attrs(self): @staticmethod def type_infer(node: Node): - assert node.has_valid('dst_type'), 'Destination type of "Cast" operation should be extracted earlier' + assert node.has_valid( + 'dst_type'), 'Destination type of "Cast" operation should be extracted earlier' node.out_port(0).set_data_type(node.dst_type) @staticmethod def helper_value_propagation(node_name, value, dst_type): - new_blob, finite_match_count, zero_match_count = convert_blob(value, dst_type) + new_blob, finite_match_count, zero_match_count = convert_blob( + value, dst_type) if finite_match_count: log.error("{} elements of {} were clipped to infinity while converting an input blob for node '{}' to {}." @@ -63,6 +65,10 @@ def custom_type_casting_and_packing(node: Node, value, dst_type): we would pad them to 6 element with the last element as zero and we would pack them into 3 uint8 values """ assert dst_type in [packed_U4, packed_I4] + # TODO: Remove this comment when it's clear that we can fix it easily + # raise Exception("Packing of u4/i4 data is no longer supported in mo because it is now incompatible with the new " + # "order of the halfs of a byte that was introduced in OpenVINO runtime recently. Use ovc " + # "command line tool or openvino.convert_model python function instead.") minimum_regular_dtype = np.uint8 if dst_type == packed_U4 else np.int8 # initial casing from the source type to the numpy-friendly type which could absorb all the values of dst_type @@ -83,10 +89,12 @@ def custom_type_casting_and_packing(node: Node, value, dst_type): padded = np.concatenate((flattened, np.zeros([pad], dtype=minimum_regular_dtype))) assert np.prod(padded.shape) % num_values_fitting_into_uint8 == 0 - bit_order_little = (padded[:, None] & (1 << np.arange(num_bits)) > 0).astype(np.uint8) - bit_order_big = np.flip(bit_order_little, axis=1) - bit_order_big_flattened = bit_order_big.flatten() - packed = np.packbits(bit_order_big_flattened) + bit_order_little = (padded[:, None] & ( + 1 << np.arange(num_bits)) > 0).astype(np.uint8) + bit_order_big_flattened = bit_order_little.flatten() + # u1 still has reversed bit order: + packed = np.packbits(bit_order_big_flattened, + bitorder='little' if num_bits > 1 else 'big') node.out_node(0)['force_shape'] = data_shape.copy() node.out_node(0)['force_type'] = np_data_type_to_precision(dst_type) diff --git a/tools/mo/unit_tests/mo/ops/cast_test.py b/tools/mo/unit_tests/mo/ops/cast_test.py index 985a7276514235..73a468e9fb80fa 100644 --- a/tools/mo/unit_tests/mo/ops/cast_test.py +++ b/tools/mo/unit_tests/mo/ops/cast_test.py @@ -21,81 +21,20 @@ class TestCastTest(): """ Example of checking: - 7 == 0111, padded to 0111 0000, results in 112 - 7 == 0111, 8 == 1000 packed to 0111 1000, results in 120 + 7 == 0111, padded to 00000111, results in 7 + 7 == 0111, 8 == 1000 packed to 10000111, results in 7+16 - -8 == 1000, padded to 1000 0000, results in 128 + -8 == 1000, padded to 00001000, results in 8 """ - @pytest.mark.parametrize("value, expected, custom_dtype",[ - ([0], [0], packed_U4), - ([1], [16], packed_U4), - ([2], [32], packed_U4), - ([3], [48], packed_U4), - ([4], [64], packed_U4), - ([5], [80], packed_U4), - ([6], [96], packed_U4), - ([7], [112], packed_U4), - ([8], [128], packed_U4), - ([9], [144], packed_U4), - ([10], [160], packed_U4), - ([11], [176], packed_U4), - ([12], [192], packed_U4), - ([13], [208], packed_U4), - ([14], [224], packed_U4), - ([15], [240], packed_U4), - - ([0, 15], [15], packed_U4), - ([1, 14], [30], packed_U4), - ([2, 13], [45], packed_U4), - ([3, 12], [60], packed_U4), - ([4, 11], [75], packed_U4), - ([5, 10], [90], packed_U4), - ([6, 9], [105], packed_U4), - ([7, 8], [120], packed_U4), - ([8, 7], [135], packed_U4), - ([9, 6], [150], packed_U4), - ([10, 5], [165], packed_U4), - ([11, 4], [180], packed_U4), - ([12, 3], [195], packed_U4), - ([13, 2], [210], packed_U4), - ([14, 1], [225], packed_U4), - ([15, 0], [240], packed_U4), - - ([-8], [128], packed_I4), - ([-7], [144], packed_I4), - ([-6], [160], packed_I4), - ([-5], [176], packed_I4), - ([-4], [192], packed_I4), - ([-3], [208], packed_I4), - ([-2], [224], packed_I4), - ([-1], [240], packed_I4), - ([0], [0], packed_I4), - ([1], [16], packed_I4), - ([2], [32], packed_I4), - ([3], [48], packed_I4), - ([4], [64], packed_I4), - ([5], [80], packed_I4), - ([6], [96], packed_I4), - ([7], [112], packed_I4), - - ([-8, 7], [135], packed_I4), - ([-7, 6], [150], packed_I4), - ([-6, 5], [165], packed_I4), - ([-5, 4], [180], packed_I4), - ([-4, 3], [195], packed_I4), - ([-3, 2], [210], packed_I4), - ([-2, 1], [225], packed_I4), - ([-1, 0], [240], packed_I4), - ([0, -1], [15], packed_I4), - ([1, -2], [30], packed_I4), - ([2, -3], [45], packed_I4), - ([3, -4], [60], packed_I4), - ([4, -5], [75], packed_I4), - ([5, -6], [90], packed_I4), - ([6, -7], [105], packed_I4), - ([7, -8], [120], packed_I4), - ]) + @pytest.mark.parametrize("value, expected, custom_dtype", + [([i], [i], packed_U4) for i in range(16)] + + [([i, 15-i], [i + (15-i)*16], packed_U4) for i in range(16)] + + [([-i], [16-i], packed_I4) for i in range(1, 8+1)] + + [([i], [i], packed_I4) for i in range(8)] + + [([-i-1, i], [16-i-1 + 16*i], packed_I4) for i in range(8)] + + [([i, -i-1], [i + 16*(16-i-1)], packed_I4) for i in range(8)] + ) def test_custom_value_propagation(self, value, expected, custom_dtype): graph = build_graph(nodes(value, custom_dtype), [ *connect('value', 'convert'), *connect('convert', 'output'), From dc9ec1ee9d6127b49aec9d157ff12981b252e422 Mon Sep 17 00:00:00 2001 From: Katarzyna Mitrus Date: Thu, 19 Oct 2023 02:53:38 +0200 Subject: [PATCH 14/39] Fix MO multinomial shape array (#20548) --- tools/mo/openvino/tools/mo/ops/multinomial.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/mo/openvino/tools/mo/ops/multinomial.py b/tools/mo/openvino/tools/mo/ops/multinomial.py index 42f4b0d3eedbb9..233a9d4565f4c1 100644 --- a/tools/mo/openvino/tools/mo/ops/multinomial.py +++ b/tools/mo/openvino/tools/mo/ops/multinomial.py @@ -3,7 +3,7 @@ import numpy as np -from openvino.tools.mo.front.common.partial_infer.utils import dynamic_dimension +from openvino.tools.mo.front.common.partial_infer.utils import dynamic_dimension_value, shape_array from openvino.tools.mo.front.extractor import bool_to_str from openvino.tools.mo.graph.graph import Graph, Node @@ -63,7 +63,7 @@ def infer(node: Node): num_samples = node.in_port(1).data.get_value() if num_samples is not None: - output_shape.append(num_samples) + output_shape.append(np.array(num_samples).item()) else: - output_shape.append(dynamic_dimension) - node.out_port(0).data.set_shape(output_shape) + output_shape.append(dynamic_dimension_value) + node.out_port(0).data.set_shape(shape_array(output_shape)) From 5fb6785f9ed8932e8de58600cd35421a2818d487 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Thu, 19 Oct 2023 11:18:42 +0400 Subject: [PATCH 15/39] Enabled FrontendLibCloseTest FE test suite (#20592) --- src/frontends/onnx/tests/skip_tests_config.cpp | 6 ++++-- src/frontends/paddle/tests/skip_tests_config.cpp | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/frontends/onnx/tests/skip_tests_config.cpp b/src/frontends/onnx/tests/skip_tests_config.cpp index 234cb99dfe9257..99d6bc297a1110 100644 --- a/src/frontends/onnx/tests/skip_tests_config.cpp +++ b/src/frontends/onnx/tests/skip_tests_config.cpp @@ -9,9 +9,11 @@ std::vector disabledTestPatterns() { return { -#ifndef BUILD_SHARED_LIBS +#ifdef OPENVINO_STATIC_LIBRARY // Disable tests for static libraries - ".*FrontendLibCloseTest.*" + ".*FrontendLibCloseTest.*", #endif + // CVS-123201 + ".*testUnloadLibBeforeDeletingDependentObject.*", }; } diff --git a/src/frontends/paddle/tests/skip_tests_config.cpp b/src/frontends/paddle/tests/skip_tests_config.cpp index 234cb99dfe9257..144e9d001ae276 100644 --- a/src/frontends/paddle/tests/skip_tests_config.cpp +++ b/src/frontends/paddle/tests/skip_tests_config.cpp @@ -9,7 +9,7 @@ std::vector disabledTestPatterns() { return { -#ifndef BUILD_SHARED_LIBS +#ifdef OPENVINO_STATIC_LIBRARY // Disable tests for static libraries ".*FrontendLibCloseTest.*" #endif From 137c8e896f1ab36c88378242c022e911890c6c31 Mon Sep 17 00:00:00 2001 From: Irina Efode Date: Thu, 19 Oct 2023 11:22:44 +0400 Subject: [PATCH 16/39] [OP CONFORMANCE] Fix op version operations in conformance report (#20502) * [OP CONFORMANCE] Fix op version operations in conformance report * Code style + SD test --- .../subgraphs_dumper/tests/cache/op_cache.cpp | 8 +- .../functional_test_utils/summary/op_info.hpp | 110 +++++++++++++----- .../summary/op_summary.hpp | 2 +- .../src/summary/op_info.cpp | 49 +++----- .../src/summary/op_summary.cpp | 13 ++- 5 files changed, 109 insertions(+), 73 deletions(-) diff --git a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/op_cache.cpp b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/op_cache.cpp index 67a57298da3d95..0ec25023f3801c 100644 --- a/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/op_cache.cpp +++ b/src/tests/functional/plugin/conformance/subgraphs_dumper/tests/cache/op_cache.cpp @@ -132,7 +132,7 @@ TEST_F(OpCacheUnitTest, update_cache_by_model) { ASSERT_EQ(meta.get_model_info().begin()->second.model_priority, 3); // check input_info ASSERT_EQ(meta.get_input_info().size(), 1); - ASSERT_EQ(meta.get_input_info().begin()->first, "Convert-1_0"); + ASSERT_EQ(meta.get_input_info().begin()->first, "Convert-0_0"); ASSERT_EQ(meta.get_input_info().begin()->second.ranges.max, DEFAULT_MAX_VALUE); ASSERT_EQ(meta.get_input_info().begin()->second.ranges.min, DEFAULT_MIN_VALUE); ASSERT_EQ(meta.get_input_info().begin()->second.is_const, false); @@ -149,7 +149,7 @@ TEST_F(OpCacheUnitTest, update_cache_by_model) { ASSERT_EQ(meta.get_model_info().begin()->second.model_priority, 1); // check input_info ASSERT_EQ(meta.get_input_info().size(), 1); - ASSERT_EQ(meta.get_input_info().begin()->first, "ShapeOf-1_0"); + ASSERT_EQ(meta.get_input_info().begin()->first, "ShapeOf-0_0"); ASSERT_EQ(meta.get_input_info().begin()->second.ranges.max, DEFAULT_MAX_VALUE); ASSERT_EQ(meta.get_input_info().begin()->second.ranges.min, DEFAULT_MIN_VALUE); ASSERT_EQ(meta.get_input_info().begin()->second.is_const, false); @@ -162,7 +162,7 @@ TEST_F(OpCacheUnitTest, serialize_op) { ASSERT_TRUE(this->serialize_op({convert_node, test_meta})); ASSERT_TRUE(ov::util::directory_exists(test_artifacts_dir)); auto serialized_model_path = ov::util::path_join({test_artifacts_dir, - "operation", "static", "Convert-1", "f16", "Convert-1_0.xml"}); + "operation", "static", "Convert-0", "f16", "Convert-0_0.xml"}); ASSERT_TRUE(ov::util::file_exists(serialized_model_path)); auto core = ov::Core(); auto serialized_model = core.read_model(serialized_model_path); @@ -171,7 +171,7 @@ TEST_F(OpCacheUnitTest, serialize_op) { } TEST_F(OpCacheUnitTest, get_rel_serilization_dir) { - auto ref_path = ov::util::path_join({"operation", "static", "Convert-1", "f16"}); + auto ref_path = ov::util::path_join({"operation", "static", "Convert-0", "f16"}); auto original_path = this->get_rel_serilization_dir(convert_node); ASSERT_EQ(ref_path, original_path); } diff --git a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_info.hpp b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_info.hpp index df4377d5cf9ad4..ef76694caf9691 100644 --- a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_info.hpp +++ b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_info.hpp @@ -4,45 +4,95 @@ #pragma once +#include "openvino/op/ops.hpp" #include "openvino/openvino.hpp" namespace ov { namespace test { namespace functional { +// {{ type_info, real_version }} +const std::map not_aligned_op_version = { + // opset 1 + {ov::op::v0::Abs::get_type_info_static(), 0}, + {ov::op::v0::Acos::get_type_info_static(), 0}, + {ov::op::v0::Asin::get_type_info_static(), 0}, + {ov::op::v0::Atan::get_type_info_static(), 0}, + {ov::op::v0::BatchNormInference::get_type_info_static(), 0}, + {ov::op::v0::CTCGreedyDecoder::get_type_info_static(), 0}, + {ov::op::v0::Ceiling::get_type_info_static(), 0}, + {ov::op::v0::Clamp::get_type_info_static(), 0}, + {ov::op::v0::Concat::get_type_info_static(), 0}, + {ov::op::v0::Constant::get_type_info_static(), 0}, + {ov::op::v0::Convert::get_type_info_static(), 0}, + {ov::op::v0::Cos::get_type_info_static(), 0}, + {ov::op::v0::Cosh::get_type_info_static(), 0}, + {ov::op::v0::DepthToSpace::get_type_info_static(), 0}, + {ov::op::v0::DetectionOutput::get_type_info_static(), 0}, + {ov::op::v0::Elu::get_type_info_static(), 0}, + {ov::op::v0::Erf::get_type_info_static(), 0}, + {ov::op::v0::Exp::get_type_info_static(), 0}, + {ov::op::v0::FakeQuantize::get_type_info_static(), 0}, + {ov::op::v0::Floor::get_type_info_static(), 0}, + {ov::op::v0::GRN::get_type_info_static(), 0}, + {ov::op::v0::HardSigmoid::get_type_info_static(), 0}, + {ov::op::v0::Interpolate::get_type_info_static(), 0}, + {ov::op::v0::Log::get_type_info_static(), 0}, + {ov::op::v0::LRN::get_type_info_static(), 0}, + {ov::op::v0::LSTMCell::get_type_info_static(), 0}, + {ov::op::v0::LSTMSequence::get_type_info_static(), 0}, + {ov::op::v0::MatMul::get_type_info_static(), 0}, + {ov::op::v0::Negative::get_type_info_static(), 0}, + {ov::op::v0::NormalizeL2::get_type_info_static(), 0}, + {ov::op::v0::PRelu::get_type_info_static(), 0}, + {ov::op::v0::PSROIPooling::get_type_info_static(), 0}, + {ov::op::v0::Parameter::get_type_info_static(), 0}, + {ov::op::v0::PriorBox::get_type_info_static(), 0}, + {ov::op::v0::PriorBoxClustered::get_type_info_static(), 0}, + {ov::op::v0::Proposal::get_type_info_static(), 0}, + {ov::op::v0::Range::get_type_info_static(), 0}, + {ov::op::v0::Relu::get_type_info_static(), 0}, + {ov::op::v0::RegionYolo::get_type_info_static(), 0}, + {ov::op::v0::Result::get_type_info_static(), 0}, + {ov::op::v0::ReverseSequence::get_type_info_static(), 0}, + {ov::op::v0::RNNCell::get_type_info_static(), 0}, + {ov::op::v0::Selu::get_type_info_static(), 0}, + {ov::op::v0::ShapeOf::get_type_info_static(), 0}, + {ov::op::v0::ShuffleChannels::get_type_info_static(), 0}, + {ov::op::v0::Sign::get_type_info_static(), 0}, + {ov::op::v0::Sigmoid::get_type_info_static(), 0}, + {ov::op::v0::Sin::get_type_info_static(), 0}, + {ov::op::v0::Sinh::get_type_info_static(), 0}, + {ov::op::v0::Sqrt::get_type_info_static(), 0}, + {ov::op::v0::SpaceToDepth::get_type_info_static(), 0}, + {ov::op::v0::SquaredDifference::get_type_info_static(), 0}, + {ov::op::v0::Squeeze::get_type_info_static(), 0}, + {ov::op::v0::Tan::get_type_info_static(), 0}, + {ov::op::v0::Tanh::get_type_info_static(), 0}, + {ov::op::v0::TensorIterator::get_type_info_static(), 0}, + {ov::op::v0::Tile::get_type_info_static(), 0}, + {ov::op::v0::Unsqueeze::get_type_info_static(), 0}, + {ov::op::v0::Xor::get_type_info_static(), 0}, + // opset 2 + {ov::op::v0::MVN::get_type_info_static(), 0}, + {ov::op::v0::ReorgYolo::get_type_info_static(), 0}, + {ov::op::v0::ROIPooling::get_type_info_static(), 0}, + {ov::op::v0::Gelu::get_type_info_static(), 0}, + {ov::op::v1::BatchToSpace::get_type_info_static(), 1}, + {ov::op::v1::SpaceToBatch::get_type_info_static(), 1}, + // opset 3 + {ov::op::v0::RNNCell::get_type_info_static(), 0}, + {ov::op::v0::ShuffleChannels::get_type_info_static(), 0}, + // opset 4 + {ov::op::v3::Acosh::get_type_info_static(), 3}, + {ov::op::v3::Asinh::get_type_info_static(), 3}, + {ov::op::v3::Atanh::get_type_info_static(), 3}, +}; + // todo: reuse in summary std::string get_node_version(const std::shared_ptr& node, const std::string& postfix = ""); +std::string get_node_version(const ov::NodeTypeInfo& node_type_info); } // namespace functional } // namespace test } // namespace ov - -// todo: remove these structure after remove old subgraphs dumper -namespace LayerTestsUtils { - -struct ModelInfo { - size_t unique_op_cnt; - // model_path, op_cnt - std::map model_paths; - - ModelInfo(size_t _op_cnt = 0, const std::map& _model_paths = {{}}); -}; - -struct PortInfo { - double min; - double max; - bool convert_to_const; - - PortInfo(double min, double max, bool convert_to_const); - PortInfo(); -}; - -struct OPInfo { - std::map found_in_models; - std::map ports_info; - - OPInfo(const std::string& source_model, const std::string& model_path, size_t total_op_cnt = 0); - - OPInfo() = default; -}; -} // namespace LayerTestsUtils diff --git a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_summary.hpp b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_summary.hpp index cc97fb45cd6949..44cf995f7184c8 100644 --- a/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_summary.hpp +++ b/src/tests/test_utils/functional_test_utils/include/functional_test_utils/summary/op_summary.hpp @@ -32,7 +32,7 @@ class OpSummary : public virtual Summary { std::map opsStats = {}; unsigned short int downgrade_coefficient; - std::string getOpVersion(const std::string& version); + std::string get_opset_number(const std::string& opset_full_name); protected: OpSummary(unsigned short int downgrade_coefficient = 1); diff --git a/src/tests/test_utils/functional_test_utils/src/summary/op_info.cpp b/src/tests/test_utils/functional_test_utils/src/summary/op_info.cpp index 0082cba61a343f..358be2d424b177 100644 --- a/src/tests/test_utils/functional_test_utils/src/summary/op_info.cpp +++ b/src/tests/test_utils/functional_test_utils/src/summary/op_info.cpp @@ -8,45 +8,30 @@ namespace ov { namespace test { namespace functional { -// todo: reuse in summary std::string get_node_version(const std::shared_ptr& node, const std::string& postfix) { - std::string op_name = node->get_type_info().name; - std::string opset_version = node->get_type_info().get_version(); - std::string opset_name = "opset"; - auto pos = opset_version.find(opset_name); - if (pos != std::string::npos) { - op_name += "-" + opset_version.substr(pos + opset_name.size()); - } + const auto& node_type_info = node->get_type_info(); + auto op_name = get_node_version(node_type_info); if (!postfix.empty()) { op_name += "_" + postfix; } return op_name; } +std::string get_node_version(const ov::NodeTypeInfo& node_type_info) { + std::string op_name = node_type_info.name + std::string("-"); + std::string opset_version = node_type_info.get_version(); + if (not_aligned_op_version.count(node_type_info)) { + op_name += std::to_string(not_aligned_op_version.at(node_type_info)); + } else { + std::string opset_name = "opset"; + auto pos = opset_version.find(opset_name); + if (pos != std::string::npos) { + op_name += opset_version.substr(pos + opset_name.size()); + } + } + return op_name; +} + } // namespace functional } // namespace test } // namespace ov - -namespace LayerTestsUtils { - -ModelInfo::ModelInfo(size_t _op_cnt, const std::map& _model_paths) - : unique_op_cnt(_op_cnt), - model_paths(_model_paths) {} - -PortInfo::PortInfo(double min, double max, bool convert_to_const) - : min(min), - max(max), - convert_to_const(convert_to_const) {} - -PortInfo::PortInfo() { - min = std::numeric_limits::min(); - max = std::numeric_limits::max(); - convert_to_const = false; -} - -OPInfo::OPInfo(const std::string& source_model, const std::string& model_path, size_t total_op_cnt) { - found_in_models = {{source_model, ModelInfo(1, {{model_path, total_op_cnt}})}}; - ports_info = {}; -} - -} // namespace LayerTestsUtils diff --git a/src/tests/test_utils/functional_test_utils/src/summary/op_summary.cpp b/src/tests/test_utils/functional_test_utils/src/summary/op_summary.cpp index b895d14ab26ca6..fbaaf4e16629c3 100644 --- a/src/tests/test_utils/functional_test_utils/src/summary/op_summary.cpp +++ b/src/tests/test_utils/functional_test_utils/src/summary/op_summary.cpp @@ -8,6 +8,7 @@ #include #include "common_test_utils/file_utils.hpp" +#include "functional_test_utils/summary/op_info.hpp" using namespace ov::test::utils; @@ -109,13 +110,13 @@ void OpSummary::updateOPsImplStatus(const ov::NodeTypeInfo& op, const bool implS } } -std::string OpSummary::getOpVersion(const std::string& version) { +std::string OpSummary::get_opset_number(const std::string& opset_full_name) { std::string opset_name = "opset"; - auto pos = version.find(opset_name); + auto pos = opset_full_name.find(opset_name); if (pos == std::string::npos) { return "undefined"; } else { - return version.substr(pos + opset_name.size()); + return opset_full_name.substr(pos + opset_name.size()); } } @@ -259,7 +260,7 @@ void OpSummary::saveReport() { const auto& type_info_set = opset.get_type_info_set(); for (const auto& type_info : type_info_set) { auto it = opsInfo.find(type_info); - std::string op_version = getOpVersion(opset_version); + std::string op_version = get_opset_number(opset_version); if (it == opsInfo.end()) { opsInfo.insert({type_info, op_version}); } else { @@ -304,7 +305,7 @@ void OpSummary::saveReport() { pugi::xml_node opsNode = root.append_child("ops_list"); for (const auto& op : opsInfo) { - std::string name = std::string(op.first.name) + "-" + getOpVersion(op.first.version_id); + std::string name = functional::get_node_version(op.first); opsNode.append_child(name.c_str()).append_attribute("opsets").set_value(op.second.c_str()); } @@ -315,7 +316,7 @@ void OpSummary::saveReport() { it.second.rel_passed /= downgrade_coefficient; it.second.rel_all /= downgrade_coefficient; - std::string name = std::string(it.first.name) + "-" + getOpVersion(it.first.version_id); + std::string name = functional::get_node_version(it.first); opList.insert(name); pugi::xml_node entry = currentDeviceNode.append_child(name.c_str()); entry.append_attribute("implemented").set_value(it.second.isImplemented); From ad9a146f948fba44d2c86594535e354c6b8ea8ae Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Thu, 19 Oct 2023 11:44:38 +0400 Subject: [PATCH 17/39] Enabled LC_RPATH for brew (#20587) --- .../packaging/common-libraries.cmake | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/cmake/developer_package/packaging/common-libraries.cmake b/cmake/developer_package/packaging/common-libraries.cmake index 4fbce5b4a58ca7..9671d827521d20 100644 --- a/cmake/developer_package/packaging/common-libraries.cmake +++ b/cmake/developer_package/packaging/common-libraries.cmake @@ -4,14 +4,6 @@ include(GNUInstallDirs) -if(CPACK_GENERATOR STREQUAL "BREW") - # brew relies on RPATHs - # set(CMAKE_SKIP_INSTALL_RPATH OFF) -else() - # we don't need RPATHs, because libraries are searched by standard paths - set(CMAKE_SKIP_INSTALL_RPATH ON) -endif() - # # ov_common_libraries_cpack_set_dirs() # @@ -115,3 +107,12 @@ macro(ov_define_component_include_rules) endmacro() ov_define_component_include_rules() + +if(CPACK_GENERATOR STREQUAL "BREW") + # brew relies on RPATHs + set(CMAKE_SKIP_INSTALL_RPATH OFF) + set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/${OV_CPACK_LIBRARYDIR}") +else() + # we don't need RPATHs, because libraries are searched by standard paths + set(CMAKE_SKIP_INSTALL_RPATH ON) +endif() From 6bec4fc6dfae3929bfddc3bea8ff2a38c7f3f820 Mon Sep 17 00:00:00 2001 From: Fang Xu Date: Thu, 19 Oct 2023 13:27:27 +0530 Subject: [PATCH 18/39] build onetbb with hwloc2.9.3 (#20481) --- cmake/dependencies.cmake | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake index 5c86bdea57620c..257263f663bec6 100644 --- a/cmake/dependencies.cmake +++ b/cmake/dependencies.cmake @@ -104,10 +104,10 @@ function(ov_download_tbb) elseif(LINUX AND X86_64 AND OV_GLIBC_VERSION VERSION_GREATER_EQUAL 2.17) # build oneTBB 2021.2.1 with gcc 4.8 (glibc 2.17) RESOLVE_DEPENDENCY(TBB - ARCHIVE_LIN "oneapi-tbb-2021.2.3-lin.tgz" + ARCHIVE_LIN "oneapi-tbb-2021.2.3-lin-20231012.tgz" TARGET_PATH "${TEMP}/tbb" ENVIRONMENT "TBBROOT" - SHA256 "f3f2edd8e7875b02220f11ab5b201411d5af6822e525e8da5444b4a666514e8b" + SHA256 "6f39d18783b37fdcc15ca137fbf70bc78206848af1a510cada806279fae49718" USE_NEW_LOCATION TRUE) elseif(YOCTO_AARCH64) RESOLVE_DEPENDENCY(TBB @@ -135,10 +135,10 @@ function(ov_download_tbb) elseif(LINUX AND AARCH64 AND OV_GLIBC_VERSION VERSION_GREATER_EQUAL 2.17) # build oneTBB 2021.2.1 with gcc 4.8 (glibc 2.17) RESOLVE_DEPENDENCY(TBB - ARCHIVE_LIN "oneapi-tbb-2021.2.1-lin-arm64-canary.tgz" + ARCHIVE_LIN "oneapi-tbb-2021.2.1-lin-arm64-20231012.tgz" TARGET_PATH "${TEMP}/tbb" ENVIRONMENT "TBBROOT" - SHA256 "042fdac53be65841a970b05d892f4b20b556b06fd3b20d2d0068e49c4fd74f07" + SHA256 "cbb239cbda7ea2937cec7008c12fe628dd44488e1eafd9630f8814f9eb2c13e2" USE_NEW_LOCATION TRUE) elseif(APPLE AND AARCH64) # build oneTBB 2021.2.1 with export MACOSX_DEPLOYMENT_TARGET=11.0 @@ -204,10 +204,10 @@ function(ov_download_tbbbind_2_5) USE_NEW_LOCATION TRUE) elseif(LINUX AND X86_64) RESOLVE_DEPENDENCY(TBBBIND_2_5 - ARCHIVE_LIN "tbbbind_2_5_static_lin_v3.tgz" + ARCHIVE_LIN "tbbbind_2_5_static_lin_v4.tgz" TARGET_PATH "${TEMP}/tbbbind_2_5" ENVIRONMENT "TBBBIND_2_5_ROOT" - SHA256 "d39deb262c06981b5e2d2e3c593e9fc9be62ce4feb91dd4e648e92753659a6b3" + SHA256 "4ebf30246530795f066fb9616e6707c6b17be7a65d29d3518b578a769dd54eea" USE_NEW_LOCATION TRUE) else() # TMP: for Apple Silicon TBB does not provide TBBBind From 1aebf6df5fa43c92a93361391f8f57d3fdb419b4 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Thu, 19 Oct 2023 14:45:19 +0400 Subject: [PATCH 19/39] Add prefixes to proto files to avoid ODR conflicts (#20588) --- src/frontends/paddle/src/decoder_proto.cpp | 6 ++-- src/frontends/paddle/src/decoder_proto.hpp | 4 +-- src/frontends/paddle/src/frontend.cpp | 2 +- src/frontends/paddle/src/input_model.cpp | 10 +++--- src/frontends/paddle/src/place.cpp | 14 ++++---- src/frontends/paddle/src/place.hpp | 20 +++++------ .../paddle/src/proto/framework.proto | 2 +- .../tensorflow/src/checkpoint_v1_reader.cpp | 4 +-- .../tensorflow/src/checkpoint_v1_reader.hpp | 4 +-- .../tensorflow/src/decoder_argdef.hpp | 22 ++++++------ .../tensorflow/src/decoder_proto.cpp | 28 +++++++-------- .../tensorflow/src/decoder_proto.hpp | 22 ++++++------ .../tensorflow/src/graph_iterator_meta.cpp | 26 +++++++------- .../tensorflow/src/graph_iterator_meta.hpp | 14 ++++---- .../tensorflow/src/graph_iterator_proto.hpp | 18 +++++----- .../src/graph_iterator_proto_txt.hpp | 2 +- .../src/graph_iterator_saved_model.cpp | 26 +++++++------- .../src/graph_iterator_saved_model.hpp | 14 ++++---- .../tensorflow/src/op/var_handle.cpp | 4 +-- .../tensorflow/src/op/xla_conv_v2.cpp | 2 +- src/frontends/tensorflow/src/op/xla_dot.cpp | 2 +- .../src/proto/allocation_description.proto | 2 +- .../tensorflow/src/proto/api_def.proto | 2 +- .../tensorflow/src/proto/attr_value.proto | 2 +- .../tensorflow/src/proto/cost_graph.proto | 2 +- .../src/proto/dataset_options.proto | 2 +- .../src/proto/device_attributes.proto | 2 +- .../tensorflow/src/proto/function.proto | 2 +- .../src/proto/{ => google/protobuf}/any.proto | 0 .../{ => google/protobuf}/wrappers.proto | 0 .../tensorflow/src/proto/graph.proto | 2 +- .../src/proto/graph_transfer_info.proto | 2 +- .../tensorflow/src/proto/kernel_def.proto | 2 +- .../tensorflow/src/proto/log_memory.proto | 2 +- .../tensorflow/src/proto/meta_graph.proto | 4 +-- .../tensorflow/src/proto/model.proto | 2 +- .../tensorflow/src/proto/node_def.proto | 2 +- .../tensorflow/src/proto/op_def.proto | 2 +- .../tensorflow/src/proto/reader_base.proto | 2 +- .../remote_fused_graph_execute_info.proto | 2 +- .../src/proto/resource_handle.proto | 2 +- .../tensorflow/src/proto/saved_model.proto | 2 +- .../src/proto/saved_object_graph.proto | 4 +-- .../src/proto/saved_tensor_slice.proto | 2 +- .../tensorflow/src/proto/saver.proto | 2 +- .../tensorflow/src/proto/step_stats.proto | 2 +- .../tensorflow/src/proto/struct.proto | 18 +++++----- .../tensorflow/src/proto/summary.proto | 2 +- .../tensorflow/src/proto/tensor.proto | 2 +- .../tensorflow/src/proto/tensor_bundle.proto | 2 +- .../src/proto/tensor_description.proto | 2 +- .../tensorflow/src/proto/tensor_shape.proto | 2 +- .../tensorflow/src/proto/tensor_slice.proto | 2 +- .../src/proto/trackable_object_graph.proto | 4 +-- .../tensorflow/src/proto/types.proto | 2 +- .../tensorflow/src/proto/variable.proto | 2 +- .../tensorflow/src/proto/versions.proto | 2 +- .../tensorflow/src/proto/xla_data.proto | 2 +- src/frontends/tensorflow/src/tf_utils.cpp | 34 +++++++++---------- src/frontends/tensorflow/src/tf_utils.hpp | 10 +++--- .../tensorflow/src/variables_index.cpp | 18 +++++----- .../tensorflow/src/variables_index.hpp | 2 +- src/frontends/tensorflow/tests/tf_utils.cpp | 2 ++ src/frontends/tensorflow/tests/tf_utils.hpp | 2 +- 64 files changed, 204 insertions(+), 202 deletions(-) rename src/frontends/tensorflow/src/proto/{ => google/protobuf}/any.proto (100%) rename src/frontends/tensorflow/src/proto/{ => google/protobuf}/wrappers.proto (100%) diff --git a/src/frontends/paddle/src/decoder_proto.cpp b/src/frontends/paddle/src/decoder_proto.cpp index f286bfcf1f81fc..e25437fcbf4a2e 100644 --- a/src/frontends/paddle/src/decoder_proto.cpp +++ b/src/frontends/paddle/src/decoder_proto.cpp @@ -19,9 +19,9 @@ namespace ov { namespace frontend { namespace paddle { -using namespace ::paddle::framework; +using namespace ::ov_paddle::framework; -ov::element::Type get_ov_type(const ::paddle::framework::proto::VarType_Type& type) { +ov::element::Type get_ov_type(const ::ov_paddle::framework::proto::VarType_Type& type) { static const std::map type_map{ {proto::VarType_Type::VarType_Type_BOOL, ov::element::boolean}, {proto::VarType_Type::VarType_Type_INT16, ov::element::i16}, @@ -189,7 +189,7 @@ std::vector DecoderProto::decode_attribute_helper(const std: namespace { inline std::map map_for_each_input_impl( - const google::protobuf::RepeatedPtrField<::paddle::framework::proto::OpDesc_Var>& c, + const google::protobuf::RepeatedPtrField<::ov_paddle::framework::proto::OpDesc_Var>& c, const std::function(const std::string&, size_t)>& func) { size_t idx = 0; std::map res; diff --git a/src/frontends/paddle/src/decoder_proto.hpp b/src/frontends/paddle/src/decoder_proto.hpp index 11627c6fba6ab9..652b03fd3ea76b 100644 --- a/src/frontends/paddle/src/decoder_proto.hpp +++ b/src/frontends/paddle/src/decoder_proto.hpp @@ -23,7 +23,7 @@ namespace ov { namespace frontend { namespace paddle { -ov::element::Type get_ov_type(const ::paddle::framework::proto::VarType_Type& type); +ov::element::Type get_ov_type(const ::ov_paddle::framework::proto::VarType_Type& type); class DecoderProto : public paddle::DecoderBase { public: @@ -56,7 +56,7 @@ class DecoderProto : public paddle::DecoderBase { const std::function(const std::string&, size_t)>& func) const; private: - std::vector<::paddle::framework::proto::OpDesc_Attr> decode_attribute_helper(const std::string& name) const; + std::vector<::ov_paddle::framework::proto::OpDesc_Attr> decode_attribute_helper(const std::string& name) const; std::weak_ptr op_place; const std::shared_ptr get_place() const { diff --git a/src/frontends/paddle/src/frontend.cpp b/src/frontends/paddle/src/frontend.cpp index 9582fccf6c447f..2bc0ba333bb241 100644 --- a/src/frontends/paddle/src/frontend.cpp +++ b/src/frontends/paddle/src/frontend.cpp @@ -393,7 +393,7 @@ bool FrontEnd::supported_impl(const std::vector& variants) const { else if (variants[0].is()) { // Validating first stream, it must contain a model auto p_model_stream = variants[0].as(); - ::paddle::framework::proto::ProgramDesc fw; + ::ov_paddle::framework::proto::ProgramDesc fw; return fw.ParseFromIstream(p_model_stream); } return false; diff --git a/src/frontends/paddle/src/input_model.cpp b/src/frontends/paddle/src/input_model.cpp index 287fa5e54ad743..1264d983965e5d 100644 --- a/src/frontends/paddle/src/input_model.cpp +++ b/src/frontends/paddle/src/input_model.cpp @@ -21,7 +21,7 @@ namespace ov { namespace frontend { namespace paddle { -using namespace ::paddle::framework::proto; +using namespace ::ov_paddle::framework::proto; class InputModel::InputModelImpl { public: @@ -279,7 +279,7 @@ void InputModel::InputModelImpl::load_consts(const std::basic_string& folder_ if (!var_desc.persistable()) continue; - FRONT_END_GENERAL_CHECK(var_desc.type().type() == ::paddle::framework::proto::VarType::LOD_TENSOR); + FRONT_END_GENERAL_CHECK(var_desc.type().type() == ::ov_paddle::framework::proto::VarType::LOD_TENSOR); const auto& tensor = var_desc.type().lod_tensor().tensor(); Shape shape(tensor.dims().cbegin(), tensor.dims().cend()); const auto& type = get_ov_type(tensor.data_type()); @@ -324,7 +324,7 @@ void InputModel::InputModelImpl::load_consts(std::istream* weight_stream) { if (!var_desc.persistable()) continue; - FRONT_END_GENERAL_CHECK(var_desc.type().type() == ::paddle::framework::proto::VarType::LOD_TENSOR); + FRONT_END_GENERAL_CHECK(var_desc.type().type() == ::ov_paddle::framework::proto::VarType::LOD_TENSOR); FRONT_END_GENERAL_CHECK(weight_stream != nullptr && weight_stream->peek() != EOF, "PaddlePaddle *.pdiparams format weight file doesn't exist!"); /* @@ -350,8 +350,8 @@ void InputModel::InputModelImpl::load_consts(std::istream* weight_stream) { std::unique_ptr buf(new char[size]); weight_stream->read(reinterpret_cast(buf.get()), size); - std::unique_ptr<::paddle::framework::proto::VarType_TensorDesc> tensor_desc( - new ::paddle::framework::proto::VarType_TensorDesc()); + std::unique_ptr<::ov_paddle::framework::proto::VarType_TensorDesc> tensor_desc( + new ::ov_paddle::framework::proto::VarType_TensorDesc()); tensor_desc->ParseFromArray(buf.get(), size); Shape shape(tensor_desc->dims().cbegin(), tensor_desc->dims().cend()); const auto& type = get_ov_type(tensor_desc->data_type()); diff --git a/src/frontends/paddle/src/place.cpp b/src/frontends/paddle/src/place.cpp index 7af2bc07bbf5c5..ab5232018a6a9f 100644 --- a/src/frontends/paddle/src/place.cpp +++ b/src/frontends/paddle/src/place.cpp @@ -29,12 +29,12 @@ bool Place::is_output() const { } OpPlace::OpPlace(const ov::frontend::InputModel& input_model, - const ::paddle::framework::proto::OpDesc& op_desc, + const ::ov_paddle::framework::proto::OpDesc& op_desc, const std::vector& names) : Place(input_model, names), m_op_desc(op_desc) {} -OpPlace::OpPlace(const ov::frontend::InputModel& input_model, const ::paddle::framework::proto::OpDesc& op_desc) +OpPlace::OpPlace(const ov::frontend::InputModel& input_model, const ::ov_paddle::framework::proto::OpDesc& op_desc) : OpPlace(input_model, op_desc, {}) {} const std::map>>& OpPlace::get_output_ports() const { @@ -58,7 +58,7 @@ std::shared_ptr OpPlace::get_input_port_paddle(const std::string& i return m_input_ports.at(inputName)[inputPortIndex]; } -const ::paddle::framework::proto::OpDesc& OpPlace::get_desc() const { +const ::ov_paddle::framework::proto::OpDesc& OpPlace::get_desc() const { return m_op_desc; } @@ -207,11 +207,11 @@ Place::Ptr OpPlace::get_target_tensor(int outputPortIndex) const { TensorPlace::TensorPlace(const ov::frontend::InputModel& input_model, const std::vector& names, - const ::paddle::framework::proto::VarDesc& var_desc) + const ::ov_paddle::framework::proto::VarDesc& var_desc) : Place(input_model, names), m_var_desc(var_desc) { const auto& var_type = var_desc.type(); - if (var_type.type() == ::paddle::framework::proto::VarType::LOD_TENSOR) { + if (var_type.type() == ::ov_paddle::framework::proto::VarType::LOD_TENSOR) { const auto& tensor_desc = var_type.lod_tensor().tensor(); m_type = get_ov_type(tensor_desc.data_type()); m_pshape = PartialShape(std::vector(tensor_desc.dims().begin(), tensor_desc.dims().end())); @@ -219,7 +219,7 @@ TensorPlace::TensorPlace(const ov::frontend::InputModel& input_model, } TensorPlace::TensorPlace(const ov::frontend::InputModel& input_model, - const ::paddle::framework::proto::VarDesc& var_desc) + const ::ov_paddle::framework::proto::VarDesc& var_desc) : TensorPlace(input_model, {var_desc.name()}, var_desc) {} std::vector TensorPlace::get_consuming_ports() const { @@ -250,7 +250,7 @@ void TensorPlace::add_consuming_port(const std::shared_ptr& in_port m_consuming_ports.push_back(in_port); } -const ::paddle::framework::proto::VarDesc& TensorPlace::get_desc() const { +const ::ov_paddle::framework::proto::VarDesc& TensorPlace::get_desc() const { return m_var_desc; } diff --git a/src/frontends/paddle/src/place.hpp b/src/frontends/paddle/src/place.hpp index fc2fe9eb29efe0..e09112dd42f295 100644 --- a/src/frontends/paddle/src/place.hpp +++ b/src/frontends/paddle/src/place.hpp @@ -7,7 +7,7 @@ #include "input_model.hpp" #include "openvino/frontend/manager.hpp" -namespace paddle { +namespace ov_paddle { namespace framework { namespace proto { class OpDesc; @@ -15,7 +15,7 @@ class VarDesc; } // namespace proto } // namespace framework -} // namespace paddle +} // namespace ov_paddle namespace ov { namespace frontend { @@ -101,10 +101,10 @@ class OutPortPlace : public Place { class OpPlace : public Place { public: OpPlace(const ov::frontend::InputModel& input_model, - const ::paddle::framework::proto::OpDesc& op_desc, + const ::ov_paddle::framework::proto::OpDesc& op_desc, const std::vector& names); - OpPlace(const ov::frontend::InputModel& input_model, const ::paddle::framework::proto::OpDesc& op_desc); + OpPlace(const ov::frontend::InputModel& input_model, const ::ov_paddle::framework::proto::OpDesc& op_desc); void add_in_port(const std::shared_ptr& input, const std::string& name); void add_out_port(const std::shared_ptr& output, const std::string& name); @@ -114,7 +114,7 @@ class OpPlace : public Place { const std::map>>& get_input_ports() const; std::shared_ptr get_output_port_paddle(const std::string& outputName, int outputPortIndex) const; std::shared_ptr get_input_port_paddle(const std::string& inputName, int inputPortIndex) const; - const ::paddle::framework::proto::OpDesc& get_desc() const; + const ::ov_paddle::framework::proto::OpDesc& get_desc() const; const std::shared_ptr get_decoder() const; void set_decoder(const std::shared_ptr op_decoder); @@ -152,7 +152,7 @@ class OpPlace : public Place { Ptr get_target_tensor(const std::string& outputName, int outputPortIndex) const override; private: - const ::paddle::framework::proto::OpDesc& m_op_desc; // TODO: to conceal it behind decoder. + const ::ov_paddle::framework::proto::OpDesc& m_op_desc; // TODO: to conceal it behind decoder. std::shared_ptr m_op_decoder; std::map>> m_input_ports; std::map>> m_output_ports; @@ -162,9 +162,9 @@ class TensorPlace : public Place { public: TensorPlace(const ov::frontend::InputModel& input_model, const std::vector& names, - const ::paddle::framework::proto::VarDesc& var_desc); + const ::ov_paddle::framework::proto::VarDesc& var_desc); - TensorPlace(const ov::frontend::InputModel& input_model, const ::paddle::framework::proto::VarDesc& var_desc); + TensorPlace(const ov::frontend::InputModel& input_model, const ::ov_paddle::framework::proto::VarDesc& var_desc); void add_producing_port(const std::shared_ptr& out_port); void add_consuming_port(const std::shared_ptr& in_port); @@ -182,7 +182,7 @@ class TensorPlace : public Place { void set_element_type(const element::Type& type) { m_type = type; } - const ::paddle::framework::proto::VarDesc& get_desc() const; + const ::ov_paddle::framework::proto::VarDesc& get_desc() const; // External usage Ptr get_producing_operation() const override; @@ -192,7 +192,7 @@ class TensorPlace : public Place { bool is_equal_data(const Ptr& another) const override; private: - const ::paddle::framework::proto::VarDesc& m_var_desc; + const ::ov_paddle::framework::proto::VarDesc& m_var_desc; PartialShape m_pshape; element::Type m_type; diff --git a/src/frontends/paddle/src/proto/framework.proto b/src/frontends/paddle/src/proto/framework.proto index 22112cba29667d..4fc9c26c47e9ca 100644 --- a/src/frontends/paddle/src/proto/framework.proto +++ b/src/frontends/paddle/src/proto/framework.proto @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ syntax = "proto2"; -package paddle.framework.proto; +package ov_paddle.framework.proto; option optimize_for = LITE_RUNTIME; // Added by Intel Corporation 2021-2022 diff --git a/src/frontends/tensorflow/src/checkpoint_v1_reader.cpp b/src/frontends/tensorflow/src/checkpoint_v1_reader.cpp index 33833100ad6c6f..c74173af792d76 100644 --- a/src/frontends/tensorflow/src/checkpoint_v1_reader.cpp +++ b/src/frontends/tensorflow/src/checkpoint_v1_reader.cpp @@ -68,7 +68,7 @@ void CheckpointV1Reader::initialize() { // parse empty index block // This is only present at the first item of each checkpoint file and serves // as a table of contents, listing all the tensor slices saved in this file. - ::tensorflow::SavedTensorSlices sts; + ::ov_tensorflow::SavedTensorSlices sts; FRONT_END_GENERAL_CHECK(sts.ParseFromArray(value.data(), static_cast(value.size())), "[TensorFlow Frontend] incorrect input checkpoint file or internal error: cannot parse " "SavedTensorSlices entry"); @@ -254,7 +254,7 @@ void CheckpointV1Reader::read_variable(const std::string& variable_name, ov::Any // This is only present at the first item of each checkpoint file and serves // as a table of contents, listing all the tensor slices saved in this file. - ::tensorflow::SavedTensorSlices sts; + ::ov_tensorflow::SavedTensorSlices sts; FRONT_END_GENERAL_CHECK(sts.ParseFromArray(raw_data.data(), static_cast(raw_data.size())), "[TensorFlow Frontend] incorrect input checkpoint file or internal error: cannot parse " "SavedTensorSlices entry"); diff --git a/src/frontends/tensorflow/src/checkpoint_v1_reader.hpp b/src/frontends/tensorflow/src/checkpoint_v1_reader.hpp index bfae3b139a2aae..f088ed145f8ff3 100644 --- a/src/frontends/tensorflow/src/checkpoint_v1_reader.hpp +++ b/src/frontends/tensorflow/src/checkpoint_v1_reader.hpp @@ -21,8 +21,8 @@ namespace frontend { namespace tensorflow { // stores information about shape, type, and shard id for Variable struct VariableInfo { - ::tensorflow::TensorShapeProto variable_shape; - ::tensorflow::DataType variable_type; + ::ov_tensorflow::TensorShapeProto variable_shape; + ::ov_tensorflow::DataType variable_type; int32_t shard_id; size_t offset; size_t size; diff --git a/src/frontends/tensorflow/src/decoder_argdef.hpp b/src/frontends/tensorflow/src/decoder_argdef.hpp index dfee9b21e1481c..69f05423f528d6 100644 --- a/src/frontends/tensorflow/src/decoder_argdef.hpp +++ b/src/frontends/tensorflow/src/decoder_argdef.hpp @@ -9,11 +9,11 @@ #include "openvino/frontend/tensorflow/decoder.hpp" -namespace tensorflow { +namespace ov_tensorflow { class GraphDef; class FunctionDef; class OpDef_ArgDef; -} // namespace tensorflow +} // namespace ov_tensorflow namespace ov { namespace frontend { @@ -21,18 +21,18 @@ namespace tensorflow { class DecoderArgDef : public ov::frontend::tensorflow::DecoderBase { public: - explicit DecoderArgDef(const ::tensorflow::OpDef_ArgDef* arg_def, - const std::shared_ptr<::tensorflow::GraphDef>& graph_def, - const std::shared_ptr<::tensorflow::FunctionDef>& func_def, + explicit DecoderArgDef(const ::ov_tensorflow::OpDef_ArgDef* arg_def, + const std::shared_ptr<::ov_tensorflow::GraphDef>& graph_def, + const std::shared_ptr<::ov_tensorflow::FunctionDef>& func_def, const std::string& op_type) : m_arg_def(arg_def), m_graph_def(graph_def), m_func_def(func_def), m_op_type(op_type) {} - explicit DecoderArgDef(const ::tensorflow::OpDef_ArgDef* arg_def, - const std::shared_ptr<::tensorflow::GraphDef>& graph_def, - const std::shared_ptr<::tensorflow::FunctionDef>& func_def, + explicit DecoderArgDef(const ::ov_tensorflow::OpDef_ArgDef* arg_def, + const std::shared_ptr<::ov_tensorflow::GraphDef>& graph_def, + const std::shared_ptr<::ov_tensorflow::FunctionDef>& func_def, const std::string& op_type, const std::string& producer_name) : m_arg_def(arg_def), @@ -55,13 +55,13 @@ class DecoderArgDef : public ov::frontend::tensorflow::DecoderBase { const std::string& get_op_name() const override; private: - const ::tensorflow::OpDef_ArgDef* m_arg_def; + const ::ov_tensorflow::OpDef_ArgDef* m_arg_def; // For existence of OpDef_ArgDef object corresponding to the main graph node, // GraphDef object must live in the memory - const std::shared_ptr<::tensorflow::GraphDef> m_graph_def; + const std::shared_ptr<::ov_tensorflow::GraphDef> m_graph_def; // For existence of OpDef_ArgDef object corresponding to the body graph node, // both GraphDef and FunctionDef objects must be alive in the memory - const std::shared_ptr<::tensorflow::FunctionDef> m_func_def; + const std::shared_ptr<::ov_tensorflow::FunctionDef> m_func_def; const std::string m_op_type; const std::string m_producer_name; }; diff --git a/src/frontends/tensorflow/src/decoder_proto.cpp b/src/frontends/tensorflow/src/decoder_proto.cpp index 2488973c1029e1..26003336584d1e 100644 --- a/src/frontends/tensorflow/src/decoder_proto.cpp +++ b/src/frontends/tensorflow/src/decoder_proto.cpp @@ -38,7 +38,7 @@ void extract_tensor_content(const std::string& tensor_content, ov::Tensor* value # pragma warning(disable : 4267) // possible loss of data #endif template -void extract_compressed_tensor_content(const ::tensorflow::TensorProto& tensor_proto, +void extract_compressed_tensor_content(const ::ov_tensorflow::TensorProto& tensor_proto, int64_t val_size, ov::Tensor* values) { auto val_lastsaved = static_cast(0); @@ -90,15 +90,15 @@ ov::Any DecoderProto::get_attribute(const std::string& name) const { } switch (attrs[0].value_case()) { - case ::tensorflow::AttrValue::ValueCase::kB: + case ::ov_tensorflow::AttrValue::ValueCase::kB: return attrs[0].b(); - case ::tensorflow::AttrValue::ValueCase::kF: + case ::ov_tensorflow::AttrValue::ValueCase::kF: return attrs[0].f(); - case ::tensorflow::AttrValue::ValueCase::kS: + case ::ov_tensorflow::AttrValue::ValueCase::kS: return attrs[0].s(); - case ::tensorflow::AttrValue::ValueCase::kI: + case ::ov_tensorflow::AttrValue::ValueCase::kI: return attrs[0].i(); - case ::tensorflow::AttrValue::ValueCase::kShape: { + case ::ov_tensorflow::AttrValue::ValueCase::kShape: { const auto& tf_shape = attrs[0].shape(); if (tf_shape.unknown_rank()) { return ov::PartialShape::dynamic(); @@ -111,16 +111,16 @@ ov::Any DecoderProto::get_attribute(const std::string& name) const { return ov::PartialShape(dims); } - case ::tensorflow::AttrValue::ValueCase::kType: { + case ::ov_tensorflow::AttrValue::ValueCase::kType: { auto atype = attrs[0].type(); - if (atype != ::tensorflow::DT_STRING) { + if (atype != ::ov_tensorflow::DT_STRING) { return get_ov_type(attrs[0].type()); } else { return ov::Any("DT_STRING"); } } - case ::tensorflow::AttrValue::ValueCase::kList: { + case ::ov_tensorflow::AttrValue::ValueCase::kList: { const auto& list = attrs[0].list(); if (list.i_size()) return std::vector(list.i().begin(), list.i().end()); @@ -156,7 +156,7 @@ ov::Any DecoderProto::get_attribute(const std::string& name) const { if (list.type_size()) { std::vector res; for (int idx = 0; idx < list.type_size(); ++idx) { - if (list.type(idx) != ::tensorflow::DataType::DT_STRING) { + if (list.type(idx) != ::ov_tensorflow::DataType::DT_STRING) { res.emplace_back(get_ov_type(list.type(idx))); } else { res.emplace_back(ov::element::dynamic); @@ -176,15 +176,15 @@ ov::Any DecoderProto::get_attribute(const std::string& name) const { return EmptyList(); } - case ::tensorflow::AttrValue::ValueCase::kTensor: { + case ::ov_tensorflow::AttrValue::ValueCase::kTensor: { return unpack_tensor_proto(attrs[0].tensor()); } - case ::tensorflow::AttrValue::ValueCase::kPlaceholder: + case ::ov_tensorflow::AttrValue::ValueCase::kPlaceholder: FRONT_END_GENERAL_CHECK(false, "Conversion from Tensorflow to OpenVINO data type failed: Placeholder type for '", name, "' attribute is not supported."); - case ::tensorflow::AttrValue::ValueCase::kFunc: + case ::ov_tensorflow::AttrValue::ValueCase::kFunc: // attrs[0].func() returns NameAttrList object from which // we retrieve the function name // Further, InputModel object is created for FunctionDef with this name @@ -251,7 +251,7 @@ const std::string& DecoderProto::get_op_name() const { return m_node_def->name(); } -std::vector<::tensorflow::AttrValue> DecoderProto::decode_attribute_helper(const std::string& name) const { +std::vector<::ov_tensorflow::AttrValue> DecoderProto::decode_attribute_helper(const std::string& name) const { auto attr_map = m_node_def->attr(); if (attr_map.contains(name)) { auto value = m_node_def->attr().at(name); diff --git a/src/frontends/tensorflow/src/decoder_proto.hpp b/src/frontends/tensorflow/src/decoder_proto.hpp index 338bfdeccea79d..eab5e10c41c892 100644 --- a/src/frontends/tensorflow/src/decoder_proto.hpp +++ b/src/frontends/tensorflow/src/decoder_proto.hpp @@ -11,12 +11,12 @@ #include "openvino/frontend/tensorflow/decoder.hpp" #include "types.pb.h" -namespace tensorflow { +namespace ov_tensorflow { class GraphDef; class FunctionDef; class NodeDef; class AttrValue; -} // namespace tensorflow +} // namespace ov_tensorflow namespace ov { namespace frontend { @@ -29,15 +29,15 @@ void parse_producer_name(const std::string& producer_port_name, class DecoderProto : public ov::frontend::tensorflow::DecoderBase { public: - explicit DecoderProto(const ::tensorflow::NodeDef* node_def, - const std::shared_ptr<::tensorflow::GraphDef>& graph_def) + explicit DecoderProto(const ::ov_tensorflow::NodeDef* node_def, + const std::shared_ptr<::ov_tensorflow::GraphDef>& graph_def) : m_node_def(node_def), m_graph_def(graph_def), m_func_def(nullptr) {} - explicit DecoderProto(const ::tensorflow::NodeDef* node_def, - const std::shared_ptr<::tensorflow::GraphDef>& graph_def, - const std::shared_ptr<::tensorflow::FunctionDef>& func_def) + explicit DecoderProto(const ::ov_tensorflow::NodeDef* node_def, + const std::shared_ptr<::ov_tensorflow::GraphDef>& graph_def, + const std::shared_ptr<::ov_tensorflow::FunctionDef>& func_def) : m_node_def(node_def), m_graph_def(graph_def), m_func_def(func_def) {} @@ -56,14 +56,14 @@ class DecoderProto : public ov::frontend::tensorflow::DecoderBase { const std::string& get_op_name() const override; private: - std::vector<::tensorflow::AttrValue> decode_attribute_helper(const std::string& name) const; - const ::tensorflow::NodeDef* m_node_def; + std::vector<::ov_tensorflow::AttrValue> decode_attribute_helper(const std::string& name) const; + const ::ov_tensorflow::NodeDef* m_node_def; // For existence of NodeDef object corresponding to the main graph node, // GraphDef object must live in the memory - const std::shared_ptr<::tensorflow::GraphDef> m_graph_def; + const std::shared_ptr<::ov_tensorflow::GraphDef> m_graph_def; // For existence of NodeDef object corresponding to the body graph node, // both GraphDef and FunctionDef objects must be alive in the memory - const std::shared_ptr<::tensorflow::FunctionDef> m_func_def; + const std::shared_ptr<::ov_tensorflow::FunctionDef> m_func_def; }; } // namespace tensorflow } // namespace frontend diff --git a/src/frontends/tensorflow/src/graph_iterator_meta.cpp b/src/frontends/tensorflow/src/graph_iterator_meta.cpp index 8bc41fbaefdd04..81bd821aadec0e 100644 --- a/src/frontends/tensorflow/src/graph_iterator_meta.cpp +++ b/src/frontends/tensorflow/src/graph_iterator_meta.cpp @@ -17,19 +17,19 @@ namespace ov { namespace frontend { namespace tensorflow { -bool GraphIteratorMeta::is_valid_signature(const ::tensorflow::SignatureDef& signature) const { - const std::map<::tensorflow::DataType, ov::element::Type> types{ - {::tensorflow::DataType::DT_BOOL, ov::element::boolean}, - {::tensorflow::DataType::DT_INT16, ov::element::i16}, - {::tensorflow::DataType::DT_INT32, ov::element::i32}, - {::tensorflow::DataType::DT_INT64, ov::element::i64}, - {::tensorflow::DataType::DT_HALF, ov::element::f16}, - {::tensorflow::DataType::DT_FLOAT, ov::element::f32}, - {::tensorflow::DataType::DT_DOUBLE, ov::element::f64}, - {::tensorflow::DataType::DT_UINT8, ov::element::u8}, - {::tensorflow::DataType::DT_INT8, ov::element::i8}, - {::tensorflow::DataType::DT_BFLOAT16, ov::element::bf16}, - {::tensorflow::DataType::DT_STRING, ov::element::dynamic}}; +bool GraphIteratorMeta::is_valid_signature(const ::ov_tensorflow::SignatureDef& signature) const { + const std::map<::ov_tensorflow::DataType, ov::element::Type> types{ + {::ov_tensorflow::DataType::DT_BOOL, ov::element::boolean}, + {::ov_tensorflow::DataType::DT_INT16, ov::element::i16}, + {::ov_tensorflow::DataType::DT_INT32, ov::element::i32}, + {::ov_tensorflow::DataType::DT_INT64, ov::element::i64}, + {::ov_tensorflow::DataType::DT_HALF, ov::element::f16}, + {::ov_tensorflow::DataType::DT_FLOAT, ov::element::f32}, + {::ov_tensorflow::DataType::DT_DOUBLE, ov::element::f64}, + {::ov_tensorflow::DataType::DT_UINT8, ov::element::u8}, + {::ov_tensorflow::DataType::DT_INT8, ov::element::i8}, + {::ov_tensorflow::DataType::DT_BFLOAT16, ov::element::bf16}, + {::ov_tensorflow::DataType::DT_STRING, ov::element::dynamic}}; for (const auto& it : signature.inputs()) { if (it.second.name().empty() || types.find(it.second.dtype()) == types.end()) diff --git a/src/frontends/tensorflow/src/graph_iterator_meta.hpp b/src/frontends/tensorflow/src/graph_iterator_meta.hpp index 1e2789227260fb..6c14df8ba8fd6b 100644 --- a/src/frontends/tensorflow/src/graph_iterator_meta.hpp +++ b/src/frontends/tensorflow/src/graph_iterator_meta.hpp @@ -27,7 +27,7 @@ std::basic_string get_variables_index_name(const std::wstring // Loads graph from Tensorflow MetaGraph file (*.meta) class GraphIteratorMeta : public GraphIteratorProto { - std::shared_ptr<::tensorflow::MetaGraphDef> m_metagraph_def; + std::shared_ptr<::ov_tensorflow::MetaGraphDef> m_metagraph_def; std::shared_ptr m_variables_index; std::shared_ptr> m_inputs_map; std::shared_ptr> m_outputs_map; @@ -36,7 +36,7 @@ class GraphIteratorMeta : public GraphIteratorProto { public: template GraphIteratorMeta(const std::basic_string& path, const bool mmap_enabled) - : m_metagraph_def(std::make_shared<::tensorflow::MetaGraphDef>()), + : m_metagraph_def(std::make_shared<::ov_tensorflow::MetaGraphDef>()), m_mmap_enabled(mmap_enabled) { this->read_meta(path); } @@ -45,7 +45,7 @@ class GraphIteratorMeta : public GraphIteratorProto { static bool is_supported(const std::basic_string& path) { try { std::ifstream mg_stream(path.c_str(), std::ios::in | std::ifstream::binary); - auto metagraph_def = std::make_shared<::tensorflow::MetaGraphDef>(); + auto metagraph_def = std::make_shared<::ov_tensorflow::MetaGraphDef>(); return mg_stream && mg_stream.is_open() && metagraph_def->ParsePartialFromIstream(&mg_stream) && metagraph_def->has_graph_def() && metagraph_def->graph_def().node_size() > 0; } catch (...) { @@ -66,7 +66,7 @@ class GraphIteratorMeta : public GraphIteratorProto { } private: - bool is_valid_signature(const ::tensorflow::SignatureDef& signature) const; + bool is_valid_signature(const ::ov_tensorflow::SignatureDef& signature) const; template bool read_meta(const std::basic_string& path) { @@ -87,10 +87,10 @@ class GraphIteratorMeta : public GraphIteratorProto { bool res = m_metagraph_def->ParseFromIstream(&mg_stream); FRONT_END_GENERAL_CHECK(res && m_metagraph_def->has_graph_def(), "MetaGraph cannot be parsed"); - std::map validSignatures = {}; + std::map validSignatures = {}; for (const auto& sit : m_metagraph_def->signature_def()) { const std::string& key = sit.first; - const ::tensorflow::SignatureDef& val = sit.second; + const ::ov_tensorflow::SignatureDef& val = sit.second; if (is_valid_signature(val)) { validSignatures[key] = &val; } @@ -114,7 +114,7 @@ class GraphIteratorMeta : public GraphIteratorProto { } } - m_graph_def = std::make_shared<::tensorflow::GraphDef>(m_metagraph_def->graph_def()); + m_graph_def = std::make_shared<::ov_tensorflow::GraphDef>(m_metagraph_def->graph_def()); // Update variables map using information by resolving AssignVariableOp graph nodes std::map var_map; diff --git a/src/frontends/tensorflow/src/graph_iterator_proto.hpp b/src/frontends/tensorflow/src/graph_iterator_proto.hpp index 8b073b08373305..d01e1fec6b7a0c 100644 --- a/src/frontends/tensorflow/src/graph_iterator_proto.hpp +++ b/src/frontends/tensorflow/src/graph_iterator_proto.hpp @@ -21,8 +21,8 @@ namespace tensorflow { class GraphIteratorProto : public GraphIterator { protected: - std::shared_ptr<::tensorflow::GraphDef> m_graph_def; - std::shared_ptr<::tensorflow::FunctionDef> m_func_def; + std::shared_ptr<::ov_tensorflow::GraphDef> m_graph_def; + std::shared_ptr<::ov_tensorflow::FunctionDef> m_func_def; std::shared_ptr m_checkpoint_v1_reader; size_t node_index = 0; @@ -32,7 +32,7 @@ class GraphIteratorProto : public GraphIterator { std::vector m_output_names; GraphIteratorProto() - : m_graph_def(std::make_shared<::tensorflow::GraphDef>()), + : m_graph_def(std::make_shared<::ov_tensorflow::GraphDef>()), m_func_def(nullptr), m_checkpoint_v1_reader(nullptr), m_library_map() {} @@ -62,8 +62,8 @@ class GraphIteratorProto : public GraphIterator { } public: - GraphIteratorProto(const std::shared_ptr<::tensorflow::GraphDef>& graph_def, - const std::shared_ptr<::tensorflow::FunctionDef>& func_def, + GraphIteratorProto(const std::shared_ptr<::ov_tensorflow::GraphDef>& graph_def, + const std::shared_ptr<::ov_tensorflow::FunctionDef>& func_def, const std::unordered_map& library_map, const std::shared_ptr checkpoint_v1_reader) : m_graph_def(graph_def), @@ -105,7 +105,7 @@ class GraphIteratorProto : public GraphIterator { /// \brief Construct GraphIterator for the frozen model without v1 checkpoints template GraphIteratorProto(const std::basic_string& model_path) - : m_graph_def(std::make_shared<::tensorflow::GraphDef>()), + : m_graph_def(std::make_shared<::ov_tensorflow::GraphDef>()), m_func_def(nullptr), m_checkpoint_v1_reader(nullptr) { std::ifstream pb_stream(model_path, std::ios::in | std::ifstream::binary); @@ -119,7 +119,7 @@ class GraphIteratorProto : public GraphIterator { /// \brief Construct GraphIterator for the frozen model with v1 checkpoints template GraphIteratorProto(const std::basic_string& model_path, const std::basic_string& checkpoint_directory) - : m_graph_def(std::make_shared<::tensorflow::GraphDef>()), + : m_graph_def(std::make_shared<::ov_tensorflow::GraphDef>()), m_func_def(nullptr), m_checkpoint_v1_reader(nullptr) { std::ifstream pb_stream(model_path, std::ios::in | std::ifstream::binary); @@ -136,7 +136,7 @@ class GraphIteratorProto : public GraphIterator { static bool is_supported(const std::basic_string& path) { try { std::ifstream pb_stream(path, std::ios::in | std::ifstream::binary); - auto graph_def = std::make_shared<::tensorflow::GraphDef>(); + auto graph_def = std::make_shared<::ov_tensorflow::GraphDef>(); return pb_stream && pb_stream.is_open() && graph_def->ParsePartialFromIstream(&pb_stream) && graph_def->node_size() > 0; } catch (...) { @@ -184,7 +184,7 @@ class GraphIteratorProto : public GraphIterator { "[TensorFlow Error] Internal Error: incorrect library map to cache function indices by names."); auto func = m_graph_def->library().function(func_ind); - auto func_ptr = std::make_shared<::tensorflow::FunctionDef>(func); + auto func_ptr = std::make_shared<::ov_tensorflow::FunctionDef>(func); return std::make_shared(m_graph_def, func_ptr, m_library_map, m_checkpoint_v1_reader); } diff --git a/src/frontends/tensorflow/src/graph_iterator_proto_txt.hpp b/src/frontends/tensorflow/src/graph_iterator_proto_txt.hpp index 6d5b6494f764c5..523d863dbb0bdd 100644 --- a/src/frontends/tensorflow/src/graph_iterator_proto_txt.hpp +++ b/src/frontends/tensorflow/src/graph_iterator_proto_txt.hpp @@ -62,7 +62,7 @@ class GraphIteratorProtoTxt : public GraphIteratorProto { if (!input_stream) { return false; } - auto graph_def = std::make_shared<::tensorflow::GraphDef>(); + auto graph_def = std::make_shared<::ov_tensorflow::GraphDef>(); auto is_parsed = ::google::protobuf::TextFormat::Parse(input_stream.get(), graph_def.get()) && graph_def && graph_def->node_size() > 0; return is_parsed; diff --git a/src/frontends/tensorflow/src/graph_iterator_saved_model.cpp b/src/frontends/tensorflow/src/graph_iterator_saved_model.cpp index ece0148d19bb20..7c9af8216a910f 100644 --- a/src/frontends/tensorflow/src/graph_iterator_saved_model.cpp +++ b/src/frontends/tensorflow/src/graph_iterator_saved_model.cpp @@ -17,19 +17,19 @@ namespace ov { namespace frontend { namespace tensorflow { -bool GraphIteratorSavedModel::is_valid_signature(const ::tensorflow::SignatureDef& signature) const { - const std::map<::tensorflow::DataType, ov::element::Type> types{ - {::tensorflow::DataType::DT_BOOL, ov::element::boolean}, - {::tensorflow::DataType::DT_INT16, ov::element::i16}, - {::tensorflow::DataType::DT_INT32, ov::element::i32}, - {::tensorflow::DataType::DT_INT64, ov::element::i64}, - {::tensorflow::DataType::DT_HALF, ov::element::f16}, - {::tensorflow::DataType::DT_FLOAT, ov::element::f32}, - {::tensorflow::DataType::DT_DOUBLE, ov::element::f64}, - {::tensorflow::DataType::DT_UINT8, ov::element::u8}, - {::tensorflow::DataType::DT_INT8, ov::element::i8}, - {::tensorflow::DataType::DT_BFLOAT16, ov::element::bf16}, - {::tensorflow::DataType::DT_STRING, ov::element::dynamic}}; +bool GraphIteratorSavedModel::is_valid_signature(const ::ov_tensorflow::SignatureDef& signature) const { + const std::map<::ov_tensorflow::DataType, ov::element::Type> types{ + {::ov_tensorflow::DataType::DT_BOOL, ov::element::boolean}, + {::ov_tensorflow::DataType::DT_INT16, ov::element::i16}, + {::ov_tensorflow::DataType::DT_INT32, ov::element::i32}, + {::ov_tensorflow::DataType::DT_INT64, ov::element::i64}, + {::ov_tensorflow::DataType::DT_HALF, ov::element::f16}, + {::ov_tensorflow::DataType::DT_FLOAT, ov::element::f32}, + {::ov_tensorflow::DataType::DT_DOUBLE, ov::element::f64}, + {::ov_tensorflow::DataType::DT_UINT8, ov::element::u8}, + {::ov_tensorflow::DataType::DT_INT8, ov::element::i8}, + {::ov_tensorflow::DataType::DT_BFLOAT16, ov::element::bf16}, + {::ov_tensorflow::DataType::DT_STRING, ov::element::dynamic}}; for (const auto& it : signature.inputs()) { if (it.second.name().empty() || types.find(it.second.dtype()) == types.end()) diff --git a/src/frontends/tensorflow/src/graph_iterator_saved_model.hpp b/src/frontends/tensorflow/src/graph_iterator_saved_model.hpp index 511f2a0a5bc307..52b0ba75137835 100644 --- a/src/frontends/tensorflow/src/graph_iterator_saved_model.hpp +++ b/src/frontends/tensorflow/src/graph_iterator_saved_model.hpp @@ -34,7 +34,7 @@ std::basic_string get_variables_index_name(); // Loads graph from Tensorflow Saved Model file (saved_model.pb) class GraphIteratorSavedModel : public GraphIteratorProto { - std::shared_ptr<::tensorflow::SavedModel> m_saved_model; + std::shared_ptr<::ov_tensorflow::SavedModel> m_saved_model; std::shared_ptr m_variables_index; std::shared_ptr> m_inputs_map; std::shared_ptr> m_outputs_map; @@ -43,7 +43,7 @@ class GraphIteratorSavedModel : public GraphIteratorProto { public: template GraphIteratorSavedModel(const std::basic_string& path, const std::string& tags, const bool mmap_enabled) - : m_saved_model(std::make_shared<::tensorflow::SavedModel>()), + : m_saved_model(std::make_shared<::ov_tensorflow::SavedModel>()), m_mmap_enabled(mmap_enabled) { this->read_saved_model(path, tags); } @@ -66,7 +66,7 @@ class GraphIteratorSavedModel : public GraphIteratorProto { } private: - bool is_valid_signature(const ::tensorflow::SignatureDef& signature) const; + bool is_valid_signature(const ::ov_tensorflow::SignatureDef& signature) const; template bool read_saved_model(const std::basic_string& path, const std::string& tags) { @@ -141,11 +141,11 @@ class GraphIteratorSavedModel : public GraphIteratorProto { } /// \brief Does a loading of exact meta-graph - bool load_meta_graph(const ::tensorflow::MetaGraphDef& meta_graph) { - std::map validSignatures = {}; + bool load_meta_graph(const ::ov_tensorflow::MetaGraphDef& meta_graph) { + std::map validSignatures = {}; for (const auto& sit : meta_graph.signature_def()) { const std::string& key = sit.first; - const ::tensorflow::SignatureDef& val = sit.second; + const ::ov_tensorflow::SignatureDef& val = sit.second; if (is_valid_signature(val)) { validSignatures[key] = &val; } @@ -167,7 +167,7 @@ class GraphIteratorSavedModel : public GraphIteratorProto { } } - m_graph_def = std::make_shared<::tensorflow::GraphDef>(meta_graph.graph_def()); + m_graph_def = std::make_shared<::ov_tensorflow::GraphDef>(meta_graph.graph_def()); // Update variables map using information by resolving AssignVariableOp graph nodes std::map var_map; diff --git a/src/frontends/tensorflow/src/op/var_handle.cpp b/src/frontends/tensorflow/src/op/var_handle.cpp index 50a5b73c449f8f..edca2d2bca8cb0 100644 --- a/src/frontends/tensorflow/src/op/var_handle.cpp +++ b/src/frontends/tensorflow/src/op/var_handle.cpp @@ -26,7 +26,7 @@ template static std::shared_ptr read_variable(std::shared_ptr var_index, const ov::element::Type ov_type, const ov::Shape shape, - const ::tensorflow::BundleEntryProto& entry, + const ::ov_tensorflow::BundleEntryProto& entry, const NodeContext& node) { google::protobuf::int64 size = 1; for (uint64_t i = 0; i < shape.size(); ++i) { @@ -95,7 +95,7 @@ OutputVector translate_varhandle_op(const NodeContext& node) { TENSORFLOW_OP_VALIDATION(node, result, "[TensorFlow Frontend] Internal error: Cannot find requested variable."); - ::tensorflow::BundleEntryProto entry; + ::ov_tensorflow::BundleEntryProto entry; TENSORFLOW_OP_VALIDATION(node, entry.ParseFromArray(entry_data, static_cast(entry_size)), "[TensorFlow Frontend] Internal error: Cannot get read bundle entry."); diff --git a/src/frontends/tensorflow/src/op/xla_conv_v2.cpp b/src/frontends/tensorflow/src/op/xla_conv_v2.cpp index dc2e319c9a03b1..605b2c5f51e209 100644 --- a/src/frontends/tensorflow/src/op/xla_conv_v2.cpp +++ b/src/frontends/tensorflow/src/op/xla_conv_v2.cpp @@ -20,7 +20,7 @@ using namespace std; using namespace ov; using namespace ov::op; -using namespace xla; +using namespace ov_xla; namespace ov { namespace frontend { diff --git a/src/frontends/tensorflow/src/op/xla_dot.cpp b/src/frontends/tensorflow/src/op/xla_dot.cpp index e463494511f076..00493e1385d7b2 100644 --- a/src/frontends/tensorflow/src/op/xla_dot.cpp +++ b/src/frontends/tensorflow/src/op/xla_dot.cpp @@ -92,7 +92,7 @@ OutputVector translate_xla_dot_op(const NodeContext& node) { auto rhs = node.get_input(1); auto node_name = node.get_name(); auto dimension_numbers_message = node.get_attribute("dimension_numbers"); - ::xla::DotDimensionNumbers dimension_numbers; + ::ov_xla::DotDimensionNumbers dimension_numbers; TENSORFLOW_OP_VALIDATION( node, dimension_numbers.ParseFromArray(dimension_numbers_message.data(), diff --git a/src/frontends/tensorflow/src/proto/allocation_description.proto b/src/frontends/tensorflow/src/proto/allocation_description.proto index 8932ca2cb33b33..589f1bf597b0fb 100644 --- a/src/frontends/tensorflow/src/proto/allocation_description.proto +++ b/src/frontends/tensorflow/src/proto/allocation_description.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow; +package ov_tensorflow; option cc_enable_arenas = true; option java_outer_classname = "AllocationDescriptionProtos"; diff --git a/src/frontends/tensorflow/src/proto/api_def.proto b/src/frontends/tensorflow/src/proto/api_def.proto index 810aabc5a2c2c3..31139f89855e65 100644 --- a/src/frontends/tensorflow/src/proto/api_def.proto +++ b/src/frontends/tensorflow/src/proto/api_def.proto @@ -15,7 +15,7 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow; +package ov_tensorflow; option cc_enable_arenas = true; option java_outer_classname = "ApiDefProtos"; option java_multiple_files = true; diff --git a/src/frontends/tensorflow/src/proto/attr_value.proto b/src/frontends/tensorflow/src/proto/attr_value.proto index 3028176c02bcd7..c42f78ac45a42b 100644 --- a/src/frontends/tensorflow/src/proto/attr_value.proto +++ b/src/frontends/tensorflow/src/proto/attr_value.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow; +package ov_tensorflow; import "tensor.proto"; import "tensor_shape.proto"; diff --git a/src/frontends/tensorflow/src/proto/cost_graph.proto b/src/frontends/tensorflow/src/proto/cost_graph.proto index dad93a029babae..db348eb8860847 100644 --- a/src/frontends/tensorflow/src/proto/cost_graph.proto +++ b/src/frontends/tensorflow/src/proto/cost_graph.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow; +package ov_tensorflow; import "tensor_shape.proto"; import "types.proto"; diff --git a/src/frontends/tensorflow/src/proto/dataset_options.proto b/src/frontends/tensorflow/src/proto/dataset_options.proto index dc492a60fe0ebe..be7a0d8efd0c61 100644 --- a/src/frontends/tensorflow/src/proto/dataset_options.proto +++ b/src/frontends/tensorflow/src/proto/dataset_options.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow.data; +package ov_tensorflow.data; // Represents the type of auto-sharding we enable. enum AutoShardPolicy { diff --git a/src/frontends/tensorflow/src/proto/device_attributes.proto b/src/frontends/tensorflow/src/proto/device_attributes.proto index 92c8a6b2d191bc..bec64f2744124b 100644 --- a/src/frontends/tensorflow/src/proto/device_attributes.proto +++ b/src/frontends/tensorflow/src/proto/device_attributes.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow; +package ov_tensorflow; option cc_enable_arenas = true; option java_outer_classname = "DeviceAttributesProtos"; diff --git a/src/frontends/tensorflow/src/proto/function.proto b/src/frontends/tensorflow/src/proto/function.proto index 65a2acb3b91979..271126ac0f4687 100644 --- a/src/frontends/tensorflow/src/proto/function.proto +++ b/src/frontends/tensorflow/src/proto/function.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow; +package ov_tensorflow; import "attr_value.proto"; import "node_def.proto"; diff --git a/src/frontends/tensorflow/src/proto/any.proto b/src/frontends/tensorflow/src/proto/google/protobuf/any.proto similarity index 100% rename from src/frontends/tensorflow/src/proto/any.proto rename to src/frontends/tensorflow/src/proto/google/protobuf/any.proto diff --git a/src/frontends/tensorflow/src/proto/wrappers.proto b/src/frontends/tensorflow/src/proto/google/protobuf/wrappers.proto similarity index 100% rename from src/frontends/tensorflow/src/proto/wrappers.proto rename to src/frontends/tensorflow/src/proto/google/protobuf/wrappers.proto diff --git a/src/frontends/tensorflow/src/proto/graph.proto b/src/frontends/tensorflow/src/proto/graph.proto index c52e84022f9fcd..97bf8002700d0a 100644 --- a/src/frontends/tensorflow/src/proto/graph.proto +++ b/src/frontends/tensorflow/src/proto/graph.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow; +package ov_tensorflow; import "function.proto"; import "node_def.proto"; diff --git a/src/frontends/tensorflow/src/proto/graph_transfer_info.proto b/src/frontends/tensorflow/src/proto/graph_transfer_info.proto index e42c1353695313..821e7619cc8488 100644 --- a/src/frontends/tensorflow/src/proto/graph_transfer_info.proto +++ b/src/frontends/tensorflow/src/proto/graph_transfer_info.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow; +package ov_tensorflow; import "types.proto"; diff --git a/src/frontends/tensorflow/src/proto/kernel_def.proto b/src/frontends/tensorflow/src/proto/kernel_def.proto index 5e6b839d31582e..a8d0daeaa9ef20 100644 --- a/src/frontends/tensorflow/src/proto/kernel_def.proto +++ b/src/frontends/tensorflow/src/proto/kernel_def.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow; +package ov_tensorflow; import "attr_value.proto"; diff --git a/src/frontends/tensorflow/src/proto/log_memory.proto b/src/frontends/tensorflow/src/proto/log_memory.proto index 96dac4c9ca370d..39ea81659c7eda 100644 --- a/src/frontends/tensorflow/src/proto/log_memory.proto +++ b/src/frontends/tensorflow/src/proto/log_memory.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow; +package ov_tensorflow; import "tensor_description.proto"; diff --git a/src/frontends/tensorflow/src/proto/meta_graph.proto b/src/frontends/tensorflow/src/proto/meta_graph.proto index b6918fa853bf8c..48f4c4b5e83860 100644 --- a/src/frontends/tensorflow/src/proto/meta_graph.proto +++ b/src/frontends/tensorflow/src/proto/meta_graph.proto @@ -12,9 +12,9 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow; +package ov_tensorflow; -import "any.proto"; +import "google/protobuf/any.proto"; import "graph.proto"; import "op_def.proto"; import "tensor_shape.proto"; diff --git a/src/frontends/tensorflow/src/proto/model.proto b/src/frontends/tensorflow/src/proto/model.proto index a6567d462b8772..1614f284b7fd55 100644 --- a/src/frontends/tensorflow/src/proto/model.proto +++ b/src/frontends/tensorflow/src/proto/model.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow.data.model; +package ov_tensorflow.data.model; option cc_enable_arenas = true; diff --git a/src/frontends/tensorflow/src/proto/node_def.proto b/src/frontends/tensorflow/src/proto/node_def.proto index 573d0f901dd732..848c27d4c4c1f8 100644 --- a/src/frontends/tensorflow/src/proto/node_def.proto +++ b/src/frontends/tensorflow/src/proto/node_def.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow; +package ov_tensorflow; import "attr_value.proto"; diff --git a/src/frontends/tensorflow/src/proto/op_def.proto b/src/frontends/tensorflow/src/proto/op_def.proto index 4d5c66c39e16d7..d44526f059c548 100644 --- a/src/frontends/tensorflow/src/proto/op_def.proto +++ b/src/frontends/tensorflow/src/proto/op_def.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow; +package ov_tensorflow; option cc_enable_arenas = true; option java_outer_classname = "OpDefProtos"; option java_multiple_files = true; diff --git a/src/frontends/tensorflow/src/proto/reader_base.proto b/src/frontends/tensorflow/src/proto/reader_base.proto index 0c3536600e6f24..e51e3781ddc6d1 100644 --- a/src/frontends/tensorflow/src/proto/reader_base.proto +++ b/src/frontends/tensorflow/src/proto/reader_base.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow; +package ov_tensorflow; option cc_enable_arenas = true; option java_outer_classname = "ReaderBaseProtos"; diff --git a/src/frontends/tensorflow/src/proto/remote_fused_graph_execute_info.proto b/src/frontends/tensorflow/src/proto/remote_fused_graph_execute_info.proto index abfcfdbec08007..b94ee5e6f1b892 100644 --- a/src/frontends/tensorflow/src/proto/remote_fused_graph_execute_info.proto +++ b/src/frontends/tensorflow/src/proto/remote_fused_graph_execute_info.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow; +package ov_tensorflow; import "graph.proto"; import "tensor_shape.proto"; diff --git a/src/frontends/tensorflow/src/proto/resource_handle.proto b/src/frontends/tensorflow/src/proto/resource_handle.proto index 4d872b6d9d8074..55345d0302a428 100644 --- a/src/frontends/tensorflow/src/proto/resource_handle.proto +++ b/src/frontends/tensorflow/src/proto/resource_handle.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow; +package ov_tensorflow; import "tensor_shape.proto"; import "types.proto"; diff --git a/src/frontends/tensorflow/src/proto/saved_model.proto b/src/frontends/tensorflow/src/proto/saved_model.proto index 0034fdfd46dcf8..75a809070b59e0 100644 --- a/src/frontends/tensorflow/src/proto/saved_model.proto +++ b/src/frontends/tensorflow/src/proto/saved_model.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow; +package ov_tensorflow; import "meta_graph.proto"; diff --git a/src/frontends/tensorflow/src/proto/saved_object_graph.proto b/src/frontends/tensorflow/src/proto/saved_object_graph.proto index 671441075c3628..9ce18710a14954 100644 --- a/src/frontends/tensorflow/src/proto/saved_object_graph.proto +++ b/src/frontends/tensorflow/src/proto/saved_object_graph.proto @@ -12,9 +12,9 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow; +package ov_tensorflow; -import "any.proto"; +import "google/protobuf/any.proto"; import "tensor_shape.proto"; import "types.proto"; import "variable.proto"; diff --git a/src/frontends/tensorflow/src/proto/saved_tensor_slice.proto b/src/frontends/tensorflow/src/proto/saved_tensor_slice.proto index 4645b2bdca9b89..6d13b1f27aa455 100644 --- a/src/frontends/tensorflow/src/proto/saved_tensor_slice.proto +++ b/src/frontends/tensorflow/src/proto/saved_tensor_slice.proto @@ -29,7 +29,7 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow; +package ov_tensorflow; option cc_enable_arenas = true; option java_outer_classname = "SavedTensorSliceProtos"; option java_multiple_files = true; diff --git a/src/frontends/tensorflow/src/proto/saver.proto b/src/frontends/tensorflow/src/proto/saver.proto index 7834f473e4ccdf..634397b1ee04b3 100644 --- a/src/frontends/tensorflow/src/proto/saver.proto +++ b/src/frontends/tensorflow/src/proto/saver.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow; +package ov_tensorflow; option cc_enable_arenas = true; option java_outer_classname = "SaverProtos"; diff --git a/src/frontends/tensorflow/src/proto/step_stats.proto b/src/frontends/tensorflow/src/proto/step_stats.proto index 04e0864a5aec49..b2524e28a807fa 100644 --- a/src/frontends/tensorflow/src/proto/step_stats.proto +++ b/src/frontends/tensorflow/src/proto/step_stats.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow; +package ov_tensorflow; import "allocation_description.proto"; import "tensor_description.proto"; diff --git a/src/frontends/tensorflow/src/proto/struct.proto b/src/frontends/tensorflow/src/proto/struct.proto index d03201b685ac79..7da836debd7e76 100644 --- a/src/frontends/tensorflow/src/proto/struct.proto +++ b/src/frontends/tensorflow/src/proto/struct.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow; +package ov_tensorflow; import "tensor.proto"; import "tensor_shape.proto"; @@ -66,9 +66,9 @@ message StructuredValue { bool bool_value = 14; // Represents a TensorShape. - tensorflow.TensorShapeProto tensor_shape_value = 31; + ov_tensorflow.TensorShapeProto tensor_shape_value = 31; // Represents an enum value for dtype. - tensorflow.DataType tensor_dtype_value = 32; + ov_tensorflow.DataType tensor_dtype_value = 32; // Represents a value for tf.TensorSpec. TensorSpecProto tensor_spec_value = 33; // Represents a value for tf.TypeSpec. @@ -121,17 +121,17 @@ message NamedTupleValue { // A protobuf to represent tf.TensorSpec. message TensorSpecProto { string name = 1; - tensorflow.TensorShapeProto shape = 2; - tensorflow.DataType dtype = 3; + ov_tensorflow.TensorShapeProto shape = 2; + ov_tensorflow.DataType dtype = 3; } // A protobuf to represent tf.BoundedTensorSpec. message BoundedTensorSpecProto { string name = 1; - tensorflow.TensorShapeProto shape = 2; - tensorflow.DataType dtype = 3; - tensorflow.TensorProto minimum = 4; - tensorflow.TensorProto maximum = 5; + ov_tensorflow.TensorShapeProto shape = 2; + ov_tensorflow.DataType dtype = 3; + ov_tensorflow.TensorProto minimum = 4; + ov_tensorflow.TensorProto maximum = 5; } // Represents a tf.TypeSpec diff --git a/src/frontends/tensorflow/src/proto/summary.proto b/src/frontends/tensorflow/src/proto/summary.proto index 9e4b95f4bc3348..16bc6235bfb1b3 100644 --- a/src/frontends/tensorflow/src/proto/summary.proto +++ b/src/frontends/tensorflow/src/proto/summary.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow; +package ov_tensorflow; import "tensor.proto"; diff --git a/src/frontends/tensorflow/src/proto/tensor.proto b/src/frontends/tensorflow/src/proto/tensor.proto index c2e1fd7eb6b627..85fd170596eefe 100644 --- a/src/frontends/tensorflow/src/proto/tensor.proto +++ b/src/frontends/tensorflow/src/proto/tensor.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow; +package ov_tensorflow; import "resource_handle.proto"; import "tensor_shape.proto"; diff --git a/src/frontends/tensorflow/src/proto/tensor_bundle.proto b/src/frontends/tensorflow/src/proto/tensor_bundle.proto index 43fea749b42172..48bf6be520920b 100644 --- a/src/frontends/tensorflow/src/proto/tensor_bundle.proto +++ b/src/frontends/tensorflow/src/proto/tensor_bundle.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow; +package ov_tensorflow; import "tensor_shape.proto"; import "tensor_slice.proto"; diff --git a/src/frontends/tensorflow/src/proto/tensor_description.proto b/src/frontends/tensorflow/src/proto/tensor_description.proto index 3ab9c310a6f127..86ecbe2b3e4047 100644 --- a/src/frontends/tensorflow/src/proto/tensor_description.proto +++ b/src/frontends/tensorflow/src/proto/tensor_description.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow; +package ov_tensorflow; import "allocation_description.proto"; import "tensor_shape.proto"; diff --git a/src/frontends/tensorflow/src/proto/tensor_shape.proto b/src/frontends/tensorflow/src/proto/tensor_shape.proto index 0a7515def63931..48d821006c4989 100644 --- a/src/frontends/tensorflow/src/proto/tensor_shape.proto +++ b/src/frontends/tensorflow/src/proto/tensor_shape.proto @@ -19,7 +19,7 @@ option java_multiple_files = true; option java_package = "org.tensorflow.framework"; option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework/tensor_shape_go_proto"; -package tensorflow; +package ov_tensorflow; // Dimensions of a tensor. message TensorShapeProto { diff --git a/src/frontends/tensorflow/src/proto/tensor_slice.proto b/src/frontends/tensorflow/src/proto/tensor_slice.proto index 415012483056d3..b30c61eca33361 100644 --- a/src/frontends/tensorflow/src/proto/tensor_slice.proto +++ b/src/frontends/tensorflow/src/proto/tensor_slice.proto @@ -14,7 +14,7 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow; +package ov_tensorflow; option cc_enable_arenas = true; option java_outer_classname = "TensorSliceProtos"; diff --git a/src/frontends/tensorflow/src/proto/trackable_object_graph.proto b/src/frontends/tensorflow/src/proto/trackable_object_graph.proto index f4a8e4da34f129..748be64410c002 100644 --- a/src/frontends/tensorflow/src/proto/trackable_object_graph.proto +++ b/src/frontends/tensorflow/src/proto/trackable_object_graph.proto @@ -12,9 +12,9 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow; +package ov_tensorflow; -import "wrappers.proto"; +import "google/protobuf/wrappers.proto"; option cc_enable_arenas = true; option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; diff --git a/src/frontends/tensorflow/src/proto/types.proto b/src/frontends/tensorflow/src/proto/types.proto index 0a60332f662397..a50586760a7cdf 100644 --- a/src/frontends/tensorflow/src/proto/types.proto +++ b/src/frontends/tensorflow/src/proto/types.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow; +package ov_tensorflow; option cc_enable_arenas = true; option java_outer_classname = "TypesProtos"; option java_multiple_files = true; diff --git a/src/frontends/tensorflow/src/proto/variable.proto b/src/frontends/tensorflow/src/proto/variable.proto index 6e9a05d1291b23..297638e7bc649a 100644 --- a/src/frontends/tensorflow/src/proto/variable.proto +++ b/src/frontends/tensorflow/src/proto/variable.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow; +package ov_tensorflow; option cc_enable_arenas = true; option java_outer_classname = "VariableProtos"; diff --git a/src/frontends/tensorflow/src/proto/versions.proto b/src/frontends/tensorflow/src/proto/versions.proto index 31a6623cf71a90..0fc46788dc2078 100644 --- a/src/frontends/tensorflow/src/proto/versions.proto +++ b/src/frontends/tensorflow/src/proto/versions.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package tensorflow; +package ov_tensorflow; option cc_enable_arenas = true; option java_outer_classname = "VersionsProtos"; diff --git a/src/frontends/tensorflow/src/proto/xla_data.proto b/src/frontends/tensorflow/src/proto/xla_data.proto index 95695ba78a2974..42ff8065983f77 100644 --- a/src/frontends/tensorflow/src/proto/xla_data.proto +++ b/src/frontends/tensorflow/src/proto/xla_data.proto @@ -15,7 +15,7 @@ limitations under the License. syntax = "proto3"; -package xla; +package ov_xla; option cc_enable_arenas = true; diff --git a/src/frontends/tensorflow/src/tf_utils.cpp b/src/frontends/tensorflow/src/tf_utils.cpp index c72e8e7bb9080a..1c7df199a851a6 100644 --- a/src/frontends/tensorflow/src/tf_utils.cpp +++ b/src/frontends/tensorflow/src/tf_utils.cpp @@ -83,7 +83,7 @@ void extract_tensor_content(const string& tensor_content, Tensor* values) { # pragma warning(disable : 4267) // possible loss of data #endif template -void extract_compressed_tensor_content(const ::tensorflow::TensorProto& tensor_proto, +void extract_compressed_tensor_content(const ::ov_tensorflow::TensorProto& tensor_proto, int64_t val_size, Tensor* values) { auto val_lastsaved = static_cast(0); @@ -149,30 +149,30 @@ bool CfMarkerType::is_copyable() const { return false; } -Type get_ov_type(const ::tensorflow::DataType& type) { - static const map<::tensorflow::DataType, Type> type_map{{::tensorflow::DataType::DT_BOOL, boolean}, - {::tensorflow::DataType::DT_INT16, i16}, - {::tensorflow::DataType::DT_INT32, i32}, - {::tensorflow::DataType::DT_INT64, i64}, - {::tensorflow::DataType::DT_HALF, f16}, - {::tensorflow::DataType::DT_FLOAT, f32}, - {::tensorflow::DataType::DT_DOUBLE, f64}, - {::tensorflow::DataType::DT_UINT8, u8}, - {::tensorflow::DataType::DT_INT8, i8}, - {::tensorflow::DataType::DT_BFLOAT16, bf16}}; +Type get_ov_type(const ::ov_tensorflow::DataType& type) { + static const map<::ov_tensorflow::DataType, Type> type_map{{::ov_tensorflow::DataType::DT_BOOL, boolean}, + {::ov_tensorflow::DataType::DT_INT16, i16}, + {::ov_tensorflow::DataType::DT_INT32, i32}, + {::ov_tensorflow::DataType::DT_INT64, i64}, + {::ov_tensorflow::DataType::DT_HALF, f16}, + {::ov_tensorflow::DataType::DT_FLOAT, f32}, + {::ov_tensorflow::DataType::DT_DOUBLE, f64}, + {::ov_tensorflow::DataType::DT_UINT8, u8}, + {::ov_tensorflow::DataType::DT_INT8, i8}, + {::ov_tensorflow::DataType::DT_BFLOAT16, bf16}}; auto it = type_map.find(type); // for all unsupported types return dynamic type return it == type_map.end() ? dynamic : it->second; } -Any unpack_tensor_proto(const ::tensorflow::TensorProto& tensor_proto) { +Any unpack_tensor_proto(const ::ov_tensorflow::TensorProto& tensor_proto) { return unpack_tensor_proto(tensor_proto, tensor_proto.tensor_shape(), tensor_proto.dtype()); } -Any unpack_tensor_proto(const ::tensorflow::TensorProto& tensor_proto, - const ::tensorflow::TensorShapeProto& tensor_shape, - const ::tensorflow::DataType& tensor_type) { +Any unpack_tensor_proto(const ::ov_tensorflow::TensorProto& tensor_proto, + const ::ov_tensorflow::TensorShapeProto& tensor_shape, + const ::ov_tensorflow::DataType& tensor_type) { PartialShape pshape; for (int i = 0; i < tensor_shape.dim_size(); i++) { pshape.push_back(tensor_shape.dim(i).size()); @@ -180,7 +180,7 @@ Any unpack_tensor_proto(const ::tensorflow::TensorProto& tensor_proto, FRONT_END_GENERAL_CHECK(pshape.is_static(), "Dynamic shapes are not supported for Tensor attribute."); Type ov_type = get_ov_type(tensor_type); - if (tensor_type != ::tensorflow::DataType::DT_STRING) { + if (tensor_type != ::ov_tensorflow::DataType::DT_STRING) { FRONT_END_GENERAL_CHECK( ov_type.is_static(), "Encountered unknown element type " + DataType_Name(tensor_type) + " on an empty tensor_proto"); diff --git a/src/frontends/tensorflow/src/tf_utils.hpp b/src/frontends/tensorflow/src/tf_utils.hpp index 5de9029a816e6c..286ce1440bc638 100644 --- a/src/frontends/tensorflow/src/tf_utils.hpp +++ b/src/frontends/tensorflow/src/tf_utils.hpp @@ -24,13 +24,13 @@ namespace tensorflow { #define CF_MARKER_TAG "tf_cf_marker_tag" -ov::element::Type get_ov_type(const ::tensorflow::DataType& type); +ov::element::Type get_ov_type(const ::ov_tensorflow::DataType& type); -ov::Any unpack_tensor_proto(const ::tensorflow::TensorProto& tensor_proto); +ov::Any unpack_tensor_proto(const ::ov_tensorflow::TensorProto& tensor_proto); -ov::Any unpack_tensor_proto(const ::tensorflow::TensorProto& tensor_proto, - const ::tensorflow::TensorShapeProto& tensor_shape, - const ::tensorflow::DataType& tensor_type); +ov::Any unpack_tensor_proto(const ::ov_tensorflow::TensorProto& tensor_proto, + const ::ov_tensorflow::TensorShapeProto& tensor_shape, + const ::ov_tensorflow::DataType& tensor_type); class Switch; using SetOfSwitchNodes = std::unordered_set>; diff --git a/src/frontends/tensorflow/src/variables_index.cpp b/src/frontends/tensorflow/src/variables_index.cpp index c24ffd8112bd09..cda18ca3ca7c7f 100644 --- a/src/frontends/tensorflow/src/variables_index.cpp +++ b/src/frontends/tensorflow/src/variables_index.cpp @@ -126,7 +126,7 @@ void VariablesIndex::read_bundle_header() { auto item = m_variables_index.find(""); FRONT_END_GENERAL_CHECK(item != m_variables_index.end(), "Bundle Header isn't found in index"); - ::tensorflow::BundleHeaderProto bundleHeader; + ::ov_tensorflow::BundleHeaderProto bundleHeader; FRONT_END_GENERAL_CHECK(bundleHeader.ParseFromArray(item->second.data(), static_cast(item->second.size())), "Bundle Header: Cannot parse Bundle Header"); FRONT_END_GENERAL_CHECK(bundleHeader.version().producer() == 1, "Bundle Header: Unsupported producer version"); @@ -145,7 +145,7 @@ void VariablesIndex::read_checkpointable_object_graph() { return; } - ::tensorflow::BundleEntryProto entry; + ::ov_tensorflow::BundleEntryProto entry; FRONT_END_GENERAL_CHECK(entry.ParseFromArray(item->second.data(), static_cast(item->second.size())), "CMO: Cannot parse Bundle Entry"); @@ -155,7 +155,7 @@ void VariablesIndex::read_checkpointable_object_graph() { FRONT_END_GENERAL_CHECK(shard != m_data_files.end(), "CMO: data files isn't found"); std::vector data(entry.size()); - ::tensorflow::TrackableObjectGraph tog; + ::ov_tensorflow::TrackableObjectGraph tog; // TODO: have to understand this offset // It looks like reinterpret_cast artifact @@ -244,13 +244,13 @@ bool VariablesIndex::read_variables(std::ifstream& vi_stream, const std::wstring struct PtrNode { using SharedPtrNode = std::shared_ptr; - const ::tensorflow::NodeDef* node; + const ::ov_tensorflow::NodeDef* node; std::vector inputs; std::vector outputs; PtrNode() : node(nullptr), inputs(), outputs() {} - PtrNode(const ::tensorflow::NodeDef& src_node) { + PtrNode(const ::ov_tensorflow::NodeDef& src_node) { node = &src_node; } @@ -308,14 +308,14 @@ struct PtrNode { } }; -static void read_stateful_partitioned_call(const std::shared_ptr<::tensorflow::GraphDef> graph_def, - const ::tensorflow::NodeDef& partCall, +static void read_stateful_partitioned_call(const std::shared_ptr<::ov_tensorflow::GraphDef> graph_def, + const ::ov_tensorflow::NodeDef& partCall, std::map& node_dictionary) { FRONT_END_GENERAL_CHECK(partCall.op() == "StatefulPartitionedCall", "Passed node isn't StatefulPartitionedCall"); std::string func_name = partCall.attr().at("f").func().name(); - const ::tensorflow::FunctionDef* func_def = nullptr; + const ::ov_tensorflow::FunctionDef* func_def = nullptr; for (const auto& func : graph_def->library().function()) { if (func.signature().name() == func_name) { func_def = &func; @@ -365,7 +365,7 @@ static void read_stateful_partitioned_call(const std::shared_ptr<::tensorflow::G } } -void VariablesIndex::map_assignvariable(const std::shared_ptr<::tensorflow::GraphDef> graph_def, +void VariablesIndex::map_assignvariable(const std::shared_ptr<::ov_tensorflow::GraphDef> graph_def, std::map& variables_map) { std::map nodes; diff --git a/src/frontends/tensorflow/src/variables_index.hpp b/src/frontends/tensorflow/src/variables_index.hpp index df852a627994e7..2fb517e8e9b2c8 100644 --- a/src/frontends/tensorflow/src/variables_index.hpp +++ b/src/frontends/tensorflow/src/variables_index.hpp @@ -139,7 +139,7 @@ class VariablesIndex { /// It needs to map VarHandleOp to right place in .index file. /// \param[in] graph_def GraphDef object for analysis /// \param[out] variables_map Map of variables found in graph_def - static void map_assignvariable(const std::shared_ptr<::tensorflow::GraphDef> graph_def, + static void map_assignvariable(const std::shared_ptr<::ov_tensorflow::GraphDef> graph_def, std::map& variables_map); private: diff --git a/src/frontends/tensorflow/tests/tf_utils.cpp b/src/frontends/tensorflow/tests/tf_utils.cpp index 120b8ffab8659d..d742b53dcf8704 100644 --- a/src/frontends/tensorflow/tests/tf_utils.cpp +++ b/src/frontends/tensorflow/tests/tf_utils.cpp @@ -16,6 +16,8 @@ namespace frontend { namespace tensorflow { namespace tests { +const std::string TF_FE = "tf"; + shared_ptr convert_model(const string& model_path, const ConversionExtension::Ptr& conv_ext, const vector& input_names, diff --git a/src/frontends/tensorflow/tests/tf_utils.hpp b/src/frontends/tensorflow/tests/tf_utils.hpp index 1c48a95c85fee8..80addd43d61e9e 100644 --- a/src/frontends/tensorflow/tests/tf_utils.hpp +++ b/src/frontends/tensorflow/tests/tf_utils.hpp @@ -14,7 +14,7 @@ namespace ov { namespace frontend { namespace tensorflow { namespace tests { -static const std::string TF_FE = "tf"; +extern const std::string TF_FE; // a wrapper to create TensorFlow Frontend and configure the conversion pipeline // by registering new translator via extension, specifying (new) inputs, their shapes and types From be177021b0f370d229ec0ef0aaeff360a819645e Mon Sep 17 00:00:00 2001 From: Anastasiia Pnevskaia Date: Thu, 19 Oct 2023 13:26:21 +0200 Subject: [PATCH 20/39] Switching telemetry to opt-out and disabling telemetry in CI jobs (#20391) * Added disabling telemetry script to linux azure job, added debug checks to convert_model. * Telemetry disabling. * Disabling telemetry. * Config corrections. * Config corrections. * Update .github/workflows/mac.yml Co-authored-by: Andrey Kashchikhin * Debug output. * Win config correction. * Win config correction. * Debug output. * Debug output. * Added turning off telemetry to onnx azure tests config. * Corrected config. * Removed debug output. * Switch telemetry to opt-out. * Removed not needed blank lines. * Text correction. * Debug output. * Remove changes from CI configs, add CI var to dockers. * Config correction. * Debug output. * Config corrected. * Readme corrected. * Config changed. * Config changed. * Debug output. * Required version changed. * Remove debug output. --------- Co-authored-by: Andrey Kashchikhin --- README.md | 10 +++++ .../telemetry_information.md | 40 +++---------------- src/bindings/python/requirements.txt | 2 +- src/bindings/python/setup.cfg | 1 + tools/constraints.txt | 2 +- .../tools/mo/utils/telemetry_utils.py | 9 ++++- .../ovc/openvino/tools/ovc/telemetry_utils.py | 8 +++- 7 files changed, 32 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index bfc4a722c2680d..489ef7803ccd80 100644 --- a/README.md +++ b/README.md @@ -128,6 +128,16 @@ OpenVINOâ„¢ Toolkit also contains several plugins which simplify loading models OpenVINOâ„¢ Toolkit is licensed under [Apache License Version 2.0](LICENSE). By contributing to the project, you agree to the license and copyright terms therein and release your contribution under these terms. +## Telemetry +OpenVINOâ„¢ collects software performance and usage data for the purpose of improving OpenVINOâ„¢ tools. This data is collected directly by OpenVINOâ„¢ or through the use of Google Analytics 4. +You can opt-out at any time by running the command: + +``` bash +opt_in_out --opt_out +``` + +More Information is available at https://docs.openvino.ai/latest/openvino_docs_telemetry_information.html. + ## Documentation ### User documentation diff --git a/docs/articles_en/about_openvino/additional_resources/telemetry_information.md b/docs/articles_en/about_openvino/additional_resources/telemetry_information.md index 4340a40923770b..b23a763ff97e70 100644 --- a/docs/articles_en/about_openvino/additional_resources/telemetry_information.md +++ b/docs/articles_en/about_openvino/additional_resources/telemetry_information.md @@ -3,13 +3,11 @@ @sphinxdirective .. meta:: - :description: Learn about OpenVINOâ„¢ telemetry, that with your explicit consent - collects only usage data to simplify debugging and further development. + :description: Learn about OpenVINOâ„¢ telemetry, that collects anonymous usage data for the purpose of improving OpenVINOâ„¢ tools. -To facilitate debugging and further development, OpenVINOâ„¢ asks its users for -a permission to collect telemetry data. It will not be collected -without an explicit consent on your part and will cover only OpenVINOâ„¢ usage information. +To facilitate debugging and further development, OpenVINOâ„¢ collects anonymous telemetry data. Anonymous telemetry data is collected by default, +but you can stop data collection anytime by running the command ``opt_in_out --opt_out``. It does not extend to any other Intel software, hardware, website usage, or other products. Google Analytics is used for telemetry purposes. Refer to @@ -18,34 +16,6 @@ Google Analytics is used for telemetry purposes. Refer to Enable or disable Telemetry reporting ########################################################### -First-run consent -+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - -On the first run of an application that collects telemetry data, you will be prompted -to opt in or out of telemetry collection with the following telemetry message: - -.. code-block:: console - - Intel would like your permission to collect software performance and usage data - for the purpose of improving Intel products and services. This data will be collected - directly by Intel or through the use of Google Analytics. This data will be stored - in countries where Intel or Google operate. - - You can opt-out at any time in the future by running ``opt_in_out --opt_in``. - - More Information is available at docs.openvino.ai. - - Please type ``Y`` to give your consent or ``N`` to decline. - -Choose your preference by typing ``Y`` to enable or ``N`` to disable telemetry. Your choice will -be confirmed by a corresponding disclaimer. If you do not reply to the telemetry message, -your telemetry data will not be collected. - -For the Neural Network Compression Framework (NNCF), which is not a command line application, -the telemetry message will not display. Telemetry data will only be collected from NNCF -if you have explicitly provided consent in another OpenVINO tool. - - Changing consent decision +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ @@ -82,8 +52,8 @@ Telemetry Data Collection Details .. tab-item:: Telemetry Data Retention :sync: telemetry-data-retention - Telemetry data is retained in Google Analytics for a maximum of 26 months. - Any raw data that has reached the 26-month threshold is deleted from Google Analytics on a monthly basis. + Telemetry data is retained in Google Analytics for a maximum of 14 months. + Any raw data that has reached the 14-month threshold is deleted from Google Analytics on a monthly basis. @endsphinxdirective \ No newline at end of file diff --git a/src/bindings/python/requirements.txt b/src/bindings/python/requirements.txt index 72438eeb2ecd91..c4d3c3e35568aa 100644 --- a/src/bindings/python/requirements.txt +++ b/src/bindings/python/requirements.txt @@ -1,3 +1,3 @@ numpy>=1.16.6 singledispatchmethod; python_version<'3.8' -openvino-telemetry>=2023.1.0 +openvino-telemetry>=2023.2.1 diff --git a/src/bindings/python/setup.cfg b/src/bindings/python/setup.cfg index 083c8e1de85cb1..b9b15ef0ca1214 100644 --- a/src/bindings/python/setup.cfg +++ b/src/bindings/python/setup.cfg @@ -13,6 +13,7 @@ setenv = OV_BACKEND = {env:OV_BACKEND:"CPU"} PYTHONPATH = {env:PYTHONPATH} OpenVINO_DIR = {env:OpenVINO_DIR} + CI = True passenv = http_proxy https_proxy diff --git a/tools/constraints.txt b/tools/constraints.txt index 18a3080d3a1e78..2e1588a005e03f 100644 --- a/tools/constraints.txt +++ b/tools/constraints.txt @@ -18,4 +18,4 @@ pyenchant>=3.0.0 test-generator==0.1.1 py>=1.9.0 urllib3>=1.26.4 -openvino-telemetry>=2023.1.0 +openvino-telemetry>=2023.2.1 diff --git a/tools/mo/openvino/tools/mo/utils/telemetry_utils.py b/tools/mo/openvino/tools/mo/utils/telemetry_utils.py index 802986edf4c4c0..e2cdd0b53f61d0 100644 --- a/tools/mo/openvino/tools/mo/utils/telemetry_utils.py +++ b/tools/mo/openvino/tools/mo/utils/telemetry_utils.py @@ -22,8 +22,13 @@ def init_mo_telemetry(app_name='Model Optimizer'): - return tm.Telemetry(tid=get_tid(), app_name=app_name, app_version=get_rt_version(), backend='ga4') - + return tm.Telemetry(tid=get_tid(), + app_name=app_name, + app_version=get_rt_version(), + backend='ga4', + enable_opt_in_dialog=False, + disable_in_ci=True + ) def send_framework_info(framework: str): """ diff --git a/tools/ovc/openvino/tools/ovc/telemetry_utils.py b/tools/ovc/openvino/tools/ovc/telemetry_utils.py index 87e0132ccd17a6..42232b0839a6be 100644 --- a/tools/ovc/openvino/tools/ovc/telemetry_utils.py +++ b/tools/ovc/openvino/tools/ovc/telemetry_utils.py @@ -17,7 +17,13 @@ def init_mo_telemetry(app_name='Model Conversion API'): - return tm.Telemetry(tid=get_tid(), app_name=app_name, app_version=get_rt_version(), backend='ga4') + return tm.Telemetry(tid=get_tid(), + app_name=app_name, + app_version=get_rt_version(), + backend='ga4', + enable_opt_in_dialog=False, + disable_in_ci=True + ) def send_framework_info(framework: str): """ From 818c78d80a7717257c81537ae2172d88f548e3a3 Mon Sep 17 00:00:00 2001 From: Sebastian Golebiewski Date: Thu, 19 Oct 2023 13:31:03 +0200 Subject: [PATCH 21/39] [DOCS] Fix command for Building with Ninja (#20605) * Fix command for Building with Ninja Removing current directory from the command. * Update docs/dev/build_windows.md --------- Co-authored-by: Ilya Lavrenov --- docs/dev/build_windows.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/dev/build_windows.md b/docs/dev/build_windows.md index 28001328ea3013..b6321785c38970 100644 --- a/docs/dev/build_windows.md +++ b/docs/dev/build_windows.md @@ -78,7 +78,7 @@ Supported configurations: ```sh call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Professional\VC\Auxiliary\Build\vcvars64.bat" cmake -G Ninja -Wno-dev -DCMAKE_BUILD_TYPE=Release .. -ninja . +cmake --build . --parallel ``` ## See also From e4a83b9b779e573b62503944ac0f91234e977667 Mon Sep 17 00:00:00 2001 From: Andrey Kashchikhin Date: Thu, 19 Oct 2023 15:05:55 +0100 Subject: [PATCH 22/39] [CI] [GHA] Extend Win CC pipeline with tests, make it static (#20579) * start with transferring * add CPU fun tests stage; complete CC stage * start tests in comd * uncomment * fix step * rm restore key * align cmake * add tbb path setting, continue on error * add logs; add missing dir; add extension for ov cpu test bin * add missing slash for path; explicitely set BUILD_TYPE * correct paths for layer tests summary files; add tbb as target * correct path to tbb * rm triggers; add nightly trigger; rm debug conditions * rm comment on cache --- .../linux_conditional_compilation.yml | 2 +- .../windows_conditional_compilation.yml | 280 +++++++++++++----- 2 files changed, 214 insertions(+), 68 deletions(-) diff --git a/.github/workflows/linux_conditional_compilation.yml b/.github/workflows/linux_conditional_compilation.yml index c8fb34cca85244..cb4470ef496606 100644 --- a/.github/workflows/linux_conditional_compilation.yml +++ b/.github/workflows/linux_conditional_compilation.yml @@ -308,7 +308,7 @@ jobs: name: openvino_tests path: ${{ env.INSTALL_TEST_DIR }} - - name: Extract OpenVINO packages + - name: Extract OpenVINO tests package run: tar -xvzf ${INSTALL_TEST_DIR}/openvino_tests.tar.gz -C ${INSTALL_TEST_DIR} - name: Install OpenVINO dependencies diff --git a/.github/workflows/windows_conditional_compilation.yml b/.github/workflows/windows_conditional_compilation.yml index 93f947ee071df1..04583255fdb058 100644 --- a/.github/workflows/windows_conditional_compilation.yml +++ b/.github/workflows/windows_conditional_compilation.yml @@ -1,4 +1,4 @@ -name: Tests on Windows Conditional Compilation (VS 2022, Python 3.11) +name: Windows Conditional Compilation (VS 2022, Python 3.11) on: workflow_dispatch: schedule: @@ -20,40 +20,34 @@ on: # - '**.md' # - '**/layer_tests_summary/**' # - '**/conformance/**' -# branches: -# - master + branches: + - master concurrency: - group: ${{ github.head_ref || github.run_id }}-windows-cc + # github.ref is not unique in post-commit + group: ${{ github.event_name == 'push' && github.run_id || github.ref }}-windows-cc cancel-in-progress: true env: - CMAKE_BUILD_TYPE: 'Release' - CMAKE_GENERATOR: 'Ninja' - CMAKE_CXX_COMPILER_LAUNCHER: sccache - CMAKE_C_COMPILER_LAUNCHER: sccache - OPENVINO_REPO: "${{ github.workspace }}\\openvino" - OPENVINO_CONTRIB_REPO: "${{ github.workspace }}\\openvino_contrib" - INSTALL_DIR: "${{ github.workspace }}\\install_pkg" - INSTALL_TEST_DIR: "${{ github.workspace }}\\install\\tests" - SAMPLES_INSTALL_DIR: "${{ github.workspace }}\\install\\samples" - LAYER_TESTS_INSTALL_DIR: "${{ github.workspace }}\\install\\tests\\layer_tests" - BUILD_DIR: "${{ github.workspace }}\\build" - BUILD_DIR_2: "${{ github.workspace }}\\build_s" - MODELS_PATH: "${{ github.workspace }}\\testdata" - OV_TEMP: "${{ github.workspace }}\\openvino_temp" - BUILD_TYPE: "Release" - PYTHON_STATIC_ARGS: -m "not dynamic_library and not template_plugin" - VCVARSPATH: "C:\\Program Files\\Microsoft Visual Studio\\2022\\Enterprise\\VC\\Auxiliary\\Build\\vcvarsall.bat" + PYTHON_VERSION: '3.11' jobs: Build: - # TODO: remove. Temporary measure to prevent the workflow from scheduling on forks. - if: ${{ github.repository_owner == 'openvinotoolkit' }} defaults: run: shell: pwsh runs-on: windows-latest-8-cores + env: + CMAKE_BUILD_TYPE: 'Release' + CMAKE_GENERATOR: 'Ninja Multi-Config' + CMAKE_CXX_COMPILER_LAUNCHER: sccache + CMAKE_C_COMPILER_LAUNCHER: sccache + OPENVINO_REPO: "${{ github.workspace }}\\openvino" + INSTALL_DIR: "${{ github.workspace }}\\openvino_install" + INSTALL_TEST_DIR: "${{ github.workspace }}\\tests_install" + BUILD_DIR: "${{ github.workspace }}\\openvino_build" + MODELS_PATH: "${{ github.workspace }}\\testdata" + SELECTIVE_BUILD_STAT_DIR: "${{ github.workspace }}\\selective_build_stat" steps: - name: Clone OpenVINO uses: actions/checkout@v4 @@ -75,21 +69,17 @@ jobs: - uses: actions/setup-python@v4 with: - python-version: '3.11' + python-version: ${{ env.PYTHON_VERSION }} - name: Install build dependencies - run: | - choco install --no-progress ninja + run: choco install --no-progress ninja # # Build # - - name: Get number of CPU cores - uses: SimenB/github-actions-cpu-cores@v2 - id: cpu-cores - - - uses: ilammy/msvc-dev-cmd@v1 + - name: Configure Developer Command Prompt for Microsoft Visual C++ + uses: ilammy/msvc-dev-cmd@v1 - name: Setup sccache uses: hendrikmuhs/ccache-action@v1.2 @@ -99,71 +89,227 @@ jobs: # Should save cache only if run in the master branch of the base repo # github.ref_name is 'ref/PR_#' in case of the PR, and 'branch_name' when executed on push save: ${{ github.ref_name == 'master' && 'true' || 'false' }} - key: ${{ github.job }}-windows-cc + key: ${{ github.job }}-${{ runner.os }}-itt restore-keys: | - ${{ github.job }}-windows-cc + ${{ github.job }}-${{ runner.os }}-itt - - name: CMake CC COLLECT + - name: CMake configure - CC COLLECT run: | - & "${{ env.VCVARSPATH }}" x64 && cmake -G Ninja ` + cmake -G "${{ env.CMAKE_GENERATOR }}" ` + -DBUILD_SHARED_LIBS=OFF ` + -DENABLE_TESTS=ON ` -DENABLE_CPPLINT=OFF ` - -DENABLE_GAPI_PREPROCESSING=OFF ` - -DENABLE_PLUGINS_XML=ON ` - -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF ` - -DCMAKE_BUILD_TYPE=${{ env.BUILD_TYPE }} ` + -DENABLE_NCC_STYLE=OFF ` + -DENABLE_INTEL_GNA=OFF ` + -DCMAKE_COMPILE_WARNING_AS_ERROR=ON ` -DENABLE_PROFILING_ITT=ON ` -DSELECTIVE_BUILD=COLLECT ` + -DCMAKE_DISABLE_FIND_PACKAGE_PkgConfig=ON ` -S ${{ env.OPENVINO_REPO }} ` -B ${{ env.BUILD_DIR }} - - name: Build CC COLLECT + - name: Cmake build - CC COLLECT run: | - & "${{ env.VCVARSPATH }}" x64 && cmake --build ${{ env.BUILD_DIR }} --parallel ${{ steps.cpu-cores.outputs.count }} --config ${{ env.BUILD_TYPE }} ` - --target openvino_intel_cpu_plugin openvino_ir_frontend benchmark_app sea_itt_lib + cmake --build ${{ env.BUILD_DIR }} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} + cmake --build ${{ env.BUILD_DIR }} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} --target sea_itt_lib - - name: List bin files + - name: Cmake install - OpenVINO + run: cmake -DCMAKE_INSTALL_PREFIX=${{ env.INSTALL_DIR }} -P ${{ env.BUILD_DIR }}/cmake_install.cmake + + - name: Build C++ samples - OpenVINO build tree + run: | + cmake -G "${{ env.CMAKE_GENERATOR }}" -DOpenVINO_DIR=${{ env.BUILD_DIR }} -S ${{ env.INSTALL_DIR }}/samples/cpp -B ${{ env.BUILD_DIR }}/cpp_samples + cmake --build ${{ env.BUILD_DIR }}/cpp_samples --parallel --config ${{ env.CMAKE_BUILD_TYPE }} --target hello_query_device + + - name: Build C samples - OpenVINO install tree + run: | + & ${{ env.INSTALL_DIR }}/samples/c/build_samples_msvc.bat -i ${{ env.INSTALL_DIR }} -b ${{ env.BUILD_DIR }}/c_samples + + - name: Ctest - OpenVINO unit tests shell: cmd - run: dir ${{ env.OPENVINO_REPO }}\bin\ /s + run: | + set path=%path%;${{ env.OPENVINO_REPO }}\temp\tbb\bin + ctest -C ${{ env.CMAKE_BUILD_TYPE }} --test-dir ${{ env.BUILD_DIR }} -V -L UNIT - - name: Code usage analysis + - name: Perform code tracing via ITT collector shell: cmd - working-directory: ${{ env.OPENVINO_REPO }} run: | set path=%path%;${{ env.OPENVINO_REPO }}\temp\tbb\bin - call "${{ env.VCVARSPATH }}" && python thirdparty\itt_collector\runtool\sea_runtool.py --bindir ${{ env.OPENVINO_REPO }}\bin\intel64\${{ env.BUILD_TYPE }} -o ${{ env.BUILD_DIR }}\itt_stat ! ${{ env.OPENVINO_REPO }}\bin\intel64\${{ env.BUILD_TYPE }}\benchmark_app.exe -niter 1 -nireq 1 -m ${{ env.MODELS_PATH }}\models\test_model\test_model_fp32.xml -d CPU + + python3 ${{ env.OPENVINO_REPO }}\thirdparty\itt_collector\runtool\sea_runtool.py ^ + --bindir ${{ env.OPENVINO_REPO }}\bin\intel64\${{ env.CMAKE_BUILD_TYPE }} ^ + -o ${{ env.SELECTIVE_BUILD_STAT_DIR }}\itt_stat ! ${{ env.OPENVINO_REPO }}\bin\intel64\${{ env.CMAKE_BUILD_TYPE }}\benchmark_app.exe ^ + -niter 1 ^ + -nireq 1 ^ + -m ${{ env.MODELS_PATH }}\models\test_model\test_model_fp32.xml ^ + -d CPU + + - name: List bin files + shell: cmd + run: dir ${{ env.OPENVINO_REPO }}\bin\ /s - - name: List csv files + - name: List install files shell: cmd - run: dir ${{ env.BUILD_DIR }}\*.csv /s /p + run: dir ${{ env.INSTALL_DIR }} /s + + - name: Pack Artifacts + run: | + $file=Get-ChildItem -Path "${{ env.SELECTIVE_BUILD_STAT_DIR }}" + $compress = @{ + Path = $file + CompressionLevel = "Optimal" + DestinationPath = "${{ env.BUILD_DIR }}/openvino_selective_build_stat.zip" + } + Compress-Archive @compress + + $compress = @{ + Path = "${{ env.OPENVINO_REPO }}/bin/intel64/${{ env.CMAKE_BUILD_TYPE }}/ov_cpu_func_tests.exe", "${{ env.OPENVINO_REPO }}/src/tests/test_utils/functional_test_utils/layer_tests_summary", "${{ env.INSTALL_DIR }}/runtime/3rdparty/tbb" + CompressionLevel = "Optimal" + DestinationPath = "${{ env.BUILD_DIR }}/openvino_tests.zip" + } + Compress-Archive @compress + + - name: Upload selective build statistics package + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: openvino_selective_build_stat + path: ${{ env.BUILD_DIR }}/openvino_selective_build_stat.zip + if-no-files-found: 'error' + + - name: Upload OpenVINO tests package + if: ${{ always() }} + uses: actions/upload-artifact@v3 + with: + name: openvino_tests + path: ${{ env.BUILD_DIR }}/openvino_tests.zip + if-no-files-found: 'error' + + CC_Build: + name: Conditional Compilation + needs: Build + defaults: + run: + shell: pwsh + runs-on: windows-latest-8-cores + env: + CMAKE_BUILD_TYPE: 'Release' + CMAKE_CXX_COMPILER_LAUNCHER: sccache + CMAKE_C_COMPILER_LAUNCHER: sccache + OPENVINO_REPO: "${{ github.workspace }}\\openvino" + BUILD_DIR: "${{ github.workspace }}\\openvino_build" + MODELS_PATH: "${{ github.workspace }}\\testdata" + SELECTIVE_BUILD_STAT_DIR: "${{ github.workspace }}\\selective_build_stat" + steps: + - name: Clone OpenVINO + uses: actions/checkout@v4 + with: + path: 'openvino' + submodules: 'true' - - name: CMake CC ON + - name: Clone test models + uses: actions/checkout@v4 + with: + repository: 'openvinotoolkit/testdata' + path: 'testdata' + lfs: 'true' + ref: 'master' + + - name: Download selective build statistics package + uses: actions/download-artifact@v3 + with: + name: openvino_selective_build_stat + path: ${{ env.SELECTIVE_BUILD_STAT_DIR }} + + - name: Extract selective build statistics package + run: Expand-Archive ${{ env.SELECTIVE_BUILD_STAT_DIR }}/openvino_selective_build_stat.zip -DestinationPath "${{ env.SELECTIVE_BUILD_STAT_DIR }}" + + - name: CMake configure - CC ON run: | - & "${{ env.VCVARSPATH }}" x64 && cmake -G "Visual Studio 17 2022" ` - -DCMAKE_VERBOSE_MAKEFILE=ON ` + cmake ` + -DBUILD_SHARED_LIBS=OFF ` -DENABLE_CPPLINT=OFF ` - -DENABLE_GAPI_PREPROCESSING=OFF ` - -DENABLE_PROFILING_ITT=OFF ` -DSELECTIVE_BUILD=ON ` - -DCMAKE_COMPILE_WARNING_AS_ERROR=OFF ` - -DSELECTIVE_BUILD_STAT=${{ env.BUILD_DIR }}\*.csv ` + -DENABLE_TEMPLATE=OFF ` + -DENABLE_INTEL_GPU=OFF ` + -DENABLE_INTEL_GNA=OFF ` + -DENABLE_OV_TF_FRONTEND=OFF ` + -DENABLE_OV_TF_LITE_FRONTEND=OFF ` + -DENABLE_OV_PADDLE_FRONTEND=OFF ` + -DENABLE_OV_PYTORCH_FRONTEND=OFF ` + -DENABLE_OV_ONNX_FRONTEND=OFF ` + -DSELECTIVE_BUILD_STAT=${{ env.SELECTIVE_BUILD_STAT_DIR }}\*.csv ` -S ${{ env.OPENVINO_REPO }} ` - -B ${{ env.BUILD_DIR_2 }} + -B ${{ env.BUILD_DIR }} - - name: Build CC ON - run: | - & "${{ env.VCVARSPATH }}" x64 && cmake --build ${{ env.BUILD_DIR_2 }} --parallel ${{ steps.cpu-cores.outputs.count }} --config ${{ env.BUILD_TYPE }} ` - --target openvino_intel_cpu_plugin openvino_ir_frontend benchmark_app + - name: Cmake build - CC ON + run: cmake --build ${{ env.BUILD_DIR }} --parallel --config ${{ env.CMAKE_BUILD_TYPE }} --target benchmark_app - - name: List bin files ON + - name: List bin files shell: cmd run: dir ${{ env.OPENVINO_REPO }}\bin\ /s - - name: Check conditional_compilation_gen.h header + - name: Run with CC-ed runtime shell: cmd - run: type ${{ env.BUILD_DIR_2 }}\src\common\conditional_compilation\conditional_compilation_gen.h + run: | + set path=%path%;${{ env.OPENVINO_REPO }}\temp\tbb\bin + ${{ env.OPENVINO_REPO }}\bin\intel64\${{ env.CMAKE_BUILD_TYPE }}\benchmark_app.exe -niter 1 -nireq 1 -m ${{ env.MODELS_PATH }}\models\test_model\test_model_fp32.xml -d CPU - - name: Use OpenVINO after CC + CPU_Functional_Tests: + name: CPU functional tests + needs: Build + defaults: + run: + shell: pwsh + runs-on: windows-latest-8-cores + env: + INSTALL_TEST_DIR: "${{ github.workspace }}\\tests_install" + PARALLEL_TEST_SCRIPT: "${{ github.workspace }}\\tests_install\\layer_tests_summary\\run_parallel.py" + PARALLEL_TEST_CACHE: "${{ github.workspace }}\\tests_install\\test_cache.lst" + + steps: + - name: Download OpenVINO tests package + uses: actions/download-artifact@v3 + with: + name: openvino_tests + path: ${{ env.INSTALL_TEST_DIR }} + + - name: Extract OpenVINO tests package + run: Expand-Archive ${{ env.INSTALL_TEST_DIR }}/openvino_tests.zip -DestinationPath "${{ env.INSTALL_TEST_DIR }}" + + - uses: actions/setup-python@v4 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Install python dependencies for run_parallel.py + run: python3 -m pip install -r ${{ env.INSTALL_TEST_DIR }}/layer_tests_summary/requirements.txt + + # Windows pipeline is in nightly mode, uncomment once there is a consistent cache creation + # - name: Restore tests execution time + # uses: actions/cache/restore@v3 + # with: + # path: ${{ env.PARALLEL_TEST_CACHE }} + # key: ${{ runner.os }}-tests-functional-cpu-stamp-${{ github.sha }} + # restore-keys: | + # ${{ runner.os }}-tests-functional-cpu-stamp + + - name: Intel CPU plugin func tests (parallel) shell: cmd run: | - set path=%path%;${{ env.OPENVINO_REPO }}\temp\tbb\bin - ${{ env.OPENVINO_REPO }}\bin\intel64\${{ env.BUILD_TYPE }}\benchmark_app.exe -niter 1 -nireq 1 -m ${{ env.MODELS_PATH }}\models\test_model\test_model_fp32.xml -d CPU + set path=%path%;${{ env.INSTALL_TEST_DIR }}\tbb\bin;${{ env.INSTALL_TEST_DIR }}\tbb + python3 ${{ env.PARALLEL_TEST_SCRIPT }} -e ${{ env.INSTALL_TEST_DIR }}\ov_cpu_func_tests.exe -w ${{ env.INSTALL_TEST_DIR }} -s suite -rf 0 -- --gtest_print_time=1 --gtest_filter=*smoke* + timeout-minutes: 45 + + - name: Upload Test Results + uses: actions/upload-artifact@v3 + if: ${{ !cancelled() }} + with: + name: test-results-functional-cpu + path: | + ${{ env.INSTALL_TEST_DIR }}/TEST*.xml + ${{ env.INSTALL_TEST_DIR }}/logs/failed/*.log + ${{ env.INSTALL_TEST_DIR }}/logs/crashed/*.log + ${{ env.INSTALL_TEST_DIR }}/logs/hanged/*.log + ${{ env.INSTALL_TEST_DIR }}/logs/interapted/*.log + ${{ env.INSTALL_TEST_DIR }}/logs/disabled_tests.log + if-no-files-found: 'error' From f6aa2ab7afa70b862705d5393f0d1499bfa2c35c Mon Sep 17 00:00:00 2001 From: Andrey Kashchikhin Date: Thu, 19 Oct 2023 16:17:16 +0100 Subject: [PATCH 23/39] fix yml (#20614) --- .github/workflows/windows_conditional_compilation.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/windows_conditional_compilation.yml b/.github/workflows/windows_conditional_compilation.yml index 04583255fdb058..f0a9741aee9537 100644 --- a/.github/workflows/windows_conditional_compilation.yml +++ b/.github/workflows/windows_conditional_compilation.yml @@ -20,8 +20,8 @@ on: # - '**.md' # - '**/layer_tests_summary/**' # - '**/conformance/**' - branches: - - master +# branches: +# - master concurrency: # github.ref is not unique in post-commit From 5018be82c38f7866b69c19ca4954df8006345fda Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Thu, 19 Oct 2023 08:35:10 -0700 Subject: [PATCH 24/39] TorchFX: Constant value pass without copy (#20380) * TorchFX: Constant value pass optimization * Replace op.Constant with make_constant in fx_decoder * Using shared memory for constant value passing Co-authored-by: Jan Iwaszkiewicz --------- Co-authored-by: Jan Iwaszkiewicz --- .../python/src/openvino/frontend/pytorch/fx_decoder.py | 9 ++------- .../python/src/openvino/frontend/pytorch/utils.py | 8 -------- 2 files changed, 2 insertions(+), 15 deletions(-) diff --git a/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py index a79892b3e4d6f5..479e1a5cb1c622 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py @@ -7,10 +7,9 @@ from openvino.frontend.pytorch.py_pytorch_frontend import _FrontEndPytorchDecoder as Decoder from openvino.frontend.pytorch.py_pytorch_frontend import _Type as DecoderType from openvino.runtime import op, PartialShape, Type as OVType, OVAny, Shape -from openvino.frontend.pytorch.utils import maybe_convert_max_int, make_constant, fetch_attr, pt_to_ov_type_map, ov_to_c_type_map +from openvino.frontend.pytorch.utils import maybe_convert_max_int, make_constant, fetch_attr, pt_to_ov_type_map import torch -import ctypes class TorchFXPythonDecoder (Decoder): @@ -224,11 +223,7 @@ def as_constant(self): if self.pt_module.op == 'get_attr': # Extract Constant from FX module field ret = fetch_attr(self.fx_gm, self.pt_module.target) - ovshape = PartialShape(ret.size()) - ovtype = pt_to_ov_type_map[str(ret.type())] - c_type = ctypes.POINTER(ov_to_c_type_map[ovtype]) - data_c_ptr = ctypes.cast(ret.data_ptr(), c_type) - ov_const = op.Constant(ovtype, ovshape.get_shape(), data_c_ptr[:ret.nelement()]) + ov_const = op.Constant(ret.numpy(), shared_memory=True) return ov_const.outputs() diff --git a/src/bindings/python/src/openvino/frontend/pytorch/utils.py b/src/bindings/python/src/openvino/frontend/pytorch/utils.py index 97d237fb0efda1..a3ac46e701119b 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/utils.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/utils.py @@ -7,7 +7,6 @@ import torch import numpy as np -import ctypes from openvino.runtime import op, Type as OVType, Shape, Tensor from openvino.runtime import opset11 as ops @@ -132,13 +131,6 @@ def get_value_from_getattr(getattr_node, self_module): "torch.qint32": OVType.i32 } -ov_to_c_type_map = { - OVType.f32: ctypes.c_float, - OVType.f64: ctypes.c_double, - OVType.i32: ctypes.c_int, - OVType.i64: ctypes.c_int64, -} - wrapper_template = """ import torch From 3d5fe8d44694b6bd0a5b40d1ff051dc139cc60e7 Mon Sep 17 00:00:00 2001 From: Mustafa Cavus Date: Thu, 19 Oct 2023 10:21:28 -0700 Subject: [PATCH 25/39] Llm and sd additional ops (#20435) * TorchFX: New ops added (baddbbmm, leaky_relu_) * TorchFX: Initial scaled_dot_product_flash_attention * Code Formatting: scaled_fot_product_attention translation * TorchFX unit test enabled for SDPA * Typo fix in comment line Co-authored-by: Maxim Vafin --------- Co-authored-by: Maxim Vafin --- .../pytorch/torchdynamo/op_support.py | 3 ++ .../src/op/scaled_dot_product_attention.cpp | 36 +++++++++++++++---- src/frontends/pytorch/src/op_table.cpp | 4 +++ .../test_scaled_dot_product_attention.py | 1 + 4 files changed, 37 insertions(+), 7 deletions(-) diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/op_support.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/op_support.py index 726f3b598bc15e..4a76d90b160553 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/op_support.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/op_support.py @@ -41,6 +41,7 @@ def __init__(self): "torch.ops.aten.arange.default": None, "torch.ops.aten.argmax.default": None, "torch.ops.aten.avg_pool2d.default": None, + "torch.ops.aten.baddbmm.default": None, "torch.ops.aten.bitwise_and.Tensor": None, "torch.ops.aten.bmm.default": None, "torch.ops.aten.cat.default": None, @@ -67,6 +68,7 @@ def __init__(self): "torch.ops.aten.hardswish_.default": None, "torch.ops.aten.hardtanh_.default": None, "torch.ops.aten.index.Tensor": None, + "torch.ops.aten.leaky_relu_.default": None, "torch.ops.aten.lift_fresh_copy.default": None, "torch.ops.aten.linalg_vector_norm.default": None, "torch.ops.aten.lt.Tensor": None, @@ -89,6 +91,7 @@ def __init__(self): "torch.ops.aten.relu.default": None, "torch.ops.aten.relu_.default": None, "torch.ops.aten.rsub.Scalar": None, + "torch.ops.aten._scaled_dot_product_flash_attention.default": None, "torch.ops.aten.select.int": None, "torch.ops.aten.sigmoid.default": None, "torch.ops.aten.silu.default": None, diff --git a/src/frontends/pytorch/src/op/scaled_dot_product_attention.cpp b/src/frontends/pytorch/src/op/scaled_dot_product_attention.cpp index 735324405d1f11..82231472e401be 100644 --- a/src/frontends/pytorch/src/op/scaled_dot_product_attention.cpp +++ b/src/frontends/pytorch/src/op/scaled_dot_product_attention.cpp @@ -15,6 +15,7 @@ #include "openvino/op/matmul.hpp" #include "openvino/op/multiply.hpp" #include "openvino/op/range.hpp" +#include "openvino/op/reshape.hpp" #include "openvino/op/select.hpp" #include "openvino/op/shape_of.hpp" #include "openvino/op/softmax.hpp" @@ -22,6 +23,7 @@ #include "openvino/op/squeeze.hpp" #include "openvino/op/transpose.hpp" #include "openvino/op/unsqueeze.hpp" +#include "openvino/op/util/framework_node.hpp" #include "utils.hpp" namespace ov { @@ -31,10 +33,7 @@ namespace op { using namespace ov::op; -OutputVector translate_scaled_dot_product_attention(const NodeContext& context) { - // aten::scaled_dot_product_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float - // dropout_p=0., bool is_causal=False) - num_inputs_check(context, 6, 6); +std::shared_ptr translate_scaled_dot_product_attention_common(const NodeContext& context) { auto query = context.get_input(0); auto key = context.get_input(1); auto value = context.get_input(2); @@ -68,7 +67,10 @@ OutputVector translate_scaled_dot_product_attention(const NodeContext& context) minus_inf = context.mark_node(std::make_shared(minus_inf, scaled_atten)); // two types of masks are supported. A boolean mask where a value of True indicates that the element should take // part in attention. A float mask of the same type as query, key, value that is added to the attention score. - auto is_causal = context.const_input(5); + auto is_causal = false; + if (!context.input_is_none(5)) { + is_causal = context.const_input(5); + } if (is_causal || !context.input_is_none(3)) { Output mask; Output atten_mask; @@ -100,10 +102,30 @@ OutputVector translate_scaled_dot_product_attention(const NodeContext& context) scaled_atten = context.mark_node(std::make_shared(scaled_atten, atten_mask)); } scaled_atten = context.mark_node(std::make_shared(scaled_atten, -1)); - return {context.mark_node(std::make_shared(scaled_atten, value))}; + return context.mark_node(std::make_shared(scaled_atten, value)); +}; + +OutputVector translate_scaled_dot_product_attention(const NodeContext& context) { + // aten::scaled_dot_product_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float + // dropout_p=0., bool is_causal=False) + num_inputs_check(context, 6, 6); + return {translate_scaled_dot_product_attention_common(context)}; +}; + +OutputVector translate_scaled_dot_product_attention_fx(const NodeContext& context) { + // aten::scaled_dot_product_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_mask=None, float + // dropout_p=0., bool is_causal=False) + num_inputs_check(context, 3, 6); + auto output = translate_scaled_dot_product_attention_common(context); + // TODO: scaled_dot_product_flash_attention has 9 outputs but for most cases only + // the first input is used. Rest of the outputs should be returned properly as + // needed. + ov::OutputVector out_vec; + out_vec.push_back(output); + return {context.mark_node(make_list_construct(out_vec))}; }; } // namespace op } // namespace pytorch } // namespace frontend -} // namespace ov \ No newline at end of file +} // namespace ov diff --git a/src/frontends/pytorch/src/op_table.cpp b/src/frontends/pytorch/src/op_table.cpp index 75665ffe8d4d14..5614a3881c3573 100644 --- a/src/frontends/pytorch/src/op_table.cpp +++ b/src/frontends/pytorch/src/op_table.cpp @@ -213,6 +213,7 @@ OP_CONVERTER(translate_group_norm_fx); OP_CONVERTER(translate_index_fx); OP_CONVERTER(translate_layer_norm_fx); OP_CONVERTER(translate_max_poolnd_fx); +OP_CONVERTER(translate_scaled_dot_product_attention_fx); OP_CONVERTER(translate_slice_fx); OP_CONVERTER(translate_softmax_fx); OP_CONVERTER(translate_transpose_fx); @@ -555,6 +556,7 @@ const std::map get_supported_ops_fx() { {"aten.arange.default", op::translate_arange_fx}, {"aten.argmax.default", op::translate_argmax}, {"aten.avg_pool2d.default", op::translate_avg_poolnd}, + {"aten.baddbmm.default", op::translate_addmm}, {"aten.bitwise_and.Tensor", op::translate_bitwise_and}, {"aten.bmm.default", op::translate_1to1_match_2_inputs_align_types}, {"aten.cat.default", op::translate_cat_fx}, @@ -581,6 +583,7 @@ const std::map get_supported_ops_fx() { {"aten.hardswish_.default", op::inplace_op>}, {"aten.hardtanh_.default", op::inplace_op}, {"aten.index.Tensor", op::translate_index_fx}, + {"aten.leaky_relu_.default", op::inplace_op>}, {"aten.lift_fresh_copy.default", op::skip_node}, {"aten.linalg_vector_norm.default", op::translate_linalg_vector_norm}, {"aten.log.default", op::translate_log}, @@ -603,6 +606,7 @@ const std::map get_supported_ops_fx() { {"aten.relu.default", op::translate_1to1_match_1_inputs}, {"aten.relu_.default", op::inplace_op>}, {"aten.rsub.Scalar", op::translate_rsub}, + {"aten._scaled_dot_product_flash_attention.default", op::translate_scaled_dot_product_attention_fx}, {"aten.select.int", op::translate_select}, {"aten.sigmoid.default", op::translate_1to1_match_1_inputs}, {"aten.silu.default", op::translate_1to1_match_1_inputs}, diff --git a/tests/layer_tests/pytorch_tests/test_scaled_dot_product_attention.py b/tests/layer_tests/pytorch_tests/test_scaled_dot_product_attention.py index 22ed325471823b..69c600a0b7562d 100644 --- a/tests/layer_tests/pytorch_tests/test_scaled_dot_product_attention.py +++ b/tests/layer_tests/pytorch_tests/test_scaled_dot_product_attention.py @@ -36,6 +36,7 @@ def forward(self, query, key, value): @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_fx_backend @pytest.mark.parametrize(['mask', "is_causal"], [(False, False), (False, True), (True, True), (True, False)]) def test_scaled_dot_product_atten(self, ie_device, precision, ir_version, mask, is_causal): self._test(*self.create_model(mask, is_causal),ie_device, precision, ir_version) From 070678fc192e260ce213055924706de39f22663a Mon Sep 17 00:00:00 2001 From: Siddhant Chauhan Date: Fri, 20 Oct 2023 00:10:38 +0530 Subject: [PATCH 26/39] [TF FE][TF Hub] Support TruncateMod operation (#20468) * [TF FE][TF Hub] Support TruncateMod operation * Update truncate_mod.cpp * fix --- src/frontends/tensorflow/src/op_table.cpp | 1 + .../include/common_op_table.hpp | 1 + .../tensorflow_common/src/op/truncate_mod.cpp | 48 ++++++++++++++++++ .../tensorflow_tests/test_tf_TruncateMod.py | 49 +++++++++++++++++++ 4 files changed, 99 insertions(+) create mode 100644 src/frontends/tensorflow_common/src/op/truncate_mod.cpp create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_TruncateMod.py diff --git a/src/frontends/tensorflow/src/op_table.cpp b/src/frontends/tensorflow/src/op_table.cpp index bc1a657faf54fb..75512bce97be8e 100644 --- a/src/frontends/tensorflow/src/op_table.cpp +++ b/src/frontends/tensorflow/src/op_table.cpp @@ -281,6 +281,7 @@ const std::map get_supported_ops() { {"TopK", CreatorFunction(translate_top_k_op)}, {"TopKV2", CreatorFunction(translate_top_k_v2_op)}, {"Transpose", CreatorFunction(translate_transpose_op)}, + {"TruncateMod", CreatorFunction(translate_truncate_mod_op)}, {"Unpack", CreatorFunction(translate_unpack_op)}, {"UnravelIndex", CreatorFunction(translate_unravel_index_op)}, {"UnsortedSegmentSum", CreatorFunction(translate_unsorted_segment_sum_op)}, diff --git a/src/frontends/tensorflow_common/include/common_op_table.hpp b/src/frontends/tensorflow_common/include/common_op_table.hpp index 54f1dff243efd1..1004cb6e0c2483 100644 --- a/src/frontends/tensorflow_common/include/common_op_table.hpp +++ b/src/frontends/tensorflow_common/include/common_op_table.hpp @@ -145,6 +145,7 @@ OP_CONVERTER(translate_tile_op); OP_CONVERTER_NAMED(translate_top_k_op); OP_CONVERTER_NAMED(translate_top_k_v2_op); OP_CONVERTER(translate_transpose_op); +OP_CONVERTER(translate_truncate_mod_op); OP_CONVERTER(translate_unpack_op); OP_CONVERTER(translate_unravel_index_op); OP_CONVERTER(translate_unsorted_segment_sum_op); diff --git a/src/frontends/tensorflow_common/src/op/truncate_mod.cpp b/src/frontends/tensorflow_common/src/op/truncate_mod.cpp new file mode 100644 index 00000000000000..c4422ee983345d --- /dev/null +++ b/src/frontends/tensorflow_common/src/op/truncate_mod.cpp @@ -0,0 +1,48 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_op_table.hpp" +#include "openvino/op/equal.hpp" +#include "openvino/op/floor_mod.hpp" +#include "openvino/op/less.hpp" +#include "openvino/op/negative.hpp" +#include "openvino/op/select.hpp" +#include "openvino/op/subtract.hpp" + +using namespace std; +using namespace ov::opset10; + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace op { +OutputVector translate_truncate_mod_op(const NodeContext& node) { + default_op_checks(node, 2, {"TruncateMod"}); + auto x = node.get_input(0); + auto y = node.get_input(1); + + auto is_x_negative = make_shared(x, create_same_type_const_scalar(x, 0)); + auto is_y_negative = make_shared(y, create_same_type_const_scalar(y, 0)); + + // if (y < 0) {y = -y} + auto negative_y = make_shared(y); + y = make_shared(is_zero, floor_mod, make_shared(is_res_negative, make_shared(res), make_shared(res)); + + set_node_name(node.get_name(), final_res); + return final_res->outputs(); +} +} // namespace op +} // namespace tensorflow +} // namespace frontend +} // namespace ov diff --git a/tests/layer_tests/tensorflow_tests/test_tf_TruncateDiv.py b/tests/layer_tests/tensorflow_tests/test_tf_TruncateDiv.py new file mode 100644 index 00000000000000..508cde035b83ad --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_TruncateDiv.py @@ -0,0 +1,49 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import tensorflow as tf +from common.tf_layer_test_class import CommonTFLayerTest + + +class TestTruncateDiv(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + assert 'x' in inputs_info + assert 'y' in inputs_info + x_shape = inputs_info['x'] + y_shape = inputs_info['y'] + inputs_data = {} + # generate x and y to ensure truncation + inputs_data['x'] = np.random.randint(-10, 10, x_shape).astype(self.input_type) + inputs_data['y'] = np.random.randint(1, 10, y_shape).astype(self.input_type) + return inputs_data + + def create_truncate_div_net(self, input_shape, input_type): + self.input_type = input_type + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(input_type, input_shape, 'x') + y = tf.compat.v1.placeholder(input_type, input_shape, 'y') + tf.raw_ops.TruncateDiv(x=x, y=y) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_data_basic = [ + dict(input_shape=[10, 20], input_type=np.float32), + dict(input_shape=[8, 5], input_type=np.float32), + dict(input_shape=[5, 3], input_type=np.int32), + dict(input_shape=[6, 4], input_type=np.int32), + ] + + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + def test_truncate_div_basic(self, params, ie_device, precision, ir_version, temp_dir, + use_new_frontend, use_old_api): + self._test(*self.create_truncate_div_net(**params), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_new_frontend=use_new_frontend, use_old_api=use_old_api) From 361b250fc433e9c1d1488b0201560e6031845be2 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Fri, 20 Oct 2023 10:44:42 +0400 Subject: [PATCH 29/39] WA issues with dynamic protobuf usage in Fes (#20612) --- src/frontends/common/src/manager.cpp | 20 +------------ src/frontends/common/src/plugin_loader.cpp | 21 +++++++++++++- src/frontends/common/src/plugin_loader.hpp | 10 ++++++- .../frontend/shared/src/library_extension.cpp | 28 +++++++++++++++++++ thirdparty/dependencies.cmake | 8 +++--- 5 files changed, 62 insertions(+), 25 deletions(-) diff --git a/src/frontends/common/src/manager.cpp b/src/frontends/common/src/manager.cpp index 35df484c2cab26..6194fca7583937 100644 --- a/src/frontends/common/src/manager.cpp +++ b/src/frontends/common/src/manager.cpp @@ -20,21 +20,6 @@ class FrontEndManager::Impl { std::mutex m_loading_mutex; std::vector m_plugins; - // Note, static methods below are required to create an order of initialization of static variables - // e.g. if users (not encouraged) created ov::Model globally, we need to ensure proper order of initialization - - /// \return map of shared object per frontend - static std::unordered_map>& get_shared_objects_map() { - static std::unordered_map> shared_objects_map; - return shared_objects_map; - } - - /// \return Mutex to guard access the shared object map - static std::mutex& get_shared_objects_mutex() { - static std::mutex shared_objects_map_mutex; - return shared_objects_map_mutex; - } - public: Impl() { search_all_plugins(); @@ -46,10 +31,6 @@ class FrontEndManager::Impl { auto fe_obj = std::make_shared(); fe_obj->m_shared_object = std::make_shared(plugin.get_so_pointer()); fe_obj->m_actual = plugin.get_creator().m_creator(); - - std::lock_guard guard(get_shared_objects_mutex()); - get_shared_objects_map().emplace(plugin.get_creator().m_name, fe_obj->m_shared_object); - return fe_obj; } @@ -164,6 +145,7 @@ class FrontEndManager::Impl { {".xml", {"ir", "ir"}}, {".onnx", {"onnx", "onnx"}}, {".pb", {"tf", "tensorflow"}}, + {".pbtxt", {"tf", "tensorflow"}}, {".tflite", {"tflite", "tensorflow_lite"}}, {".pdmodel", {"paddle", "paddle"}}, // {".ts", {"pytorch", "pytorch"}}, diff --git a/src/frontends/common/src/plugin_loader.cpp b/src/frontends/common/src/plugin_loader.cpp index a044152d8d590d..a98eff766bbc0d 100644 --- a/src/frontends/common/src/plugin_loader.cpp +++ b/src/frontends/common/src/plugin_loader.cpp @@ -16,17 +16,32 @@ #include -#include #include #include #include "openvino/util/file_util.hpp" +#include "openvino/util/log.hpp" #include "openvino/util/shared_object.hpp" #include "plugin_loader.hpp" using namespace ov; using namespace ov::frontend; +// Note, static methods below are required to create an order of initialization of static variables +// e.g. if users (not encouraged) created ov::Model globally, we need to ensure proper order of initialization + +/// \return map of shared object per frontend +std::unordered_map>& ov::frontend::get_shared_objects_map() { + static std::unordered_map> shared_objects_map; + return shared_objects_map; +} + +/// \return Mutex to guard access the shared object map +std::mutex& ov::frontend::get_shared_objects_mutex() { + static std::mutex shared_objects_map_mutex; + return shared_objects_map_mutex; +} + #ifdef OPENVINO_STATIC_LIBRARY # include "ov_frontends.hpp" @@ -131,6 +146,10 @@ bool PluginInfo::load() { m_load_failed = true; return false; } + + std::lock_guard guard(get_shared_objects_mutex()); + get_shared_objects_map().emplace(get_creator().m_name, get_so_pointer()); + return true; } diff --git a/src/frontends/common/src/plugin_loader.hpp b/src/frontends/common/src/plugin_loader.hpp index 93e6a5cc2eb5a3..dccf8ddf7a39f3 100644 --- a/src/frontends/common/src/plugin_loader.hpp +++ b/src/frontends/common/src/plugin_loader.hpp @@ -4,7 +4,12 @@ #pragma once -#include +#include +#include +#include +#include + +#include "openvino/frontend/manager.hpp" #ifdef _WIN32 static const char PathSeparator[] = ";"; @@ -15,6 +20,9 @@ static const char PathSeparator[] = ":"; namespace ov { namespace frontend { +std::unordered_map>& get_shared_objects_map(); +std::mutex& get_shared_objects_mutex(); + /// \brief Internal data structure holding by each frontend. Includes library handle and extensions. class FrontEndSharedData { friend inline void add_extension_to_shared_data(std::shared_ptr& obj, diff --git a/src/frontends/tests/frontend/shared/src/library_extension.cpp b/src/frontends/tests/frontend/shared/src/library_extension.cpp index a2257f8fca116b..8a6bb23d82f0ef 100644 --- a/src/frontends/tests/frontend/shared/src/library_extension.cpp +++ b/src/frontends/tests/frontend/shared/src/library_extension.cpp @@ -9,6 +9,7 @@ #include "common_test_utils/file_utils.hpp" #include "openvino/op/relu.hpp" #include "openvino/op/swish.hpp" +#include "openvino/runtime/core.hpp" #include "utils.hpp" using namespace ov::frontend; @@ -88,3 +89,30 @@ TEST_P(FrontendLibraryExtensionTest, verifyFunctions) { nodes.end()); } } + +TEST_P(FrontendLibraryExtensionTest, loadExtensionBeforeFrontend) { + // release all frontends internally + ov::shutdown(); + + const auto& lib_path = get_lib_path("test_builtin_extensions"); + + ov::Core core; + core.add_extension(lib_path); + + auto model = core.read_model(m_param.m_modelName); + ASSERT_NE(nullptr, model); + + const auto nodes = model->get_ops(); + ASSERT_EQ(std::find_if(nodes.begin(), + nodes.end(), + [](const std::shared_ptr& n) { + return ov::is_type(n); + }), + nodes.end()); + ASSERT_NE(std::find_if(nodes.begin(), + nodes.end(), + [](const std::shared_ptr& n) { + return ov::is_type(n); + }), + nodes.end()); +} diff --git a/thirdparty/dependencies.cmake b/thirdparty/dependencies.cmake index fac4752c318250..4eed13c9a79af6 100644 --- a/thirdparty/dependencies.cmake +++ b/thirdparty/dependencies.cmake @@ -414,14 +414,14 @@ if(ENABLE_OV_PADDLE_FRONTEND OR ENABLE_OV_ONNX_FRONTEND OR ENABLE_OV_TF_FRONTEND if(CMAKE_VERBOSE_MAKEFILE) set(Protobuf_DEBUG ON) endif() - if(OV_VCPKG_BUILD) - set(protobuf_config CONFIG) - endif() # try to find newer version first (major is changed) # see https://protobuf.dev/support/version-support/ and # https://github.com/protocolbuffers/protobuf/commit/d61f75ff6db36b4f9c0765f131f8edc2f86310fa - find_package(Protobuf 4.22.0 QUIET ${protobuf_config}) + find_package(Protobuf 4.22.0 QUIET CONFIG) if(NOT Protobuf_FOUND) + if(OV_VCPKG_BUILD) + set(protobuf_config CONFIG) + endif() # otherwise, fallback to existing default find_package(Protobuf 3.20.3 REQUIRED ${protobuf_config}) endif() From 2b8827abd06e2e1e6e25887a4fa734d322681631 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Fri, 20 Oct 2023 11:37:13 +0400 Subject: [PATCH 30/39] Updated urllib3 to resolve CVE (#20620) --- docs/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 69433a40eb64ff..2e643842f24861 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -44,6 +44,6 @@ sphinxcontrib-jsmath==1.0.1 sphinxcontrib-qthelp==1.0.3 sphinxcontrib-serializinghtml==1.1.5 toml==0.10.2 -urllib3==1.26.17 +urllib3==1.26.18 zipp==3.4.1 docs/openvino_custom_sphinx_sitemap From 9edbcb1d4d58961afedbf1c4cdc255423e9448c7 Mon Sep 17 00:00:00 2001 From: rsato10 <89757445+rsato10@users.noreply.github.com> Date: Fri, 20 Oct 2023 03:22:30 -0700 Subject: [PATCH 31/39] [TF FE] Support ToBool operation (#20511) * [TF FE][TF Hub] Support ToBool operations * [TF FE][TF Hub] Support ToBool operations * fixing select operation Support ToBool operations for TF Hub models * added false and true const for tobool operations * added reduction axes * Apply suggestions from code review * Update tests/layer_tests/tensorflow_tests/test_tf_ToBool.py * Update tests/layer_tests/tensorflow_tests/test_tf_ToBool.py * Update tests/layer_tests/tensorflow_tests/test_tf_ToBool.py * Update src/frontends/tensorflow_common/src/op/tobool.cpp * added second zero constant * added correct types src\frontends\tensorflow_common\src\op\tobool.cpp * added includes src\frontends\tensorflow_common\src\op\tobool.cpp * Update src/frontends/tensorflow_common/src/op/tobool.cpp * remove select and not_equal src/frontends/tensorflow_common/src/op/tobool.cpp * Apply suggestions from code review * Update src/frontends/tensorflow_common/src/op/tobool.cpp * Apply suggestions from code review * Update src/frontends/tensorflow_common/src/op/tobool.cpp --------- Co-authored-by: Roman Kazantsev --- src/frontends/tensorflow/src/op_table.cpp | 1 + .../include/common_op_table.hpp | 1 + .../tensorflow_common/src/op/tobool.cpp | 68 +++++++++++++++++++ .../tensorflow_tests/test_tf_ToBool.py | 43 ++++++++++++ 4 files changed, 113 insertions(+) create mode 100644 src/frontends/tensorflow_common/src/op/tobool.cpp create mode 100644 tests/layer_tests/tensorflow_tests/test_tf_ToBool.py diff --git a/src/frontends/tensorflow/src/op_table.cpp b/src/frontends/tensorflow/src/op_table.cpp index 41db5dab3c3916..149b2d76184497 100644 --- a/src/frontends/tensorflow/src/op_table.cpp +++ b/src/frontends/tensorflow/src/op_table.cpp @@ -278,6 +278,7 @@ const std::map get_supported_ops() { {"TensorListReserve", CreatorFunction(translate_tensor_list_reserve_op)}, {"TensorListResize", CreatorFunction(translate_tensor_list_resize_op)}, {"Tile", CreatorFunction(translate_tile_op)}, + {"ToBool", CreatorFunction(translate_tobool_op)}, {"TopK", CreatorFunction(translate_top_k_op)}, {"TopKV2", CreatorFunction(translate_top_k_v2_op)}, {"Transpose", CreatorFunction(translate_transpose_op)}, diff --git a/src/frontends/tensorflow_common/include/common_op_table.hpp b/src/frontends/tensorflow_common/include/common_op_table.hpp index 49c09560a7a1ad..75a9bdcafc91ee 100644 --- a/src/frontends/tensorflow_common/include/common_op_table.hpp +++ b/src/frontends/tensorflow_common/include/common_op_table.hpp @@ -142,6 +142,7 @@ OP_CONVERTER(translate_tensor_list_set_item_op); OP_CONVERTER(translate_tensor_list_stack_op); OP_CONVERTER(translate_tensor_list_resize_op); OP_CONVERTER(translate_tile_op); +OP_CONVERTER(translate_tobool_op); OP_CONVERTER_NAMED(translate_top_k_op); OP_CONVERTER_NAMED(translate_top_k_v2_op); OP_CONVERTER(translate_transpose_op); diff --git a/src/frontends/tensorflow_common/src/op/tobool.cpp b/src/frontends/tensorflow_common/src/op/tobool.cpp new file mode 100644 index 00000000000000..a8d595800a4f5c --- /dev/null +++ b/src/frontends/tensorflow_common/src/op/tobool.cpp @@ -0,0 +1,68 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "common_op_table.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/equal.hpp" +#include "openvino/op/greater.hpp" +#include "openvino/op/logical_and.hpp" +#include "openvino/op/logical_or.hpp" +#include "openvino/op/not_equal.hpp" +#include "openvino/op/reduce_prod.hpp" +#include "openvino/op/select.hpp" +#include "openvino/op/shape_of.hpp" + +using namespace std; +using namespace ov::op; + +namespace ov { +namespace frontend { +namespace tensorflow { +namespace op { +OutputVector translate_tobool_op(const NodeContext& node) { + // (rank(x) == 0 && x != 0) || (rank > 0 && ReduceProd(ShapeOf(x))) > 0 + + default_op_checks(node, 1, {"ToBool"}); + auto x = node.get_input(0); + + // prepare auxiliary zero and zero constants of the same type as the inputs + auto zero = create_same_type_const_scalar(x, 0); + auto zero_2 = make_shared(element::i32, Shape{}, 0); + auto true_const = make_shared(element::boolean, Shape{}, true); + auto false_const = make_shared(element::boolean, Shape{}, false); + // compute a mask to get rank(x) == 0 + auto x_rank = compute_subgraph_scalar_rank(x, element::i32); + + // compute rank(x) == 0 + auto is_zero = make_shared(x_rank, zero_2); + + // compute mask to get x != 0 + auto is_not_zero = make_shared(x, zero); + + // compute (rank(x) == 0 && x != 0) + auto logical_and = make_shared(is_zero, is_not_zero); + // compute rank(x) > 0 + auto greater_than_zero = make_shared(x_rank, zero_2); + + // compute ShapeOf(x) + auto cond_shape = make_shared(x, element::i32); + // compute ReduceProd(ShapeOf(x))) and axis + auto axis = make_shared(element::i32, Shape{}, 0); + auto reduce_prod = make_shared(cond_shape, axis); + + // compute ReduceProd(ShapeOf(x))) > 0 + auto greater_than__zero_2 = make_shared(reduce_prod, zero_2); + // compute (rank > 0 && ReduceProd(ShapeOf(x))) > 0 + auto logical_and_2 = make_shared(greater_than_zero, greater_than__zero_2); + + auto logical_or = make_shared(logical_and, logical_and_2); + + auto tobool = make_shared(logical_or, true_const, false_const); + set_node_name(node.get_name(), tobool); + return tobool->outputs(); +} +} // namespace op +} // namespace tensorflow +} // namespace frontend +} // namespace ov \ No newline at end of file diff --git a/tests/layer_tests/tensorflow_tests/test_tf_ToBool.py b/tests/layer_tests/tensorflow_tests/test_tf_ToBool.py new file mode 100644 index 00000000000000..74da79c36d52a1 --- /dev/null +++ b/tests/layer_tests/tensorflow_tests/test_tf_ToBool.py @@ -0,0 +1,43 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import tensorflow as tf +from common.tf_layer_test_class import CommonTFLayerTest + + +class TestToBool(CommonTFLayerTest): + def _prepare_input(self, inputs_info): + assert 'x' in inputs_info + x_shape = inputs_info['x'] + inputs_data = {} + inputs_data['x'] = np.random.randint(-10, 10, x_shape).astype(np.float32) + + return inputs_data + + def create_tobool_net(self, input_shape, input_type): + self.input_type = input_type + tf.compat.v1.reset_default_graph() + # Create the graph and model + with tf.compat.v1.Session() as sess: + x = tf.compat.v1.placeholder(input_type, input_shape, 'x') + tf.raw_ops.ToBool(input=x) + tf.compat.v1.global_variables_initializer() + tf_net = sess.graph_def + + return tf_net, None + + test_data_basic = [ + dict(input_shape=[10, 20], input_type=np.float32), + dict(input_shape=[2, 3, 4], input_type=np.float32), + ] + + @pytest.mark.parametrize("params", test_data_basic) + @pytest.mark.precommit_tf_fe + @pytest.mark.nightly + def test_to_bool_basic(self, params, ie_device, precision, ir_version, temp_dir, + use_new_frontend, use_old_api): + self._test(*self.create_tobool_net(**params), + ie_device, precision, ir_version, temp_dir=temp_dir, + use_new_frontend=use_new_frontend, use_old_api=use_old_api) \ No newline at end of file From 73d25a0f99ce68259c16fbb673626e1ea531c215 Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Fri, 20 Oct 2023 12:23:13 +0200 Subject: [PATCH 32/39] [PT FE] Add readme for pytorch frontend (#20610) * Add readme for pytorch frontend * Mension fx decoder * Apply suggestions from code review * Update src/frontends/pytorch/README.md * Apply suggestions from code review Co-authored-by: Tatiana Savina * Apply suggestions from code review Co-authored-by: Tatiana Savina * Apply suggestions from code review Co-authored-by: Tatiana Savina * Update src/frontends/pytorch/README.md * Update src/frontends/pytorch/README.md Co-authored-by: Tatiana Savina --------- Co-authored-by: Tatiana Savina Co-authored-by: Andrei Kochin --- src/frontends/pytorch/README.md | 141 ++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 src/frontends/pytorch/README.md diff --git a/src/frontends/pytorch/README.md b/src/frontends/pytorch/README.md new file mode 100644 index 00000000000000..92a38d693d9b21 --- /dev/null +++ b/src/frontends/pytorch/README.md @@ -0,0 +1,141 @@ +# OpenVINO PyTorch Frontend + +The PyTorch Frontend (PT FE) is a C++ based OpenVINO Frontend component that is +responsible for reading and converting a PyTorch model to an `ov::Model` object +that can be further serialized into the Intermediate Representation (IR) format. + +## Key Contacts + +People from the [openvino-pytorch-frontend-maintainers](https://github.com/orgs/openvinotoolkit/teams/openvino-pytorch-frontend-maintainers) +have the rights to approve and merge PRs to the PyTorch Frontend component. +They can assist with any questions about the component. + +## Components + +The structure of OpenVINO PyTorch Frontend sources includes the following +directories: + +* [include](./include) is a public frontend API. +* [src](./src/) folder contains the sources of the component. + +## Architecture + +OpenVINO PyTorch Frontend is a C++ component that uses [TorchScriptPythonDecoder](../../bindings/python/src/openvino/frontend/pytorch/ts_decoder.py) +in Python code to parse a PyTorch model from a Python object. Usually, the frontend is +used inside [openvino.convert_model](../../../tools/ovc) in Python code or inside +openvino backend in `torch.compile_model`, in which case `TorchFXPythonDecoder` +is used to decode `torch.fx.graph`. The entire model conversion workflow can be +represented by the following diagram. + +```mermaid +flowchart TD + A[(torch.nn.Module)] --> torch.compile + subgraph torch.compile + subgraph TorchFXPythonDecoder + torch.fx.graph_module.GraphModule + end + TorchFXPythonDecoder --> E("pytorch::FrontEnd::load()") + E -->|ov::InputModel| F("pytorch::FrontEnd::convert()") + F --> G[(ov::Model)] + end + A[(torch.nn.Module)] --> openvino.convert_model + subgraph openvino.convert_model + subgraph TorchScriptPythonDecoder + torch.jit.trace ~~~ torch.jit.script + end + TorchScriptPythonDecoder --> B("pytorch::FrontEnd::load()") + B -->|ov::InputModel| C("pytorch::FrontEnd::convert()") + end + openvino.convert_model --> D[(ov::Model)] +``` + +OpenVINO PyTorch Frontend supports extensions. To add an extension, use +`ov::frontend::pytorch::Frontend::add_extension()` API. +The following extension types are supported: + +* `ov::frontend::tensorflow::ConversionExtension` or `ov::frontend::ConversionExtension` - add a new Loader into the conversion pipeline. +* `ov::TelemetryExtension` - enable telemetry for the frontend. +* `ov::BaseOpExtension` - enable support for a custom operation. +* `ov::detail::SOExtension` - allow support for `ov::BaseOpExtension` extensions loaded from an external library. + +## How to Implement Support for a New PyTorch Operation + +PyTorch conversion into the OpenVINO opset operations consists of two stages: +1. Conversion of PyTorch operations to OpenVINO opset using [translators](./src/op/), + which directly transforms a PyTorch operation into a sub-graph of the OpenVINO + opset. This is a 1->N conversion. +2. [Internal Transformations](./src/transforms) that transform a sub-graph of + operations into a sub-graph of the OpenVINO opset. This is an N->N conversion. + +### Operation Translation + +Most PyTorch operations can be converted by a single `translator`. The +dictionary of `translators` is placed in the [op_table.cpp](./src/op_table.cpp) +file and each translator is located in the [op](../tensorflow_common/src/op/) +directory: + +https://github.com/openvinotoolkit/openvino/blob/491454103ea2f29b242587c6084c19868a879a82/src/frontends/pytorch/src/op_table.cpp#L222-L227 + +The main rules for translator implementation: +1. Support dynamic shapes and ranks, undefined types, including future support of new types, such as strings and complex numbers. +2. Try to maintain the same algorithmic complexity of the decomposition. Fewer operations are usually better. +3. Use the latest OpenVINO opset version for the translation. +4. Use helper routines for operation checks and graph construction from `utils.hpp`. +5. Call `NodeContext::mark_mode()` for each created node. + +#### Inplace and Mutable Operations + +Some PyTorch operations modify the input tensor rather than the output. For example, +`aten::add` writes the result of addition to the output, but `aten::add_` writes the result +to its first input. To correctly convert such an operation: +* Ensure that the output tensor produced by the translation has the same type and shape as the initial input. +* Call `NodeContext::mutate_input()` to change the input tensor with the new value. + +#### PtFrameworkNode Primitive + +`PtFrameworkNode` is used to represent unconverted operation from the original +model. You can use `FrontEnd::convert_partially()` instead of `Frontend::convert()` +to get an `ov::Model` containing unconverted operations. + +#### Operations Accepting Strings + +At the moment, OpenVINO core does not support strings. However, since strings in models are usually constants, you can extract them as `std::string` directly from Python using `NodeContext::const_input()`. + +#### Operations with lists, tuples, dicts + +These types are also not supported by OpenVINO core and generally require +implementing transformation for N->N conversion. However, in some simple cases, lists +and tuples can be processed. Helpers for working with lists can be found in `utils.hpp`. +For example, `get_list_as_outputs` enables you to get list elements to work with them +in the translator or transformation. + +### Internal Transformations + +In rare cases, converting PyTorch operations requires transformation. The main +difference between transformation and translation is that transformation works on the graph rather +than on the `NodeContext` of a single operation. This means that some functionality +provided by `NodeContext` is not accessible in transformation and usually +requires working with `PtFramworkNode` directly. [General rules](https://docs.openvino.ai/2023.1/openvino_docs_transformations.html) +for writing transformations also apply to PT FE transformations. + +### PyTorch Frontend Layer Tests + +The layer tests are Python-based tests that check if a PyTorch operation is +supported by PT FE. The testing pipeline of the layer tests consists of four +steps: +1. Create a simple model containing the PyTorch operation to be tested. +2. Convert this model into an OpenVINO Model. +3. Infer the original model using PyTorch and infer the OpenVINO Model. +4. Compare the inference results between both frameworks. + +To set up the environment for running the layer tests, follow these [instructions](../../../tests/layer_tests/README.md). + +To test the entire suite of the PyTorch operation set support, run the following command: +```bash +python -m pytest layer_tests/pytorch_tests +``` + +## See Also + * [OpenVINO README](../../../README.md) + * [OpenVINO Core Components](../../README.md) + * [Developer documentation](../../../docs/dev/index.md) From 891f79ac8492d0913681e9007458348f9d09a2e7 Mon Sep 17 00:00:00 2001 From: Mateusz Mikolajczyk Date: Fri, 20 Oct 2023 12:24:10 +0200 Subject: [PATCH 33/39] [PT FE] Add aten::as_strided (#19482) * Add aten::as_strided * rm commented code * Update src/frontends/pytorch/src/op/as_strided.cpp Co-authored-by: Maxim Vafin * Update src/frontends/pytorch/src/op/as_strided.cpp Co-authored-by: Maxim Vafin * Fix CI error * Fix CI issues * mark_node for remaining constants * Add test reproducing issue * Use strides from torchscript * Add led model to test suite * Add sugested changes --------- Co-authored-by: Maxim Vafin --- .../openvino/frontend/pytorch/ts_decoder.py | 13 +- .../pyopenvino/frontend/pytorch/decoder.hpp | 4 + .../openvino/frontend/pytorch/decoder.hpp | 3 + src/frontends/pytorch/src/op/as_strided.cpp | 106 +++++++++++++++ src/frontends/pytorch/src/op_table.cpp | 2 + src/frontends/pytorch/src/utils.hpp | 3 + .../pytorch_tests/test_as_strided.py | 125 ++++++++++++++++++ .../torch_tests/hf_transformers_models | 2 - .../torch_tests/test_hf_transformers.py | 3 +- 9 files changed, 257 insertions(+), 4 deletions(-) create mode 100644 src/frontends/pytorch/src/op/as_strided.cpp create mode 100644 tests/layer_tests/pytorch_tests/test_as_strided.py diff --git a/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py index 11d5991e700c42..f7a398bf67e519 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/ts_decoder.py @@ -107,9 +107,10 @@ def _get_scripted_model(self, pt_module, example_inputs=None, skip_freeze=False) gptq.unpatch_model(pt_module) if not skip_freeze: + ops_kind_no_freeze = ["quantize", "aten::as_strided"] for n in scripted.inlined_graph.nodes(): # TODO: switch off freezing for all traced models - if "quantize" in n.kind(): + if any(kind in n.kind() for kind in ops_kind_no_freeze): # do not freeze quantized models skip_freeze = True break @@ -150,6 +151,16 @@ def get_input_shape(self, index: int): raw_input = self._raw_input(index) return self.get_shape_for_value(raw_input) + def get_input_strides(self, index: int) -> typing.List[int]: + raw_input = self._raw_input(index) + if isinstance(raw_input, torch.Value): + inp_type = raw_input.type() + if isinstance(inp_type, torch.TensorType): + strides = inp_type.strides() + if strides: + return strides + return [] + def get_input_type(self, index: int): raw_input = self._raw_input(index) return self.get_type_for_value(raw_input) diff --git a/src/bindings/python/src/pyopenvino/frontend/pytorch/decoder.hpp b/src/bindings/python/src/pyopenvino/frontend/pytorch/decoder.hpp index a1136e4cda6f66..024b03b2ff4cd9 100644 --- a/src/bindings/python/src/pyopenvino/frontend/pytorch/decoder.hpp +++ b/src/bindings/python/src/pyopenvino/frontend/pytorch/decoder.hpp @@ -34,6 +34,10 @@ class PyDecoder : public ov::frontend::pytorch::TorchDecoder { PYBIND11_OVERRIDE_PURE(ov::PartialShape, TorchDecoder, get_input_shape, index); } + const std::vector& get_input_strides(size_t index) const override { + PYBIND11_OVERRIDE_PURE(const std::vector&, TorchDecoder, get_input_strides, index); + } + ov::Any get_input_type(size_t index) const override { PYBIND11_OVERRIDE_PURE(ov::Any, TorchDecoder, get_input_type, index); } diff --git a/src/frontends/pytorch/include/openvino/frontend/pytorch/decoder.hpp b/src/frontends/pytorch/include/openvino/frontend/pytorch/decoder.hpp index 066c203e3a1938..d5878783c314af 100644 --- a/src/frontends/pytorch/include/openvino/frontend/pytorch/decoder.hpp +++ b/src/frontends/pytorch/include/openvino/frontend/pytorch/decoder.hpp @@ -40,6 +40,9 @@ class TorchDecoder : public IDecoder { // Return shape if inputs has torch::Tensor type in the original model, otherwise returns the shape [] of a scalar virtual PartialShape get_input_shape(size_t index) const = 0; + // Return strides if inputs has torch::Tensor type in original model, otherwise return []. + virtual const std::vector& get_input_strides(size_t index) const = 0; + // Return element::Type when it the original type can be represented, otherwise returns PT-specific data type object // (see custom_type.hpp) virtual Any get_input_type(size_t index) const = 0; diff --git a/src/frontends/pytorch/src/op/as_strided.cpp b/src/frontends/pytorch/src/op/as_strided.cpp new file mode 100644 index 00000000000000..5d1dfe38bdaa17 --- /dev/null +++ b/src/frontends/pytorch/src/op/as_strided.cpp @@ -0,0 +1,106 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/frontend/pytorch/node_context.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/gather.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/range.hpp" +#include "openvino/op/reshape.hpp" +#include "openvino/op/scatter_update.hpp" +#include "openvino/op/tile.hpp" +#include "openvino/op/transpose.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace op { + +using namespace ov::op; +bool compare_strides(const std::tuple& a, const std::tuple& b) { + return std::get<0>(a) > std::get<0>(b); +} +OutputVector translate_as_strided(const NodeContext& context) { + // "aten::as_strided(Tensor(a) self, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor(a)" + num_inputs_check(context, 3, 4); + auto decoder = context.get_decoder(); + auto input = context.get_input(0); + auto const_1 = context.mark_node(v0::Constant::create(element::i32, Shape{}, {1})); + auto const_0 = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0})); + auto const_neg_1 = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {-1})); + auto input_strides = decoder->get_input_strides(0); + FRONT_END_OP_CONVERSION_CHECK(input_strides.size() != 0, + "aten::as_strided: Couldn't retrive input stride information from torchscript."); + + std::vector idxs(input_strides.size()); + iota(idxs.begin(), idxs.end(), 0); + std::vector> stride_idxs(idxs.size()); + std::for_each(idxs.rbegin(), idxs.rend(), [&](size_t& idx) { + stride_idxs[idx] = {input_strides[idx], idx}; + }); + + std::sort(stride_idxs.begin(), stride_idxs.end(), compare_strides); + std::vector transpose_idx(idxs.size()); + int transpose_counter = 0; + std::for_each(stride_idxs.begin(), stride_idxs.end(), [&](std::tuple& pair) { + transpose_idx[transpose_counter] = uint64_t(std::get<1>(pair)); + transpose_counter++; + }); + auto transpose_idx_const = + context.mark_node(v0::Constant::create(element::i32, Shape{transpose_idx.size()}, transpose_idx)); + auto transposed_input = context.mark_node(std::make_shared(input, transpose_idx_const)); + auto flat_input = context.mark_node(std::make_shared(transposed_input, const_neg_1, false)); + std::deque> sizes; + std::deque> strides; + if (std::dynamic_pointer_cast(context.get_input_from_visible_context(1).get_node_shared_ptr())) { + auto input_vector = context.const_input>(1); + std::for_each(input_vector.rbegin(), input_vector.rend(), [&](int64_t input_val) { + auto const_input = context.mark_node(v0::Constant::create(element::i32, Shape{}, {input_val})); + sizes.push_front(const_input); + }); + } else { + sizes = get_list_as_outputs(context.get_input(1)); + } + if (std::dynamic_pointer_cast(context.get_input_from_visible_context(2).get_node_shared_ptr())) { + auto input_vector = context.const_input>(2); + std::for_each(input_vector.rbegin(), input_vector.rend(), [&](int64_t input_val) { + auto const_input = context.mark_node(v0::Constant::create(element::i32, Shape{}, {input_val})); + strides.push_front(const_input); + }); + } else { + strides = get_list_as_outputs(context.get_input(2)); + } + auto offset = const_0->output(0); + if (!context.input_is_none(3)) { + offset = context.get_input(3); + } + FRONT_END_OP_CONVERSION_CHECK(sizes.size() == strides.size(), + "aten::as_strided: Vector for strides and sizes need to have equal length."); + auto strides_size = strides.size() - 1; + auto i = 0; + auto strides_length_const = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {strides.size()})); + auto ones_strides_len = context.mark_node(std::make_shared(const_1, strides_length_const)); + auto indices = const_0; + std::for_each(strides.rbegin(), strides.rend(), [&](Output& stride) { + auto const_num_iter = context.mark_node(v0::Constant::create(element::i32, Shape{1}, {strides_size - i})); + stride = context.mark_node(std::make_shared(stride, element::i32)); + auto size = sizes.at(strides_size - i); + auto range = context.mark_node(std::make_shared(const_0, size, const_1, element::i32)); + range = context.mark_node(std::make_shared(range, stride)); + auto iteration_shape = context.mark_node( + std::make_shared(ones_strides_len, const_num_iter, const_neg_1, const_0)); + range = context.mark_node(std::make_shared(range, iteration_shape, false)); + indices = context.mark_node(std::make_shared(indices, range)); + i++; + }); + indices = context.mark_node(std::make_shared(indices, offset)); + auto gather = context.mark_node(std::make_shared(flat_input, indices, const_0)); + return {gather}; +}; +} // namespace op +} // namespace pytorch +} // namespace frontend +} // namespace ov diff --git a/src/frontends/pytorch/src/op_table.cpp b/src/frontends/pytorch/src/op_table.cpp index 5614a3881c3573..d9ac0aff6af2dc 100644 --- a/src/frontends/pytorch/src/op_table.cpp +++ b/src/frontends/pytorch/src/op_table.cpp @@ -34,6 +34,7 @@ OP_CONVERTER(translate_argmax); OP_CONVERTER(translate_argsort); OP_CONVERTER(translate_argmax); OP_CONVERTER(translate_argmin); +OP_CONVERTER(translate_as_strided); OP_CONVERTER(translate_as_tensor); OP_CONVERTER(translate_avg_poolnd); OP_CONVERTER(translate_bool); @@ -256,6 +257,7 @@ const std::map get_supported_ops_ts() { {"aten::argmax", op::translate_argmax}, {"aten::argmin", op::translate_argmin}, {"aten::argsort", op::translate_argsort}, + {"aten::as_strided", op::translate_as_strided}, {"aten::as_tensor", op::translate_as_tensor}, {"aten::asin", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, {"aten::asin_", op::inplace_op>}, diff --git a/src/frontends/pytorch/src/utils.hpp b/src/frontends/pytorch/src/utils.hpp index 1635296e612dff..b4a37118961ab7 100644 --- a/src/frontends/pytorch/src/utils.hpp +++ b/src/frontends/pytorch/src/utils.hpp @@ -158,6 +158,9 @@ class DummyDecoder : public TorchDecoder { virtual PartialShape get_input_shape(size_t index) const override { FRONT_END_NOT_IMPLEMENTED(get_input_shape); } + virtual const std::vector& get_input_strides(size_t index) const override { + FRONT_END_NOT_IMPLEMENTED(get_input_strides); + } virtual Any get_input_type(size_t index) const override { FRONT_END_NOT_IMPLEMENTED(get_input_type); } diff --git a/tests/layer_tests/pytorch_tests/test_as_strided.py b/tests/layer_tests/pytorch_tests/test_as_strided.py new file mode 100644 index 00000000000000..9bfaa66d3a7f6b --- /dev/null +++ b/tests/layer_tests/pytorch_tests/test_as_strided.py @@ -0,0 +1,125 @@ +# Copyright (C) 2018-2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +import torch + +from pytorch_layer_test_class import PytorchLayerTest + + +class TestAsStrided(PytorchLayerTest): + def _prepare_input(self): + return (np.random.randn(8, 8).astype(np.float32),) + + def create_model(self, size, stride, offset): + class aten_as_strided(torch.nn.Module): + def __init__(self, size, stride, offset): + super().__init__() + self.size = size + self.stride = stride + self.offset = offset + + def forward(self, x): + return torch.as_strided(x, self.size, self.stride, self.offset) + + ref_net = None + + return aten_as_strided(size, stride, offset), ref_net, "aten::as_strided" + + @pytest.mark.parametrize( + "size,stride", + [ + ([1], [1]), + ([2, 2], [1, 1]), + ([5, 4, 3], [1, 3, 7]), + ([5, 5, 5], [5, 0, 5]), + ([1, 2, 3, 4], [4, 3, 2, 1]), + ], + ) + @pytest.mark.parametrize("offset", [None, 1, 3, 7]) + @pytest.mark.nightly + @pytest.mark.precommit + def test_as_strided(self, size, stride, offset, ie_device, precision, ir_version): + self._test(*self.create_model(size, stride, offset), ie_device, precision, ir_version, trace_model=True) + + +class TestAsStridedListConstruct(PytorchLayerTest): + def _prepare_input(self, size_shape_tensor=[1], stride_shape_tensor=[1]): + return ( + np.random.randn(8, 8).astype(np.float32), + np.ones(size_shape_tensor), + np.ones(stride_shape_tensor), + ) + + def create_model(self, size, stride, offset, mode): + class aten_as_strided(torch.nn.Module): + def __init__(self, size, stride, offset, mode): + super().__init__() + self.size = size + self.stride = stride + self.size_shape_tensor = torch.empty(size) + self.stride_shape_tensor = torch.empty(stride) + self.offset = offset + modes = { + "no_const": self.forward_no_const, + "stride_const": self.forward_stride_const, + "size_const": self.forward_size_const, + } + self.forward = modes.get(mode) + + def forward_no_const(self, x, size_shape_tensor, stride_shape_tensor): + sz1, sz2, sz3 = size_shape_tensor.shape + st1, st2, st3 = stride_shape_tensor.shape + return torch.as_strided(x, [sz1, sz2, sz3], [st1, st2, st3], self.offset) + + def forward_stride_const(self, x, size_shape_tensor, stride_shape_tensor): + sz1, sz2, sz3 = size_shape_tensor.shape + return torch.as_strided(x, [sz1, sz2, sz3], self.stride, self.offset) + + def forward_size_const(self, x, size_shape_tensor, stride_shape_tensor): + st1, st2, st3 = stride_shape_tensor.shape + return torch.as_strided(x, self.size, [st1, st2, st3], self.offset) + + ref_net = None + + return aten_as_strided(size, stride, offset, mode), ref_net, ["aten::as_strided", "prim::ListConstruct"] + + @pytest.mark.parametrize("size,stride", [([5, 4, 3], [1, 3, 7]), ([5, 5, 5], [5, 0, 5])]) + @pytest.mark.parametrize("offset", [None, 7]) + @pytest.mark.parametrize("mode", ["no_const", "stride_const", "size_const"]) + @pytest.mark.nightly + @pytest.mark.precommit + def test_as_strided_list_construct(self, size, stride, offset, mode, ie_device, precision, ir_version): + inp_kwargs = {"size_shape_tensor": size, "stride_shape_tensor": stride} + self._test( + *self.create_model(size, stride, offset, mode), + ie_device, + precision, + ir_version, + kwargs_to_prepare_input=inp_kwargs, + trace_model=True + ) + + +class TestAsStridedLongformer(PytorchLayerTest): + def _prepare_input(self): + return (np.random.randn(1, 10, 20, 40).astype(np.float32).transpose([0, 2, 3, 1]),) + + def create_model(self): + class aten_as_strided_lf(torch.nn.Module): + def forward(self, x): + chunk_size = list(x.size()) + chunk_size[1] = chunk_size[1] * 2 - 1 + chunk_stride = list(x.stride()) + chunk_stride[1] = chunk_stride[1] // 2 + return x.as_strided(size=chunk_size, stride=chunk_stride) + + ref_net = None + + return aten_as_strided_lf(), ref_net, "aten::as_strided" + + @pytest.mark.nightly + @pytest.mark.precommit + def test_as_strided_lf(self, ie_device, precision, ir_version): + self._test(*self.create_model(), ie_device, precision, ir_version, trace_model=True, freeze_model=False) diff --git a/tests/model_hub_tests/torch_tests/hf_transformers_models b/tests/model_hub_tests/torch_tests/hf_transformers_models index 0618d98a4d9f31..56deedc29b76ad 100644 --- a/tests/model_hub_tests/torch_tests/hf_transformers_models +++ b/tests/model_hub_tests/torch_tests/hf_transformers_models @@ -10,7 +10,6 @@ albert-base-v2,albert AlekseyKorshuk/test_reward_model,reward_model,skip,Load problem alibaba-damo/mgp-str-base,mgp-str,xfail,Compile error: unsupported Einsum allenai/hvila-block-layoutlm-finetuned-docbank,hierarchical_model,skip,Load problem -allenai/longformer-base-4096,longformer,xfail,Unsupported op aten::as_strided ameya772/sentence-t5-base-atis-fine-tuned,T5,skip,Load problem andreasmadsen/efficient_mlm_m0.40,roberta-prelayernorm anton-l/emformer-base-librispeech,emformer,skip,Load problem @@ -301,7 +300,6 @@ pie/example-re-textclf-tacred,TransformerTextClassificationModel,skip,Load probl pleisto/yuren-baichuan-7b,multimodal_llama,skip,Load problem predictia/europe_reanalysis_downscaler_convbaseline,convbilinear,skip,Load problem predictia/europe_reanalysis_downscaler_convswin2sr,conv_swin2sr,skip,Load problem -pszemraj/led-large-book-summary,led,xfail,Unsupported op aten::as_strided qmeeus/whisper-small-ner-combined,whisper_for_slu,skip,Load problem raman-ai/pcqv2-tokengt-lap16,tokengt,skip,Load problem range3/pegasus-gpt2-medium,pegasusgpt2,skip,Load problem diff --git a/tests/model_hub_tests/torch_tests/test_hf_transformers.py b/tests/model_hub_tests/torch_tests/test_hf_transformers.py index 184e725a04f9b9..caeb2e0ff2a01d 100644 --- a/tests/model_hub_tests/torch_tests/test_hf_transformers.py +++ b/tests/model_hub_tests/torch_tests/test_hf_transformers.py @@ -292,7 +292,8 @@ def teardown_method(self): cleanup_dir(hf_hub_cache_dir) super().teardown_method() - @pytest.mark.parametrize("name,type", [("bert-base-uncased", "bert"), + @pytest.mark.parametrize("name,type", [("allenai/led-base-16384", "led"), + ("bert-base-uncased", "bert"), ("facebook/bart-large-mnli", "bart"), ("google/flan-t5-base", "t5"), ("google/tapas-large-finetuned-wtq", "tapas"), From fc93262d7c68741404200a5ab1b76359ae5d8edb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lucas=20Alava=20Pe=C3=B1a?= Date: Fri, 20 Oct 2023 12:00:50 +0100 Subject: [PATCH 34/39] fix typo in opset3 shuffle_channels docstring (#20158) Co-authored-by: Jan Iwaszkiewicz --- src/bindings/python/src/compatibility/ngraph/opset3/ops.py | 4 ++-- src/bindings/python/src/openvino/runtime/opset3/ops.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/bindings/python/src/compatibility/ngraph/opset3/ops.py b/src/bindings/python/src/compatibility/ngraph/opset3/ops.py index 82846826111751..7d7c757d9cd5dc 100644 --- a/src/bindings/python/src/compatibility/ngraph/opset3/ops.py +++ b/src/bindings/python/src/compatibility/ngraph/opset3/ops.py @@ -550,9 +550,9 @@ def shuffle_channels(data: Node, axis: int, group: int, name: Optional[str] = No `data_reshaped` = reshape(`data`, [N, group, C / group, H * W]) - `data_trnasposed` = transpose(`data_reshaped`, [0, 2, 1, 3]) + `data_transposed` = transpose(`data_reshaped`, [0, 2, 1, 3]) - `output` = reshape(`data_trnasposed`, [N, C, H, W]) + `output` = reshape(`data_transposed`, [N, C, H, W]) For example: diff --git a/src/bindings/python/src/openvino/runtime/opset3/ops.py b/src/bindings/python/src/openvino/runtime/opset3/ops.py index 979fda8a782a02..8a1d81d9703ffb 100644 --- a/src/bindings/python/src/openvino/runtime/opset3/ops.py +++ b/src/bindings/python/src/openvino/runtime/opset3/ops.py @@ -575,9 +575,9 @@ def shuffle_channels(data: Node, axis: int, group: int, name: Optional[str] = No `data_reshaped` = reshape(`data`, [N, group, C / group, H * W]) - `data_trnasposed` = transpose(`data_reshaped`, [0, 2, 1, 3]) + `data_transposed` = transpose(`data_reshaped`, [0, 2, 1, 3]) - `output` = reshape(`data_trnasposed`, [N, C, H, W]) + `output` = reshape(`data_transposed`, [N, C, H, W]) For example: From b17d0fe7f5000a4be64f33257d0dd2348be5bddd Mon Sep 17 00:00:00 2001 From: Maxim Vafin Date: Fri, 20 Oct 2023 18:12:00 +0200 Subject: [PATCH 35/39] Update model list for hf hub tests (#20485) * Update model list and install cpu torch * Move to hub tests * Update tests/model_hub_tests/torch_tests/requirements.txt * Make pytorch mainteiners owners of torch tests --- .github/CODEOWNERS | 1 + .../torch_tests/hf_transformers_models | 14 +++++++------- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 3f2178a1c681e5..3727c4d88f6e8b 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -94,6 +94,7 @@ /tests/layer_tests/tensorflow_tests @openvinotoolkit/openvino-tf-frontend-maintainers /tests/layer_tests/jax_tests @openvinotoolkit/openvino-tf-frontend-maintainers /tests/model_hub_tests @openvinotoolkit/openvino-tf-frontend-maintainers +/tests/model_hub_tests/torch_tests @openvinotoolkit/openvino-pytorch-frontend-maintainers # Tools: /tools/ @openvinotoolkit/openvino-tools-maintainers diff --git a/tests/model_hub_tests/torch_tests/hf_transformers_models b/tests/model_hub_tests/torch_tests/hf_transformers_models index 56deedc29b76ad..31a24b681eb4c5 100644 --- a/tests/model_hub_tests/torch_tests/hf_transformers_models +++ b/tests/model_hub_tests/torch_tests/hf_transformers_models @@ -67,7 +67,7 @@ facebook/detr-resnet-50,detr facebook/dinov2-base,dinov2,skip,Load problem facebook/dpr-question_encoder-single-nq-base,dpr facebook/encodec_24khz,encodec,xfail,Unsupported op aten::lstm -facebook/esm2_t6_8M_UR50D,esm,xfail,Tracing error: The values for attribute 'shape' do not match +facebook/esm2_t6_8M_UR50D,esm facebook/flava-full,flava,xfail,Tracing problem facebook/flava-image-codebook,flava_image_codebook,skip,Load problem facebook/m2m100_418M,m2m_100 @@ -122,10 +122,10 @@ hf-internal-testing/tiny-random-Data2VecAudioModel,data2vec-audio,skip,Load prob hf-internal-testing/tiny-random-Data2VecTextModel,data2vec-text hf-internal-testing/tiny-random-Data2VecVisionModel,data2vec-vision hf-internal-testing/tiny-random-DeiTModel,deit -hf-internal-testing/tiny-random-DonutSwinModel,donut-swin,xfail,Unsupported op aten::adaptive_avg_pool1d +hf-internal-testing/tiny-random-DonutSwinModel,donut-swin hf-internal-testing/tiny-random-EfficientFormerForImageClassification,efficientformer hf-internal-testing/tiny-random-flaubert,flaubert -hf-internal-testing/tiny-random-FocalNetModel,focalnet,xfail,Unsupported op aten::adaptive_avg_pool1d +hf-internal-testing/tiny-random-FocalNetModel,focalnet hf-internal-testing/tiny-random-GPTBigCodeForCausalLM,gpt_bigcode,xfail,Conversion is failed for: aten::mul hf-internal-testing/tiny-random-GPTJModel,gptj hf-internal-testing/tiny-random-groupvit,groupvit @@ -154,7 +154,7 @@ hf-internal-testing/tiny-random-Speech2TextModel,speech_to_text,skip,Load proble hf-internal-testing/tiny-random-speech-encoder-decoder,speech-encoder-decoder,skip,Load problem hf-internal-testing/tiny-random-SplinterModel,splinter hf-internal-testing/tiny-random-SqueezeBertModel,squeezebert -hf-internal-testing/tiny-random-SwinModel,swin,xfail,Unsupported op aten::adaptive_avg_pool1d +hf-internal-testing/tiny-random-SwinModel,swin hf-internal-testing/tiny-random-unispeech,unispeech,skip,Load problem hf-internal-testing/tiny-random-UniSpeechSatModel,unispeech-sat,skip,Load problem hf-internal-testing/tiny-random-vision_perceiver_conv,perceiver @@ -246,7 +246,7 @@ microsoft/markuplm-base,markuplm microsoft/resnet-50,resnet microsoft/speecht5_hifigan,hifigan,skip,Load problem microsoft/speecht5_tts,speecht5,skip,Load problem -microsoft/swinv2-tiny-patch4-window8-256,swinv2,xfail,Unsupported op aten::adaptive_avg_pool1d +microsoft/swinv2-tiny-patch4-window8-256,swinv2 microsoft/table-transformer-detection,table-transformer microsoft/wavlm-large,wavlm,skip,Load problem microsoft/xclip-base-patch32,xclip,skip,Load problem @@ -328,8 +328,8 @@ sheonhan/ict-imagenet-256,ict,skip,Load problem shibing624/text2vec-base-chinese-paraphrase,ernie shikhartuli/flexibert-mini,flexibert,skip,Load problem shikras/shikra-7b-delta-v1-0708,shikra,skip,Load problem -shi-labs/dinat-mini-in1k-224,dinat,xfail,Unsupported op aten::adaptive_avg_pool1d -shi-labs/nat-mini-in1k-224,nat,xfail,Unsupported op aten::adaptive_avg_pool1d +shi-labs/dinat-mini-in1k-224,dinat,xfail,Accuracy validation failed +shi-labs/nat-mini-in1k-224,nat,xfail,Accuracy validation failed shi-labs/oneformer_ade20k_swin_large,oneformer,skip,Load problem shuqi/seed-encoder,seed_encoder,skip,Load problem sijunhe/nezha-cn-base,nezha From 0934d2a7dd56014b6bfa5d3af1f2e1bbc7f8abb7 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Sun, 22 Oct 2023 01:02:32 +0400 Subject: [PATCH 36/39] Moved all tensorflow proto files to a folder with ov prefix to avoid ODR (#20636) conflicts --- .../frontends/frontends.cmake | 17 +++++++--- src/frontends/paddle/src/decoder_proto.cpp | 6 ++-- src/frontends/paddle/src/decoder_proto.hpp | 4 +-- src/frontends/paddle/src/frontend.cpp | 2 +- src/frontends/paddle/src/input_model.cpp | 10 +++--- src/frontends/paddle/src/place.cpp | 14 ++++---- src/frontends/paddle/src/place.hpp | 20 +++++------ .../paddle/src/proto/framework.proto | 2 +- .../tensorflow/src/checkpoint_v1_reader.cpp | 6 ++-- .../tensorflow/src/checkpoint_v1_reader.hpp | 10 +++--- .../tensorflow/src/decoder_argdef.cpp | 4 +-- .../tensorflow/src/decoder_argdef.hpp | 22 ++++++------ .../tensorflow/src/decoder_proto.cpp | 34 +++++++++---------- .../tensorflow/src/decoder_proto.hpp | 24 ++++++------- .../tensorflow/src/graph_iterator_meta.cpp | 30 ++++++++-------- .../tensorflow/src/graph_iterator_meta.hpp | 14 ++++---- .../tensorflow/src/graph_iterator_proto.hpp | 20 +++++------ .../src/graph_iterator_proto_txt.hpp | 2 +- .../src/graph_iterator_saved_model.cpp | 30 ++++++++-------- .../src/graph_iterator_saved_model.hpp | 16 ++++----- .../tensorflow/src/op/var_handle.cpp | 6 ++-- .../tensorflow/src/op/xla_conv_v2.cpp | 4 +-- src/frontends/tensorflow/src/op/xla_dot.cpp | 4 +-- .../allocation_description.proto | 2 +- .../proto/{ => ov_tensorflow}/api_def.proto | 4 +-- .../{ => ov_tensorflow}/attr_value.proto | 8 ++--- .../{ => ov_tensorflow}/cost_graph.proto | 6 ++-- .../{ => ov_tensorflow}/dataset_options.proto | 2 +- .../device_attributes.proto | 2 +- .../proto/{ => ov_tensorflow}/function.proto | 8 ++--- .../src/proto/{ => ov_tensorflow}/graph.proto | 8 ++--- .../graph_transfer_info.proto | 4 +-- .../{ => ov_tensorflow}/kernel_def.proto | 4 +-- .../{ => ov_tensorflow}/log_memory.proto | 4 +-- .../{ => ov_tensorflow}/meta_graph.proto | 16 ++++----- .../src/proto/{ => ov_tensorflow}/model.proto | 2 +- .../proto/{ => ov_tensorflow}/node_def.proto | 4 +-- .../proto/{ => ov_tensorflow}/op_def.proto | 9 ++--- .../{ => ov_tensorflow}/reader_base.proto | 2 +- .../remote_fused_graph_execute_info.proto | 8 ++--- .../{ => ov_tensorflow}/resource_handle.proto | 6 ++-- .../{ => ov_tensorflow}/saved_model.proto | 4 +-- .../saved_object_graph.proto | 14 ++++---- .../saved_tensor_slice.proto | 12 +++---- .../src/proto/{ => ov_tensorflow}/saver.proto | 2 +- .../{ => ov_tensorflow}/step_stats.proto | 6 ++-- .../proto/{ => ov_tensorflow}/struct.proto | 24 ++++++------- .../proto/{ => ov_tensorflow}/summary.proto | 4 +-- .../proto/{ => ov_tensorflow}/tensor.proto | 8 ++--- .../{ => ov_tensorflow}/tensor_bundle.proto | 10 +++--- .../tensor_description.proto | 8 ++--- .../{ => ov_tensorflow}/tensor_shape.proto | 2 +- .../{ => ov_tensorflow}/tensor_slice.proto | 2 +- .../trackable_object_graph.proto | 2 +- .../src/proto/{ => ov_tensorflow}/types.proto | 2 +- .../proto/{ => ov_tensorflow}/variable.proto | 2 +- .../proto/{ => ov_tensorflow}/versions.proto | 2 +- .../proto/{ => ov_tensorflow}/xla_data.proto | 2 +- src/frontends/tensorflow/src/tf_utils.cpp | 34 +++++++++---------- src/frontends/tensorflow/src/tf_utils.hpp | 20 +++++------ .../tensorflow/src/variables_index.cpp | 22 ++++++------ .../tensorflow/src/variables_index.hpp | 4 +-- 62 files changed, 297 insertions(+), 289 deletions(-) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/allocation_description.proto (98%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/api_def.proto (98%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/attr_value.proto (95%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/cost_graph.proto (97%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/dataset_options.proto (99%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/device_attributes.proto (98%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/function.proto (97%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/graph.proto (95%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/graph_transfer_info.proto (97%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/kernel_def.proto (97%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/log_memory.proto (97%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/meta_graph.proto (97%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/model.proto (99%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/node_def.proto (98%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/op_def.proto (98%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/reader_base.proto (98%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/remote_fused_graph_execute_info.proto (93%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/resource_handle.proto (95%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/saved_model.proto (95%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/saved_object_graph.proto (97%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/saved_tensor_slice.proto (94%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/saver.proto (99%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/step_stats.proto (96%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/struct.proto (93%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/summary.proto (99%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/tensor.proto (96%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/tensor_bundle.proto (93%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/tensor_description.proto (88%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/tensor_shape.proto (99%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/tensor_slice.proto (98%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/trackable_object_graph.proto (99%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/types.proto (99%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/variable.proto (99%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/versions.proto (98%) rename src/frontends/tensorflow/src/proto/{ => ov_tensorflow}/xla_data.proto (99%) diff --git a/cmake/developer_package/frontends/frontends.cmake b/cmake/developer_package/frontends/frontends.cmake index a86c57c6c87845..a20b1665fb7d29 100644 --- a/cmake/developer_package/frontends/frontends.cmake +++ b/cmake/developer_package/frontends/frontends.cmake @@ -125,17 +125,24 @@ macro(ov_add_frontend) source_group("public include" FILES ${LIBRARY_PUBLIC_HEADERS}) # Generate protobuf file on build time for each '.proto' file in src/proto - file(GLOB proto_files ${frontend_root_dir}/src/proto/*.proto) + set(protofiles_root_dir "${frontend_root_dir}/src/proto") + file(GLOB_RECURSE proto_files ${protofiles_root_dir}/*.proto) foreach(proto_file IN LISTS proto_files) + # filter out standaard google proto files + if(proto_file MATCHES ".*google.*") + continue() + endif() + file(RELATIVE_PATH proto_file_relative "${CMAKE_SOURCE_DIR}" "${proto_file}") - get_filename_component(FILE_DIR ${proto_file} DIRECTORY) get_filename_component(FILE_WE ${proto_file} NAME_WE) - set(OUTPUT_PB_SRC ${CMAKE_CURRENT_BINARY_DIR}/${FILE_WE}.pb.cc) - set(OUTPUT_PB_HEADER ${CMAKE_CURRENT_BINARY_DIR}/${FILE_WE}.pb.h) + file(RELATIVE_PATH relative_path ${protofiles_root_dir} ${proto_file}) + get_filename_component(relative_path ${relative_path} DIRECTORY) + set(OUTPUT_PB_SRC ${CMAKE_CURRENT_BINARY_DIR}/${relative_path}/${FILE_WE}.pb.cc) + set(OUTPUT_PB_HEADER ${CMAKE_CURRENT_BINARY_DIR}/${relative_path}/${FILE_WE}.pb.h) add_custom_command( OUTPUT "${OUTPUT_PB_SRC}" "${OUTPUT_PB_HEADER}" - COMMAND ${PROTOC_EXECUTABLE} ARGS --cpp_out ${CMAKE_CURRENT_BINARY_DIR} -I ${FILE_DIR} ${FILE_WE}.proto + COMMAND ${PROTOC_EXECUTABLE} ARGS --cpp_out ${CMAKE_CURRENT_BINARY_DIR} -I ${protofiles_root_dir} ${proto_file} DEPENDS ${PROTOC_DEPENDENCY} ${proto_file} COMMENT "Running C++ protocol buffer compiler (${PROTOC_EXECUTABLE}) on ${proto_file_relative}" VERBATIM diff --git a/src/frontends/paddle/src/decoder_proto.cpp b/src/frontends/paddle/src/decoder_proto.cpp index e25437fcbf4a2e..f286bfcf1f81fc 100644 --- a/src/frontends/paddle/src/decoder_proto.cpp +++ b/src/frontends/paddle/src/decoder_proto.cpp @@ -19,9 +19,9 @@ namespace ov { namespace frontend { namespace paddle { -using namespace ::ov_paddle::framework; +using namespace ::paddle::framework; -ov::element::Type get_ov_type(const ::ov_paddle::framework::proto::VarType_Type& type) { +ov::element::Type get_ov_type(const ::paddle::framework::proto::VarType_Type& type) { static const std::map type_map{ {proto::VarType_Type::VarType_Type_BOOL, ov::element::boolean}, {proto::VarType_Type::VarType_Type_INT16, ov::element::i16}, @@ -189,7 +189,7 @@ std::vector DecoderProto::decode_attribute_helper(const std: namespace { inline std::map map_for_each_input_impl( - const google::protobuf::RepeatedPtrField<::ov_paddle::framework::proto::OpDesc_Var>& c, + const google::protobuf::RepeatedPtrField<::paddle::framework::proto::OpDesc_Var>& c, const std::function(const std::string&, size_t)>& func) { size_t idx = 0; std::map res; diff --git a/src/frontends/paddle/src/decoder_proto.hpp b/src/frontends/paddle/src/decoder_proto.hpp index 652b03fd3ea76b..11627c6fba6ab9 100644 --- a/src/frontends/paddle/src/decoder_proto.hpp +++ b/src/frontends/paddle/src/decoder_proto.hpp @@ -23,7 +23,7 @@ namespace ov { namespace frontend { namespace paddle { -ov::element::Type get_ov_type(const ::ov_paddle::framework::proto::VarType_Type& type); +ov::element::Type get_ov_type(const ::paddle::framework::proto::VarType_Type& type); class DecoderProto : public paddle::DecoderBase { public: @@ -56,7 +56,7 @@ class DecoderProto : public paddle::DecoderBase { const std::function(const std::string&, size_t)>& func) const; private: - std::vector<::ov_paddle::framework::proto::OpDesc_Attr> decode_attribute_helper(const std::string& name) const; + std::vector<::paddle::framework::proto::OpDesc_Attr> decode_attribute_helper(const std::string& name) const; std::weak_ptr op_place; const std::shared_ptr get_place() const { diff --git a/src/frontends/paddle/src/frontend.cpp b/src/frontends/paddle/src/frontend.cpp index 2bc0ba333bb241..9582fccf6c447f 100644 --- a/src/frontends/paddle/src/frontend.cpp +++ b/src/frontends/paddle/src/frontend.cpp @@ -393,7 +393,7 @@ bool FrontEnd::supported_impl(const std::vector& variants) const { else if (variants[0].is()) { // Validating first stream, it must contain a model auto p_model_stream = variants[0].as(); - ::ov_paddle::framework::proto::ProgramDesc fw; + ::paddle::framework::proto::ProgramDesc fw; return fw.ParseFromIstream(p_model_stream); } return false; diff --git a/src/frontends/paddle/src/input_model.cpp b/src/frontends/paddle/src/input_model.cpp index 1264d983965e5d..287fa5e54ad743 100644 --- a/src/frontends/paddle/src/input_model.cpp +++ b/src/frontends/paddle/src/input_model.cpp @@ -21,7 +21,7 @@ namespace ov { namespace frontend { namespace paddle { -using namespace ::ov_paddle::framework::proto; +using namespace ::paddle::framework::proto; class InputModel::InputModelImpl { public: @@ -279,7 +279,7 @@ void InputModel::InputModelImpl::load_consts(const std::basic_string& folder_ if (!var_desc.persistable()) continue; - FRONT_END_GENERAL_CHECK(var_desc.type().type() == ::ov_paddle::framework::proto::VarType::LOD_TENSOR); + FRONT_END_GENERAL_CHECK(var_desc.type().type() == ::paddle::framework::proto::VarType::LOD_TENSOR); const auto& tensor = var_desc.type().lod_tensor().tensor(); Shape shape(tensor.dims().cbegin(), tensor.dims().cend()); const auto& type = get_ov_type(tensor.data_type()); @@ -324,7 +324,7 @@ void InputModel::InputModelImpl::load_consts(std::istream* weight_stream) { if (!var_desc.persistable()) continue; - FRONT_END_GENERAL_CHECK(var_desc.type().type() == ::ov_paddle::framework::proto::VarType::LOD_TENSOR); + FRONT_END_GENERAL_CHECK(var_desc.type().type() == ::paddle::framework::proto::VarType::LOD_TENSOR); FRONT_END_GENERAL_CHECK(weight_stream != nullptr && weight_stream->peek() != EOF, "PaddlePaddle *.pdiparams format weight file doesn't exist!"); /* @@ -350,8 +350,8 @@ void InputModel::InputModelImpl::load_consts(std::istream* weight_stream) { std::unique_ptr buf(new char[size]); weight_stream->read(reinterpret_cast(buf.get()), size); - std::unique_ptr<::ov_paddle::framework::proto::VarType_TensorDesc> tensor_desc( - new ::ov_paddle::framework::proto::VarType_TensorDesc()); + std::unique_ptr<::paddle::framework::proto::VarType_TensorDesc> tensor_desc( + new ::paddle::framework::proto::VarType_TensorDesc()); tensor_desc->ParseFromArray(buf.get(), size); Shape shape(tensor_desc->dims().cbegin(), tensor_desc->dims().cend()); const auto& type = get_ov_type(tensor_desc->data_type()); diff --git a/src/frontends/paddle/src/place.cpp b/src/frontends/paddle/src/place.cpp index ab5232018a6a9f..7af2bc07bbf5c5 100644 --- a/src/frontends/paddle/src/place.cpp +++ b/src/frontends/paddle/src/place.cpp @@ -29,12 +29,12 @@ bool Place::is_output() const { } OpPlace::OpPlace(const ov::frontend::InputModel& input_model, - const ::ov_paddle::framework::proto::OpDesc& op_desc, + const ::paddle::framework::proto::OpDesc& op_desc, const std::vector& names) : Place(input_model, names), m_op_desc(op_desc) {} -OpPlace::OpPlace(const ov::frontend::InputModel& input_model, const ::ov_paddle::framework::proto::OpDesc& op_desc) +OpPlace::OpPlace(const ov::frontend::InputModel& input_model, const ::paddle::framework::proto::OpDesc& op_desc) : OpPlace(input_model, op_desc, {}) {} const std::map>>& OpPlace::get_output_ports() const { @@ -58,7 +58,7 @@ std::shared_ptr OpPlace::get_input_port_paddle(const std::string& i return m_input_ports.at(inputName)[inputPortIndex]; } -const ::ov_paddle::framework::proto::OpDesc& OpPlace::get_desc() const { +const ::paddle::framework::proto::OpDesc& OpPlace::get_desc() const { return m_op_desc; } @@ -207,11 +207,11 @@ Place::Ptr OpPlace::get_target_tensor(int outputPortIndex) const { TensorPlace::TensorPlace(const ov::frontend::InputModel& input_model, const std::vector& names, - const ::ov_paddle::framework::proto::VarDesc& var_desc) + const ::paddle::framework::proto::VarDesc& var_desc) : Place(input_model, names), m_var_desc(var_desc) { const auto& var_type = var_desc.type(); - if (var_type.type() == ::ov_paddle::framework::proto::VarType::LOD_TENSOR) { + if (var_type.type() == ::paddle::framework::proto::VarType::LOD_TENSOR) { const auto& tensor_desc = var_type.lod_tensor().tensor(); m_type = get_ov_type(tensor_desc.data_type()); m_pshape = PartialShape(std::vector(tensor_desc.dims().begin(), tensor_desc.dims().end())); @@ -219,7 +219,7 @@ TensorPlace::TensorPlace(const ov::frontend::InputModel& input_model, } TensorPlace::TensorPlace(const ov::frontend::InputModel& input_model, - const ::ov_paddle::framework::proto::VarDesc& var_desc) + const ::paddle::framework::proto::VarDesc& var_desc) : TensorPlace(input_model, {var_desc.name()}, var_desc) {} std::vector TensorPlace::get_consuming_ports() const { @@ -250,7 +250,7 @@ void TensorPlace::add_consuming_port(const std::shared_ptr& in_port m_consuming_ports.push_back(in_port); } -const ::ov_paddle::framework::proto::VarDesc& TensorPlace::get_desc() const { +const ::paddle::framework::proto::VarDesc& TensorPlace::get_desc() const { return m_var_desc; } diff --git a/src/frontends/paddle/src/place.hpp b/src/frontends/paddle/src/place.hpp index e09112dd42f295..fc2fe9eb29efe0 100644 --- a/src/frontends/paddle/src/place.hpp +++ b/src/frontends/paddle/src/place.hpp @@ -7,7 +7,7 @@ #include "input_model.hpp" #include "openvino/frontend/manager.hpp" -namespace ov_paddle { +namespace paddle { namespace framework { namespace proto { class OpDesc; @@ -15,7 +15,7 @@ class VarDesc; } // namespace proto } // namespace framework -} // namespace ov_paddle +} // namespace paddle namespace ov { namespace frontend { @@ -101,10 +101,10 @@ class OutPortPlace : public Place { class OpPlace : public Place { public: OpPlace(const ov::frontend::InputModel& input_model, - const ::ov_paddle::framework::proto::OpDesc& op_desc, + const ::paddle::framework::proto::OpDesc& op_desc, const std::vector& names); - OpPlace(const ov::frontend::InputModel& input_model, const ::ov_paddle::framework::proto::OpDesc& op_desc); + OpPlace(const ov::frontend::InputModel& input_model, const ::paddle::framework::proto::OpDesc& op_desc); void add_in_port(const std::shared_ptr& input, const std::string& name); void add_out_port(const std::shared_ptr& output, const std::string& name); @@ -114,7 +114,7 @@ class OpPlace : public Place { const std::map>>& get_input_ports() const; std::shared_ptr get_output_port_paddle(const std::string& outputName, int outputPortIndex) const; std::shared_ptr get_input_port_paddle(const std::string& inputName, int inputPortIndex) const; - const ::ov_paddle::framework::proto::OpDesc& get_desc() const; + const ::paddle::framework::proto::OpDesc& get_desc() const; const std::shared_ptr get_decoder() const; void set_decoder(const std::shared_ptr op_decoder); @@ -152,7 +152,7 @@ class OpPlace : public Place { Ptr get_target_tensor(const std::string& outputName, int outputPortIndex) const override; private: - const ::ov_paddle::framework::proto::OpDesc& m_op_desc; // TODO: to conceal it behind decoder. + const ::paddle::framework::proto::OpDesc& m_op_desc; // TODO: to conceal it behind decoder. std::shared_ptr m_op_decoder; std::map>> m_input_ports; std::map>> m_output_ports; @@ -162,9 +162,9 @@ class TensorPlace : public Place { public: TensorPlace(const ov::frontend::InputModel& input_model, const std::vector& names, - const ::ov_paddle::framework::proto::VarDesc& var_desc); + const ::paddle::framework::proto::VarDesc& var_desc); - TensorPlace(const ov::frontend::InputModel& input_model, const ::ov_paddle::framework::proto::VarDesc& var_desc); + TensorPlace(const ov::frontend::InputModel& input_model, const ::paddle::framework::proto::VarDesc& var_desc); void add_producing_port(const std::shared_ptr& out_port); void add_consuming_port(const std::shared_ptr& in_port); @@ -182,7 +182,7 @@ class TensorPlace : public Place { void set_element_type(const element::Type& type) { m_type = type; } - const ::ov_paddle::framework::proto::VarDesc& get_desc() const; + const ::paddle::framework::proto::VarDesc& get_desc() const; // External usage Ptr get_producing_operation() const override; @@ -192,7 +192,7 @@ class TensorPlace : public Place { bool is_equal_data(const Ptr& another) const override; private: - const ::ov_paddle::framework::proto::VarDesc& m_var_desc; + const ::paddle::framework::proto::VarDesc& m_var_desc; PartialShape m_pshape; element::Type m_type; diff --git a/src/frontends/paddle/src/proto/framework.proto b/src/frontends/paddle/src/proto/framework.proto index 4fc9c26c47e9ca..22112cba29667d 100644 --- a/src/frontends/paddle/src/proto/framework.proto +++ b/src/frontends/paddle/src/proto/framework.proto @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ syntax = "proto2"; -package ov_paddle.framework.proto; +package paddle.framework.proto; option optimize_for = LITE_RUNTIME; // Added by Intel Corporation 2021-2022 diff --git a/src/frontends/tensorflow/src/checkpoint_v1_reader.cpp b/src/frontends/tensorflow/src/checkpoint_v1_reader.cpp index c74173af792d76..185b374e6bc42f 100644 --- a/src/frontends/tensorflow/src/checkpoint_v1_reader.cpp +++ b/src/frontends/tensorflow/src/checkpoint_v1_reader.cpp @@ -7,7 +7,7 @@ #include "checkpoint_utils.hpp" #include "openvino/frontend/exception.hpp" #include "openvino/util/file_util.hpp" -#include "saved_tensor_slice.pb.h" +#include "ov_tensorflow/saved_tensor_slice.pb.h" #include "tf_utils.hpp" #ifdef ENABLE_SNAPPY_COMPRESSION @@ -68,7 +68,7 @@ void CheckpointV1Reader::initialize() { // parse empty index block // This is only present at the first item of each checkpoint file and serves // as a table of contents, listing all the tensor slices saved in this file. - ::ov_tensorflow::SavedTensorSlices sts; + ::tensorflow::SavedTensorSlices sts; FRONT_END_GENERAL_CHECK(sts.ParseFromArray(value.data(), static_cast(value.size())), "[TensorFlow Frontend] incorrect input checkpoint file or internal error: cannot parse " "SavedTensorSlices entry"); @@ -254,7 +254,7 @@ void CheckpointV1Reader::read_variable(const std::string& variable_name, ov::Any // This is only present at the first item of each checkpoint file and serves // as a table of contents, listing all the tensor slices saved in this file. - ::ov_tensorflow::SavedTensorSlices sts; + ::tensorflow::SavedTensorSlices sts; FRONT_END_GENERAL_CHECK(sts.ParseFromArray(raw_data.data(), static_cast(raw_data.size())), "[TensorFlow Frontend] incorrect input checkpoint file or internal error: cannot parse " "SavedTensorSlices entry"); diff --git a/src/frontends/tensorflow/src/checkpoint_v1_reader.hpp b/src/frontends/tensorflow/src/checkpoint_v1_reader.hpp index f088ed145f8ff3..1171fd6a682cb1 100644 --- a/src/frontends/tensorflow/src/checkpoint_v1_reader.hpp +++ b/src/frontends/tensorflow/src/checkpoint_v1_reader.hpp @@ -12,17 +12,17 @@ #include "checkpoint_utils.hpp" #include "openvino/core/any.hpp" #include "openvino/frontend/exception.hpp" -#include "saved_tensor_slice.pb.h" -#include "tensor_shape.pb.h" -#include "types.pb.h" +#include "ov_tensorflow/saved_tensor_slice.pb.h" +#include "ov_tensorflow/tensor_shape.pb.h" +#include "ov_tensorflow/types.pb.h" namespace ov { namespace frontend { namespace tensorflow { // stores information about shape, type, and shard id for Variable struct VariableInfo { - ::ov_tensorflow::TensorShapeProto variable_shape; - ::ov_tensorflow::DataType variable_type; + ::tensorflow::TensorShapeProto variable_shape; + ::tensorflow::DataType variable_type; int32_t shard_id; size_t offset; size_t size; diff --git a/src/frontends/tensorflow/src/decoder_argdef.cpp b/src/frontends/tensorflow/src/decoder_argdef.cpp index 3430bcbe5e37aa..af7c1a1cfccd01 100644 --- a/src/frontends/tensorflow/src/decoder_argdef.cpp +++ b/src/frontends/tensorflow/src/decoder_argdef.cpp @@ -5,11 +5,11 @@ #include "decoder_argdef.hpp" #include "decoder_proto.hpp" -#include "op_def.pb.h" #include "openvino/frontend/tensorflow/node_context.hpp" #include "openvino/frontend/tensorflow/special_types.hpp" +#include "ov_tensorflow/op_def.pb.h" +#include "ov_tensorflow/types.pb.h" #include "tf_utils.hpp" -#include "types.pb.h" namespace ov { namespace frontend { diff --git a/src/frontends/tensorflow/src/decoder_argdef.hpp b/src/frontends/tensorflow/src/decoder_argdef.hpp index 69f05423f528d6..dfee9b21e1481c 100644 --- a/src/frontends/tensorflow/src/decoder_argdef.hpp +++ b/src/frontends/tensorflow/src/decoder_argdef.hpp @@ -9,11 +9,11 @@ #include "openvino/frontend/tensorflow/decoder.hpp" -namespace ov_tensorflow { +namespace tensorflow { class GraphDef; class FunctionDef; class OpDef_ArgDef; -} // namespace ov_tensorflow +} // namespace tensorflow namespace ov { namespace frontend { @@ -21,18 +21,18 @@ namespace tensorflow { class DecoderArgDef : public ov::frontend::tensorflow::DecoderBase { public: - explicit DecoderArgDef(const ::ov_tensorflow::OpDef_ArgDef* arg_def, - const std::shared_ptr<::ov_tensorflow::GraphDef>& graph_def, - const std::shared_ptr<::ov_tensorflow::FunctionDef>& func_def, + explicit DecoderArgDef(const ::tensorflow::OpDef_ArgDef* arg_def, + const std::shared_ptr<::tensorflow::GraphDef>& graph_def, + const std::shared_ptr<::tensorflow::FunctionDef>& func_def, const std::string& op_type) : m_arg_def(arg_def), m_graph_def(graph_def), m_func_def(func_def), m_op_type(op_type) {} - explicit DecoderArgDef(const ::ov_tensorflow::OpDef_ArgDef* arg_def, - const std::shared_ptr<::ov_tensorflow::GraphDef>& graph_def, - const std::shared_ptr<::ov_tensorflow::FunctionDef>& func_def, + explicit DecoderArgDef(const ::tensorflow::OpDef_ArgDef* arg_def, + const std::shared_ptr<::tensorflow::GraphDef>& graph_def, + const std::shared_ptr<::tensorflow::FunctionDef>& func_def, const std::string& op_type, const std::string& producer_name) : m_arg_def(arg_def), @@ -55,13 +55,13 @@ class DecoderArgDef : public ov::frontend::tensorflow::DecoderBase { const std::string& get_op_name() const override; private: - const ::ov_tensorflow::OpDef_ArgDef* m_arg_def; + const ::tensorflow::OpDef_ArgDef* m_arg_def; // For existence of OpDef_ArgDef object corresponding to the main graph node, // GraphDef object must live in the memory - const std::shared_ptr<::ov_tensorflow::GraphDef> m_graph_def; + const std::shared_ptr<::tensorflow::GraphDef> m_graph_def; // For existence of OpDef_ArgDef object corresponding to the body graph node, // both GraphDef and FunctionDef objects must be alive in the memory - const std::shared_ptr<::ov_tensorflow::FunctionDef> m_func_def; + const std::shared_ptr<::tensorflow::FunctionDef> m_func_def; const std::string m_op_type; const std::string m_producer_name; }; diff --git a/src/frontends/tensorflow/src/decoder_proto.cpp b/src/frontends/tensorflow/src/decoder_proto.cpp index 26003336584d1e..9e0a53efb6d09f 100644 --- a/src/frontends/tensorflow/src/decoder_proto.cpp +++ b/src/frontends/tensorflow/src/decoder_proto.cpp @@ -4,12 +4,12 @@ #include "decoder_proto.hpp" -#include "attr_value.pb.h" -#include "node_def.pb.h" #include "openvino/frontend/tensorflow/node_context.hpp" #include "openvino/frontend/tensorflow/special_types.hpp" +#include "ov_tensorflow/attr_value.pb.h" +#include "ov_tensorflow/node_def.pb.h" +#include "ov_tensorflow/types.pb.h" #include "tf_utils.hpp" -#include "types.pb.h" namespace ov { namespace frontend { @@ -38,7 +38,7 @@ void extract_tensor_content(const std::string& tensor_content, ov::Tensor* value # pragma warning(disable : 4267) // possible loss of data #endif template -void extract_compressed_tensor_content(const ::ov_tensorflow::TensorProto& tensor_proto, +void extract_compressed_tensor_content(const ::tensorflow::TensorProto& tensor_proto, int64_t val_size, ov::Tensor* values) { auto val_lastsaved = static_cast(0); @@ -90,15 +90,15 @@ ov::Any DecoderProto::get_attribute(const std::string& name) const { } switch (attrs[0].value_case()) { - case ::ov_tensorflow::AttrValue::ValueCase::kB: + case ::tensorflow::AttrValue::ValueCase::kB: return attrs[0].b(); - case ::ov_tensorflow::AttrValue::ValueCase::kF: + case ::tensorflow::AttrValue::ValueCase::kF: return attrs[0].f(); - case ::ov_tensorflow::AttrValue::ValueCase::kS: + case ::tensorflow::AttrValue::ValueCase::kS: return attrs[0].s(); - case ::ov_tensorflow::AttrValue::ValueCase::kI: + case ::tensorflow::AttrValue::ValueCase::kI: return attrs[0].i(); - case ::ov_tensorflow::AttrValue::ValueCase::kShape: { + case ::tensorflow::AttrValue::ValueCase::kShape: { const auto& tf_shape = attrs[0].shape(); if (tf_shape.unknown_rank()) { return ov::PartialShape::dynamic(); @@ -111,16 +111,16 @@ ov::Any DecoderProto::get_attribute(const std::string& name) const { return ov::PartialShape(dims); } - case ::ov_tensorflow::AttrValue::ValueCase::kType: { + case ::tensorflow::AttrValue::ValueCase::kType: { auto atype = attrs[0].type(); - if (atype != ::ov_tensorflow::DT_STRING) { + if (atype != ::tensorflow::DT_STRING) { return get_ov_type(attrs[0].type()); } else { return ov::Any("DT_STRING"); } } - case ::ov_tensorflow::AttrValue::ValueCase::kList: { + case ::tensorflow::AttrValue::ValueCase::kList: { const auto& list = attrs[0].list(); if (list.i_size()) return std::vector(list.i().begin(), list.i().end()); @@ -156,7 +156,7 @@ ov::Any DecoderProto::get_attribute(const std::string& name) const { if (list.type_size()) { std::vector res; for (int idx = 0; idx < list.type_size(); ++idx) { - if (list.type(idx) != ::ov_tensorflow::DataType::DT_STRING) { + if (list.type(idx) != ::tensorflow::DataType::DT_STRING) { res.emplace_back(get_ov_type(list.type(idx))); } else { res.emplace_back(ov::element::dynamic); @@ -176,15 +176,15 @@ ov::Any DecoderProto::get_attribute(const std::string& name) const { return EmptyList(); } - case ::ov_tensorflow::AttrValue::ValueCase::kTensor: { + case ::tensorflow::AttrValue::ValueCase::kTensor: { return unpack_tensor_proto(attrs[0].tensor()); } - case ::ov_tensorflow::AttrValue::ValueCase::kPlaceholder: + case ::tensorflow::AttrValue::ValueCase::kPlaceholder: FRONT_END_GENERAL_CHECK(false, "Conversion from Tensorflow to OpenVINO data type failed: Placeholder type for '", name, "' attribute is not supported."); - case ::ov_tensorflow::AttrValue::ValueCase::kFunc: + case ::tensorflow::AttrValue::ValueCase::kFunc: // attrs[0].func() returns NameAttrList object from which // we retrieve the function name // Further, InputModel object is created for FunctionDef with this name @@ -251,7 +251,7 @@ const std::string& DecoderProto::get_op_name() const { return m_node_def->name(); } -std::vector<::ov_tensorflow::AttrValue> DecoderProto::decode_attribute_helper(const std::string& name) const { +std::vector<::tensorflow::AttrValue> DecoderProto::decode_attribute_helper(const std::string& name) const { auto attr_map = m_node_def->attr(); if (attr_map.contains(name)) { auto value = m_node_def->attr().at(name); diff --git a/src/frontends/tensorflow/src/decoder_proto.hpp b/src/frontends/tensorflow/src/decoder_proto.hpp index eab5e10c41c892..9d22e273e1e146 100644 --- a/src/frontends/tensorflow/src/decoder_proto.hpp +++ b/src/frontends/tensorflow/src/decoder_proto.hpp @@ -9,14 +9,14 @@ #include "openvino/core/type/element_type.hpp" #include "openvino/frontend/tensorflow/decoder.hpp" -#include "types.pb.h" +#include "ov_tensorflow/types.pb.h" -namespace ov_tensorflow { +namespace tensorflow { class GraphDef; class FunctionDef; class NodeDef; class AttrValue; -} // namespace ov_tensorflow +} // namespace tensorflow namespace ov { namespace frontend { @@ -29,15 +29,15 @@ void parse_producer_name(const std::string& producer_port_name, class DecoderProto : public ov::frontend::tensorflow::DecoderBase { public: - explicit DecoderProto(const ::ov_tensorflow::NodeDef* node_def, - const std::shared_ptr<::ov_tensorflow::GraphDef>& graph_def) + explicit DecoderProto(const ::tensorflow::NodeDef* node_def, + const std::shared_ptr<::tensorflow::GraphDef>& graph_def) : m_node_def(node_def), m_graph_def(graph_def), m_func_def(nullptr) {} - explicit DecoderProto(const ::ov_tensorflow::NodeDef* node_def, - const std::shared_ptr<::ov_tensorflow::GraphDef>& graph_def, - const std::shared_ptr<::ov_tensorflow::FunctionDef>& func_def) + explicit DecoderProto(const ::tensorflow::NodeDef* node_def, + const std::shared_ptr<::tensorflow::GraphDef>& graph_def, + const std::shared_ptr<::tensorflow::FunctionDef>& func_def) : m_node_def(node_def), m_graph_def(graph_def), m_func_def(func_def) {} @@ -56,14 +56,14 @@ class DecoderProto : public ov::frontend::tensorflow::DecoderBase { const std::string& get_op_name() const override; private: - std::vector<::ov_tensorflow::AttrValue> decode_attribute_helper(const std::string& name) const; - const ::ov_tensorflow::NodeDef* m_node_def; + std::vector<::tensorflow::AttrValue> decode_attribute_helper(const std::string& name) const; + const ::tensorflow::NodeDef* m_node_def; // For existence of NodeDef object corresponding to the main graph node, // GraphDef object must live in the memory - const std::shared_ptr<::ov_tensorflow::GraphDef> m_graph_def; + const std::shared_ptr<::tensorflow::GraphDef> m_graph_def; // For existence of NodeDef object corresponding to the body graph node, // both GraphDef and FunctionDef objects must be alive in the memory - const std::shared_ptr<::ov_tensorflow::FunctionDef> m_func_def; + const std::shared_ptr<::tensorflow::FunctionDef> m_func_def; }; } // namespace tensorflow } // namespace frontend diff --git a/src/frontends/tensorflow/src/graph_iterator_meta.cpp b/src/frontends/tensorflow/src/graph_iterator_meta.cpp index 81bd821aadec0e..06f2d31f389a27 100644 --- a/src/frontends/tensorflow/src/graph_iterator_meta.cpp +++ b/src/frontends/tensorflow/src/graph_iterator_meta.cpp @@ -10,26 +10,26 @@ #include #include "openvino/core/type/element_type.hpp" -#include "tensor_bundle.pb.h" -#include "trackable_object_graph.pb.h" +#include "ov_tensorflow/tensor_bundle.pb.h" +#include "ov_tensorflow/trackable_object_graph.pb.h" namespace ov { namespace frontend { namespace tensorflow { -bool GraphIteratorMeta::is_valid_signature(const ::ov_tensorflow::SignatureDef& signature) const { - const std::map<::ov_tensorflow::DataType, ov::element::Type> types{ - {::ov_tensorflow::DataType::DT_BOOL, ov::element::boolean}, - {::ov_tensorflow::DataType::DT_INT16, ov::element::i16}, - {::ov_tensorflow::DataType::DT_INT32, ov::element::i32}, - {::ov_tensorflow::DataType::DT_INT64, ov::element::i64}, - {::ov_tensorflow::DataType::DT_HALF, ov::element::f16}, - {::ov_tensorflow::DataType::DT_FLOAT, ov::element::f32}, - {::ov_tensorflow::DataType::DT_DOUBLE, ov::element::f64}, - {::ov_tensorflow::DataType::DT_UINT8, ov::element::u8}, - {::ov_tensorflow::DataType::DT_INT8, ov::element::i8}, - {::ov_tensorflow::DataType::DT_BFLOAT16, ov::element::bf16}, - {::ov_tensorflow::DataType::DT_STRING, ov::element::dynamic}}; +bool GraphIteratorMeta::is_valid_signature(const ::tensorflow::SignatureDef& signature) const { + const std::map<::tensorflow::DataType, ov::element::Type> types{ + {::tensorflow::DataType::DT_BOOL, ov::element::boolean}, + {::tensorflow::DataType::DT_INT16, ov::element::i16}, + {::tensorflow::DataType::DT_INT32, ov::element::i32}, + {::tensorflow::DataType::DT_INT64, ov::element::i64}, + {::tensorflow::DataType::DT_HALF, ov::element::f16}, + {::tensorflow::DataType::DT_FLOAT, ov::element::f32}, + {::tensorflow::DataType::DT_DOUBLE, ov::element::f64}, + {::tensorflow::DataType::DT_UINT8, ov::element::u8}, + {::tensorflow::DataType::DT_INT8, ov::element::i8}, + {::tensorflow::DataType::DT_BFLOAT16, ov::element::bf16}, + {::tensorflow::DataType::DT_STRING, ov::element::dynamic}}; for (const auto& it : signature.inputs()) { if (it.second.name().empty() || types.find(it.second.dtype()) == types.end()) diff --git a/src/frontends/tensorflow/src/graph_iterator_meta.hpp b/src/frontends/tensorflow/src/graph_iterator_meta.hpp index 6c14df8ba8fd6b..1e2789227260fb 100644 --- a/src/frontends/tensorflow/src/graph_iterator_meta.hpp +++ b/src/frontends/tensorflow/src/graph_iterator_meta.hpp @@ -27,7 +27,7 @@ std::basic_string get_variables_index_name(const std::wstring // Loads graph from Tensorflow MetaGraph file (*.meta) class GraphIteratorMeta : public GraphIteratorProto { - std::shared_ptr<::ov_tensorflow::MetaGraphDef> m_metagraph_def; + std::shared_ptr<::tensorflow::MetaGraphDef> m_metagraph_def; std::shared_ptr m_variables_index; std::shared_ptr> m_inputs_map; std::shared_ptr> m_outputs_map; @@ -36,7 +36,7 @@ class GraphIteratorMeta : public GraphIteratorProto { public: template GraphIteratorMeta(const std::basic_string& path, const bool mmap_enabled) - : m_metagraph_def(std::make_shared<::ov_tensorflow::MetaGraphDef>()), + : m_metagraph_def(std::make_shared<::tensorflow::MetaGraphDef>()), m_mmap_enabled(mmap_enabled) { this->read_meta(path); } @@ -45,7 +45,7 @@ class GraphIteratorMeta : public GraphIteratorProto { static bool is_supported(const std::basic_string& path) { try { std::ifstream mg_stream(path.c_str(), std::ios::in | std::ifstream::binary); - auto metagraph_def = std::make_shared<::ov_tensorflow::MetaGraphDef>(); + auto metagraph_def = std::make_shared<::tensorflow::MetaGraphDef>(); return mg_stream && mg_stream.is_open() && metagraph_def->ParsePartialFromIstream(&mg_stream) && metagraph_def->has_graph_def() && metagraph_def->graph_def().node_size() > 0; } catch (...) { @@ -66,7 +66,7 @@ class GraphIteratorMeta : public GraphIteratorProto { } private: - bool is_valid_signature(const ::ov_tensorflow::SignatureDef& signature) const; + bool is_valid_signature(const ::tensorflow::SignatureDef& signature) const; template bool read_meta(const std::basic_string& path) { @@ -87,10 +87,10 @@ class GraphIteratorMeta : public GraphIteratorProto { bool res = m_metagraph_def->ParseFromIstream(&mg_stream); FRONT_END_GENERAL_CHECK(res && m_metagraph_def->has_graph_def(), "MetaGraph cannot be parsed"); - std::map validSignatures = {}; + std::map validSignatures = {}; for (const auto& sit : m_metagraph_def->signature_def()) { const std::string& key = sit.first; - const ::ov_tensorflow::SignatureDef& val = sit.second; + const ::tensorflow::SignatureDef& val = sit.second; if (is_valid_signature(val)) { validSignatures[key] = &val; } @@ -114,7 +114,7 @@ class GraphIteratorMeta : public GraphIteratorProto { } } - m_graph_def = std::make_shared<::ov_tensorflow::GraphDef>(m_metagraph_def->graph_def()); + m_graph_def = std::make_shared<::tensorflow::GraphDef>(m_metagraph_def->graph_def()); // Update variables map using information by resolving AssignVariableOp graph nodes std::map var_map; diff --git a/src/frontends/tensorflow/src/graph_iterator_proto.hpp b/src/frontends/tensorflow/src/graph_iterator_proto.hpp index d01e1fec6b7a0c..5ef6d0a5954b41 100644 --- a/src/frontends/tensorflow/src/graph_iterator_proto.hpp +++ b/src/frontends/tensorflow/src/graph_iterator_proto.hpp @@ -10,10 +10,10 @@ #include "checkpoint_v1_reader.hpp" #include "decoder_argdef.hpp" #include "decoder_proto.hpp" -#include "graph.pb.h" #include "openvino/frontend/exception.hpp" #include "openvino/frontend/graph_iterator.hpp" #include "openvino/frontend/tensorflow/decoder.hpp" +#include "ov_tensorflow/graph.pb.h" namespace ov { namespace frontend { @@ -21,8 +21,8 @@ namespace tensorflow { class GraphIteratorProto : public GraphIterator { protected: - std::shared_ptr<::ov_tensorflow::GraphDef> m_graph_def; - std::shared_ptr<::ov_tensorflow::FunctionDef> m_func_def; + std::shared_ptr<::tensorflow::GraphDef> m_graph_def; + std::shared_ptr<::tensorflow::FunctionDef> m_func_def; std::shared_ptr m_checkpoint_v1_reader; size_t node_index = 0; @@ -32,7 +32,7 @@ class GraphIteratorProto : public GraphIterator { std::vector m_output_names; GraphIteratorProto() - : m_graph_def(std::make_shared<::ov_tensorflow::GraphDef>()), + : m_graph_def(std::make_shared<::tensorflow::GraphDef>()), m_func_def(nullptr), m_checkpoint_v1_reader(nullptr), m_library_map() {} @@ -62,8 +62,8 @@ class GraphIteratorProto : public GraphIterator { } public: - GraphIteratorProto(const std::shared_ptr<::ov_tensorflow::GraphDef>& graph_def, - const std::shared_ptr<::ov_tensorflow::FunctionDef>& func_def, + GraphIteratorProto(const std::shared_ptr<::tensorflow::GraphDef>& graph_def, + const std::shared_ptr<::tensorflow::FunctionDef>& func_def, const std::unordered_map& library_map, const std::shared_ptr checkpoint_v1_reader) : m_graph_def(graph_def), @@ -105,7 +105,7 @@ class GraphIteratorProto : public GraphIterator { /// \brief Construct GraphIterator for the frozen model without v1 checkpoints template GraphIteratorProto(const std::basic_string& model_path) - : m_graph_def(std::make_shared<::ov_tensorflow::GraphDef>()), + : m_graph_def(std::make_shared<::tensorflow::GraphDef>()), m_func_def(nullptr), m_checkpoint_v1_reader(nullptr) { std::ifstream pb_stream(model_path, std::ios::in | std::ifstream::binary); @@ -119,7 +119,7 @@ class GraphIteratorProto : public GraphIterator { /// \brief Construct GraphIterator for the frozen model with v1 checkpoints template GraphIteratorProto(const std::basic_string& model_path, const std::basic_string& checkpoint_directory) - : m_graph_def(std::make_shared<::ov_tensorflow::GraphDef>()), + : m_graph_def(std::make_shared<::tensorflow::GraphDef>()), m_func_def(nullptr), m_checkpoint_v1_reader(nullptr) { std::ifstream pb_stream(model_path, std::ios::in | std::ifstream::binary); @@ -136,7 +136,7 @@ class GraphIteratorProto : public GraphIterator { static bool is_supported(const std::basic_string& path) { try { std::ifstream pb_stream(path, std::ios::in | std::ifstream::binary); - auto graph_def = std::make_shared<::ov_tensorflow::GraphDef>(); + auto graph_def = std::make_shared<::tensorflow::GraphDef>(); return pb_stream && pb_stream.is_open() && graph_def->ParsePartialFromIstream(&pb_stream) && graph_def->node_size() > 0; } catch (...) { @@ -184,7 +184,7 @@ class GraphIteratorProto : public GraphIterator { "[TensorFlow Error] Internal Error: incorrect library map to cache function indices by names."); auto func = m_graph_def->library().function(func_ind); - auto func_ptr = std::make_shared<::ov_tensorflow::FunctionDef>(func); + auto func_ptr = std::make_shared<::tensorflow::FunctionDef>(func); return std::make_shared(m_graph_def, func_ptr, m_library_map, m_checkpoint_v1_reader); } diff --git a/src/frontends/tensorflow/src/graph_iterator_proto_txt.hpp b/src/frontends/tensorflow/src/graph_iterator_proto_txt.hpp index 523d863dbb0bdd..6d5b6494f764c5 100644 --- a/src/frontends/tensorflow/src/graph_iterator_proto_txt.hpp +++ b/src/frontends/tensorflow/src/graph_iterator_proto_txt.hpp @@ -62,7 +62,7 @@ class GraphIteratorProtoTxt : public GraphIteratorProto { if (!input_stream) { return false; } - auto graph_def = std::make_shared<::ov_tensorflow::GraphDef>(); + auto graph_def = std::make_shared<::tensorflow::GraphDef>(); auto is_parsed = ::google::protobuf::TextFormat::Parse(input_stream.get(), graph_def.get()) && graph_def && graph_def->node_size() > 0; return is_parsed; diff --git a/src/frontends/tensorflow/src/graph_iterator_saved_model.cpp b/src/frontends/tensorflow/src/graph_iterator_saved_model.cpp index 7c9af8216a910f..803e7d694bc69a 100644 --- a/src/frontends/tensorflow/src/graph_iterator_saved_model.cpp +++ b/src/frontends/tensorflow/src/graph_iterator_saved_model.cpp @@ -10,26 +10,26 @@ #include #include "openvino/core/type/element_type.hpp" -#include "tensor_bundle.pb.h" -#include "trackable_object_graph.pb.h" +#include "ov_tensorflow/tensor_bundle.pb.h" +#include "ov_tensorflow/trackable_object_graph.pb.h" namespace ov { namespace frontend { namespace tensorflow { -bool GraphIteratorSavedModel::is_valid_signature(const ::ov_tensorflow::SignatureDef& signature) const { - const std::map<::ov_tensorflow::DataType, ov::element::Type> types{ - {::ov_tensorflow::DataType::DT_BOOL, ov::element::boolean}, - {::ov_tensorflow::DataType::DT_INT16, ov::element::i16}, - {::ov_tensorflow::DataType::DT_INT32, ov::element::i32}, - {::ov_tensorflow::DataType::DT_INT64, ov::element::i64}, - {::ov_tensorflow::DataType::DT_HALF, ov::element::f16}, - {::ov_tensorflow::DataType::DT_FLOAT, ov::element::f32}, - {::ov_tensorflow::DataType::DT_DOUBLE, ov::element::f64}, - {::ov_tensorflow::DataType::DT_UINT8, ov::element::u8}, - {::ov_tensorflow::DataType::DT_INT8, ov::element::i8}, - {::ov_tensorflow::DataType::DT_BFLOAT16, ov::element::bf16}, - {::ov_tensorflow::DataType::DT_STRING, ov::element::dynamic}}; +bool GraphIteratorSavedModel::is_valid_signature(const ::tensorflow::SignatureDef& signature) const { + const std::map<::tensorflow::DataType, ov::element::Type> types{ + {::tensorflow::DataType::DT_BOOL, ov::element::boolean}, + {::tensorflow::DataType::DT_INT16, ov::element::i16}, + {::tensorflow::DataType::DT_INT32, ov::element::i32}, + {::tensorflow::DataType::DT_INT64, ov::element::i64}, + {::tensorflow::DataType::DT_HALF, ov::element::f16}, + {::tensorflow::DataType::DT_FLOAT, ov::element::f32}, + {::tensorflow::DataType::DT_DOUBLE, ov::element::f64}, + {::tensorflow::DataType::DT_UINT8, ov::element::u8}, + {::tensorflow::DataType::DT_INT8, ov::element::i8}, + {::tensorflow::DataType::DT_BFLOAT16, ov::element::bf16}, + {::tensorflow::DataType::DT_STRING, ov::element::dynamic}}; for (const auto& it : signature.inputs()) { if (it.second.name().empty() || types.find(it.second.dtype()) == types.end()) diff --git a/src/frontends/tensorflow/src/graph_iterator_saved_model.hpp b/src/frontends/tensorflow/src/graph_iterator_saved_model.hpp index 52b0ba75137835..4cb385e66f744d 100644 --- a/src/frontends/tensorflow/src/graph_iterator_saved_model.hpp +++ b/src/frontends/tensorflow/src/graph_iterator_saved_model.hpp @@ -8,7 +8,7 @@ #include "graph_iterator_proto.hpp" #include "openvino/util/file_util.hpp" -#include "saved_model.pb.h" +#include "ov_tensorflow/saved_model.pb.h" #include "variables_index.hpp" namespace ov { @@ -34,7 +34,7 @@ std::basic_string get_variables_index_name(); // Loads graph from Tensorflow Saved Model file (saved_model.pb) class GraphIteratorSavedModel : public GraphIteratorProto { - std::shared_ptr<::ov_tensorflow::SavedModel> m_saved_model; + std::shared_ptr<::tensorflow::SavedModel> m_saved_model; std::shared_ptr m_variables_index; std::shared_ptr> m_inputs_map; std::shared_ptr> m_outputs_map; @@ -43,7 +43,7 @@ class GraphIteratorSavedModel : public GraphIteratorProto { public: template GraphIteratorSavedModel(const std::basic_string& path, const std::string& tags, const bool mmap_enabled) - : m_saved_model(std::make_shared<::ov_tensorflow::SavedModel>()), + : m_saved_model(std::make_shared<::tensorflow::SavedModel>()), m_mmap_enabled(mmap_enabled) { this->read_saved_model(path, tags); } @@ -66,7 +66,7 @@ class GraphIteratorSavedModel : public GraphIteratorProto { } private: - bool is_valid_signature(const ::ov_tensorflow::SignatureDef& signature) const; + bool is_valid_signature(const ::tensorflow::SignatureDef& signature) const; template bool read_saved_model(const std::basic_string& path, const std::string& tags) { @@ -141,11 +141,11 @@ class GraphIteratorSavedModel : public GraphIteratorProto { } /// \brief Does a loading of exact meta-graph - bool load_meta_graph(const ::ov_tensorflow::MetaGraphDef& meta_graph) { - std::map validSignatures = {}; + bool load_meta_graph(const ::tensorflow::MetaGraphDef& meta_graph) { + std::map validSignatures = {}; for (const auto& sit : meta_graph.signature_def()) { const std::string& key = sit.first; - const ::ov_tensorflow::SignatureDef& val = sit.second; + const ::tensorflow::SignatureDef& val = sit.second; if (is_valid_signature(val)) { validSignatures[key] = &val; } @@ -167,7 +167,7 @@ class GraphIteratorSavedModel : public GraphIteratorProto { } } - m_graph_def = std::make_shared<::ov_tensorflow::GraphDef>(meta_graph.graph_def()); + m_graph_def = std::make_shared<::tensorflow::GraphDef>(meta_graph.graph_def()); // Update variables map using information by resolving AssignVariableOp graph nodes std::map var_map; diff --git a/src/frontends/tensorflow/src/op/var_handle.cpp b/src/frontends/tensorflow/src/op/var_handle.cpp index edca2d2bca8cb0..0c86041440a8ff 100644 --- a/src/frontends/tensorflow/src/op/var_handle.cpp +++ b/src/frontends/tensorflow/src/op/var_handle.cpp @@ -10,7 +10,7 @@ #include "ngraph/runtime/shared_buffer.hpp" #include "openvino/opsets/opset8.hpp" #include "openvino/util/mmap_object.hpp" -#include "tensor_bundle.pb.h" +#include "ov_tensorflow/tensor_bundle.pb.h" using namespace std; using namespace ov::opset8; @@ -26,7 +26,7 @@ template static std::shared_ptr read_variable(std::shared_ptr var_index, const ov::element::Type ov_type, const ov::Shape shape, - const ::ov_tensorflow::BundleEntryProto& entry, + const ::tensorflow::BundleEntryProto& entry, const NodeContext& node) { google::protobuf::int64 size = 1; for (uint64_t i = 0; i < shape.size(); ++i) { @@ -95,7 +95,7 @@ OutputVector translate_varhandle_op(const NodeContext& node) { TENSORFLOW_OP_VALIDATION(node, result, "[TensorFlow Frontend] Internal error: Cannot find requested variable."); - ::ov_tensorflow::BundleEntryProto entry; + ::tensorflow::BundleEntryProto entry; TENSORFLOW_OP_VALIDATION(node, entry.ParseFromArray(entry_data, static_cast(entry_size)), "[TensorFlow Frontend] Internal error: Cannot get read bundle entry."); diff --git a/src/frontends/tensorflow/src/op/xla_conv_v2.cpp b/src/frontends/tensorflow/src/op/xla_conv_v2.cpp index 605b2c5f51e209..2d6ecdfa7bfb73 100644 --- a/src/frontends/tensorflow/src/op/xla_conv_v2.cpp +++ b/src/frontends/tensorflow/src/op/xla_conv_v2.cpp @@ -14,13 +14,13 @@ #include "openvino/op/shape_of.hpp" #include "openvino/op/slice.hpp" #include "openvino/op/transpose.hpp" +#include "ov_tensorflow/xla_data.pb.h" #include "utils.hpp" -#include "xla_data.pb.h" using namespace std; using namespace ov; using namespace ov::op; -using namespace ov_xla; +using namespace xla; namespace ov { namespace frontend { diff --git a/src/frontends/tensorflow/src/op/xla_dot.cpp b/src/frontends/tensorflow/src/op/xla_dot.cpp index 00493e1385d7b2..b4c38519ce210c 100644 --- a/src/frontends/tensorflow/src/op/xla_dot.cpp +++ b/src/frontends/tensorflow/src/op/xla_dot.cpp @@ -13,8 +13,8 @@ #include "openvino/op/shape_of.hpp" #include "openvino/op/transpose.hpp" #include "openvino/op/unsqueeze.hpp" +#include "ov_tensorflow/xla_data.pb.h" #include "utils.hpp" -#include "xla_data.pb.h" using namespace std; using namespace ov; @@ -92,7 +92,7 @@ OutputVector translate_xla_dot_op(const NodeContext& node) { auto rhs = node.get_input(1); auto node_name = node.get_name(); auto dimension_numbers_message = node.get_attribute("dimension_numbers"); - ::ov_xla::DotDimensionNumbers dimension_numbers; + ::xla::DotDimensionNumbers dimension_numbers; TENSORFLOW_OP_VALIDATION( node, dimension_numbers.ParseFromArray(dimension_numbers_message.data(), diff --git a/src/frontends/tensorflow/src/proto/allocation_description.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/allocation_description.proto similarity index 98% rename from src/frontends/tensorflow/src/proto/allocation_description.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/allocation_description.proto index 589f1bf597b0fb..8932ca2cb33b33 100644 --- a/src/frontends/tensorflow/src/proto/allocation_description.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/allocation_description.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow; +package tensorflow; option cc_enable_arenas = true; option java_outer_classname = "AllocationDescriptionProtos"; diff --git a/src/frontends/tensorflow/src/proto/api_def.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/api_def.proto similarity index 98% rename from src/frontends/tensorflow/src/proto/api_def.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/api_def.proto index 31139f89855e65..cbb581973d32bb 100644 --- a/src/frontends/tensorflow/src/proto/api_def.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/api_def.proto @@ -15,13 +15,13 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow; +package tensorflow; option cc_enable_arenas = true; option java_outer_classname = "ApiDefProtos"; option java_multiple_files = true; option java_package = "org.tensorflow.framework"; option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework/api_def_go_proto"; -import "attr_value.proto"; +import "ov_tensorflow/attr_value.proto"; // Used to specify and override the default API & behavior in the // generated code for client languages, from what you would get from diff --git a/src/frontends/tensorflow/src/proto/attr_value.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/attr_value.proto similarity index 95% rename from src/frontends/tensorflow/src/proto/attr_value.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/attr_value.proto index c42f78ac45a42b..b903c30cf99276 100644 --- a/src/frontends/tensorflow/src/proto/attr_value.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/attr_value.proto @@ -12,11 +12,11 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow; +package tensorflow; -import "tensor.proto"; -import "tensor_shape.proto"; -import "types.proto"; +import "ov_tensorflow/tensor.proto"; +import "ov_tensorflow/tensor_shape.proto"; +import "ov_tensorflow/types.proto"; option cc_enable_arenas = true; option java_outer_classname = "AttrValueProtos"; diff --git a/src/frontends/tensorflow/src/proto/cost_graph.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/cost_graph.proto similarity index 97% rename from src/frontends/tensorflow/src/proto/cost_graph.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/cost_graph.proto index db348eb8860847..8e4d9788f49595 100644 --- a/src/frontends/tensorflow/src/proto/cost_graph.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/cost_graph.proto @@ -12,10 +12,10 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow; +package tensorflow; -import "tensor_shape.proto"; -import "types.proto"; +import "ov_tensorflow/tensor_shape.proto"; +import "ov_tensorflow/types.proto"; option cc_enable_arenas = true; option java_outer_classname = "CostGraphProtos"; diff --git a/src/frontends/tensorflow/src/proto/dataset_options.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/dataset_options.proto similarity index 99% rename from src/frontends/tensorflow/src/proto/dataset_options.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/dataset_options.proto index be7a0d8efd0c61..dc492a60fe0ebe 100644 --- a/src/frontends/tensorflow/src/proto/dataset_options.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/dataset_options.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow.data; +package tensorflow.data; // Represents the type of auto-sharding we enable. enum AutoShardPolicy { diff --git a/src/frontends/tensorflow/src/proto/device_attributes.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/device_attributes.proto similarity index 98% rename from src/frontends/tensorflow/src/proto/device_attributes.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/device_attributes.proto index bec64f2744124b..92c8a6b2d191bc 100644 --- a/src/frontends/tensorflow/src/proto/device_attributes.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/device_attributes.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow; +package tensorflow; option cc_enable_arenas = true; option java_outer_classname = "DeviceAttributesProtos"; diff --git a/src/frontends/tensorflow/src/proto/function.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/function.proto similarity index 97% rename from src/frontends/tensorflow/src/proto/function.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/function.proto index 271126ac0f4687..9e84731c983bb1 100644 --- a/src/frontends/tensorflow/src/proto/function.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/function.proto @@ -12,11 +12,11 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow; +package tensorflow; -import "attr_value.proto"; -import "node_def.proto"; -import "op_def.proto"; +import "ov_tensorflow/attr_value.proto"; +import "ov_tensorflow/node_def.proto"; +import "ov_tensorflow/op_def.proto"; option cc_enable_arenas = true; option java_outer_classname = "FunctionProtos"; diff --git a/src/frontends/tensorflow/src/proto/graph.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/graph.proto similarity index 95% rename from src/frontends/tensorflow/src/proto/graph.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/graph.proto index 97bf8002700d0a..e047abeafe18b1 100644 --- a/src/frontends/tensorflow/src/proto/graph.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/graph.proto @@ -12,11 +12,11 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow; +package tensorflow; -import "function.proto"; -import "node_def.proto"; -import "versions.proto"; +import "ov_tensorflow/function.proto"; +import "ov_tensorflow/node_def.proto"; +import "ov_tensorflow/versions.proto"; option cc_enable_arenas = true; option java_outer_classname = "GraphProtos"; diff --git a/src/frontends/tensorflow/src/proto/graph_transfer_info.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/graph_transfer_info.proto similarity index 97% rename from src/frontends/tensorflow/src/proto/graph_transfer_info.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/graph_transfer_info.proto index 821e7619cc8488..9e7d598e34a5c1 100644 --- a/src/frontends/tensorflow/src/proto/graph_transfer_info.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/graph_transfer_info.proto @@ -12,9 +12,9 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow; +package tensorflow; -import "types.proto"; +import "ov_tensorflow/types.proto"; option cc_enable_arenas = true; option java_outer_classname = "GraphTransferInfoProto"; diff --git a/src/frontends/tensorflow/src/proto/kernel_def.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/kernel_def.proto similarity index 97% rename from src/frontends/tensorflow/src/proto/kernel_def.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/kernel_def.proto index a8d0daeaa9ef20..88142d3de9584d 100644 --- a/src/frontends/tensorflow/src/proto/kernel_def.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/kernel_def.proto @@ -12,9 +12,9 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow; +package tensorflow; -import "attr_value.proto"; +import "ov_tensorflow/attr_value.proto"; option cc_enable_arenas = true; option java_outer_classname = "KernelDefProtos"; diff --git a/src/frontends/tensorflow/src/proto/log_memory.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/log_memory.proto similarity index 97% rename from src/frontends/tensorflow/src/proto/log_memory.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/log_memory.proto index 39ea81659c7eda..62489f0e0b8df4 100644 --- a/src/frontends/tensorflow/src/proto/log_memory.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/log_memory.proto @@ -12,9 +12,9 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow; +package tensorflow; -import "tensor_description.proto"; +import "ov_tensorflow/tensor_description.proto"; option cc_enable_arenas = true; option java_outer_classname = "LogMemoryProtos"; diff --git a/src/frontends/tensorflow/src/proto/meta_graph.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/meta_graph.proto similarity index 97% rename from src/frontends/tensorflow/src/proto/meta_graph.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/meta_graph.proto index 48f4c4b5e83860..255fb6efeb2f9e 100644 --- a/src/frontends/tensorflow/src/proto/meta_graph.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/meta_graph.proto @@ -12,16 +12,16 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow; +package tensorflow; import "google/protobuf/any.proto"; -import "graph.proto"; -import "op_def.proto"; -import "tensor_shape.proto"; -import "types.proto"; -import "saved_object_graph.proto"; -import "saver.proto"; -import "struct.proto"; +import "ov_tensorflow/graph.proto"; +import "ov_tensorflow/op_def.proto"; +import "ov_tensorflow/tensor_shape.proto"; +import "ov_tensorflow/types.proto"; +import "ov_tensorflow/saved_object_graph.proto"; +import "ov_tensorflow/saver.proto"; +import "ov_tensorflow/struct.proto"; option cc_enable_arenas = true; option java_outer_classname = "MetaGraphProtos"; diff --git a/src/frontends/tensorflow/src/proto/model.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/model.proto similarity index 99% rename from src/frontends/tensorflow/src/proto/model.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/model.proto index 1614f284b7fd55..a6567d462b8772 100644 --- a/src/frontends/tensorflow/src/proto/model.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/model.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow.data.model; +package tensorflow.data.model; option cc_enable_arenas = true; diff --git a/src/frontends/tensorflow/src/proto/node_def.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/node_def.proto similarity index 98% rename from src/frontends/tensorflow/src/proto/node_def.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/node_def.proto index 848c27d4c4c1f8..b8f3a017a30fc5 100644 --- a/src/frontends/tensorflow/src/proto/node_def.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/node_def.proto @@ -12,9 +12,9 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow; +package tensorflow; -import "attr_value.proto"; +import "ov_tensorflow/attr_value.proto"; option cc_enable_arenas = true; option java_outer_classname = "NodeProto"; diff --git a/src/frontends/tensorflow/src/proto/op_def.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/op_def.proto similarity index 98% rename from src/frontends/tensorflow/src/proto/op_def.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/op_def.proto index d44526f059c548..31493fed26ce55 100644 --- a/src/frontends/tensorflow/src/proto/op_def.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/op_def.proto @@ -12,15 +12,16 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow; +package tensorflow; option cc_enable_arenas = true; option java_outer_classname = "OpDefProtos"; option java_multiple_files = true; option java_package = "org.tensorflow.framework"; option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework/op_def_go_proto"; -import "attr_value.proto"; -import "types.proto"; -import "resource_handle.proto"; + +import "ov_tensorflow/attr_value.proto"; +import "ov_tensorflow/types.proto"; +import "ov_tensorflow/resource_handle.proto"; // Defines an operation. A NodeDef in a GraphDef specifies an Op by // using the "op" field which should match the name of a OpDef. diff --git a/src/frontends/tensorflow/src/proto/reader_base.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/reader_base.proto similarity index 98% rename from src/frontends/tensorflow/src/proto/reader_base.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/reader_base.proto index e51e3781ddc6d1..0c3536600e6f24 100644 --- a/src/frontends/tensorflow/src/proto/reader_base.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/reader_base.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow; +package tensorflow; option cc_enable_arenas = true; option java_outer_classname = "ReaderBaseProtos"; diff --git a/src/frontends/tensorflow/src/proto/remote_fused_graph_execute_info.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/remote_fused_graph_execute_info.proto similarity index 93% rename from src/frontends/tensorflow/src/proto/remote_fused_graph_execute_info.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/remote_fused_graph_execute_info.proto index b94ee5e6f1b892..3b17878e127cf9 100644 --- a/src/frontends/tensorflow/src/proto/remote_fused_graph_execute_info.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/remote_fused_graph_execute_info.proto @@ -12,11 +12,11 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow; +package tensorflow; -import "graph.proto"; -import "tensor_shape.proto"; -import "types.proto"; +import "ov_tensorflow/graph.proto"; +import "ov_tensorflow/tensor_shape.proto"; +import "ov_tensorflow/types.proto"; option cc_enable_arenas = true; option java_outer_classname = "RemoteFusedGraphExecuteInfoProto"; diff --git a/src/frontends/tensorflow/src/proto/resource_handle.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/resource_handle.proto similarity index 95% rename from src/frontends/tensorflow/src/proto/resource_handle.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/resource_handle.proto index 55345d0302a428..19b4dcc3b84ded 100644 --- a/src/frontends/tensorflow/src/proto/resource_handle.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/resource_handle.proto @@ -12,10 +12,10 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow; +package tensorflow; -import "tensor_shape.proto"; -import "types.proto"; +import "ov_tensorflow/tensor_shape.proto"; +import "ov_tensorflow/types.proto"; option cc_enable_arenas = true; option java_outer_classname = "ResourceHandle"; diff --git a/src/frontends/tensorflow/src/proto/saved_model.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/saved_model.proto similarity index 95% rename from src/frontends/tensorflow/src/proto/saved_model.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/saved_model.proto index 75a809070b59e0..f8660655229245 100644 --- a/src/frontends/tensorflow/src/proto/saved_model.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/saved_model.proto @@ -12,9 +12,9 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow; +package tensorflow; -import "meta_graph.proto"; +import "ov_tensorflow/meta_graph.proto"; option cc_enable_arenas = true; option java_outer_classname = "SavedModelProtos"; diff --git a/src/frontends/tensorflow/src/proto/saved_object_graph.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/saved_object_graph.proto similarity index 97% rename from src/frontends/tensorflow/src/proto/saved_object_graph.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/saved_object_graph.proto index 9ce18710a14954..d0b2170044966c 100644 --- a/src/frontends/tensorflow/src/proto/saved_object_graph.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/saved_object_graph.proto @@ -12,15 +12,15 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow; +package tensorflow; import "google/protobuf/any.proto"; -import "tensor_shape.proto"; -import "types.proto"; -import "variable.proto"; -import "versions.proto"; -import "struct.proto"; -import "trackable_object_graph.proto"; +import "ov_tensorflow/tensor_shape.proto"; +import "ov_tensorflow/types.proto"; +import "ov_tensorflow/variable.proto"; +import "ov_tensorflow/versions.proto"; +import "ov_tensorflow/struct.proto"; +import "ov_tensorflow/trackable_object_graph.proto"; option cc_enable_arenas = true; option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; diff --git a/src/frontends/tensorflow/src/proto/saved_tensor_slice.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/saved_tensor_slice.proto similarity index 94% rename from src/frontends/tensorflow/src/proto/saved_tensor_slice.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/saved_tensor_slice.proto index 6d13b1f27aa455..9e628752bb1f5c 100644 --- a/src/frontends/tensorflow/src/proto/saved_tensor_slice.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/saved_tensor_slice.proto @@ -29,17 +29,17 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow; +package tensorflow; option cc_enable_arenas = true; option java_outer_classname = "SavedTensorSliceProtos"; option java_multiple_files = true; option java_package = "org.tensorflow.util"; -import "tensor_shape.proto"; -import "tensor_slice.proto"; -import "tensor.proto"; -import "types.proto"; -import "versions.proto"; +import "ov_tensorflow/tensor_shape.proto"; +import "ov_tensorflow/tensor_slice.proto"; +import "ov_tensorflow/tensor.proto"; +import "ov_tensorflow/types.proto"; +import "ov_tensorflow/versions.proto"; // Metadata describing the set of slices of the same tensor saved in a // checkpoint file. diff --git a/src/frontends/tensorflow/src/proto/saver.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/saver.proto similarity index 99% rename from src/frontends/tensorflow/src/proto/saver.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/saver.proto index 634397b1ee04b3..7834f473e4ccdf 100644 --- a/src/frontends/tensorflow/src/proto/saver.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/saver.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow; +package tensorflow; option cc_enable_arenas = true; option java_outer_classname = "SaverProtos"; diff --git a/src/frontends/tensorflow/src/proto/step_stats.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/step_stats.proto similarity index 96% rename from src/frontends/tensorflow/src/proto/step_stats.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/step_stats.proto index b2524e28a807fa..027a1d79ee22e4 100644 --- a/src/frontends/tensorflow/src/proto/step_stats.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/step_stats.proto @@ -12,10 +12,10 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow; +package tensorflow; -import "allocation_description.proto"; -import "tensor_description.proto"; +import "ov_tensorflow/allocation_description.proto"; +import "ov_tensorflow/tensor_description.proto"; option cc_enable_arenas = true; option java_outer_classname = "StepStatsProtos"; diff --git a/src/frontends/tensorflow/src/proto/struct.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/struct.proto similarity index 93% rename from src/frontends/tensorflow/src/proto/struct.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/struct.proto index 7da836debd7e76..4126bd98c4a3d3 100644 --- a/src/frontends/tensorflow/src/proto/struct.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/struct.proto @@ -12,11 +12,11 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow; +package tensorflow; -import "tensor.proto"; -import "tensor_shape.proto"; -import "types.proto"; +import "ov_tensorflow/tensor.proto"; +import "ov_tensorflow/tensor_shape.proto"; +import "ov_tensorflow/types.proto"; option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto"; @@ -66,9 +66,9 @@ message StructuredValue { bool bool_value = 14; // Represents a TensorShape. - ov_tensorflow.TensorShapeProto tensor_shape_value = 31; + tensorflow.TensorShapeProto tensor_shape_value = 31; // Represents an enum value for dtype. - ov_tensorflow.DataType tensor_dtype_value = 32; + tensorflow.DataType tensor_dtype_value = 32; // Represents a value for tf.TensorSpec. TensorSpecProto tensor_spec_value = 33; // Represents a value for tf.TypeSpec. @@ -121,17 +121,17 @@ message NamedTupleValue { // A protobuf to represent tf.TensorSpec. message TensorSpecProto { string name = 1; - ov_tensorflow.TensorShapeProto shape = 2; - ov_tensorflow.DataType dtype = 3; + tensorflow.TensorShapeProto shape = 2; + tensorflow.DataType dtype = 3; } // A protobuf to represent tf.BoundedTensorSpec. message BoundedTensorSpecProto { string name = 1; - ov_tensorflow.TensorShapeProto shape = 2; - ov_tensorflow.DataType dtype = 3; - ov_tensorflow.TensorProto minimum = 4; - ov_tensorflow.TensorProto maximum = 5; + tensorflow.TensorShapeProto shape = 2; + tensorflow.DataType dtype = 3; + tensorflow.TensorProto minimum = 4; + tensorflow.TensorProto maximum = 5; } // Represents a tf.TypeSpec diff --git a/src/frontends/tensorflow/src/proto/summary.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/summary.proto similarity index 99% rename from src/frontends/tensorflow/src/proto/summary.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/summary.proto index 16bc6235bfb1b3..ce326176947dd4 100644 --- a/src/frontends/tensorflow/src/proto/summary.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/summary.proto @@ -12,9 +12,9 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow; +package tensorflow; -import "tensor.proto"; +import "ov_tensorflow/tensor.proto"; option cc_enable_arenas = true; option java_outer_classname = "SummaryProtos"; diff --git a/src/frontends/tensorflow/src/proto/tensor.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/tensor.proto similarity index 96% rename from src/frontends/tensorflow/src/proto/tensor.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/tensor.proto index 85fd170596eefe..42f063536e09e0 100644 --- a/src/frontends/tensorflow/src/proto/tensor.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/tensor.proto @@ -12,11 +12,11 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow; +package tensorflow; -import "resource_handle.proto"; -import "tensor_shape.proto"; -import "types.proto"; +import "ov_tensorflow/resource_handle.proto"; +import "ov_tensorflow/tensor_shape.proto"; +import "ov_tensorflow/types.proto"; option cc_enable_arenas = true; option java_outer_classname = "TensorProtos"; diff --git a/src/frontends/tensorflow/src/proto/tensor_bundle.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_bundle.proto similarity index 93% rename from src/frontends/tensorflow/src/proto/tensor_bundle.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_bundle.proto index 48bf6be520920b..21af38195c4e11 100644 --- a/src/frontends/tensorflow/src/proto/tensor_bundle.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_bundle.proto @@ -12,12 +12,12 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow; +package tensorflow; -import "tensor_shape.proto"; -import "tensor_slice.proto"; -import "types.proto"; -import "versions.proto"; +import "ov_tensorflow/tensor_shape.proto"; +import "ov_tensorflow/tensor_slice.proto"; +import "ov_tensorflow/types.proto"; +import "ov_tensorflow/versions.proto"; option cc_enable_arenas = true; option java_outer_classname = "TensorBundleProtos"; diff --git a/src/frontends/tensorflow/src/proto/tensor_description.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_description.proto similarity index 88% rename from src/frontends/tensorflow/src/proto/tensor_description.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_description.proto index 86ecbe2b3e4047..c03e1311c1f386 100644 --- a/src/frontends/tensorflow/src/proto/tensor_description.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_description.proto @@ -12,11 +12,11 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow; +package tensorflow; -import "allocation_description.proto"; -import "tensor_shape.proto"; -import "types.proto"; +import "ov_tensorflow/allocation_description.proto"; +import "ov_tensorflow/tensor_shape.proto"; +import "ov_tensorflow/types.proto"; option cc_enable_arenas = true; option java_outer_classname = "TensorDescriptionProtos"; diff --git a/src/frontends/tensorflow/src/proto/tensor_shape.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_shape.proto similarity index 99% rename from src/frontends/tensorflow/src/proto/tensor_shape.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_shape.proto index 48d821006c4989..0a7515def63931 100644 --- a/src/frontends/tensorflow/src/proto/tensor_shape.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_shape.proto @@ -19,7 +19,7 @@ option java_multiple_files = true; option java_package = "org.tensorflow.framework"; option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework/tensor_shape_go_proto"; -package ov_tensorflow; +package tensorflow; // Dimensions of a tensor. message TensorShapeProto { diff --git a/src/frontends/tensorflow/src/proto/tensor_slice.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_slice.proto similarity index 98% rename from src/frontends/tensorflow/src/proto/tensor_slice.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_slice.proto index b30c61eca33361..415012483056d3 100644 --- a/src/frontends/tensorflow/src/proto/tensor_slice.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/tensor_slice.proto @@ -14,7 +14,7 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow; +package tensorflow; option cc_enable_arenas = true; option java_outer_classname = "TensorSliceProtos"; diff --git a/src/frontends/tensorflow/src/proto/trackable_object_graph.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/trackable_object_graph.proto similarity index 99% rename from src/frontends/tensorflow/src/proto/trackable_object_graph.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/trackable_object_graph.proto index 748be64410c002..f0a9617432f617 100644 --- a/src/frontends/tensorflow/src/proto/trackable_object_graph.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/trackable_object_graph.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow; +package tensorflow; import "google/protobuf/wrappers.proto"; diff --git a/src/frontends/tensorflow/src/proto/types.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/types.proto similarity index 99% rename from src/frontends/tensorflow/src/proto/types.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/types.proto index a50586760a7cdf..0a60332f662397 100644 --- a/src/frontends/tensorflow/src/proto/types.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/types.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow; +package tensorflow; option cc_enable_arenas = true; option java_outer_classname = "TypesProtos"; option java_multiple_files = true; diff --git a/src/frontends/tensorflow/src/proto/variable.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/variable.proto similarity index 99% rename from src/frontends/tensorflow/src/proto/variable.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/variable.proto index 297638e7bc649a..6e9a05d1291b23 100644 --- a/src/frontends/tensorflow/src/proto/variable.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/variable.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow; +package tensorflow; option cc_enable_arenas = true; option java_outer_classname = "VariableProtos"; diff --git a/src/frontends/tensorflow/src/proto/versions.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/versions.proto similarity index 98% rename from src/frontends/tensorflow/src/proto/versions.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/versions.proto index 0fc46788dc2078..31a6623cf71a90 100644 --- a/src/frontends/tensorflow/src/proto/versions.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/versions.proto @@ -12,7 +12,7 @@ limitations under the License.*/ syntax = "proto3"; -package ov_tensorflow; +package tensorflow; option cc_enable_arenas = true; option java_outer_classname = "VersionsProtos"; diff --git a/src/frontends/tensorflow/src/proto/xla_data.proto b/src/frontends/tensorflow/src/proto/ov_tensorflow/xla_data.proto similarity index 99% rename from src/frontends/tensorflow/src/proto/xla_data.proto rename to src/frontends/tensorflow/src/proto/ov_tensorflow/xla_data.proto index 42ff8065983f77..95695ba78a2974 100644 --- a/src/frontends/tensorflow/src/proto/xla_data.proto +++ b/src/frontends/tensorflow/src/proto/ov_tensorflow/xla_data.proto @@ -15,7 +15,7 @@ limitations under the License. syntax = "proto3"; -package ov_xla; +package xla; option cc_enable_arenas = true; diff --git a/src/frontends/tensorflow/src/tf_utils.cpp b/src/frontends/tensorflow/src/tf_utils.cpp index 1c7df199a851a6..c72e8e7bb9080a 100644 --- a/src/frontends/tensorflow/src/tf_utils.cpp +++ b/src/frontends/tensorflow/src/tf_utils.cpp @@ -83,7 +83,7 @@ void extract_tensor_content(const string& tensor_content, Tensor* values) { # pragma warning(disable : 4267) // possible loss of data #endif template -void extract_compressed_tensor_content(const ::ov_tensorflow::TensorProto& tensor_proto, +void extract_compressed_tensor_content(const ::tensorflow::TensorProto& tensor_proto, int64_t val_size, Tensor* values) { auto val_lastsaved = static_cast(0); @@ -149,30 +149,30 @@ bool CfMarkerType::is_copyable() const { return false; } -Type get_ov_type(const ::ov_tensorflow::DataType& type) { - static const map<::ov_tensorflow::DataType, Type> type_map{{::ov_tensorflow::DataType::DT_BOOL, boolean}, - {::ov_tensorflow::DataType::DT_INT16, i16}, - {::ov_tensorflow::DataType::DT_INT32, i32}, - {::ov_tensorflow::DataType::DT_INT64, i64}, - {::ov_tensorflow::DataType::DT_HALF, f16}, - {::ov_tensorflow::DataType::DT_FLOAT, f32}, - {::ov_tensorflow::DataType::DT_DOUBLE, f64}, - {::ov_tensorflow::DataType::DT_UINT8, u8}, - {::ov_tensorflow::DataType::DT_INT8, i8}, - {::ov_tensorflow::DataType::DT_BFLOAT16, bf16}}; +Type get_ov_type(const ::tensorflow::DataType& type) { + static const map<::tensorflow::DataType, Type> type_map{{::tensorflow::DataType::DT_BOOL, boolean}, + {::tensorflow::DataType::DT_INT16, i16}, + {::tensorflow::DataType::DT_INT32, i32}, + {::tensorflow::DataType::DT_INT64, i64}, + {::tensorflow::DataType::DT_HALF, f16}, + {::tensorflow::DataType::DT_FLOAT, f32}, + {::tensorflow::DataType::DT_DOUBLE, f64}, + {::tensorflow::DataType::DT_UINT8, u8}, + {::tensorflow::DataType::DT_INT8, i8}, + {::tensorflow::DataType::DT_BFLOAT16, bf16}}; auto it = type_map.find(type); // for all unsupported types return dynamic type return it == type_map.end() ? dynamic : it->second; } -Any unpack_tensor_proto(const ::ov_tensorflow::TensorProto& tensor_proto) { +Any unpack_tensor_proto(const ::tensorflow::TensorProto& tensor_proto) { return unpack_tensor_proto(tensor_proto, tensor_proto.tensor_shape(), tensor_proto.dtype()); } -Any unpack_tensor_proto(const ::ov_tensorflow::TensorProto& tensor_proto, - const ::ov_tensorflow::TensorShapeProto& tensor_shape, - const ::ov_tensorflow::DataType& tensor_type) { +Any unpack_tensor_proto(const ::tensorflow::TensorProto& tensor_proto, + const ::tensorflow::TensorShapeProto& tensor_shape, + const ::tensorflow::DataType& tensor_type) { PartialShape pshape; for (int i = 0; i < tensor_shape.dim_size(); i++) { pshape.push_back(tensor_shape.dim(i).size()); @@ -180,7 +180,7 @@ Any unpack_tensor_proto(const ::ov_tensorflow::TensorProto& tensor_proto, FRONT_END_GENERAL_CHECK(pshape.is_static(), "Dynamic shapes are not supported for Tensor attribute."); Type ov_type = get_ov_type(tensor_type); - if (tensor_type != ::ov_tensorflow::DataType::DT_STRING) { + if (tensor_type != ::tensorflow::DataType::DT_STRING) { FRONT_END_GENERAL_CHECK( ov_type.is_static(), "Encountered unknown element type " + DataType_Name(tensor_type) + " on an empty tensor_proto"); diff --git a/src/frontends/tensorflow/src/tf_utils.hpp b/src/frontends/tensorflow/src/tf_utils.hpp index 286ce1440bc638..861fb56f552685 100644 --- a/src/frontends/tensorflow/src/tf_utils.hpp +++ b/src/frontends/tensorflow/src/tf_utils.hpp @@ -4,8 +4,6 @@ #pragma once -#include "attr_value.pb.h" -#include "node_def.pb.h" #include "openvino/core/node.hpp" #include "openvino/core/partial_shape.hpp" #include "openvino/core/runtime_attribute.hpp" @@ -14,9 +12,11 @@ #include "openvino/frontend/node_context.hpp" #include "openvino/op/loop.hpp" #include "openvino/runtime/tensor.hpp" -#include "tensor.pb.h" -#include "tensor_shape.pb.h" -#include "types.pb.h" +#include "ov_tensorflow/attr_value.pb.h" +#include "ov_tensorflow/node_def.pb.h" +#include "ov_tensorflow/tensor.pb.h" +#include "ov_tensorflow/tensor_shape.pb.h" +#include "ov_tensorflow/types.pb.h" namespace ov { namespace frontend { @@ -24,13 +24,13 @@ namespace tensorflow { #define CF_MARKER_TAG "tf_cf_marker_tag" -ov::element::Type get_ov_type(const ::ov_tensorflow::DataType& type); +ov::element::Type get_ov_type(const ::tensorflow::DataType& type); -ov::Any unpack_tensor_proto(const ::ov_tensorflow::TensorProto& tensor_proto); +ov::Any unpack_tensor_proto(const ::tensorflow::TensorProto& tensor_proto); -ov::Any unpack_tensor_proto(const ::ov_tensorflow::TensorProto& tensor_proto, - const ::ov_tensorflow::TensorShapeProto& tensor_shape, - const ::ov_tensorflow::DataType& tensor_type); +ov::Any unpack_tensor_proto(const ::tensorflow::TensorProto& tensor_proto, + const ::tensorflow::TensorShapeProto& tensor_shape, + const ::tensorflow::DataType& tensor_type); class Switch; using SetOfSwitchNodes = std::unordered_set>; diff --git a/src/frontends/tensorflow/src/variables_index.cpp b/src/frontends/tensorflow/src/variables_index.cpp index cda18ca3ca7c7f..2dcf3faf9e0b0c 100644 --- a/src/frontends/tensorflow/src/variables_index.cpp +++ b/src/frontends/tensorflow/src/variables_index.cpp @@ -11,8 +11,8 @@ #include "graph_iterator_saved_model.hpp" #include "openvino/core/type/element_type.hpp" #include "openvino/util/mmap_object.hpp" -#include "tensor_bundle.pb.h" -#include "trackable_object_graph.pb.h" +#include "ov_tensorflow/tensor_bundle.pb.h" +#include "ov_tensorflow/trackable_object_graph.pb.h" #ifdef ENABLE_SNAPPY_COMPRESSION # include "snappy.h" @@ -126,7 +126,7 @@ void VariablesIndex::read_bundle_header() { auto item = m_variables_index.find(""); FRONT_END_GENERAL_CHECK(item != m_variables_index.end(), "Bundle Header isn't found in index"); - ::ov_tensorflow::BundleHeaderProto bundleHeader; + ::tensorflow::BundleHeaderProto bundleHeader; FRONT_END_GENERAL_CHECK(bundleHeader.ParseFromArray(item->second.data(), static_cast(item->second.size())), "Bundle Header: Cannot parse Bundle Header"); FRONT_END_GENERAL_CHECK(bundleHeader.version().producer() == 1, "Bundle Header: Unsupported producer version"); @@ -145,7 +145,7 @@ void VariablesIndex::read_checkpointable_object_graph() { return; } - ::ov_tensorflow::BundleEntryProto entry; + ::tensorflow::BundleEntryProto entry; FRONT_END_GENERAL_CHECK(entry.ParseFromArray(item->second.data(), static_cast(item->second.size())), "CMO: Cannot parse Bundle Entry"); @@ -155,7 +155,7 @@ void VariablesIndex::read_checkpointable_object_graph() { FRONT_END_GENERAL_CHECK(shard != m_data_files.end(), "CMO: data files isn't found"); std::vector data(entry.size()); - ::ov_tensorflow::TrackableObjectGraph tog; + ::tensorflow::TrackableObjectGraph tog; // TODO: have to understand this offset // It looks like reinterpret_cast artifact @@ -244,13 +244,13 @@ bool VariablesIndex::read_variables(std::ifstream& vi_stream, const std::wstring struct PtrNode { using SharedPtrNode = std::shared_ptr; - const ::ov_tensorflow::NodeDef* node; + const ::tensorflow::NodeDef* node; std::vector inputs; std::vector outputs; PtrNode() : node(nullptr), inputs(), outputs() {} - PtrNode(const ::ov_tensorflow::NodeDef& src_node) { + PtrNode(const ::tensorflow::NodeDef& src_node) { node = &src_node; } @@ -308,14 +308,14 @@ struct PtrNode { } }; -static void read_stateful_partitioned_call(const std::shared_ptr<::ov_tensorflow::GraphDef> graph_def, - const ::ov_tensorflow::NodeDef& partCall, +static void read_stateful_partitioned_call(const std::shared_ptr<::tensorflow::GraphDef> graph_def, + const ::tensorflow::NodeDef& partCall, std::map& node_dictionary) { FRONT_END_GENERAL_CHECK(partCall.op() == "StatefulPartitionedCall", "Passed node isn't StatefulPartitionedCall"); std::string func_name = partCall.attr().at("f").func().name(); - const ::ov_tensorflow::FunctionDef* func_def = nullptr; + const ::tensorflow::FunctionDef* func_def = nullptr; for (const auto& func : graph_def->library().function()) { if (func.signature().name() == func_name) { func_def = &func; @@ -365,7 +365,7 @@ static void read_stateful_partitioned_call(const std::shared_ptr<::ov_tensorflow } } -void VariablesIndex::map_assignvariable(const std::shared_ptr<::ov_tensorflow::GraphDef> graph_def, +void VariablesIndex::map_assignvariable(const std::shared_ptr<::tensorflow::GraphDef> graph_def, std::map& variables_map) { std::map nodes; diff --git a/src/frontends/tensorflow/src/variables_index.hpp b/src/frontends/tensorflow/src/variables_index.hpp index 2fb517e8e9b2c8..aa805b264bc3d1 100644 --- a/src/frontends/tensorflow/src/variables_index.hpp +++ b/src/frontends/tensorflow/src/variables_index.hpp @@ -9,7 +9,7 @@ #include "graph_iterator_proto.hpp" #include "openvino/util/file_util.hpp" #include "openvino/util/mmap_object.hpp" -#include "saved_model.pb.h" +#include "ov_tensorflow/saved_model.pb.h" namespace ov { namespace frontend { @@ -139,7 +139,7 @@ class VariablesIndex { /// It needs to map VarHandleOp to right place in .index file. /// \param[in] graph_def GraphDef object for analysis /// \param[out] variables_map Map of variables found in graph_def - static void map_assignvariable(const std::shared_ptr<::ov_tensorflow::GraphDef> graph_def, + static void map_assignvariable(const std::shared_ptr<::tensorflow::GraphDef> graph_def, std::map& variables_map); private: From 94a63605d14513834da5905107567135ddf8b4b1 Mon Sep 17 00:00:00 2001 From: Evgenya Nugmanova Date: Sun, 22 Oct 2023 01:03:42 +0400 Subject: [PATCH 37/39] Clear error message in the Constant::cast_vector (#20629) --- src/core/include/openvino/op/constant.hpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/core/include/openvino/op/constant.hpp b/src/core/include/openvino/op/constant.hpp index e122d36a8223d1..a482fd12bb4c68 100644 --- a/src/core/include/openvino/op/constant.hpp +++ b/src/core/include/openvino/op/constant.hpp @@ -485,9 +485,19 @@ class OPENVINO_API Constant : public Op { if (!std::is_same::value) { OPENVINO_ASSERT( !std::numeric_limits::is_signed || std::numeric_limits::lowest() <= c, - "Cannot cast vector from constant. Some values are outside the range."); + "Cannot cast vector from ", + Type, + " constant to ", + element::from(), + ". Some values are outside the range. Example: ", + c); OPENVINO_ASSERT(std::numeric_limits::max() >= c, - "Cannot cast vector from constant. Some values are outside the range."); + "Cannot cast vector from ", + Type, + " constant to ", + element::from(), + ". Some values are outside the range. Example: ", + c); } #if defined(__clang__) # pragma clang diagnostic pop From 5dafee4ac16e1cf757a1ba38008bdf5db0ae0e69 Mon Sep 17 00:00:00 2001 From: Karan Jakhar Date: Sun, 22 Oct 2023 18:55:59 +0530 Subject: [PATCH 38/39] fixing type, suppored -> supported (#20639) --- src/frontends/pytorch/src/op/bitwise.cpp | 6 +++--- src/inference/dev_api/ie_icore.hpp | 2 +- src/inference/dev_api/openvino/runtime/icore.hpp | 2 +- .../fluid/modules/gapi/include/opencv2/gapi/imgproc.hpp | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/frontends/pytorch/src/op/bitwise.cpp b/src/frontends/pytorch/src/op/bitwise.cpp index 6e3b1fe5f49ee4..8cbae192ca6bef 100644 --- a/src/frontends/pytorch/src/op/bitwise.cpp +++ b/src/frontends/pytorch/src/op/bitwise.cpp @@ -17,7 +17,7 @@ OutputVector translate_bitwise_not(const NodeContext& context) { num_inputs_check(context, 1, 2); auto x = context.get_input(0); FRONT_END_OP_CONVERSION_CHECK(x.get_element_type().compatible(element::boolean), - "aten::bitwise_not suppored only for boolean input"); + "aten::bitwise_not supported only for boolean input"); auto not_x = context.mark_node(std::make_shared(x)); if (!context.input_is_none(1)) { context.mutate_input(1, not_x); @@ -30,7 +30,7 @@ OutputVector translate_bitwise_and(const NodeContext& context) { auto x = context.get_input(0); auto y = context.get_input(1); FRONT_END_OP_CONVERSION_CHECK(x.get_element_type().compatible(element::boolean), - "aten::bitwise_not suppored only for boolean input"); + "aten::bitwise_not supported only for boolean input"); auto and_x = context.mark_node(std::make_shared(x, y)); return {and_x}; }; @@ -40,7 +40,7 @@ OutputVector translate_bitwise_or(const NodeContext& context) { auto x = context.get_input(0); auto y = context.get_input(1); FRONT_END_OP_CONVERSION_CHECK(x.get_element_type().compatible(element::boolean), - "aten::bitwise_not suppored only for boolean input"); + "aten::bitwise_not supported only for boolean input"); auto or_x = context.mark_node(std::make_shared(x, y)); return {or_x}; }; diff --git a/src/inference/dev_api/ie_icore.hpp b/src/inference/dev_api/ie_icore.hpp index 8852c1f4ecd8c9..2210f26bbfc6ef 100644 --- a/src/inference/dev_api/ie_icore.hpp +++ b/src/inference/dev_api/ie_icore.hpp @@ -191,7 +191,7 @@ class ICore : public ov::ICore { virtual InferenceEngine::RemoteContext::Ptr CreateContext(const std::string& deviceName, const ov::AnyMap&) = 0; /** - * @brief Get only configs that are suppored by device + * @brief Get only configs that are supported by device * @param deviceName Name of a device * @param config Map of configs that can contains configs that are not supported by device * @return map of configs that are supported by device diff --git a/src/inference/dev_api/openvino/runtime/icore.hpp b/src/inference/dev_api/openvino/runtime/icore.hpp index e4d0a98f5be968..de2ca2ebf07c57 100644 --- a/src/inference/dev_api/openvino/runtime/icore.hpp +++ b/src/inference/dev_api/openvino/runtime/icore.hpp @@ -222,7 +222,7 @@ class OPENVINO_RUNTIME_API ICore { } /** - * @brief Get only properties that are suppored by specified device + * @brief Get only properties that are supported by specified device * @param full_device_name Name of a device (can be either virtual or hardware) * @param properties Properties that can contains configs that are not supported by device * @return map of properties that are supported by device diff --git a/thirdparty/fluid/modules/gapi/include/opencv2/gapi/imgproc.hpp b/thirdparty/fluid/modules/gapi/include/opencv2/gapi/imgproc.hpp index 5c4c6f7031e6d8..5a6f237b27cda1 100644 --- a/thirdparty/fluid/modules/gapi/include/opencv2/gapi/imgproc.hpp +++ b/thirdparty/fluid/modules/gapi/include/opencv2/gapi/imgproc.hpp @@ -1214,7 +1214,7 @@ or column if there are N channels, or have N columns if there is a single channe @param src Input set of 2D points stored in one of possible containers: Mat, std::vector, std::vector, std::vector. @param distType Distance used by the M-estimator, see #DistanceTypes. @ref DIST_USER -and @ref DIST_C are not suppored. +and @ref DIST_C are not supported. @param param Numerical parameter ( C ) for some types of distances. If it is 0, an optimal value is chosen. @param reps Sufficient accuracy for the radius (distance between the coordinate origin and the @@ -1286,7 +1286,7 @@ or column if there are N channels, or have N columns if there is a single channe @param src Input set of 3D points stored in one of possible containers: Mat, std::vector, std::vector, std::vector. @param distType Distance used by the M-estimator, see #DistanceTypes. @ref DIST_USER -and @ref DIST_C are not suppored. +and @ref DIST_C are not supported. @param param Numerical parameter ( C ) for some types of distances. If it is 0, an optimal value is chosen. @param reps Sufficient accuracy for the radius (distance between the coordinate origin and the From 6e4ec88db8467ef5b853d233bc3e551451729687 Mon Sep 17 00:00:00 2001 From: Irina Efode Date: Mon, 23 Oct 2023 12:47:15 +0400 Subject: [PATCH 39/39] [GHA][CONFORMANCE] Enable and conformance for Template in GHA (#20604) * [GHA][CONFORMANCE] Enable and conformance for Template in GHA * apply review * Update linux.yml --- .github/workflows/linux.yml | 12 ++++++++++++ .github/workflows/mac.yml | 12 ++++++++++++ .github/workflows/windows.yml | 8 ++++++++ 3 files changed, 32 insertions(+) diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml index 733dfed4c09d14..639eca9957928d 100644 --- a/.github/workflows/linux.yml +++ b/.github/workflows/linux.yml @@ -760,6 +760,18 @@ jobs: ${INSTALL_TEST_DIR}/ov_cpu_unit_tests --gtest_print_time=1 \ --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-CPUUnitTests.xml + - name: SubgraphsDumper tests + run: | + source ${INSTALL_DIR}/setupvars.sh + ${INSTALL_TEST_DIR}/subgraphsDumperTests --gtest_print_time=1 \ + --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-SubgraphsDumperTests.xml + + - name: Template OpImpl tests + run: | + source ${INSTALL_DIR}/setupvars.sh + ${INSTALL_TEST_DIR}/conformanceTests --gtest_print_time=1 --device=TEMPLATE --gtest_filter=*OpImpl*\ + --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-OpImplTests.xml + - name: AUTO unit tests run: | source ${INSTALL_DIR}/setupvars.sh diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml index d5084d7a5d19c6..b8e48226a1ca53 100644 --- a/.github/workflows/mac.yml +++ b/.github/workflows/mac.yml @@ -384,6 +384,18 @@ jobs: ${{ env.INSTALL_TEST_DIR }}/ov_cpu_unit_tests --gtest_print_time=1 \ --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-CPUUnitTests.xml + - name: SubgraphsDumper tests + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + {{ env.INSTALL_TEST_DIR }}/subgraphsDumperTests --gtest_print_time=1 \ + --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-SubgraphsDumperTests.xml + + - name: Template OpImpl tests + run: | + source ${{ env.INSTALL_DIR }}/setupvars.sh + {{ env.INSTALL_TEST_DIR }}/conformanceTests --gtest_print_time=1 --device=TEMPLATE --gtest_filter="*OpImpl*" \ + --gtest_output=xml:${INSTALL_TEST_DIR}/TEST-TemplateOpImplTests.xml + - name: AUTO unit tests run: | source ${{ env.INSTALL_DIR }}/setupvars.sh diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 6ce891e6767698..e6763d2a696377 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -564,6 +564,14 @@ jobs: run: | call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/ov_cpu_unit_tests --gtest_print_time=1 --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-CPUUnitTests.xml + - name: SubgraphsDumper tests + run: | + call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/subgraphsDumperTests --gtest_print_time=1 --gtest_print_time=1 --device=TEMPLATE --gtest_filter="*OpImpl*" --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-SubgraphsDumperTests.xml + + - name: Template OpImpl tests + run: | + call "${{ env.INSTALL_DIR }}\\setupvars.bat" && ${{ env.INSTALL_TEST_DIR }}/conformanceTests --gtest_output=xml:${{ env.INSTALL_TEST_DIR }}/TEST-TemplateOpImplTests.xml + - name: GNA plugin unit tests shell: cmd run: |