Merge branch 'master' into dm/npuw_unfold

openvinotoolkit · Nov 21, 2024 · 1bddf7e · 1bddf7e
2 parents 9eb350e + 9f7ad50
commit 1bddf7e
Show file tree

Hide file tree

Showing 28 changed files with 209 additions and 155 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -138,7 +138,7 @@ function(ov_developer_package_export_targets)
             endforeach()
         endif()
     else()
-        message(FATAL_ERROR "Internal error: ${target_name} does not represent a cmake target")
+        message(FATAL_ERROR "Internal error: '${EXPORT_TARGET}' does not represent a cmake target")
     endif()
 
     list(REMOVE_DUPLICATES _OPENVINO_DEVELOPER_PACKAGE_TARGETS)

diff --git a/cmake/features.cmake b/cmake/features.cmake
@@ -200,6 +200,9 @@ ov_dependent_option (ENABLE_SYSTEM_PROTOBUF "Enables use of system Protobuf" OFF
 # the option is turned off by default, because we don't want to have a dependency on libsnappy.so
 ov_dependent_option (ENABLE_SYSTEM_SNAPPY "Enables use of system version of Snappy" OFF
     "ENABLE_SNAPPY_COMPRESSION" OFF)
+# the option is turned off by default, because we are not sure that system version of ZE loader is fresh enough
+ov_dependent_option (ENABLE_SYSTEM_LEVEL_ZERO "Enables use of system version of Level Zero" OFF
+    "ENABLE_INTEL_NPU" OFF)
 
 ov_dependent_option(ENABLE_JS "Enables JS API building" ${ENABLE_JS_DEFAULT} "NOT ANDROID;NOT EMSCRIPTEN" OFF)
 

diff --git a/src/cmake/ov_parallel.cmake b/src/cmake/ov_parallel.cmake
@@ -132,7 +132,7 @@ macro(ov_find_package_tbb)
                                   IMPORTED_TARGET
                                   # we need to set GLOBAL in order to create ALIAS later
                                   # ALIAS creation for non-GLOBAL targets is available since cmake 3.18
-                                  ${OV_PkgConfig_VISILITY}
+                                  ${OV_PkgConfig_VISIBILITY}
                                   tbb)
                 if(tbb_FOUND)
                     # parse version

diff --git a/src/common/snippets/tests/src/pass/mha_tokenization.cpp b/src/common/snippets/tests/src/pass/mha_tokenization.cpp
@@ -169,10 +169,6 @@ TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA3D_SplitM) {
 }
 
 TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA3D_SplitM_withMul) {
-#if defined(WIN32) && !defined(NDEBUG)
-    test_skipped = true;
-    GTEST_SKIP() << "Skipping on Windows in Debug mode due to Issue 155258.";
-#endif
     const auto& f = MHASplitMFunction(std::vector<PartialShape>{{128, 12, 64}, {128, 12, 64}, {12, 128, 128}, {128, 12, 64}},
                                       std::vector<ov::element::Type>({ov::element::f32, ov::element::f32, ov::element::f32, ov::element::f32}),
                                       std::vector<Shape>{{2, 64, 12, 64}, {128, 12, 1, 64}, {12, 2, 64, 128}, {1, 128, 12, 64}, {128, 12, 64}},
@@ -195,10 +191,6 @@ TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA4D_SplitM) {
 }
 
 TEST_F(TokenizeMHASnippetsTests, smoke_Snippets_MHA4D_SplitM_withMul) {
-#if defined(WIN32) && !defined(NDEBUG)
-    test_skipped = true;
-    GTEST_SKIP() << "Skipping on Windows in Debug mode due to Issue 155258.";
-#endif
     const auto& f = MHASplitMFunction(std::vector<PartialShape>{{1, 384, 16, 64}, {1, 384, 16, 64}, {1, 1, 1, 384}, {1, 384, 16, 64}},
                                       std::vector<ov::element::Type>({ov::element::f32, ov::element::f32, ov::element::f32, ov::element::f32}),
                                       std::vector<Shape>{{1, 6, 64, 16, 64}, {1, 384, 16, 1, 64}, {1, 1, 1, 1, 384}, {1, 1, 384, 16, 64}, {1, 384, 16, 64}},

diff --git a/...intel_gpu/include/intel_gpu/op/swiglu.hpp → ...transformations/include/ov_ops/swiglu.hpp b/...intel_gpu/include/intel_gpu/op/swiglu.hpp → ...transformations/include/ov_ops/swiglu.hpp
@@ -5,22 +5,19 @@
 #pragma once
 
 #include "openvino/op/op.hpp"
+#include "transformations_visibility.hpp"
 
 namespace ov {
-namespace intel_gpu {
 namespace op {
+namespace internal {
 
 /// \brief Operator performing Swish Gated Linear Unit Activation
 /// This operation performs gated linear unit activation that combines swish or gelu activation function
-class SwiGLU : public ov::op::Op {
+class TRANSFORMATIONS_API SwiGLU : public ov::op::Op {
 public:
-    OPENVINO_OP("SwiGLU", "gpu_opset");
+    OPENVINO_OP("SwiGLU", "ie_internal_opset");
 
-    enum GluType {
-        Swish = 0,
-        Gelu,
-        Gelu_Tanh
-    };
+    enum GluType { Swish = 0, Gelu, Gelu_Tanh };
 
     SwiGLU() = default;
     /// \brief Constructs an SwiGLU operation.
@@ -44,26 +41,44 @@ class SwiGLU : public ov::op::Op {
 
     std::shared_ptr<Node> clone_with_new_inputs(const ov::OutputVector& new_args) const override;
 
-    int64_t get_axis() const { return m_axis; }
-    int64_t get_split_lengths() const { return m_split_lengths; }
-    GluType get_glu_type() const { return m_glu_type; }
-    size_t get_split_to_glu_idx() const { return m_split_to_glu_idx; }
+    int64_t get_axis() const {
+        return m_axis;
+    }
+    int64_t get_split_lengths() const {
+        return m_split_lengths;
+    }
+    GluType get_glu_type() const {
+        return m_glu_type;
+    }
+    size_t get_split_to_glu_idx() const {
+        return m_split_to_glu_idx;
+    }
 
-    void set_axis(int64_t axis) { m_axis = axis; }
-    void set_split_lengths(int64_t split_lengths) { m_split_lengths = split_lengths; }
-    void set_glu_type(GluType glu_type) { m_glu_type = glu_type; }
-    void set_split_to_glu_idx(size_t split_to_glu_idx) { m_split_to_glu_idx = split_to_glu_idx; }
+    void set_axis(int64_t axis) {
+        m_axis = axis;
+    }
+    void set_split_lengths(int64_t split_lengths) {
+        m_split_lengths = split_lengths;
+    }
+    void set_glu_type(GluType glu_type) {
+        m_glu_type = glu_type;
+    }
+    void set_split_to_glu_idx(size_t split_to_glu_idx) {
+        m_split_to_glu_idx = split_to_glu_idx;
+    }
 
 private:
     int64_t m_axis = 0;
     int64_t m_split_lengths = 0;
     GluType m_glu_type = GluType::Swish;
     size_t m_split_to_glu_idx = 0;
-    ov::element::Type m_output_type;
+    ov::element::Type m_output_type{};
 };
 
-std::vector<ov::PartialShape> shape_infer(const SwiGLU* op, std::vector<ov::PartialShape> input_shapes);
+// TODO 157615: Move to shape_inference
+TRANSFORMATIONS_API std::vector<ov::PartialShape> shape_infer(const SwiGLU* op,
+                                                              std::vector<ov::PartialShape> input_shapes);
 
-}   // namespace op
-}   // namespace intel_gpu
-}   // namespace ov
+}  // namespace internal
+}  // namespace op
+}  // namespace ov
diff --git a/src/common/transformations/include/transformations/common_optimizations/swiglu_fusion.hpp b/src/common/transformations/include/transformations/common_optimizations/swiglu_fusion.hpp
@@ -0,0 +1,21 @@
+// Copyright (C) 2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/pass/manager.hpp"
+#include "openvino/pass/matcher_pass.hpp"
+#include "transformations_visibility.hpp"
+
+namespace ov {
+namespace pass {
+
+class TRANSFORMATIONS_API SwiGLUFusion : public ov::pass::MatcherPass {
+public:
+    OPENVINO_RTTI("SwiGLUFusion", "0");
+    SwiGLUFusion();
+};
+
+}  // namespace pass
+}  // namespace ov
diff --git a/.../src/plugin/transformations/op/swiglu.cpp → ...mon/transformations/src/ov_ops/swiglu.cpp b/.../src/plugin/transformations/op/swiglu.cpp → ...mon/transformations/src/ov_ops/swiglu.cpp
@@ -2,24 +2,29 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "intel_gpu/op/swiglu.hpp"
+#include "ov_ops/swiglu.hpp"
+
 #include "openvino/core/partial_shape.hpp"
 #include "openvino/core/validation_util.hpp"
 #include "openvino/op/variadic_split.hpp"
 #include "variadic_split_shape_inference.hpp"
 
 namespace ov {
-namespace intel_gpu {
 namespace op {
+namespace internal {
 
 SwiGLU::SwiGLU(const Output<Node>& data,
                int64_t axis,
                int64_t split_lengths,
                const GluType glu_type,
                const size_t split_to_glu_idx,
                const ov::element::Type output_type)
-    : Op({data}), m_axis(axis), m_split_lengths(split_lengths),
-      m_glu_type(glu_type), m_split_to_glu_idx(split_to_glu_idx), m_output_type(output_type) {
+    : Op({data}),
+      m_axis(axis),
+      m_split_lengths(split_lengths),
+      m_glu_type(glu_type),
+      m_split_to_glu_idx(split_to_glu_idx),
+      m_output_type(output_type) {
     validate_and_infer_types();
 }
 
@@ -33,11 +38,9 @@ bool SwiGLU::visit_attributes(ov::AttributeVisitor& visitor) {
 void SwiGLU::validate_and_infer_types() {
     auto output_type = m_output_type == ov::element::undefined ? get_input_element_type(0) : m_output_type;
 
-    std::vector<ov::PartialShape> input_shapes = {
-        get_input_partial_shape(0),
-        ov::PartialShape(ov::Shape{}),
-        ov::PartialShape(ov::Shape{2})
-    };
+    std::vector<ov::PartialShape> input_shapes = {get_input_partial_shape(0),
+                                                  ov::PartialShape(ov::Shape{}),
+                                                  ov::PartialShape(ov::Shape{2})};
 
     set_output_type(0, output_type, shape_infer(this, input_shapes)[0]);
 }
@@ -54,16 +57,18 @@ std::shared_ptr<Node> SwiGLU::clone_with_new_inputs(const ov::OutputVector& new_
 
 std::vector<ov::PartialShape> shape_infer(const SwiGLU* op, std::vector<ov::PartialShape> input_shapes) {
     ov::op::v1::VariadicSplit variadic_split;
-    std::vector<int64_t> axis = { op->get_axis() };
-    std::vector<int64_t> split_lengths = { op->get_split_lengths(), -1 };
+    std::vector<int64_t> axis = {op->get_axis()};
+    std::vector<int64_t> split_lengths = {op->get_split_lengths(), -1};
 
     std::unordered_map<size_t, ov::Tensor> const_data;
     const_data.emplace(1, ov::Tensor(ov::element::i64, ov::Shape{}, static_cast<void*>(axis.data())));
-    const_data.emplace(2, ov::Tensor(ov::element::i64, ov::Shape{split_lengths.size()}, static_cast<void*>(split_lengths.data())));
+    const_data.emplace(
+        2,
+        ov::Tensor(ov::element::i64, ov::Shape{split_lengths.size()}, static_cast<void*>(split_lengths.data())));
 
     return ov::op::v1::shape_infer(&variadic_split, input_shapes, ov::make_tensor_accessor(const_data));
 }
 
+}  // namespace internal
 }  // namespace op
-}  // namespace intel_gpu
 }  // namespace ov
diff --git a/.../plugin/transformations/swiglu_fusion.cpp → ...ns/common_optimizations/swiglu_fusion.cpp b/.../plugin/transformations/swiglu_fusion.cpp → ...ns/common_optimizations/swiglu_fusion.cpp
@@ -2,22 +2,22 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "swiglu_fusion.hpp"
-
-#include "intel_gpu/op/swiglu.hpp"
+#include "transformations/common_optimizations/swiglu_fusion.hpp"
 
 #include "openvino/core/rt_info.hpp"
 #include "openvino/op/constant.hpp"
+#include "openvino/op/gelu.hpp"
 #include "openvino/op/multiply.hpp"
 #include "openvino/op/swish.hpp"
-#include "openvino/op/gelu.hpp"
 #include "openvino/op/variadic_split.hpp"
+#include "openvino/pass/manager.hpp"
 #include "openvino/pass/pattern/op/or.hpp"
 #include "openvino/pass/pattern/op/wrap_type.hpp"
+#include "ov_ops/swiglu.hpp"
 #include "transformations/utils/utils.hpp"
 
 namespace ov {
-namespace intel_gpu {
+namespace pass {
 
 SwiGLUFusion::SwiGLUFusion() {
     using namespace ov::pass::pattern;
@@ -60,20 +60,21 @@ SwiGLUFusion::SwiGLUFusion() {
         auto isSwiGLU = pattern_map.count(swish_m);
         auto isGeGLU = pattern_map.count(gelu_m);
         size_t split_to_glu_idx = 0;
-        ov::intel_gpu::op::SwiGLU::GluType glu_type = ov::intel_gpu::op::SwiGLU::GluType::Swish;
+        ov::op::internal::SwiGLU::GluType glu_type = ov::op::internal::SwiGLU::GluType::Swish;
 
         if (isSwiGLU) {
             auto swish = std::dynamic_pointer_cast<ov::op::v4::Swish>(pattern_map.at(swish_m).get_node_shared_ptr());
-            glu_type = ov::intel_gpu::op::SwiGLU::GluType::Swish;
+            glu_type = ov::op::internal::SwiGLU::GluType::Swish;
             split_to_glu_idx = swish->input_value(0).get_index();
 
             size_t split_in_idx = ov::is_type<ov::op::v4::Swish>(mul->get_input_node_shared_ptr(0)) ? 1 : 0;
             if (mul->input_value(split_in_idx).get_index() == split_to_glu_idx)
                 return false;
         } else if (isGeGLU) {
             auto gelu = std::dynamic_pointer_cast<ov::op::v7::Gelu>(pattern_map.at(gelu_m).get_node_shared_ptr());
-            glu_type = (gelu->get_approximation_mode() == ov::op::GeluApproximationMode::ERF) ? ov::intel_gpu::op::SwiGLU::GluType::Gelu
-                                                                                              : ov::intel_gpu::op::SwiGLU::GluType::Gelu_Tanh;
+            glu_type = (gelu->get_approximation_mode() == ov::op::GeluApproximationMode::ERF)
+                           ? ov::op::internal::SwiGLU::GluType::Gelu
+                           : ov::op::internal::SwiGLU::GluType::Gelu_Tanh;
             split_to_glu_idx = gelu->input_value(0).get_index();
 
             size_t split_in_idx = ov::is_type<ov::op::v7::Gelu>(mul->get_input_node_shared_ptr(0)) ? 1 : 0;
@@ -83,7 +84,8 @@ SwiGLUFusion::SwiGLUFusion() {
             OPENVINO_THROW("'glu_type' not initialized");
         }
 
-        auto variadic_split = std::dynamic_pointer_cast<ov::op::v1::VariadicSplit>(pattern_map.at(variadic_split_m).get_node_shared_ptr());
+        auto variadic_split = std::dynamic_pointer_cast<ov::op::v1::VariadicSplit>(
+            pattern_map.at(variadic_split_m).get_node_shared_ptr());
         auto variadic_split_in_ps = variadic_split->get_input_partial_shape(0);
         auto last_dim = variadic_split_in_ps.rank().get_length() - 1;
 
@@ -94,7 +96,8 @@ SwiGLUFusion::SwiGLUFusion() {
             return false;
         auto axis_value = axis->cast_vector<int64_t>()[0];
 
-        auto split_lengths = std::dynamic_pointer_cast<ov::op::v0::Constant>(pattern_map.at(split_lengths_const_m).get_node_shared_ptr());
+        auto split_lengths = std::dynamic_pointer_cast<ov::op::v0::Constant>(
+            pattern_map.at(split_lengths_const_m).get_node_shared_ptr());
         auto split_lengths_value = split_lengths->cast_vector<int64_t>()[0];
         // Allow only case that exactly splits in half along the last dimension
         auto split_length = variadic_split_in_ps[last_dim].get_length() / 2;
@@ -104,12 +107,12 @@ SwiGLUFusion::SwiGLUFusion() {
         auto data = pattern_map.at(data_m);
         auto output_type = m.get_match_root()->get_output_element_type(0);
 
-        auto swiglu = std::make_shared<op::SwiGLU>(data,
-                                                   axis_value,
-                                                   split_lengths_value,
-                                                   glu_type,
-                                                   split_to_glu_idx,
-                                                   output_type);
+        auto swiglu = std::make_shared<ov::op::internal::SwiGLU>(data,
+                                                                 axis_value,
+                                                                 split_lengths_value,
+                                                                 glu_type,
+                                                                 split_to_glu_idx,
+                                                                 output_type);
         swiglu->set_friendly_name(m.get_match_root()->get_friendly_name());
         ov::copy_runtime_info(m.get_matched_nodes(), swiglu);
         ov::replace_node(m.get_match_root(), swiglu);
@@ -121,5 +124,5 @@ SwiGLUFusion::SwiGLUFusion() {
     this->register_matcher(m, callback);
 }
 
-}  // namespace intel_gpu
+}  // namespace pass
 }  // namespace ov