Apply review comments 2

openvinotoolkit · Nov 26, 2024 · e80440f · e80440f
1 parent d2e0493
commit e80440f
Show file tree

Hide file tree

Showing 27 changed files with 322 additions and 308 deletions.
diff --git a/src/common/transformations/include/ov_ops/placeholder.hpp b/src/common/transformations/include/ov_ops/placeholder.hpp
diff --git a/src/common/transformations/src/ov_ops/fully_connected.cpp b/src/common/transformations/src/ov_ops/fully_connected.cpp
@@ -7,7 +7,6 @@
 #include <memory>
 
 #include "matmul_shape_inference.hpp"
-#include "ov_ops/placeholder.hpp"
 
 namespace ov {
 namespace op {
@@ -25,7 +24,7 @@ FullyConnected::FullyConnected(const ov::Output<Node>& A,
 FullyConnected::FullyConnected(const ov::Output<Node>& A,
                                const ov::Output<Node>& B,
                                const ov::element::Type output_type)
-    : FullyConnected(A, B, std::make_shared<Placeholder>(), output_type) {}
+    : FullyConnected(A, B, std::make_shared<v0::Constant>(element::undefined, Shape{0}), output_type) {}
 
 bool FullyConnected::visit_attributes(ov::AttributeVisitor& visitor) {
     visitor.on_attribute("output_type", m_output_type);

diff --git a/src/common/transformations/src/ov_ops/fully_connected_compressed.cpp b/src/common/transformations/src/ov_ops/fully_connected_compressed.cpp
@@ -7,8 +7,8 @@
 #include <memory>
 
 #include "openvino/core/type/element_type.hpp"
+#include "openvino/op/constant.hpp"
 #include "ov_ops/fully_connected.hpp"
-#include "ov_ops/placeholder.hpp"
 
 namespace ov {
 namespace op {
@@ -31,7 +31,12 @@ FullyConnectedCompressed::FullyConnectedCompressed(const ov::Output<Node>& X,
                                                    const ov::Output<Node>& bias,
                                                    const ov::Output<Node>& weight_scales,
                                                    const ov::element::Type output_type)
-    : FullyConnectedCompressed(X, W, bias, weight_scales, std::make_shared<Placeholder>(), output_type) {}
+    : FullyConnectedCompressed(X,
+                               W,
+                               bias,
+                               weight_scales,
+                               std::make_shared<v0::Constant>(element::undefined, Shape{0}),
+                               output_type) {}
 
 std::shared_ptr<ov::Node> FullyConnectedCompressed::clone_with_new_inputs(const ov::OutputVector& new_args) const {
     check_new_args_count(this, new_args);
@@ -47,14 +52,8 @@ std::shared_ptr<ov::Node> FullyConnectedCompressed::clone_with_new_inputs(const
 // @todo finalize validate_and_infer_types
 void FullyConnectedCompressed::validate_and_infer_types() {
     const auto input_size = get_input_size();
-    const size_t expected_size = 5;
-    NODE_VALIDATION_CHECK(this,
-                          input_size == expected_size,
-                          "Number of inputs is incorrect. Current value is: ",
-                          input_size,
-                          ", expected at least ",
-                          expected_size,
-                          ".");
+
+    NODE_VALIDATION_CHECK(this, input_size == 5, "Number of inputs is incorrect. Current value is: ", input_size);
 
     FullyConnected::validate_and_infer_types();
 }

diff --git a/src/common/transformations/src/ov_ops/fully_connected_quantized.cpp b/src/common/transformations/src/ov_ops/fully_connected_quantized.cpp
@@ -49,14 +49,7 @@ std::shared_ptr<ov::Node> FullyConnectedQuantized::clone_with_new_inputs(const o
 // @todo finalize validate_and_infer_types
 void FullyConnectedQuantized::validate_and_infer_types() {
     const auto input_size = get_input_size();
-    const size_t expected_size = 9;
-    NODE_VALIDATION_CHECK(this,
-                          input_size == expected_size,
-                          "Number of inputs is incorrect. Current value is: ",
-                          input_size,
-                          ", expected at least ",
-                          expected_size,
-                          ".");
+    NODE_VALIDATION_CHECK(this, input_size == 9, "Number of inputs is incorrect. Current value is: ", input_size);
 
     FullyConnected::validate_and_infer_types();
 }

diff --git a/src/common/transformations/src/ov_ops/fully_connected_quantized_legacy.cpp b/src/common/transformations/src/ov_ops/fully_connected_quantized_legacy.cpp
@@ -4,12 +4,10 @@
 
 #include "ov_ops/fully_connected_quantized_legacy.hpp"
 
-#include <cstddef>
 #include <memory>
 
 #include "matmul_shape_inference.hpp"
 #include "openvino/core/type/element_type.hpp"
-#include "ov_ops/placeholder.hpp"
 
 namespace ov {
 namespace op {
@@ -32,7 +30,12 @@ FullyConnectedQuantizedLegacy::FullyConnectedQuantizedLegacy(const ov::Output<No
                                                              const ov::Output<Node>& bias,
                                                              const ov::Output<Node>& deq_scales,
                                                              const ov::element::Type output_type)
-    : FullyConnectedQuantizedLegacy(X, W, bias, deq_scales, std::make_shared<Placeholder>(), output_type) {}
+    : FullyConnectedQuantizedLegacy(X,
+                                    W,
+                                    bias,
+                                    deq_scales,
+                                    std::make_shared<v0::Constant>(element::undefined, Shape{0}),
+                                    output_type) {}
 
 std::shared_ptr<ov::Node> FullyConnectedQuantizedLegacy::clone_with_new_inputs(const ov::OutputVector& new_args) const {
     check_new_args_count(this, new_args);
@@ -48,14 +51,8 @@ std::shared_ptr<ov::Node> FullyConnectedQuantizedLegacy::clone_with_new_inputs(c
 // @todo finalize validate_and_infer_types
 void FullyConnectedQuantizedLegacy::validate_and_infer_types() {
     const auto input_size = get_input_size();
-    const size_t expected_size = 5;
-    NODE_VALIDATION_CHECK(this,
-                          input_size == expected_size,
-                          "Number of inputs is incorrect. Current value is: ",
-                          input_size,
-                          ", expected at least ",
-                          expected_size,
-                          ".");
+
+    NODE_VALIDATION_CHECK(this, input_size == 5, "Number of inputs is incorrect. Current value is: ", input_size);
 
     ov::op::v0::MatMul op;
     op.set_transpose_a(false);

diff --git a/src/common/transformations/src/ov_ops/placeholder.cpp b/src/common/transformations/src/ov_ops/placeholder.cpp
diff --git a/src/common/transformations/src/transformations/op_conversions/convert_fc_to_compressed.cpp b/src/common/transformations/src/transformations/op_conversions/convert_fc_to_compressed.cpp
@@ -20,7 +20,6 @@
 #include "openvino/pass/pattern/op/wrap_type.hpp"
 #include "ov_ops/fully_connected.hpp"
 #include "ov_ops/fully_connected_compressed.hpp"
-#include "ov_ops/placeholder.hpp"
 #include "transformations/utils/utils.hpp"
 
 ov::pass::ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyConnectedCompressed(
@@ -157,7 +156,8 @@ ov::pass::ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnected
             }
         }
 
-        fc_input_zp = with_zero_point ? fc_input_zp : std::make_shared<ov::op::internal::Placeholder>();
+        fc_input_zp =
+            with_zero_point ? fc_input_zp : std::make_shared<ov::op::v0::Constant>(element::undefined, Shape{0});
         ov::disable_constant_folding(fc_input_zp);
         result_nodes.push_back(fc_input_zp);
 

diff --git a/...mon/transformations/src/transformations/op_conversions/convert_fc_to_quantized_legacy.cpp b/...mon/transformations/src/transformations/op_conversions/convert_fc_to_quantized_legacy.cpp
@@ -15,7 +15,6 @@
 #include "openvino/pass/pattern/op/wrap_type.hpp"
 #include "ov_ops/fully_connected.hpp"
 #include "ov_ops/fully_connected_quantized_legacy.hpp"
-#include "ov_ops/placeholder.hpp"
 #include "transformations/utils/utils.hpp"
 
 ov::pass::ConvertFCToFCQuantizedLegacy::ConvertFCToFCQuantizedLegacy() {
@@ -52,15 +51,15 @@ ov::pass::ConvertFCToFCQuantizedLegacy::ConvertFCToFCQuantizedLegacy() {
             pattern_map.at(fully_connected_m).get_node_shared_ptr());
 
         ov::NodeVector new_ops;
-        auto zp_ph = std::make_shared<ov::op::internal::Placeholder>();
-        new_ops.push_back(zp_ph);
+        auto zp = std::make_shared<ov::op::v0::Constant>(element::undefined, Shape{0});
+        new_ops.push_back(zp);
 
         auto fc_quantized =
             std::make_shared<ov::op::internal::FullyConnectedQuantizedLegacy>(activations,
                                                                               weights,
                                                                               bias,
                                                                               dequantization_scales,
-                                                                              zp_ph,
+                                                                              zp,
                                                                               fc_node->get_output_type());
         new_ops.push_back(fc_quantized);
 

diff --git a/src/core/src/op/util/op_types.cpp b/src/core/src/op/util/op_types.cpp
@@ -24,7 +24,6 @@
 #include "openvino/op/util/binary_elementwise_logical.hpp"
 #include "openvino/op/util/unary_elementwise_arithmetic.hpp"
 #include "openvino/op/xor.hpp"
-#include "ov_ops/placeholder.hpp"
 
 bool ov::op::util::is_unary_elementwise_arithmetic(const ov::Node* node) {
     return dynamic_cast<const ov::op::util::UnaryElementwiseArithmetic*>(node) != nullptr;
@@ -67,8 +66,7 @@ bool ov::op::util::is_sink(const ov::Node* node) {
 }
 
 bool ov::op::util::is_constant(const ov::Node* node) {
-    return dynamic_cast<const ov::op::v0::Constant*>(node) != nullptr ||
-           dynamic_cast<const ov::op::internal::Placeholder*>(node) != nullptr;
+    return dynamic_cast<const ov::op::v0::Constant*>(node) != nullptr;
 }
 
 bool ov::op::util::is_commutative(const ov::Node* node) {

diff --git a/src/plugins/intel_cpu/src/cpu_types.cpp b/src/plugins/intel_cpu/src/cpu_types.cpp
@@ -35,7 +35,6 @@ static const TypeToNameMap& get_type_to_name_tbl() {
     static const TypeToNameMap type_to_name_tbl = {
         {"Constant", Type::Input},
         {"Parameter", Type::Input},
-        {"PlaceHolder", Type::Input},
         {"Result", Type::Output},
         {"Eye", Type::Eye},
         {"Convolution", Type::Convolution},
@@ -473,6 +472,10 @@ std::string algToString(const Algorithm alg) {
         CASE(FQCommon);
         CASE(FQQuantization);
         CASE(FQBinarization);
+        CASE(FullyConnectedCommon);
+        CASE(FullyConnectedCompressed);
+        CASE(FullyConnectedQuantized);
+        CASE(FullyConnectedQuantizedLegacy);
         CASE(ROIPoolingMax);
         CASE(ROIPoolingBilinear);
         CASE(ROIAlignMax);

diff --git a/src/plugins/intel_cpu/src/cpu_types.h b/src/plugins/intel_cpu/src/cpu_types.h
@@ -213,6 +213,12 @@ enum class Algorithm {
     EltwiseBitwiseLeftShift,
     EltwiseBitwiseRightShift,
 
+    // FullyConnected algorithms
+    FullyConnectedCommon,
+    FullyConnectedCompressed,
+    FullyConnectedQuantized,
+    FullyConnectedQuantizedLegacy,
+
     // FakeQuantize algorithms
     FQCommon,
     FQQuantization,

diff --git a/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp b/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp
@@ -81,7 +81,6 @@ DnnlPostOpsComposer::DnnlPostOpsComposer(const PostOps& postOps,
       idxOC(indexOfOutputChannelDim),
       isINT8(isInt8),
       weightScaleMaskPerChannel(weiScaleMaskPerChannel),
-      weightsWithBatch(memory.at(ARG_WEI)->getDescPtr()->getShape().getRank()),
       outDataType(outDataType) {
     OPENVINO_ASSERT(idxOC >= 0 && static_cast<size_t>(idxOC) < outputDims.size());
     OC = outputDims[idxOC];
@@ -645,8 +644,7 @@ void DnnlPostOpsComposer::appendClip(const std::vector<float>& low, const std::v
 static MemoryPtr prepackDecompressionParams(const MemoryCPtr& paramsPtr,
                                             bool needTranspose,
                                             ov::element::Type dstPrc,
-                                            const dnnl::engine& engine,
-                                            bool weightsWithBatch) {
+                                            const dnnl::engine& engine) {
     auto shape = paramsPtr->getShape().getStaticDims();
     if (shape.size() == 1 && shape[0] == 1) {
         shape.push_back(1);
@@ -655,8 +653,10 @@ static MemoryPtr prepackDecompressionParams(const MemoryCPtr& paramsPtr,
     if (shape.size() != 2 && shape.size() != 3)
         OPENVINO_THROW("DnnlPostOpsComposer cannot prepack decompression params with invalid shape");
 
-    const size_t OC = weightsWithBatch ? shape[shape.size() - 2] : shape.front();
-    const size_t G =  weightsWithBatch ? shape[shape.size() - 1] : shape[1];
+    // weights without batch: (OC, G)
+    // weights with batch: (B, OC, G)
+    const size_t OC = shape[shape.size() - 2];
+    const size_t G =  shape[shape.size() - 1];
 
     Shape dstShape = Shape({OC, G});
 
@@ -683,7 +683,7 @@ void DnnlPostOpsComposer::appendDecompressionScales(const MemoryCPtr& scales_ptr
     if (scales_ptr == nullptr)
         return;
 
-    auto scalesMem = prepackDecompressionParams(scales_ptr, needTranspose, dstPrecision, engine, weightsWithBatch);
+    auto scalesMem = prepackDecompressionParams(scales_ptr, needTranspose, dstPrecision, engine);
     attr.set_scales_dims(DNNL_ARG_WEIGHTS,
                          DnnlExtensionUtils::convertToDnnlDims(scalesMem->getStaticDims()),
                          DnnlExtensionUtils::ElementTypeToDataType(dstPrecision));
@@ -699,7 +699,7 @@ void DnnlPostOpsComposer::appendDecompressionZeroPoints(const MemoryCPtr& zero_p
         return;
 
     auto zeroPointsMem =
-        prepackDecompressionParams(zero_points_ptr, needTranspose, dstPrecision, engine, weightsWithBatch);
+        prepackDecompressionParams(zero_points_ptr, needTranspose, dstPrecision, engine);
     attr.set_zero_points_dims(DNNL_ARG_WEIGHTS,
                               DnnlExtensionUtils::convertToDnnlDims(zeroPointsMem->getStaticDims()),
                               DnnlExtensionUtils::ElementTypeToDataType(dstPrecision));

diff --git a/src/plugins/intel_cpu/src/dnnl_postops_composer.h b/src/plugins/intel_cpu/src/dnnl_postops_composer.h
@@ -58,7 +58,6 @@ class DnnlPostOpsComposer {
     size_t idxOC;
     const bool isINT8;  // only INT8 primitive support scales
     const int weightScaleMaskPerChannel;
-    bool weightsWithBatch;
     bool weightScaleAvailable = false;
     const dnnl::memory::data_type outDataType;
 

diff --git a/src/plugins/intel_cpu/src/extension.cpp b/src/plugins/intel_cpu/src/extension.cpp
@@ -16,7 +16,6 @@
 #include "ov_ops/nms_ie_internal.hpp"
 #include "ov_ops/nms_static_shape_ie.hpp"
 #include "ov_ops/rms.hpp"
-#include "ov_ops/placeholder.hpp"
 #include "ov_ops/rotary_positional_embeddings.hpp"
 #include "ov_ops/type_relaxed.hpp"
 #include "snippets/op/subgraph.hpp"
@@ -92,7 +91,6 @@ class TypeRelaxedExtension : public ov::OpExtension<ov::op::TypeRelaxed<Op>> {
     OP_EXTENSION(ov::op::internal::FullyConnectedCompressed)                \
     OP_EXTENSION(ov::op::internal::FullyConnectedQuantizedLegacy)           \
     OP_EXTENSION(ov::op::internal::FullyConnectedQuantized)                 \
-    OP_EXTENSION(ov::op::internal::Placeholder)                             \
     OP_EXTENSION_X64(ov::intel_cpu::MHANode)                                \
     OP_EXTENSION_X64(ov::intel_cpu::InteractionNode)                        \
     OP_EXTENSION_X64(ov::intel_cpu::LLMMLPNode)                             \

diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp
@@ -728,7 +728,6 @@ void GraphOptimizer::FuseFCAndTransposeOnWeights(Graph& graph) {
     auto isSuitablePattern = [](NodePtr parent) {
         bool res = true && parent->getType() == Type::Transpose
                         && parent->getChildEdges().size() == 1
-                        && one_of(parent->getChildEdgeAt(0)->getOutputNum(), 1, 3, 4)
                         && parent->getChildEdgeAt(0)->getChild()->getType() == Type::FullyConnected
                         && parent->isConstant();
         return res;

diff --git a/src/plugins/intel_cpu/src/nodes/executors/executor_config.hpp b/src/plugins/intel_cpu/src/nodes/executors/executor_config.hpp
@@ -6,7 +6,6 @@
 
 #include "post_ops.hpp"
 #include "memory_arguments.hpp"
-#include "printers.hpp"
 
 namespace ov {
 namespace intel_cpu {

diff --git a/src/plugins/intel_cpu/src/nodes/executors/executor_factory.hpp b/src/plugins/intel_cpu/src/nodes/executors/executor_factory.hpp
@@ -20,7 +20,6 @@
 
 namespace ov {
 namespace intel_cpu {
-using namespace executor;
 
 template <typename Attrs, typename NodeT>
 static ExecutorPtr fallback(const executor::Config<Attrs>& config,