diff --git a/src/common/transformations/include/ov_ops/placeholder.hpp b/src/common/transformations/include/ov_ops/placeholder.hpp deleted file mode 100644 index fe59355826ea11..00000000000000 --- a/src/common/transformations/include/ov_ops/placeholder.hpp +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright (C) 2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include "openvino/op/op.hpp" -#include "transformations_visibility.hpp" - -namespace ov { -namespace op { -namespace internal { - -/// \brief GRUSequence operation. -/// -/// Supposed to be used in place of an optional input of another operation - -class TRANSFORMATIONS_API Placeholder : public ov::op::Op { -public: - OPENVINO_OP("Placeholder", "ie_internal_opset"); - - Placeholder(); - - void validate_and_infer_types() override; - std::shared_ptr clone_with_new_inputs(const ov::OutputVector& new_args) const override; -}; - -} // namespace internal -} // namespace op -} // namespace ov diff --git a/src/common/transformations/src/ov_ops/fully_connected.cpp b/src/common/transformations/src/ov_ops/fully_connected.cpp index 6fcebcb1eb4fb2..3fa609362b999c 100644 --- a/src/common/transformations/src/ov_ops/fully_connected.cpp +++ b/src/common/transformations/src/ov_ops/fully_connected.cpp @@ -7,7 +7,6 @@ #include #include "matmul_shape_inference.hpp" -#include "ov_ops/placeholder.hpp" namespace ov { namespace op { @@ -25,7 +24,7 @@ FullyConnected::FullyConnected(const ov::Output& A, FullyConnected::FullyConnected(const ov::Output& A, const ov::Output& B, const ov::element::Type output_type) - : FullyConnected(A, B, std::make_shared(), output_type) {} + : FullyConnected(A, B, std::make_shared(element::undefined, Shape{0}), output_type) {} bool FullyConnected::visit_attributes(ov::AttributeVisitor& visitor) { visitor.on_attribute("output_type", m_output_type); diff --git a/src/common/transformations/src/ov_ops/fully_connected_compressed.cpp b/src/common/transformations/src/ov_ops/fully_connected_compressed.cpp index 4301dd6ed15d71..e0bb13042ea6ff 100644 --- a/src/common/transformations/src/ov_ops/fully_connected_compressed.cpp +++ b/src/common/transformations/src/ov_ops/fully_connected_compressed.cpp @@ -7,8 +7,8 @@ #include #include "openvino/core/type/element_type.hpp" +#include "openvino/op/constant.hpp" #include "ov_ops/fully_connected.hpp" -#include "ov_ops/placeholder.hpp" namespace ov { namespace op { @@ -31,7 +31,12 @@ FullyConnectedCompressed::FullyConnectedCompressed(const ov::Output& X, const ov::Output& bias, const ov::Output& weight_scales, const ov::element::Type output_type) - : FullyConnectedCompressed(X, W, bias, weight_scales, std::make_shared(), output_type) {} + : FullyConnectedCompressed(X, + W, + bias, + weight_scales, + std::make_shared(element::undefined, Shape{0}), + output_type) {} std::shared_ptr FullyConnectedCompressed::clone_with_new_inputs(const ov::OutputVector& new_args) const { check_new_args_count(this, new_args); @@ -47,14 +52,8 @@ std::shared_ptr FullyConnectedCompressed::clone_with_new_inputs(const // @todo finalize validate_and_infer_types void FullyConnectedCompressed::validate_and_infer_types() { const auto input_size = get_input_size(); - const size_t expected_size = 5; - NODE_VALIDATION_CHECK(this, - input_size == expected_size, - "Number of inputs is incorrect. Current value is: ", - input_size, - ", expected at least ", - expected_size, - "."); + + NODE_VALIDATION_CHECK(this, input_size == 5, "Number of inputs is incorrect. Current value is: ", input_size); FullyConnected::validate_and_infer_types(); } diff --git a/src/common/transformations/src/ov_ops/fully_connected_quantized.cpp b/src/common/transformations/src/ov_ops/fully_connected_quantized.cpp index 9c417d2d1b48ee..3f06e14834f7d1 100644 --- a/src/common/transformations/src/ov_ops/fully_connected_quantized.cpp +++ b/src/common/transformations/src/ov_ops/fully_connected_quantized.cpp @@ -49,14 +49,7 @@ std::shared_ptr FullyConnectedQuantized::clone_with_new_inputs(const o // @todo finalize validate_and_infer_types void FullyConnectedQuantized::validate_and_infer_types() { const auto input_size = get_input_size(); - const size_t expected_size = 9; - NODE_VALIDATION_CHECK(this, - input_size == expected_size, - "Number of inputs is incorrect. Current value is: ", - input_size, - ", expected at least ", - expected_size, - "."); + NODE_VALIDATION_CHECK(this, input_size == 9, "Number of inputs is incorrect. Current value is: ", input_size); FullyConnected::validate_and_infer_types(); } diff --git a/src/common/transformations/src/ov_ops/fully_connected_quantized_legacy.cpp b/src/common/transformations/src/ov_ops/fully_connected_quantized_legacy.cpp index c6715bc9ee7370..42df0980086199 100644 --- a/src/common/transformations/src/ov_ops/fully_connected_quantized_legacy.cpp +++ b/src/common/transformations/src/ov_ops/fully_connected_quantized_legacy.cpp @@ -4,12 +4,10 @@ #include "ov_ops/fully_connected_quantized_legacy.hpp" -#include #include #include "matmul_shape_inference.hpp" #include "openvino/core/type/element_type.hpp" -#include "ov_ops/placeholder.hpp" namespace ov { namespace op { @@ -32,7 +30,12 @@ FullyConnectedQuantizedLegacy::FullyConnectedQuantizedLegacy(const ov::Output& bias, const ov::Output& deq_scales, const ov::element::Type output_type) - : FullyConnectedQuantizedLegacy(X, W, bias, deq_scales, std::make_shared(), output_type) {} + : FullyConnectedQuantizedLegacy(X, + W, + bias, + deq_scales, + std::make_shared(element::undefined, Shape{0}), + output_type) {} std::shared_ptr FullyConnectedQuantizedLegacy::clone_with_new_inputs(const ov::OutputVector& new_args) const { check_new_args_count(this, new_args); @@ -48,14 +51,8 @@ std::shared_ptr FullyConnectedQuantizedLegacy::clone_with_new_inputs(c // @todo finalize validate_and_infer_types void FullyConnectedQuantizedLegacy::validate_and_infer_types() { const auto input_size = get_input_size(); - const size_t expected_size = 5; - NODE_VALIDATION_CHECK(this, - input_size == expected_size, - "Number of inputs is incorrect. Current value is: ", - input_size, - ", expected at least ", - expected_size, - "."); + + NODE_VALIDATION_CHECK(this, input_size == 5, "Number of inputs is incorrect. Current value is: ", input_size); ov::op::v0::MatMul op; op.set_transpose_a(false); diff --git a/src/common/transformations/src/ov_ops/placeholder.cpp b/src/common/transformations/src/ov_ops/placeholder.cpp deleted file mode 100644 index 61b78c32028d45..00000000000000 --- a/src/common/transformations/src/ov_ops/placeholder.cpp +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright (C) 2024 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "ov_ops/placeholder.hpp" - -namespace ov { -namespace op { -namespace internal { - -Placeholder::Placeholder() : ov::op::Op() { - validate_and_infer_types(); -} - -void Placeholder::validate_and_infer_types() { - set_output_type(0, ov::element::undefined, ov::PartialShape{}); -} - -std::shared_ptr Placeholder::clone_with_new_inputs(const ov::OutputVector& new_args) const { - check_new_args_count(this, new_args); - return std::make_shared(); -} - -} // namespace internal -} // namespace op -} // namespace ov diff --git a/src/common/transformations/src/transformations/op_conversions/convert_fc_to_compressed.cpp b/src/common/transformations/src/transformations/op_conversions/convert_fc_to_compressed.cpp index 809cdf7dcf30c2..87c3b669d98c6d 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_fc_to_compressed.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_fc_to_compressed.cpp @@ -20,7 +20,6 @@ #include "openvino/pass/pattern/op/wrap_type.hpp" #include "ov_ops/fully_connected.hpp" #include "ov_ops/fully_connected_compressed.hpp" -#include "ov_ops/placeholder.hpp" #include "transformations/utils/utils.hpp" ov::pass::ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyConnectedCompressed( @@ -157,7 +156,8 @@ ov::pass::ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnected } } - fc_input_zp = with_zero_point ? fc_input_zp : std::make_shared(); + fc_input_zp = + with_zero_point ? fc_input_zp : std::make_shared(element::undefined, Shape{0}); ov::disable_constant_folding(fc_input_zp); result_nodes.push_back(fc_input_zp); diff --git a/src/common/transformations/src/transformations/op_conversions/convert_fc_to_quantized_legacy.cpp b/src/common/transformations/src/transformations/op_conversions/convert_fc_to_quantized_legacy.cpp index 36bea689a46f23..908e36a51a7eb9 100644 --- a/src/common/transformations/src/transformations/op_conversions/convert_fc_to_quantized_legacy.cpp +++ b/src/common/transformations/src/transformations/op_conversions/convert_fc_to_quantized_legacy.cpp @@ -15,7 +15,6 @@ #include "openvino/pass/pattern/op/wrap_type.hpp" #include "ov_ops/fully_connected.hpp" #include "ov_ops/fully_connected_quantized_legacy.hpp" -#include "ov_ops/placeholder.hpp" #include "transformations/utils/utils.hpp" ov::pass::ConvertFCToFCQuantizedLegacy::ConvertFCToFCQuantizedLegacy() { @@ -52,15 +51,15 @@ ov::pass::ConvertFCToFCQuantizedLegacy::ConvertFCToFCQuantizedLegacy() { pattern_map.at(fully_connected_m).get_node_shared_ptr()); ov::NodeVector new_ops; - auto zp_ph = std::make_shared(); - new_ops.push_back(zp_ph); + auto zp = std::make_shared(element::undefined, Shape{0}); + new_ops.push_back(zp); auto fc_quantized = std::make_shared(activations, weights, bias, dequantization_scales, - zp_ph, + zp, fc_node->get_output_type()); new_ops.push_back(fc_quantized); diff --git a/src/core/src/op/util/op_types.cpp b/src/core/src/op/util/op_types.cpp index 5b2d261f1d86e8..053051535aef21 100644 --- a/src/core/src/op/util/op_types.cpp +++ b/src/core/src/op/util/op_types.cpp @@ -24,7 +24,6 @@ #include "openvino/op/util/binary_elementwise_logical.hpp" #include "openvino/op/util/unary_elementwise_arithmetic.hpp" #include "openvino/op/xor.hpp" -#include "ov_ops/placeholder.hpp" bool ov::op::util::is_unary_elementwise_arithmetic(const ov::Node* node) { return dynamic_cast(node) != nullptr; @@ -67,8 +66,7 @@ bool ov::op::util::is_sink(const ov::Node* node) { } bool ov::op::util::is_constant(const ov::Node* node) { - return dynamic_cast(node) != nullptr || - dynamic_cast(node) != nullptr; + return dynamic_cast(node) != nullptr; } bool ov::op::util::is_commutative(const ov::Node* node) { diff --git a/src/plugins/intel_cpu/src/cpu_types.cpp b/src/plugins/intel_cpu/src/cpu_types.cpp index 70bbf363393e86..30884bbe649962 100644 --- a/src/plugins/intel_cpu/src/cpu_types.cpp +++ b/src/plugins/intel_cpu/src/cpu_types.cpp @@ -35,7 +35,6 @@ static const TypeToNameMap& get_type_to_name_tbl() { static const TypeToNameMap type_to_name_tbl = { {"Constant", Type::Input}, {"Parameter", Type::Input}, - {"PlaceHolder", Type::Input}, {"Result", Type::Output}, {"Eye", Type::Eye}, {"Convolution", Type::Convolution}, @@ -473,6 +472,10 @@ std::string algToString(const Algorithm alg) { CASE(FQCommon); CASE(FQQuantization); CASE(FQBinarization); + CASE(FullyConnectedCommon); + CASE(FullyConnectedCompressed); + CASE(FullyConnectedQuantized); + CASE(FullyConnectedQuantizedLegacy); CASE(ROIPoolingMax); CASE(ROIPoolingBilinear); CASE(ROIAlignMax); diff --git a/src/plugins/intel_cpu/src/cpu_types.h b/src/plugins/intel_cpu/src/cpu_types.h index 9461526184b0bf..71088c22af8336 100644 --- a/src/plugins/intel_cpu/src/cpu_types.h +++ b/src/plugins/intel_cpu/src/cpu_types.h @@ -213,6 +213,12 @@ enum class Algorithm { EltwiseBitwiseLeftShift, EltwiseBitwiseRightShift, + // FullyConnected algorithms + FullyConnectedCommon, + FullyConnectedCompressed, + FullyConnectedQuantized, + FullyConnectedQuantizedLegacy, + // FakeQuantize algorithms FQCommon, FQQuantization, diff --git a/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp b/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp index adf2d1a2718619..70d28f1f4ac739 100644 --- a/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp +++ b/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp @@ -81,7 +81,6 @@ DnnlPostOpsComposer::DnnlPostOpsComposer(const PostOps& postOps, idxOC(indexOfOutputChannelDim), isINT8(isInt8), weightScaleMaskPerChannel(weiScaleMaskPerChannel), - weightsWithBatch(memory.at(ARG_WEI)->getDescPtr()->getShape().getRank()), outDataType(outDataType) { OPENVINO_ASSERT(idxOC >= 0 && static_cast(idxOC) < outputDims.size()); OC = outputDims[idxOC]; @@ -645,8 +644,7 @@ void DnnlPostOpsComposer::appendClip(const std::vector& low, const std::v static MemoryPtr prepackDecompressionParams(const MemoryCPtr& paramsPtr, bool needTranspose, ov::element::Type dstPrc, - const dnnl::engine& engine, - bool weightsWithBatch) { + const dnnl::engine& engine) { auto shape = paramsPtr->getShape().getStaticDims(); if (shape.size() == 1 && shape[0] == 1) { shape.push_back(1); @@ -655,8 +653,10 @@ static MemoryPtr prepackDecompressionParams(const MemoryCPtr& paramsPtr, if (shape.size() != 2 && shape.size() != 3) OPENVINO_THROW("DnnlPostOpsComposer cannot prepack decompression params with invalid shape"); - const size_t OC = weightsWithBatch ? shape[shape.size() - 2] : shape.front(); - const size_t G = weightsWithBatch ? shape[shape.size() - 1] : shape[1]; + // weights without batch: (OC, G) + // weights with batch: (B, OC, G) + const size_t OC = shape[shape.size() - 2]; + const size_t G = shape[shape.size() - 1]; Shape dstShape = Shape({OC, G}); @@ -683,7 +683,7 @@ void DnnlPostOpsComposer::appendDecompressionScales(const MemoryCPtr& scales_ptr if (scales_ptr == nullptr) return; - auto scalesMem = prepackDecompressionParams(scales_ptr, needTranspose, dstPrecision, engine, weightsWithBatch); + auto scalesMem = prepackDecompressionParams(scales_ptr, needTranspose, dstPrecision, engine); attr.set_scales_dims(DNNL_ARG_WEIGHTS, DnnlExtensionUtils::convertToDnnlDims(scalesMem->getStaticDims()), DnnlExtensionUtils::ElementTypeToDataType(dstPrecision)); @@ -699,7 +699,7 @@ void DnnlPostOpsComposer::appendDecompressionZeroPoints(const MemoryCPtr& zero_p return; auto zeroPointsMem = - prepackDecompressionParams(zero_points_ptr, needTranspose, dstPrecision, engine, weightsWithBatch); + prepackDecompressionParams(zero_points_ptr, needTranspose, dstPrecision, engine); attr.set_zero_points_dims(DNNL_ARG_WEIGHTS, DnnlExtensionUtils::convertToDnnlDims(zeroPointsMem->getStaticDims()), DnnlExtensionUtils::ElementTypeToDataType(dstPrecision)); diff --git a/src/plugins/intel_cpu/src/dnnl_postops_composer.h b/src/plugins/intel_cpu/src/dnnl_postops_composer.h index 7d9f6b71d573a2..8c2718aaaed4d5 100644 --- a/src/plugins/intel_cpu/src/dnnl_postops_composer.h +++ b/src/plugins/intel_cpu/src/dnnl_postops_composer.h @@ -58,7 +58,6 @@ class DnnlPostOpsComposer { size_t idxOC; const bool isINT8; // only INT8 primitive support scales const int weightScaleMaskPerChannel; - bool weightsWithBatch; bool weightScaleAvailable = false; const dnnl::memory::data_type outDataType; diff --git a/src/plugins/intel_cpu/src/extension.cpp b/src/plugins/intel_cpu/src/extension.cpp index 430e8872fd4964..e6dbc04b0ca6a4 100644 --- a/src/plugins/intel_cpu/src/extension.cpp +++ b/src/plugins/intel_cpu/src/extension.cpp @@ -16,7 +16,6 @@ #include "ov_ops/nms_ie_internal.hpp" #include "ov_ops/nms_static_shape_ie.hpp" #include "ov_ops/rms.hpp" -#include "ov_ops/placeholder.hpp" #include "ov_ops/rotary_positional_embeddings.hpp" #include "ov_ops/type_relaxed.hpp" #include "snippets/op/subgraph.hpp" @@ -92,7 +91,6 @@ class TypeRelaxedExtension : public ov::OpExtension> { OP_EXTENSION(ov::op::internal::FullyConnectedCompressed) \ OP_EXTENSION(ov::op::internal::FullyConnectedQuantizedLegacy) \ OP_EXTENSION(ov::op::internal::FullyConnectedQuantized) \ - OP_EXTENSION(ov::op::internal::Placeholder) \ OP_EXTENSION_X64(ov::intel_cpu::MHANode) \ OP_EXTENSION_X64(ov::intel_cpu::InteractionNode) \ OP_EXTENSION_X64(ov::intel_cpu::LLMMLPNode) \ diff --git a/src/plugins/intel_cpu/src/graph_optimizer.cpp b/src/plugins/intel_cpu/src/graph_optimizer.cpp index 5f038f92cbe408..94f54fc4c59b55 100644 --- a/src/plugins/intel_cpu/src/graph_optimizer.cpp +++ b/src/plugins/intel_cpu/src/graph_optimizer.cpp @@ -728,7 +728,6 @@ void GraphOptimizer::FuseFCAndTransposeOnWeights(Graph& graph) { auto isSuitablePattern = [](NodePtr parent) { bool res = true && parent->getType() == Type::Transpose && parent->getChildEdges().size() == 1 - && one_of(parent->getChildEdgeAt(0)->getOutputNum(), 1, 3, 4) && parent->getChildEdgeAt(0)->getChild()->getType() == Type::FullyConnected && parent->isConstant(); return res; diff --git a/src/plugins/intel_cpu/src/nodes/executors/executor_config.hpp b/src/plugins/intel_cpu/src/nodes/executors/executor_config.hpp index 09b3b33cfe6b2f..d08c4ad8127325 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/executor_config.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/executor_config.hpp @@ -6,7 +6,6 @@ #include "post_ops.hpp" #include "memory_arguments.hpp" -#include "printers.hpp" namespace ov { namespace intel_cpu { diff --git a/src/plugins/intel_cpu/src/nodes/executors/executor_factory.hpp b/src/plugins/intel_cpu/src/nodes/executors/executor_factory.hpp index 419ab4abf52cd7..d1f0d1b4297a54 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/executor_factory.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/executor_factory.hpp @@ -20,7 +20,6 @@ namespace ov { namespace intel_cpu { -using namespace executor; template static ExecutorPtr fallback(const executor::Config& config, diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp index 79a36c3d53f010..d169188ab14497 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp @@ -10,6 +10,7 @@ #include "common/cpu_convert.h" #include "common/cpu_memcpy.h" +#include "cpu_types.h" #include "dnnl_extension_utils.h" #include "executors/memory_arguments.hpp" #include "graph_context.h" @@ -19,14 +20,16 @@ #include "memory_desc/cpu_memory_desc_utils.h" #include "nodes/executors/executor.hpp" #include "nodes/executors/fullyconnected_config.hpp" +#include "openvino/core/type.hpp" #include "openvino/core/type/element_type.hpp" #include "openvino/runtime/threading/cpu_message.hpp" #include "ov_ops/fully_connected.hpp" +#include "ov_ops/fully_connected_quantized.hpp" #include "ov_ops/fully_connected_quantized_legacy.hpp" #include "ov_ops/fully_connected_compressed.hpp" -#include "ov_ops/placeholder.hpp" #include "post_ops.hpp" #include "shape_inference/custom/fullyconnected.hpp" +#include "transformations/utils/utils.hpp" #include "utils/debug_capabilities.h" #include "utils/general_utils.h" @@ -48,22 +51,70 @@ bool FullyConnected::isSupportedOperation(const std::shared_ptr& return false; } - const auto fc = std::dynamic_pointer_cast(op); - if (!fc) { - errorMessage = "Only FullyConnected operation is supported"; + if (ov::is_type(op)) { + if (!ov::op::util::is_on_constant_path(op->input_value(BIAS))) { + errorMessage = "Only Constant operation on 'bias' input is supported"; + return false; + } + } + + if (ov::is_type(op)) { + if (!ov::op::util::is_on_constant_path(op->input_value(WEIGHT_SCALES)) || + !ov::op::util::is_on_constant_path(op->input_value(WEIGHT_ZERO_POINTS))) { + errorMessage = "Only Constant operation on 'weight scales', and 'weight zero points' inputs is supported"; + return false; + } + } + } catch (...) { + return false; + } + + return true; +} + +// @todo replace 'inferencePrecision' check with 'fc->get_input_element_type(0) == ov::element::bf16' +// after bf16 pipeline is moved to ConvertPrecision +bool FullyConnected::isSupportedCompressedOperation(const std::shared_ptr& op, + size_t IC, + size_t OC, + size_t G, + ov::element::Type inferencePrecision) noexcept { +#if defined(OPENVINO_ARCH_X86_64) + try { + std::string errorMessage; + if (!isSupportedOperation(op, errorMessage)) + return false; + + if (!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2)) return false; + + if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx) && + inferencePrecision == ov::element::bf16) { + // OneDNN AMX IP implementation has limited shapes support due to performance considerations. As a + // current solution conditions below are copied from OneDNN to make sure correct IP impl will be + // used since fallback one doesn't support weights decompression feature. + size_t simdWidth = 16; + size_t vnniFactor = 2; + size_t maxSize = 512; + auto amxRow = vnniFactor * simdWidth; + + if ((IC <= amxRow && OC <= amxRow) || (IC <= maxSize && OC <= maxSize && IC % amxRow != 0)) { + return false; + } } - if (fc->get_input_size() == 3 && - (!ov::is_type(fc->get_input_node_shared_ptr(BIAS)) && - !ov::is_type(fc->get_input_node_shared_ptr(BIAS)))) { - errorMessage = "Only Constant or Placeholder operation on 'bias' input is supported"; + if (IC % G != 0 || IC / G < 4 || OC == 1) { return false; } + + return true; } catch (...) { return false; } return true; +#else + return false; +#endif } void FullyConnected::initTensorParallelConfig(const GraphContext::CPtr context) { @@ -97,13 +148,18 @@ FullyConnected::FullyConnected(const std::shared_ptr& op, const GraphC } }; - if (const auto fcC = std::dynamic_pointer_cast(op)) { + if (ov::is_type(op)) { mapArgToInput(m_atoi, ARG_WEI | ARG_ATTR_SCALES, WEIGHT_SCALES); mapArgToInput(m_atoi, ARG_WEI | ARG_ATTR_ZERO_POINTS, WEIGHT_ZERO_POINTS); - } - - if (const auto fcQL = std::dynamic_pointer_cast(op)) { + algorithm = Algorithm::FullyConnectedCompressed; + } else if (ov::is_type(op)) { mapArgToInput(m_atoi, ARG_DST_DEQ_SCALE, 3); + algorithm = Algorithm::FullyConnectedQuantizedLegacy; + } else if (ov::is_type(op)) { + algorithm = Algorithm::FullyConnectedQuantized; + OPENVINO_THROW_NOT_IMPLEMENTED("FullyConnectedQuantized is not implemented yet"); + } else { + algorithm = Algorithm::FullyConnectedCommon; } } diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.h b/src/plugins/intel_cpu/src/nodes/fullyconnected.h index 4d56280351b496..ff01b22f1243b2 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.h +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.h @@ -68,6 +68,15 @@ class FullyConnected : public Node { bool canFuse(const NodePtr& node) const override; static bool isSupportedOperation(const std::shared_ptr& op, std::string& errorMessage) noexcept; + static bool isSupportedCompressedOperation(const std::shared_ptr& op, + size_t IC, + size_t OC, + size_t G, + ov::element::Type inferencePrecision) noexcept; + + bool isExecutable() const override { + return !isInputTensorAtPortEmpty(0); + } void prepareParams() override; void executeDynamicImpl(dnnl::stream strm) override; @@ -95,6 +104,8 @@ class FullyConnected : public Node { OUTPUT_ZERO_POINTS, }; + static bool isConstantInput(const std::shared_ptr& op, InputId port); + std::unordered_map m_atoi; // memory argument id to input id ExecutorPtr createExecutor(); diff --git a/src/plugins/intel_cpu/src/nodes/input.cpp b/src/plugins/intel_cpu/src/nodes/input.cpp index fad9de6d1baef3..4ccdc87ada25f1 100644 --- a/src/plugins/intel_cpu/src/nodes/input.cpp +++ b/src/plugins/intel_cpu/src/nodes/input.cpp @@ -7,8 +7,9 @@ #include "cpu/x64/jit_generator.hpp" #include "nodes/node_config.h" #include "openvino/core/parallel.hpp" +#include "openvino/core/shape.hpp" +#include "openvino/core/type/element_type.hpp" #include "shape_inference/shape_inference_pass_through.hpp" -#include "ov_ops/placeholder.hpp" #include "memory_desc/cpu_memory_desc_utils.h" using namespace dnnl; @@ -225,16 +226,12 @@ Input::Input(const std::shared_ptr& op, const GraphContext::CPtr conte op::v0::Constant::get_type_info_static(), op::v0::Result::get_type_info_static(), op::v3::ReadValue::get_type_info_static(), - op::v6::ReadValue::get_type_info_static(), - op::internal::Placeholder::get_type_info_static())) + op::v6::ReadValue::get_type_info_static())) OPENVINO_THROW_NOT_IMPLEMENTED("CPU Input node doesn't support ngraph operation ", op->get_type_name(), " with name ", op->get_friendly_name()); - if (auto placeHolder = ov::as_type_ptr(op)) { - memoryPtr = MemoryDescUtils::makeEmptyMemory(context); - constant = ConstantType::Const; - } else if (auto constOp = ov::as_type_ptr(op)) { + if (auto constOp = ov::as_type_ptr(op)) { constant = ConstantType::Const; m_constOp = constOp; cloneBlobIfRequired(); @@ -244,8 +241,14 @@ Input::Input(const std::shared_ptr& op, const GraphContext::CPtr conte } void Input::cloneBlobIfRequired() { - Shape shape(m_constOp->get_shape().empty() ? ov::Shape(1, 1) : m_constOp->get_shape()); const auto prec = m_constOp->get_element_type(); + + if (prec == ov::element::undefined && shape_size(m_constOp->get_shape()) == 0) { + memoryPtr = MemoryDescUtils::makeEmptyMemory(context); + return; + } + + Shape shape(m_constOp->get_shape().empty() ? ov::Shape(1, 1) : m_constOp->get_shape()); const size_t size = shape.getElementsCount(); CpuBlockedMemoryDesc memDesc(prec, shape); diff --git a/src/plugins/intel_cpu/src/shape_inference/custom/fullyconnected.cpp b/src/plugins/intel_cpu/src/shape_inference/custom/fullyconnected.cpp index e23f9c3776abf6..048b413b61a60b 100644 --- a/src/plugins/intel_cpu/src/shape_inference/custom/fullyconnected.cpp +++ b/src/plugins/intel_cpu/src/shape_inference/custom/fullyconnected.cpp @@ -15,7 +15,6 @@ Result FCShapeInfer::infer( const VectorDims& activationShape = input_shapes[0].get(); const VectorDims& weightShape = input_shapes[1].get(); size_t activationRank = activationShape.size(); - // size_t channelRank = weightShape.size() - 1; size_t channelRank = 1; // activation weight output_shape diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_matmul_to_fc.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_matmul_to_fc.cpp index beb80e14171a48..ac9ccfa7c87ffa 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_matmul_to_fc.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/convert_matmul_to_fc.cpp @@ -9,7 +9,6 @@ #include "openvino/op/transpose.hpp" #include "openvino/core/rt_info.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" -#include "ov_ops/placeholder.hpp" #include "transformations/utils/utils.hpp" #include "itt.hpp" @@ -153,7 +152,7 @@ ov::intel_cpu::ConvertMatMulToFC::ConvertMatMulToFC() { fc_input_b = convert; } - auto bias_ph = std::make_shared(); + auto bias_ph = std::make_shared(element::undefined, Shape{0}); new_ops.push_back(bias_ph); auto fc = std::make_shared(fc_input_a, diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/fc_bias_fusion.cpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/fc_bias_fusion.cpp index 3d74ea10972c29..d92d2d3627b65b 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/fc_bias_fusion.cpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/common/pass/fc_bias_fusion.cpp @@ -10,10 +10,10 @@ #include "itt.hpp" #include "openvino/core/rt_info.hpp" #include "openvino/op/add.hpp" +#include "openvino/op/constant.hpp" #include "openvino/op/reshape.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" #include "ov_ops/fully_connected.hpp" -#include "ov_ops/placeholder.hpp" #include "transformations/utils/utils.hpp" ov::intel_cpu::FullyConnectedBiasFusion::FullyConnectedBiasFusion() { @@ -21,12 +21,10 @@ ov::intel_cpu::FullyConnectedBiasFusion::FullyConnectedBiasFusion() { auto input = ov::pass::pattern::any_input(ov::pass::pattern::has_static_rank()); auto weights = ov::pass::pattern::any_input(ov::pass::pattern::has_static_shape()); - auto ph = ov::pass::pattern::wrap_type(); - - auto m_fc = ov::pass::pattern::wrap_type({input, weights, ph}, + auto bias = ov::pass::pattern::wrap_type(); + auto m_fc = ov::pass::pattern::wrap_type({input, weights, bias}, ov::pass::pattern::consumers_count(1)); - - auto m_bias = ov::pass::pattern::wrap_type(ov::pass::pattern::has_static_shape()); + auto m_bias = ov::pass::pattern::wrap_type(); auto m_add = ov::pass::pattern::wrap_type({m_fc, m_bias}); ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { diff --git a/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp b/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp index 7256509ee2430a..d63c68e4b57789 100644 --- a/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp +++ b/src/plugins/intel_cpu/src/transformations/cpu_opset/convert_to_cpu_specific_opset.hpp @@ -21,10 +21,7 @@ #include "transformations/common_optimizations/reshape_sequence_fusion.hpp" #include "transformations/defs.hpp" #include "config.h" - -#if defined(OPENVINO_ARCH_X86_64) -#include "cpu/x64/cpu_isa_traits.hpp" -#endif +#include "nodes/fullyconnected.h" #include "itt.hpp" @@ -62,40 +59,15 @@ inline void ConvertToCPUSpecificOpset(std::shared_ptr &model, const C ov::element::f4e2m1, }; -#if defined(OPENVINO_ARCH_X86_64) - // @todo introduce something like CPU_REGISTER_PASS_X64_AVX2 - const bool isDecompressionSupported = dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2); - if (isDecompressionSupported) { - CPU_REGISTER_PASS_X64( - manager, - pass::ConvertFullyConnectedToFullyConnectedCompressed, - supported_activation_types, - supported_compressed_weights_types, - [&config](const std::shared_ptr& fc, size_t IC, size_t OC, size_t G) { - // @todo replace 'inferencePrecision' check with 'fc->get_input_element_type(0) == ov::element::bf16' - // after bf16 pipeline is moved to ConvertPrecision - if (dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_amx) && - config.inferencePrecision == ov::element::bf16) { - // OneDNN AMX IP implementation has limited shapes support due to performance considerations. As a - // current solution conditions below are copied from OneDNN to make sure correct IP impl will be - // used since fallback one doesn't support weights decompression feature. - size_t simdWidth = 16; - size_t vnniFactor = 2; - size_t maxSize = 512; - auto amxRow = vnniFactor * simdWidth; - - if ((IC <= amxRow && OC <= amxRow) || (IC <= maxSize && OC <= maxSize && IC % amxRow != 0)) { - return false; - } - } - - if (IC % G != 0 || IC / G < 4 || OC == 1) { - return false; - } - return true; - }); - } -#endif // OPENVINO_ARCH_X86_64 + CPU_REGISTER_PASS_X64( + manager, + pass::ConvertFullyConnectedToFullyConnectedCompressed, + supported_activation_types, + supported_compressed_weights_types, + [&config](const std::shared_ptr& fc, size_t IC, size_t OC, size_t G) { + return ov::intel_cpu::node::FullyConnected::isSupportedCompressedOperation( + fc, IC, OC, G, config.inferencePrecision); + }); CPU_REGISTER_PASS_X64(manager, pass::ConvertFCToFCQuantizedLegacy); if (std::getenv("EXTRA_DUMP")) { diff --git a/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/fullconnect.cpp b/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/fullconnect.cpp index 3c593ec773f710..90a2fc9d0b9768 100644 --- a/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/fullconnect.cpp +++ b/src/plugins/intel_cpu/tests/unit/shape_inference_test/custom_shape_infer/fullconnect.cpp @@ -4,10 +4,10 @@ #include -#include "ov_ops/fully_connected.hpp" -#include "ov_ops/placeholder.hpp" -#include "openvino/op/parameter.hpp" #include "custom_shape_infer.hpp" +#include "openvino/op/constant.hpp" +#include "openvino/op/parameter.hpp" +#include "ov_ops/fully_connected.hpp" namespace ov { namespace intel_cpu { @@ -18,52 +18,66 @@ using namespace ov; using namespace ov::intel_cpu; TEST(CpuShapeInfer, FC_InputSize_2) { - auto activate = std::make_shared(element::f32, PartialShape{-1, -1 }); + auto activate = std::make_shared(element::f32, PartialShape{-1, -1}); auto weight = std::make_shared(element::f32, PartialShape{5, 6}); - auto op = std::make_shared(activate, weight, std::make_shared()); + auto op = std::make_shared( + activate, + weight, + std::make_shared(ov::element::undefined, ov::Shape{0})); std::vector static_input_shapes = {StaticShape{720, 640}, {5, 6}}; std::vector static_output_shapes = {StaticShape{720, 5}}; unit_test::cpu_test_shape_infer(op.get(), static_input_shapes, static_output_shapes); } TEST(CpuShapeInfer, FC_broadcastWeights1) { - auto activate = std::make_shared(element::f32, PartialShape{1, -1, -1 }); + auto activate = std::make_shared(element::f32, PartialShape{1, -1, -1}); auto weight = std::make_shared(element::f32, PartialShape{5, 6}); - auto op = std::make_shared(activate, weight, std::make_shared()); + auto op = std::make_shared( + activate, + weight, + std::make_shared(ov::element::undefined, ov::Shape{0})); std::vector static_input_shapes = {StaticShape{1, 720, 6}, {5, 6}}; std::vector static_output_shapes = {StaticShape{1, 720, 5}}; unit_test::cpu_test_shape_infer(op.get(), static_input_shapes, static_output_shapes); } TEST(CpuShapeInfer, FC_broadcastWeights2) { - auto activate = std::make_shared(element::f32, PartialShape{-1, -1, -1, -1 }); + auto activate = std::make_shared(element::f32, PartialShape{-1, -1, -1, -1}); auto weight = std::make_shared(element::f32, PartialShape{5, 6}); - auto op = std::make_shared(activate, weight, std::make_shared()); + auto op = std::make_shared( + activate, + weight, + std::make_shared(ov::element::undefined, ov::Shape{0})); std::vector static_input_shapes = {StaticShape{2, 3, 720, 6}, {5, 6}}; std::vector static_output_shapes = {StaticShape{2, 3, 720, 5}}; unit_test::cpu_test_shape_infer(op.get(), static_input_shapes, static_output_shapes); } TEST(CpuShapeInfer, FC_broadcastActivations1) { - auto activate = std::make_shared(element::f32, PartialShape{720, -1 }); + auto activate = std::make_shared(element::f32, PartialShape{720, -1}); auto weight = std::make_shared(element::f32, PartialShape{1, 5, 6}); - auto op = std::make_shared(activate, weight, std::make_shared()); + auto op = std::make_shared( + activate, + weight, + std::make_shared(ov::element::undefined, ov::Shape{0})); std::vector static_input_shapes = {StaticShape{720, 6}, {1, 5, 6}}; std::vector static_output_shapes = {StaticShape{1, 720, 5}}; unit_test::cpu_test_shape_infer(op.get(), static_input_shapes, static_output_shapes); } TEST(CpuShapeInfer, FC_broadcastActivations2) { - auto activate = std::make_shared(element::f32, PartialShape{-1, -1 }); + auto activate = std::make_shared(element::f32, PartialShape{-1, -1}); auto weight = std::make_shared(element::f32, PartialShape{1, 1, 5, 6}); - auto op = std::make_shared(activate, weight, std::make_shared()); + auto op = std::make_shared( + activate, + weight, + std::make_shared(ov::element::undefined, ov::Shape{0})); std::vector static_input_shapes = {StaticShape{720, 6}, {1, 1, 5, 6}}; std::vector static_output_shapes = {StaticShape{1, 1, 720, 5}}; unit_test::cpu_test_shape_infer(op.get(), static_input_shapes, static_output_shapes); } -} // namespace cpu_shape_infer -} // namespace unit_test -} // namespace intel_cpu -} // namespace ov - +} // namespace cpu_shape_infer +} // namespace unit_test +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/unit/transformations/convert_matmul_test.cpp b/src/plugins/intel_cpu/tests/unit/transformations/convert_matmul_test.cpp index 2fa3554f60e17a..37df1fd6d27910 100644 --- a/src/plugins/intel_cpu/tests/unit/transformations/convert_matmul_test.cpp +++ b/src/plugins/intel_cpu/tests/unit/transformations/convert_matmul_test.cpp @@ -4,22 +4,20 @@ #include -#include #include - #include #include #include #include -#include "ov_ops/fully_connected.hpp" +#include +#include #include #include #include -#include -#include #include "common_test_utils/ov_test_utils.hpp" -#include "ov_ops/placeholder.hpp" +#include "openvino/op/constant.hpp" +#include "ov_ops/fully_connected.hpp" #include "transformations/rt_info/decompression.hpp" using namespace testing; @@ -27,27 +25,28 @@ using namespace ov::intel_cpu; TEST_F(TransformationTestsF, ConvertMatMulToFCTest1) { { - auto input1 = std::make_shared(ov::element::f32, ov::Shape{ 3, 2, 2 }); - auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{ 1, 2, 2 }, { 1 }); + auto input1 = std::make_shared(ov::element::f32, ov::Shape{3, 2, 2}); + auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 2, 2}, {1}); auto matmul = std::make_shared(input1, input2, true, false); - model = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); + model = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); manager.register_pass(); } { - auto input1 = std::make_shared(ov::element::f32, ov::Shape{ 3, 2, 2 }); - auto transpose_constant1 = ov::opset1::Constant::create(ov::element::i32, ov::Shape{ 3 }, { 0, 2, 1 }); + auto input1 = std::make_shared(ov::element::f32, ov::Shape{3, 2, 2}); + auto transpose_constant1 = ov::opset1::Constant::create(ov::element::i32, ov::Shape{3}, {0, 2, 1}); auto transpose1 = std::make_shared(input1, transpose_constant1); - auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 2, 2 }, { 1 }); - auto transpose_constant2 = ov::opset1::Constant::create(ov::element::i32, ov::Shape{ 3 }, {0, 2, 1 }); + auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 2, 2}, {1}); + auto transpose_constant2 = ov::opset1::Constant::create(ov::element::i32, ov::Shape{3}, {0, 2, 1}); auto transpose2 = std::make_shared(input2, transpose_constant2); - auto matmul = std::make_shared(transpose1, - transpose2, - std::make_shared()); + auto matmul = std::make_shared( + transpose1, + transpose2, + std::make_shared(ov::element::undefined, ov::Shape{0})); - model_ref = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); + model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); } } @@ -81,7 +80,10 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest3) { { auto input1 = std::make_shared(ov::element::f32, ov::Shape{3, 2, 2}); auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{2, 2}, {1}); - auto matmul = std::make_shared(input1, input2, std::make_shared()); + auto matmul = std::make_shared( + input1, + input2, + std::make_shared(ov::element::undefined, ov::Shape{0})); model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); } @@ -99,27 +101,30 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest4) { { auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{-1, -1, 2}); auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{2, 2}, {1}); - auto matmul = std::make_shared(input1, input2, std::make_shared()); + auto matmul = std::make_shared( + input1, + input2, + std::make_shared(ov::element::undefined, ov::Shape{0})); model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); } } TEST_F(TransformationTestsF, ConvertMatMulToFCTest5) { - auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, -1, 2 }); - auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{ 3, 2, 2 }, { 1 }); + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{-1, -1, 2}); + auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{3, 2, 2}, {1}); auto matmul = std::make_shared(input1, input2, false, true); - model = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); + model = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); manager.register_pass(); } TEST_F(TransformationTestsF, ConvertMatMulToFCTest6) { - auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{ -1, -1, 2 }); - auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{ 3, 1, 2 }, { 1 }); + auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{-1, -1, 2}); + auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{3, 1, 2}, {1}); auto matmul = std::make_shared(input1, input2, false, true); - model = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); + model = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); manager.register_pass(); } @@ -135,7 +140,10 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest7) { { auto input1 = std::make_shared(ov::element::f32, ov::Shape{3, 2, 2}); auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{3, 2}, {1}); - auto fc = std::make_shared(input1, input2, std::make_shared()); + auto fc = std::make_shared( + input1, + input2, + std::make_shared(ov::element::undefined, ov::Shape{0})); model_ref = std::make_shared(ov::NodeVector{fc}, ov::ParameterVector{input1}); } @@ -154,11 +162,14 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest8) { auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{-1, -1, 2}); auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{3, 2}, {1}); - auto fc = std::make_shared(input1, input2, std::make_shared()); + auto fc = std::make_shared( + input1, + input2, + std::make_shared(ov::element::undefined, ov::Shape{0})); auto a_shape = std::make_shared(input1); auto I = ov::op::util::node_to_get_shape_value_of_indices_from_shape_node(a_shape, {0, 1}); - auto O = ov::opset1::Constant::create(ov::element::i64, { 1 }, { 3 }); + auto O = ov::opset1::Constant::create(ov::element::i64, {1}, {3}); auto output_shape = std::make_shared(ov::OutputVector{I, O}, 0); model_ref = std::make_shared(ov::NodeVector{fc}, ov::ParameterVector{input1}); @@ -177,7 +188,10 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest9) { { auto input1 = std::make_shared(ov::element::f32, ov::Shape{3, 2, 2}); auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{2, 2}, {1}); - auto matmul = std::make_shared(input1, input2, std::make_shared()); + auto matmul = std::make_shared( + input1, + input2, + std::make_shared(ov::element::undefined, ov::Shape{0})); model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); } @@ -185,10 +199,10 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest9) { TEST_F(TransformationTestsF, ConvertMatMulToFCTest10) { auto input1 = std::make_shared(ov::element::f32, ov::PartialShape::dynamic()); - auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{ 2, 2 }, { 1 }); + auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{2, 2}, {1}); auto matmul = std::make_shared(input1, input2, false, true); - model = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); + model = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); manager.register_pass(); } @@ -222,7 +236,10 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest13) { { auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{-1, -1, 1}); auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 80, 1}, {1}); - auto matmul = std::make_shared(input1, input2, std::make_shared()); + auto matmul = std::make_shared( + input1, + input2, + std::make_shared(ov::element::undefined, ov::Shape{0})); model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); } @@ -247,10 +264,11 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest14) { auto input1 = std::make_shared(ov::element::u8, ov::PartialShape{-1, -1, 1}); auto input2 = ov::opset1::Constant::create(ov::element::i8, ov::Shape{1, 80, 1}, {1}); - auto matmul = std::make_shared(input1, - input2, - std::make_shared(), - ov::element::f32); + auto matmul = std::make_shared( + input1, + input2, + std::make_shared(ov::element::undefined, ov::Shape{0}), + ov::element::f32); model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); } @@ -259,7 +277,7 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest14) { TEST_F(TransformationTestsF, ConvertMatMulToFCTest_4d_1) { { auto input1 = std::make_shared(ov::element::f32, ov::Shape{2, 3, 4, 5}); - auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{ 6, 5 }, { 1 }); + auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{6, 5}, {1}); auto matmul = std::make_shared(input1, input2, false, true); model = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); @@ -267,12 +285,13 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_4d_1) { } { auto input1 = std::make_shared(ov::element::f32, ov::Shape{2, 3, 4, 5}); - auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{ 6, 5 }, { 1 }); + auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{6, 5}, {1}); - auto fc = std::make_shared(input1, - input2, - std::make_shared(), - ov::element::f32); + auto fc = std::make_shared( + input1, + input2, + std::make_shared(ov::element::undefined, ov::Shape{0}), + ov::element::f32); model_ref = std::make_shared(ov::NodeVector{fc}, ov::ParameterVector{input1}); } @@ -290,9 +309,10 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_4d_2) { { auto input1 = std::make_shared(ov::element::f32, ov::PartialShape{-1, -1, 1, 5}); auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 10, 5}, {1}); - auto fc = std::make_shared(input1, - input2, - std::make_shared()); + auto fc = std::make_shared( + input1, + input2, + std::make_shared(ov::element::undefined, ov::Shape{0})); model_ref = std::make_shared(ov::NodeVector{fc}, ov::ParameterVector{input1}); } @@ -301,7 +321,7 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_4d_2) { TEST_F(TransformationTestsF, ConvertMatMulToFCTest_4d_3) { { auto input1 = std::make_shared(ov::element::f32, ov::Shape{2, 4}); - auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 1, 5, 4}, { 1 }); + auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 1, 5, 4}, {1}); auto matmul = std::make_shared(input1, input2, false, true); model = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); @@ -309,11 +329,12 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_4d_3) { } { auto input1 = std::make_shared(ov::element::f32, ov::Shape{2, 4}); - auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 1, 5, 4}, { 1 }); - auto fc = std::make_shared(input1, - input2, - std::make_shared(), - ov::element::f32); + auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 1, 5, 4}, {1}); + auto fc = std::make_shared( + input1, + input2, + std::make_shared(ov::element::undefined, ov::Shape{0}), + ov::element::f32); model_ref = std::make_shared(ov::NodeVector{fc}, ov::ParameterVector{input1}); } @@ -322,7 +343,7 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_4d_3) { TEST_F(TransformationTestsF, ConvertMatMulToFCTest_4d_4) { { auto input1 = std::make_shared(ov::element::f32, ov::Shape{3, 2, 4}); - auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 1, 5, 4}, { 1 }); + auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 1, 5, 4}, {1}); auto matmul = std::make_shared(input1, input2, false, true); model = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); @@ -330,11 +351,12 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_4d_4) { } { auto input1 = std::make_shared(ov::element::f32, ov::Shape{3, 2, 4}); - auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 1, 5, 4}, { 1 }); - auto fc = std::make_shared(input1, - input2, - std::make_shared(), - ov::element::f32); + auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 1, 5, 4}, {1}); + auto fc = std::make_shared( + input1, + input2, + std::make_shared(ov::element::undefined, ov::Shape{0}), + ov::element::f32); model_ref = std::make_shared(ov::NodeVector{fc}, ov::ParameterVector{input1}); } @@ -343,7 +365,7 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_4d_4) { TEST_F(TransformationTestsF, ConvertMatMulToFCTest_4d_5) { { auto input1 = std::make_shared(ov::element::f32, ov::Shape{2, 3, 2, 4}); - auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 1, 5, 4}, { 1 }); + auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 1, 5, 4}, {1}); auto matmul = std::make_shared(input1, input2, false, true); model = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); @@ -351,11 +373,12 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_4d_5) { } { auto input1 = std::make_shared(ov::element::f32, ov::Shape{2, 3, 2, 4}); - auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 1, 5, 4}, { 1 }); - auto fc = std::make_shared(input1, - input2, - std::make_shared(), - ov::element::f32); + auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 1, 5, 4}, {1}); + auto fc = std::make_shared( + input1, + input2, + std::make_shared(ov::element::undefined, ov::Shape{0}), + ov::element::f32); model_ref = std::make_shared(ov::NodeVector{fc}, ov::ParameterVector{input1}); } @@ -373,106 +396,111 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_second_input_rank_adj_1) { { auto input1 = std::make_shared(ov::element::f32, ov::Shape{5, 2, 3}); auto input2 = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 2, 3}, {1}); - auto matmul = std::make_shared(input1, - input2, - std::make_shared()); + auto matmul = std::make_shared( + input1, + input2, + std::make_shared(ov::element::undefined, ov::Shape{0})); model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); } } TEST_F(TransformationTestsF, ConvertMatMulToFCTest_second_input_rank_adj_2) { { - auto input1 = std::make_shared(ov::element::f32, ov::Shape{ 2, 3 }); - auto weights = ov::opset1::Constant::create(ov::element::f32, ov::Shape{ 2, 3 }, { 1 }); + auto input1 = std::make_shared(ov::element::f32, ov::Shape{2, 3}); + auto weights = ov::opset1::Constant::create(ov::element::f32, ov::Shape{2, 3}, {1}); auto matmul = std::make_shared(input1, weights, false, true); - model = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); + model = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); manager.register_pass(); } { - auto input1 = std::make_shared(ov::element::f32, ov::Shape{ 2, 3 }); - auto weights = ov::opset1::Constant::create(ov::element::f32, ov::Shape{ 2, 3 }, { 1 }); - auto matmul = std::make_shared(input1, - weights, - std::make_shared()); + auto input1 = std::make_shared(ov::element::f32, ov::Shape{2, 3}); + auto weights = ov::opset1::Constant::create(ov::element::f32, ov::Shape{2, 3}, {1}); + auto matmul = std::make_shared( + input1, + weights, + std::make_shared(ov::element::undefined, ov::Shape{0})); - model_ref = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); + model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); } } TEST_F(TransformationTestsF, ConvertMatMulToFCTest_second_input_rank_adj_3) { { - auto input1 = std::make_shared(ov::element::f32, ov::Shape{ 5, 2, 3 }); - auto weights = ov::opset1::Constant::create(ov::element::f32, ov::Shape{ 1, 2, 3 }, { 1 }); + auto input1 = std::make_shared(ov::element::f32, ov::Shape{5, 2, 3}); + auto weights = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 2, 3}, {1}); auto matmul = std::make_shared(input1, weights, false, true); - model = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); + model = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); manager.register_pass(); } { - auto input1 = std::make_shared(ov::element::f32, ov::Shape{ 5, 2, 3 }); + auto input1 = std::make_shared(ov::element::f32, ov::Shape{5, 2, 3}); - auto weights = ov::opset1::Constant::create(ov::element::f32, ov::Shape{ 1, 2, 3 }, { 1 }); - auto matmul = std::make_shared(input1, - weights, - std::make_shared()); - model_ref = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); + auto weights = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 2, 3}, {1}); + auto matmul = std::make_shared( + input1, + weights, + std::make_shared(ov::element::undefined, ov::Shape{0})); + model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); } } TEST_F(TransformationTestsF, ConvertMatMulToFCTest_decompress_convert_0) { { - auto input1 = std::make_shared(ov::element::f32, ov::Shape{ 3, 2, 2 }); - auto input2 = ov::opset1::Constant::create(ov::element::f16, ov::Shape{ 1, 2, 2 }, { 1 }); + auto input1 = std::make_shared(ov::element::f32, ov::Shape{3, 2, 2}); + auto input2 = ov::opset1::Constant::create(ov::element::f16, ov::Shape{1, 2, 2}, {1}); auto convert = std::make_shared(input2, ov::element::f32); ov::mark_as_decompression(convert); auto matmul = std::make_shared(input1, convert, false, false); - model = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); + model = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); manager.register_pass(); } { - auto input1 = std::make_shared(ov::element::f32, ov::Shape{ 3, 2, 2 }); + auto input1 = std::make_shared(ov::element::f32, ov::Shape{3, 2, 2}); - auto input2 = ov::opset1::Constant::create(ov::element::f16, ov::Shape{ 1, 2, 2 }, { 1 }); - auto transpose_constant = ov::opset1::Constant::create(ov::element::i32, ov::Shape{ 3 }, { 0, 2, 1 }); + auto input2 = ov::opset1::Constant::create(ov::element::f16, ov::Shape{1, 2, 2}, {1}); + auto transpose_constant = ov::opset1::Constant::create(ov::element::i32, ov::Shape{3}, {0, 2, 1}); auto transpose = std::make_shared(input2, transpose_constant); auto convert = std::make_shared(transpose, ov::element::f32); - auto matmul = std::make_shared(input1, - convert, - std::make_shared()); + auto matmul = std::make_shared( + input1, + convert, + std::make_shared(ov::element::undefined, ov::Shape{0})); - model_ref = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); + model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); } } TEST_F(TransformationTestsF, ConvertMatMulToFCTest_decompress_convert_1) { { - auto input1 = std::make_shared(ov::element::f32, ov::Shape{ 3, 2, 2 }); - auto input2 = ov::opset1::Constant::create(ov::element::f16, ov::Shape{ 1, 2, 2 }, { 1 }); + auto input1 = std::make_shared(ov::element::f32, ov::Shape{3, 2, 2}); + auto input2 = ov::opset1::Constant::create(ov::element::f16, ov::Shape{1, 2, 2}, {1}); auto convert = std::make_shared(input2, ov::element::f32); ov::mark_as_decompression(convert); auto matmul = std::make_shared(input1, convert, true, false); - model = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); + model = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); manager.register_pass(); } { - auto input1 = std::make_shared(ov::element::f32, ov::Shape{ 3, 2, 2 }); - auto transpose_constant1 = ov::opset1::Constant::create(ov::element::i32, ov::Shape{ 3 }, { 0, 2, 1 }); + auto input1 = std::make_shared(ov::element::f32, ov::Shape{3, 2, 2}); + auto transpose_constant1 = ov::opset1::Constant::create(ov::element::i32, ov::Shape{3}, {0, 2, 1}); auto transpose1 = std::make_shared(input1, transpose_constant1); - auto input2 = ov::opset1::Constant::create(ov::element::f16, ov::Shape{ 1, 2, 2 }, { 1 }); - auto transpose_constant2 = ov::opset1::Constant::create(ov::element::i32, ov::Shape{ 3 }, { 0, 2, 1 }); + auto input2 = ov::opset1::Constant::create(ov::element::f16, ov::Shape{1, 2, 2}, {1}); + auto transpose_constant2 = ov::opset1::Constant::create(ov::element::i32, ov::Shape{3}, {0, 2, 1}); auto transpose2 = std::make_shared(input2, transpose_constant2); auto convert = std::make_shared(transpose2, ov::element::f32); - auto matmul = std::make_shared(transpose1, - convert, - std::make_shared()); + auto matmul = std::make_shared( + transpose1, + convert, + std::make_shared(ov::element::undefined, ov::Shape{0})); - model_ref = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ input1 }); + model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{input1}); } } @@ -499,14 +527,13 @@ TEST_F(TransformationTestsF, ConvertMatMulToFCTest_compressed_u8_weights) { auto mul_const = ov::opset1::Constant::create(ov::element::f32, ov::Shape{1, 1, 2}, {1}); auto mul = std::make_shared(sub, mul_const); - // auto reshape_const = ov::opset1::Constant::create(ov::element::i32, {2}, {2, -1}); - // auto reshape = std::make_shared(mul, reshape_const, false); auto transpose_const = ov::opset1::Constant::create(ov::element::i32, {3}, {0, 2, 1}); auto transpose = std::make_shared(mul, transpose_const); - auto matmul = std::make_shared(data, - transpose, - std::make_shared()); + auto matmul = std::make_shared( + data, + transpose, + std::make_shared(ov::element::undefined, ov::Shape{0})); - model_ref = std::make_shared(ov::NodeVector{ matmul }, ov::ParameterVector{ data }); + model_ref = std::make_shared(ov::NodeVector{matmul}, ov::ParameterVector{data}); } } diff --git a/src/plugins/intel_cpu/tests/unit/transformations/move_fc_reshape_to_weights.cpp b/src/plugins/intel_cpu/tests/unit/transformations/move_fc_reshape_to_weights.cpp index 6b8268729d3457..b3d733aecba27b 100644 --- a/src/plugins/intel_cpu/tests/unit/transformations/move_fc_reshape_to_weights.cpp +++ b/src/plugins/intel_cpu/tests/unit/transformations/move_fc_reshape_to_weights.cpp @@ -16,7 +16,6 @@ #include #include "common_test_utils/ov_test_utils.hpp" -#include "ov_ops/placeholder.hpp" using namespace testing; using namespace ov::intel_cpu; @@ -117,7 +116,11 @@ class MoveFCReshapeToWeightsTests : public TransformationTestsF, public WithPara weights_path = std::make_shared(weights_path, transpose_const); } - auto fully_connected = std::make_shared(data, weights_path, std::make_shared()); + auto fully_connected = std::make_shared( + data, + weights_path, + std::make_shared(ov::element::undefined, ov::Shape{0})); + return std::make_shared(ov::NodeVector{fully_connected}, ov::ParameterVector{data}); }