Skip to content

Commit

Permalink
Apply review comments 2
Browse files Browse the repository at this point in the history
  • Loading branch information
EgorDuplensky committed Nov 26, 2024
1 parent d2e0493 commit e80440f
Show file tree
Hide file tree
Showing 27 changed files with 322 additions and 308 deletions.
30 changes: 0 additions & 30 deletions src/common/transformations/include/ov_ops/placeholder.hpp

This file was deleted.

3 changes: 1 addition & 2 deletions src/common/transformations/src/ov_ops/fully_connected.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#include <memory>

#include "matmul_shape_inference.hpp"
#include "ov_ops/placeholder.hpp"

namespace ov {
namespace op {
Expand All @@ -25,7 +24,7 @@ FullyConnected::FullyConnected(const ov::Output<Node>& A,
FullyConnected::FullyConnected(const ov::Output<Node>& A,
const ov::Output<Node>& B,
const ov::element::Type output_type)
: FullyConnected(A, B, std::make_shared<Placeholder>(), output_type) {}
: FullyConnected(A, B, std::make_shared<v0::Constant>(element::undefined, Shape{0}), output_type) {}

bool FullyConnected::visit_attributes(ov::AttributeVisitor& visitor) {
visitor.on_attribute("output_type", m_output_type);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
#include <memory>

#include "openvino/core/type/element_type.hpp"
#include "openvino/op/constant.hpp"
#include "ov_ops/fully_connected.hpp"
#include "ov_ops/placeholder.hpp"

namespace ov {
namespace op {
Expand All @@ -31,7 +31,12 @@ FullyConnectedCompressed::FullyConnectedCompressed(const ov::Output<Node>& X,
const ov::Output<Node>& bias,
const ov::Output<Node>& weight_scales,
const ov::element::Type output_type)
: FullyConnectedCompressed(X, W, bias, weight_scales, std::make_shared<Placeholder>(), output_type) {}
: FullyConnectedCompressed(X,
W,
bias,
weight_scales,
std::make_shared<v0::Constant>(element::undefined, Shape{0}),
output_type) {}

std::shared_ptr<ov::Node> FullyConnectedCompressed::clone_with_new_inputs(const ov::OutputVector& new_args) const {
check_new_args_count(this, new_args);
Expand All @@ -47,14 +52,8 @@ std::shared_ptr<ov::Node> FullyConnectedCompressed::clone_with_new_inputs(const
// @todo finalize validate_and_infer_types
void FullyConnectedCompressed::validate_and_infer_types() {
const auto input_size = get_input_size();
const size_t expected_size = 5;
NODE_VALIDATION_CHECK(this,
input_size == expected_size,
"Number of inputs is incorrect. Current value is: ",
input_size,
", expected at least ",
expected_size,
".");

NODE_VALIDATION_CHECK(this, input_size == 5, "Number of inputs is incorrect. Current value is: ", input_size);

FullyConnected::validate_and_infer_types();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,7 @@ std::shared_ptr<ov::Node> FullyConnectedQuantized::clone_with_new_inputs(const o
// @todo finalize validate_and_infer_types
void FullyConnectedQuantized::validate_and_infer_types() {
const auto input_size = get_input_size();
const size_t expected_size = 9;
NODE_VALIDATION_CHECK(this,
input_size == expected_size,
"Number of inputs is incorrect. Current value is: ",
input_size,
", expected at least ",
expected_size,
".");
NODE_VALIDATION_CHECK(this, input_size == 9, "Number of inputs is incorrect. Current value is: ", input_size);

FullyConnected::validate_and_infer_types();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,10 @@

#include "ov_ops/fully_connected_quantized_legacy.hpp"

#include <cstddef>
#include <memory>

#include "matmul_shape_inference.hpp"
#include "openvino/core/type/element_type.hpp"
#include "ov_ops/placeholder.hpp"

namespace ov {
namespace op {
Expand All @@ -32,7 +30,12 @@ FullyConnectedQuantizedLegacy::FullyConnectedQuantizedLegacy(const ov::Output<No
const ov::Output<Node>& bias,
const ov::Output<Node>& deq_scales,
const ov::element::Type output_type)
: FullyConnectedQuantizedLegacy(X, W, bias, deq_scales, std::make_shared<Placeholder>(), output_type) {}
: FullyConnectedQuantizedLegacy(X,
W,
bias,
deq_scales,
std::make_shared<v0::Constant>(element::undefined, Shape{0}),
output_type) {}

std::shared_ptr<ov::Node> FullyConnectedQuantizedLegacy::clone_with_new_inputs(const ov::OutputVector& new_args) const {
check_new_args_count(this, new_args);
Expand All @@ -48,14 +51,8 @@ std::shared_ptr<ov::Node> FullyConnectedQuantizedLegacy::clone_with_new_inputs(c
// @todo finalize validate_and_infer_types
void FullyConnectedQuantizedLegacy::validate_and_infer_types() {
const auto input_size = get_input_size();
const size_t expected_size = 5;
NODE_VALIDATION_CHECK(this,
input_size == expected_size,
"Number of inputs is incorrect. Current value is: ",
input_size,
", expected at least ",
expected_size,
".");

NODE_VALIDATION_CHECK(this, input_size == 5, "Number of inputs is incorrect. Current value is: ", input_size);

ov::op::v0::MatMul op;
op.set_transpose_a(false);
Expand Down
26 changes: 0 additions & 26 deletions src/common/transformations/src/ov_ops/placeholder.cpp

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include "openvino/pass/pattern/op/wrap_type.hpp"
#include "ov_ops/fully_connected.hpp"
#include "ov_ops/fully_connected_compressed.hpp"
#include "ov_ops/placeholder.hpp"
#include "transformations/utils/utils.hpp"

ov::pass::ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnectedToFullyConnectedCompressed(
Expand Down Expand Up @@ -157,7 +156,8 @@ ov::pass::ConvertFullyConnectedToFullyConnectedCompressed::ConvertFullyConnected
}
}

fc_input_zp = with_zero_point ? fc_input_zp : std::make_shared<ov::op::internal::Placeholder>();
fc_input_zp =
with_zero_point ? fc_input_zp : std::make_shared<ov::op::v0::Constant>(element::undefined, Shape{0});
ov::disable_constant_folding(fc_input_zp);
result_nodes.push_back(fc_input_zp);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
#include "openvino/pass/pattern/op/wrap_type.hpp"
#include "ov_ops/fully_connected.hpp"
#include "ov_ops/fully_connected_quantized_legacy.hpp"
#include "ov_ops/placeholder.hpp"
#include "transformations/utils/utils.hpp"

ov::pass::ConvertFCToFCQuantizedLegacy::ConvertFCToFCQuantizedLegacy() {
Expand Down Expand Up @@ -52,15 +51,15 @@ ov::pass::ConvertFCToFCQuantizedLegacy::ConvertFCToFCQuantizedLegacy() {
pattern_map.at(fully_connected_m).get_node_shared_ptr());

ov::NodeVector new_ops;
auto zp_ph = std::make_shared<ov::op::internal::Placeholder>();
new_ops.push_back(zp_ph);
auto zp = std::make_shared<ov::op::v0::Constant>(element::undefined, Shape{0});
new_ops.push_back(zp);

auto fc_quantized =
std::make_shared<ov::op::internal::FullyConnectedQuantizedLegacy>(activations,
weights,
bias,
dequantization_scales,
zp_ph,
zp,
fc_node->get_output_type());
new_ops.push_back(fc_quantized);

Expand Down
4 changes: 1 addition & 3 deletions src/core/src/op/util/op_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
#include "openvino/op/util/binary_elementwise_logical.hpp"
#include "openvino/op/util/unary_elementwise_arithmetic.hpp"
#include "openvino/op/xor.hpp"
#include "ov_ops/placeholder.hpp"

bool ov::op::util::is_unary_elementwise_arithmetic(const ov::Node* node) {
return dynamic_cast<const ov::op::util::UnaryElementwiseArithmetic*>(node) != nullptr;
Expand Down Expand Up @@ -67,8 +66,7 @@ bool ov::op::util::is_sink(const ov::Node* node) {
}

bool ov::op::util::is_constant(const ov::Node* node) {
return dynamic_cast<const ov::op::v0::Constant*>(node) != nullptr ||
dynamic_cast<const ov::op::internal::Placeholder*>(node) != nullptr;
return dynamic_cast<const ov::op::v0::Constant*>(node) != nullptr;
}

bool ov::op::util::is_commutative(const ov::Node* node) {
Expand Down
5 changes: 4 additions & 1 deletion src/plugins/intel_cpu/src/cpu_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ static const TypeToNameMap& get_type_to_name_tbl() {
static const TypeToNameMap type_to_name_tbl = {
{"Constant", Type::Input},
{"Parameter", Type::Input},
{"PlaceHolder", Type::Input},
{"Result", Type::Output},
{"Eye", Type::Eye},
{"Convolution", Type::Convolution},
Expand Down Expand Up @@ -473,6 +472,10 @@ std::string algToString(const Algorithm alg) {
CASE(FQCommon);
CASE(FQQuantization);
CASE(FQBinarization);
CASE(FullyConnectedCommon);
CASE(FullyConnectedCompressed);
CASE(FullyConnectedQuantized);
CASE(FullyConnectedQuantizedLegacy);
CASE(ROIPoolingMax);
CASE(ROIPoolingBilinear);
CASE(ROIAlignMax);
Expand Down
6 changes: 6 additions & 0 deletions src/plugins/intel_cpu/src/cpu_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,12 @@ enum class Algorithm {
EltwiseBitwiseLeftShift,
EltwiseBitwiseRightShift,

// FullyConnected algorithms
FullyConnectedCommon,
FullyConnectedCompressed,
FullyConnectedQuantized,
FullyConnectedQuantizedLegacy,

// FakeQuantize algorithms
FQCommon,
FQQuantization,
Expand Down
14 changes: 7 additions & 7 deletions src/plugins/intel_cpu/src/dnnl_postops_composer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ DnnlPostOpsComposer::DnnlPostOpsComposer(const PostOps& postOps,
idxOC(indexOfOutputChannelDim),
isINT8(isInt8),
weightScaleMaskPerChannel(weiScaleMaskPerChannel),
weightsWithBatch(memory.at(ARG_WEI)->getDescPtr()->getShape().getRank()),
outDataType(outDataType) {
OPENVINO_ASSERT(idxOC >= 0 && static_cast<size_t>(idxOC) < outputDims.size());
OC = outputDims[idxOC];
Expand Down Expand Up @@ -645,8 +644,7 @@ void DnnlPostOpsComposer::appendClip(const std::vector<float>& low, const std::v
static MemoryPtr prepackDecompressionParams(const MemoryCPtr& paramsPtr,
bool needTranspose,
ov::element::Type dstPrc,
const dnnl::engine& engine,
bool weightsWithBatch) {
const dnnl::engine& engine) {
auto shape = paramsPtr->getShape().getStaticDims();
if (shape.size() == 1 && shape[0] == 1) {
shape.push_back(1);
Expand All @@ -655,8 +653,10 @@ static MemoryPtr prepackDecompressionParams(const MemoryCPtr& paramsPtr,
if (shape.size() != 2 && shape.size() != 3)
OPENVINO_THROW("DnnlPostOpsComposer cannot prepack decompression params with invalid shape");

const size_t OC = weightsWithBatch ? shape[shape.size() - 2] : shape.front();
const size_t G = weightsWithBatch ? shape[shape.size() - 1] : shape[1];
// weights without batch: (OC, G)
// weights with batch: (B, OC, G)
const size_t OC = shape[shape.size() - 2];
const size_t G = shape[shape.size() - 1];

Shape dstShape = Shape({OC, G});

Expand All @@ -683,7 +683,7 @@ void DnnlPostOpsComposer::appendDecompressionScales(const MemoryCPtr& scales_ptr
if (scales_ptr == nullptr)
return;

auto scalesMem = prepackDecompressionParams(scales_ptr, needTranspose, dstPrecision, engine, weightsWithBatch);
auto scalesMem = prepackDecompressionParams(scales_ptr, needTranspose, dstPrecision, engine);
attr.set_scales_dims(DNNL_ARG_WEIGHTS,
DnnlExtensionUtils::convertToDnnlDims(scalesMem->getStaticDims()),
DnnlExtensionUtils::ElementTypeToDataType(dstPrecision));
Expand All @@ -699,7 +699,7 @@ void DnnlPostOpsComposer::appendDecompressionZeroPoints(const MemoryCPtr& zero_p
return;

auto zeroPointsMem =
prepackDecompressionParams(zero_points_ptr, needTranspose, dstPrecision, engine, weightsWithBatch);
prepackDecompressionParams(zero_points_ptr, needTranspose, dstPrecision, engine);
attr.set_zero_points_dims(DNNL_ARG_WEIGHTS,
DnnlExtensionUtils::convertToDnnlDims(zeroPointsMem->getStaticDims()),
DnnlExtensionUtils::ElementTypeToDataType(dstPrecision));
Expand Down
1 change: 0 additions & 1 deletion src/plugins/intel_cpu/src/dnnl_postops_composer.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ class DnnlPostOpsComposer {
size_t idxOC;
const bool isINT8; // only INT8 primitive support scales
const int weightScaleMaskPerChannel;
bool weightsWithBatch;
bool weightScaleAvailable = false;
const dnnl::memory::data_type outDataType;

Expand Down
2 changes: 0 additions & 2 deletions src/plugins/intel_cpu/src/extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
#include "ov_ops/nms_ie_internal.hpp"
#include "ov_ops/nms_static_shape_ie.hpp"
#include "ov_ops/rms.hpp"
#include "ov_ops/placeholder.hpp"
#include "ov_ops/rotary_positional_embeddings.hpp"
#include "ov_ops/type_relaxed.hpp"
#include "snippets/op/subgraph.hpp"
Expand Down Expand Up @@ -92,7 +91,6 @@ class TypeRelaxedExtension : public ov::OpExtension<ov::op::TypeRelaxed<Op>> {
OP_EXTENSION(ov::op::internal::FullyConnectedCompressed) \
OP_EXTENSION(ov::op::internal::FullyConnectedQuantizedLegacy) \
OP_EXTENSION(ov::op::internal::FullyConnectedQuantized) \
OP_EXTENSION(ov::op::internal::Placeholder) \
OP_EXTENSION_X64(ov::intel_cpu::MHANode) \
OP_EXTENSION_X64(ov::intel_cpu::InteractionNode) \
OP_EXTENSION_X64(ov::intel_cpu::LLMMLPNode) \
Expand Down
1 change: 0 additions & 1 deletion src/plugins/intel_cpu/src/graph_optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -728,7 +728,6 @@ void GraphOptimizer::FuseFCAndTransposeOnWeights(Graph& graph) {
auto isSuitablePattern = [](NodePtr parent) {
bool res = true && parent->getType() == Type::Transpose
&& parent->getChildEdges().size() == 1
&& one_of(parent->getChildEdgeAt(0)->getOutputNum(), 1, 3, 4)
&& parent->getChildEdgeAt(0)->getChild()->getType() == Type::FullyConnected
&& parent->isConstant();
return res;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

#include "post_ops.hpp"
#include "memory_arguments.hpp"
#include "printers.hpp"

namespace ov {
namespace intel_cpu {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

namespace ov {
namespace intel_cpu {
using namespace executor;

template <typename Attrs, typename NodeT>
static ExecutorPtr fallback(const executor::Config<Attrs>& config,
Expand Down
Loading

0 comments on commit e80440f

Please sign in to comment.